diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -419,10 +419,6 @@ return false; } - AMDGPU::Waitcnt allZeroWaitcnt() const { - return AMDGPU::Waitcnt::allZero(ST->hasVscnt()); - } - void setForceEmitWaitcnt() { // For non-debug builds, ForceEmitWaitcnt has been initialized to false; // For debug builds, get the debug counter info and adjust if need be @@ -1036,7 +1032,7 @@ MI.getOpcode() == AMDGPU::SI_RETURN || MI.getOpcode() == AMDGPU::S_SETPC_B64_return || (MI.isReturn() && MI.isCall() && !callWaitsOnFunctionEntry(MI))) { - Wait = Wait.combined(allZeroWaitcnt()); + Wait = Wait.combined(AMDGPU::Waitcnt::allZeroExceptVsCnt()); } // Identify S_ENDPGM instructions which may have to wait for outstanding VMEM // stores. In this case it can be useful to send a message to explicitly @@ -1232,7 +1228,7 @@ // cause an exception. Otherwise, insert an explicit S_WAITCNT 0 here. if (MI.getOpcode() == AMDGPU::S_BARRIER && !ST->hasAutoWaitcntBeforeBarrier() && !ST->supportsBackOffBarrier()) { - Wait = Wait.combined(allZeroWaitcnt()); + Wait = Wait.combined(AMDGPU::Waitcnt::allZero(ST->hasVscnt())); } // TODO: Remove this work-around, enable the assert for Bug 457939 @@ -1248,7 +1244,7 @@ ScoreBrackets.simplifyWaitcnt(Wait); if (ForceEmitZeroWaitcnts) - Wait = allZeroWaitcnt(); + Wait = AMDGPU::Waitcnt::allZeroExceptVsCnt(); if (ForceEmitWaitcnt[VM_CNT]) Wait.VmCnt = 0; @@ -1256,8 +1252,6 @@ Wait.ExpCnt = 0; if (ForceEmitWaitcnt[LGKM_CNT]) Wait.LgkmCnt = 0; - if (ForceEmitWaitcnt[VS_CNT]) - Wait.VsCnt = 0; if (FlushVmCnt) { if (ScoreBrackets.hasPendingEvent(VM_CNT)) @@ -1480,7 +1474,7 @@ } else if (Inst.isCall()) { if (callWaitsOnFunctionReturn(Inst)) { // Act as a wait on everything - ScoreBrackets->applyWaitcnt(allZeroWaitcnt()); + ScoreBrackets->applyWaitcnt(AMDGPU::Waitcnt::allZeroExceptVsCnt()); } else { // May need to way wait for anything. ScoreBrackets->applyWaitcnt(AMDGPU::Waitcnt()); @@ -1862,10 +1856,6 @@ I != E && (I->isPHI() || I->isMetaInstruction()); ++I) ; BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)).addImm(0); - if (ST->hasVscnt()) - BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT_VSCNT)) - .addReg(AMDGPU::SGPR_NULL, RegState::Undef) - .addImm(0); Modified = true; } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll @@ -30,7 +30,6 @@ ; GFX10-LABEL: v_add_v2i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_add_u16 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] %add = add <2 x i16> %a, %b @@ -68,7 +67,6 @@ ; GFX10-LABEL: v_add_v2i16_fneg_lhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_add_u16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0] ; GFX10-NEXT: s_setpc_b64 s[30:31] %neg.a = fneg <2 x half> %a @@ -108,7 +106,6 @@ ; GFX10-LABEL: v_add_v2i16_fneg_rhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_add_u16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1] ; GFX10-NEXT: s_setpc_b64 s[30:31] %neg.b = fneg <2 x half> %b @@ -154,7 +151,6 @@ ; GFX10-LABEL: v_add_v2i16_fneg_lhs_fneg_rhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_add_u16 v0, v0, v1 neg_lo:[1,1] neg_hi:[1,1] ; GFX10-NEXT: s_setpc_b64 s[30:31] %neg.a = fneg <2 x half> %a @@ -193,7 +189,6 @@ ; GFX10-LABEL: v_add_v2i16_neg_inline_imm_splat: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_add_u16 v0, 0xffc0, v0 op_sel_hi:[0,1] ; GFX10-NEXT: s_setpc_b64 s[30:31] %add = add <2 x i16> %a, @@ -227,7 +222,6 @@ ; GFX10-LABEL: v_add_v2i16_neg_inline_imm_lo: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_add_u16 v0, 0x4ffc0, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %add = add <2 x i16> %a, @@ -261,7 +255,6 @@ ; GFX10-LABEL: v_add_v2i16_neg_inline_imm_hi: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_add_u16 v0, 0xffc00004, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %add = add <2 x i16> %a, diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll @@ -106,7 +106,6 @@ ; GFX10PLUS-LABEL: v_andn2_i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_not_b32_e32 v1, v1 ; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -257,7 +256,6 @@ ; GFX10PLUS-LABEL: v_andn2_i64: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_not_b32_e32 v2, v2 ; GFX10PLUS-NEXT: v_not_b32_e32 v3, v3 ; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v2 @@ -452,7 +450,6 @@ ; GFX10PLUS-LABEL: v_andn2_i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_xor_b32_e32 v1, -1, v1 ; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -679,7 +676,6 @@ ; GFX10PLUS-LABEL: v_andn2_v2i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_xor_b32_e32 v1, -1, v1 ; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -904,7 +900,6 @@ ; GFX10PLUS-LABEL: v_andn2_v3i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_xor_b32_e32 v2, -1, v2 ; GFX10PLUS-NEXT: v_xor_b32_e32 v3, -11, v3 ; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v2 @@ -1141,7 +1136,6 @@ ; GFX10PLUS-LABEL: v_andn2_v4i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_xor_b32_e32 v2, -1, v2 ; GFX10PLUS-NEXT: v_xor_b32_e32 v3, -1, v3 ; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-asserts.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-asserts.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-asserts.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-asserts.ll @@ -5,7 +5,6 @@ ; CHECK-LABEL: icmp_v2i32_sext_to_v2i64: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 @@ -24,7 +23,6 @@ ; CHECK-LABEL: icmp_v2i32_zext_to_v2i64: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; CHECK-NEXT: v_mov_b32_e32 v3, 0 ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll @@ -30,7 +30,6 @@ ; GFX10PLUS-LABEL: v_ashr_i8: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 8 ; GFX10PLUS-NEXT: v_ashrrev_i16 v0, v1, v0 @@ -64,7 +63,6 @@ ; GFX10PLUS-LABEL: v_ashr_i8_7: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 8 ; GFX10PLUS-NEXT: v_ashrrev_i16 v0, 7, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -132,7 +130,6 @@ ; GFX10PLUS-LABEL: v_ashr_i24: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_and_b32_e32 v1, 0xffffff, v1 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 24 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, v1, v0 @@ -152,7 +149,6 @@ ; GFX10PLUS-LABEL: v_ashr_i24_7: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 24 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, 7, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -202,7 +198,6 @@ ; GFX10PLUS-LABEL: v_ashr_i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, v1, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = ashr i32 %value, %amount @@ -219,7 +214,6 @@ ; GFX10PLUS-LABEL: v_ashr_i32_31: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, 31, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = ashr i32 %value, 31 @@ -305,7 +299,6 @@ ; GFX10PLUS-LABEL: v_ashr_v2i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, v2, v0 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, v3, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -324,7 +317,6 @@ ; GFX10PLUS-LABEL: v_ashr_v2i32_31: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, 31, v0 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, 31, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -360,7 +352,6 @@ ; GFX10PLUS-LABEL: v_ashr_v3i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, v3, v0 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, v4, v1 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v2, v5, v2 @@ -400,7 +391,6 @@ ; GFX10PLUS-LABEL: v_ashr_v4i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, v4, v0 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, v5, v1 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v2, v6, v2 @@ -444,7 +434,6 @@ ; GFX10PLUS-LABEL: v_ashr_v5i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, v5, v0 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, v6, v1 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v2, v7, v2 @@ -504,7 +493,6 @@ ; GFX10-LABEL: v_ashr_v16i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX10-NEXT: v_ashrrev_i32_e32 v0, v16, v0 ; GFX10-NEXT: v_ashrrev_i32_e32 v1, v17, v1 @@ -528,7 +516,6 @@ ; GFX11-LABEL: v_ashr_v16i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_load_b32 v31, off, s32 ; GFX11-NEXT: v_ashrrev_i32_e32 v0, v16, v0 ; GFX11-NEXT: v_ashrrev_i32_e32 v1, v17, v1 @@ -620,7 +607,6 @@ ; GFX10PLUS-LABEL: v_ashr_i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_ashrrev_i16 v0, v1, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = ashr i16 %value, %amount @@ -650,7 +636,6 @@ ; GFX10PLUS-LABEL: v_ashr_i16_15: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_ashrrev_i16 v0, 15, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = ashr i16 %value, 15 @@ -787,7 +772,6 @@ ; GFX10PLUS-LABEL: v_ashr_v2i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v0, v1, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = ashr <2 x i16> %value, %amount @@ -822,7 +806,6 @@ ; GFX10PLUS-LABEL: v_ashr_v2i16_15: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v0, 15, v0 op_sel_hi:[0,1] ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = ashr <2 x i16> %value, @@ -1015,7 +998,6 @@ ; GFX10PLUS-LABEL: v_ashr_v4i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v0, v2, v0 ; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v1, v3, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -1205,7 +1187,6 @@ ; GFX10PLUS-LABEL: v_ashr_v8i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v0, v4, v0 ; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v1, v5, v1 ; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v2, v6, v2 @@ -1383,7 +1364,6 @@ ; GFX10PLUS-LABEL: v_ashr_i64: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_ashrrev_i64 v[0:1], v2, v[0:1] ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = ashr i64 %value, %amount @@ -1401,7 +1381,6 @@ ; GFX10PLUS-LABEL: v_ashr_i64_63: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, 31, v1 ; GFX10PLUS-NEXT: v_mov_b32_e32 v1, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -1421,7 +1400,6 @@ ; GFX10PLUS-LABEL: v_ashr_i64_33: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, 1, v1 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, 31, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -1440,7 +1418,6 @@ ; GFX10PLUS-LABEL: v_ashr_i64_32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, v1 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -1470,7 +1447,6 @@ ; GFX10PLUS-LABEL: v_ashr_i64_31: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_ashrrev_i64 v[0:1], 31, v[0:1] ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = ashr i64 %value, 31 @@ -1629,7 +1605,6 @@ ; GFX10PLUS-LABEL: v_ashr_v2i64: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_ashrrev_i64 v[0:1], v4, v[0:1] ; GFX10PLUS-NEXT: v_ashrrev_i64 v[2:3], v6, v[2:3] ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -1662,7 +1637,6 @@ ; GFX10PLUS-LABEL: v_ashr_v2i64_31: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_ashrrev_i64 v[0:1], 31, v[0:1] ; GFX10PLUS-NEXT: v_ashrrev_i64 v[2:3], 31, v[2:3] ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -1759,7 +1733,6 @@ ; GFX10-LABEL: v_ashr_i65: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_bfe_i32 v4, v2, 0, 1 ; GFX10-NEXT: v_sub_nc_u32_e32 v2, 64, v3 ; GFX10-NEXT: v_subrev_nc_u32_e32 v10, 64, v3 @@ -1783,7 +1756,6 @@ ; GFX11-LABEL: v_ashr_i65: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_bfe_i32 v4, v2, 0, 1 ; GFX11-NEXT: v_sub_nc_u32_e32 v2, 64, v3 ; GFX11-NEXT: v_subrev_nc_u32_e32 v10, 64, v3 @@ -1847,7 +1819,6 @@ ; GFX10PLUS-LABEL: v_ashr_i65_33: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_mov_b32_e32 v3, v1 ; GFX10PLUS-NEXT: v_bfe_i32 v1, v2, 0, 1 ; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v3, 1, v3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/br-constant-invalid-sgpr-copy.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/br-constant-invalid-sgpr-copy.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/br-constant-invalid-sgpr-copy.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/br-constant-invalid-sgpr-copy.ll @@ -20,7 +20,6 @@ ; WAVE32-LABEL: br_false: ; WAVE32: ; %bb.0: ; %.exit ; WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; WAVE32-NEXT: s_waitcnt_vscnt null, 0x0 ; WAVE32-NEXT: .LBB0_1: ; %bb0 ; WAVE32-NEXT: ; =>This Inner Loop Header: Depth=1 ; WAVE32-NEXT: s_mov_b32 s4, 1 @@ -53,7 +52,6 @@ ; WAVE32-LABEL: br_true: ; WAVE32: ; %bb.0: ; %.exit ; WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; WAVE32-NEXT: s_waitcnt_vscnt null, 0x0 ; WAVE32-NEXT: .LBB1_1: ; %bb0 ; WAVE32-NEXT: ; =>This Inner Loop Header: Depth=1 ; WAVE32-NEXT: s_mov_b32 s4, 0 @@ -87,7 +85,6 @@ ; WAVE32-LABEL: br_undef: ; WAVE32: ; %bb.0: ; %.exit ; WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; WAVE32-NEXT: s_waitcnt_vscnt null, 0x0 ; WAVE32-NEXT: .LBB2_1: ; %bb0 ; WAVE32-NEXT: ; =>This Inner Loop Header: Depth=1 ; WAVE32-NEXT: ; implicit-def: $sgpr4 @@ -122,7 +119,6 @@ ; WAVE32-LABEL: br_poison: ; WAVE32: ; %bb.0: ; %.exit ; WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; WAVE32-NEXT: s_waitcnt_vscnt null, 0x0 ; WAVE32-NEXT: .LBB3_1: ; %bb0 ; WAVE32-NEXT: ; =>This Inner Loop Header: Depth=1 ; WAVE32-NEXT: ; implicit-def: $sgpr4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/bswap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/bswap.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/bswap.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/bswap.ll @@ -67,7 +67,6 @@ ; GFX10-LABEL: v_bswap_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_perm_b32 v0, 0, v0, 0x10203 ; GFX10-NEXT: s_setpc_b64 s[30:31] %bswap = call i32 @llvm.bswap.i32(i32 %src) @@ -153,7 +152,6 @@ ; GFX10-LABEL: v_bswap_v2i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_perm_b32 v0, 0, v0, 0x10203 ; GFX10-NEXT: v_perm_b32 v1, 0, v1, 0x10203 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -243,7 +241,6 @@ ; GFX10-LABEL: v_bswap_i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_perm_b32 v2, 0, v1, 0x10203 ; GFX10-NEXT: v_perm_b32 v1, 0, v0, 0x10203 ; GFX10-NEXT: v_mov_b32_e32 v0, v2 @@ -371,7 +368,6 @@ ; GFX10-LABEL: v_bswap_v2i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_perm_b32 v4, 0, v1, 0x10203 ; GFX10-NEXT: v_perm_b32 v5, 0, v3, 0x10203 ; GFX10-NEXT: v_perm_b32 v1, 0, v0, 0x10203 @@ -442,7 +438,6 @@ ; GFX10-LABEL: v_bswap_i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_perm_b32 v0, 0, v0, 0xc0c0001 ; GFX10-NEXT: s_setpc_b64 s[30:31] %bswap = call i16 @llvm.bswap.i16(i16 %src) @@ -517,7 +512,6 @@ ; GFX10-LABEL: v_bswap_i16_zext_to_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_perm_b32 v0, 0, v0, 0xc0c0001 ; GFX10-NEXT: s_setpc_b64 s[30:31] %bswap = call i16 @llvm.bswap.i16(i16 %src) @@ -554,7 +548,6 @@ ; GFX10-LABEL: v_bswap_i16_sext_to_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_perm_b32 v0, 0, v0, 0xc0c0001 ; GFX10-NEXT: v_bfe_i32 v0, v0, 0, 16 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -592,7 +585,6 @@ ; GFX10-LABEL: v_bswap_v2i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_perm_b32 v0, 0, v0, 0x2030001 ; GFX10-NEXT: s_setpc_b64 s[30:31] %bswap = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %src) @@ -635,7 +627,6 @@ ; GFX10-LABEL: v_bswap_v3i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_perm_b32 v0, 0, v0, 0x2030001 ; GFX10-NEXT: v_perm_b32 v1, 0, v1, 0x2030001 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -678,7 +669,6 @@ ; GFX10-LABEL: v_bswap_i48: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_perm_b32 v1, 0, v1, 0x10203 ; GFX10-NEXT: v_perm_b32 v2, 0, v0, 0x10203 ; GFX10-NEXT: v_lshrrev_b64 v[0:1], 16, v[1:2] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.ll @@ -5,12 +5,10 @@ ; GFX10-LABEL: value_finder_bug: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[1:4], v[1:2], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen ; GFX10-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4 -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %vec = load <4 x float>, ptr addrspace(4) %ptr, align 4 %vec.3 = extractelement <4 x float> %vec, i32 3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll @@ -5,7 +5,6 @@ ; GFX10-LABEL: test_fmed3_f32_known_nnan_ieee_true: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] %fmul = fmul float %a, 2.0 @@ -17,7 +16,6 @@ ; GFX10-LABEL: test_fmed3_f16_known_nnan_ieee_false: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f16_e64 v0, v0, 2.0 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] %fmul = fmul half %a, 2.0 @@ -30,7 +28,6 @@ ; GFX10-LABEL: test_fmed3_non_SNaN_input_ieee_true_dx10clamp_true: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 ; GFX10-NEXT: v_min_f32_e64 v0, 0x41200000, v0 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -44,7 +41,6 @@ ; GFX10-LABEL: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] %fmul = fmul float %a, 2.0 @@ -58,7 +54,6 @@ ; GFX10-LABEL: test_fmed3_global_nnan: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] %fmul = fmul float %a, 2.0 @@ -75,7 +70,6 @@ ; GFX10-LABEL: test_fmed3_f32_maybe_NaN_ieee_false: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0 ; GFX10-NEXT: v_med3_f32 v0, v0, 1.0, 0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -89,7 +83,6 @@ ; GFX10-LABEL: test_fmed3_non_SNaN_input_ieee_true_dx10clamp_false: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 ; GFX10-NEXT: v_min_f32_e32 v0, 0x41200000, v0 ; GFX10-NEXT: v_med3_f32 v0, v0, 0, 1.0 @@ -104,7 +97,6 @@ ; GFX10-LABEL: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] %fmul = fmul float %a, 2.0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll @@ -5,7 +5,6 @@ ; GFX10-LABEL: test_min_max_ValK0_K1_f32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] %fmul = fmul float %a, 2.0 @@ -18,7 +17,6 @@ ; GFX10-LABEL: test_min_max_K0Val_K1_f64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], 2.0 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] %fmul = fmul double %a, 2.0 @@ -32,7 +30,6 @@ ; GFX10-LABEL: test_min_K1max_ValK0_f16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f16_e64 v0, v0, 2.0 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] %fmul = fmul half %a, 2.0 @@ -45,7 +42,6 @@ ; GFX10-LABEL: test_min_K1max_K0Val_f16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] %fmul = fmul <2 x half> %a, @@ -58,7 +54,6 @@ ; GFX10-LABEL: test_min_max_splat_padded_with_undef: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] %fmul = fmul <2 x half> %a, @@ -73,7 +68,6 @@ ; GFX10-LABEL: test_max_min_ValK1_K0_f32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] %fmul = fmul float %a, 2.0 @@ -86,7 +80,6 @@ ; GFX10-LABEL: test_max_min_K1Val_K0_f64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], 2.0 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] %fmul = fmul double %a, 2.0 @@ -99,7 +92,6 @@ ; GFX10-LABEL: test_max_K0min_ValK1_f16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f16_e64 v0, v0, 2.0 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] %fmul = fmul half %a, 2.0 @@ -113,7 +105,6 @@ ; GFX10-LABEL: test_max_K0min_K1Val_v2f16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] %fmul = fmul <2 x half> %a, @@ -128,7 +119,6 @@ ; GFX10-LABEL: test_min_max_global_nnan: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_max_f32_e64 v0, v0, v0 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] %maxnum = call float @llvm.maxnum.f32(float %a, float 0.0) @@ -140,7 +130,6 @@ ; GFX10-LABEL: test_max_min_global_nnan: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_max_f32_e64 v0, v0, v0 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] %minnum = call float @llvm.minnum.f32(float %a, float 1.0) @@ -157,7 +146,6 @@ ; GFX10-LABEL: test_min_max_K0_gt_K1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_max_f32_e32 v0, 1.0, v0 ; GFX10-NEXT: v_min_f32_e32 v0, 0, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -171,7 +159,6 @@ ; GFX10-LABEL: test_max_min_K0_gt_K1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_min_f32_e32 v0, 0, v0 ; GFX10-NEXT: v_max_f32_e32 v0, 1.0, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -187,7 +174,6 @@ ; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_false: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0 ; GFX10-NEXT: v_max_f32_e32 v0, 0, v0 ; GFX10-NEXT: v_min_f32_e32 v0, 1.0, v0 @@ -203,7 +189,6 @@ ; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0 ; GFX10-NEXT: v_med3_f32 v0, v0, 0, 1.0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -219,7 +204,6 @@ ; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_true: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0 ; GFX10-NEXT: v_min_f32_e32 v0, 1.0, v0 ; GFX10-NEXT: v_max_f32_e32 v0, 0, v0 @@ -234,7 +218,6 @@ ; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_false: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0 ; GFX10-NEXT: v_min_f32_e32 v0, 1.0, v0 ; GFX10-NEXT: v_max_f32_e32 v0, 0, v0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-fma-mul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-fma-mul.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-fma-mul.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-fma-mul.ll @@ -28,7 +28,6 @@ ; GFX10-CONTRACT-LABEL: test_f32_add_mul: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: v_fma_f32 v2, v2, v3, v4 ; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v2, v0, v1 ; GFX10-CONTRACT-NEXT: v_mov_b32_e32 v0, v2 @@ -37,7 +36,6 @@ ; GFX10-DENORM-LABEL: test_f32_add_mul: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: v_fma_f32 v2, v2, v3, v4 ; GFX10-DENORM-NEXT: v_fmac_f32_e32 v2, v0, v1 ; GFX10-DENORM-NEXT: v_mov_b32_e32 v0, v2 @@ -46,7 +44,6 @@ ; GFX11-CONTRACT-LABEL: test_f32_add_mul: ; GFX11-CONTRACT: ; %bb.0: ; %.entry ; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CONTRACT-NEXT: v_fma_f32 v2, v2, v3, v4 ; GFX11-CONTRACT-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-CONTRACT-NEXT: v_fmac_f32_e32 v2, v0, v1 @@ -56,7 +53,6 @@ ; GFX11-DENORM-LABEL: test_f32_add_mul: ; GFX11-DENORM: ; %bb.0: ; %.entry ; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-DENORM-NEXT: v_fma_f32 v2, v2, v3, v4 ; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-DENORM-NEXT: v_fmac_f32_e32 v2, v0, v1 @@ -88,7 +84,6 @@ ; GFX10-CONTRACT-LABEL: test_f32_add_mul_rhs: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: v_fma_f32 v2, v2, v3, v4 ; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v2, v0, v1 ; GFX10-CONTRACT-NEXT: v_mov_b32_e32 v0, v2 @@ -97,7 +92,6 @@ ; GFX10-DENORM-LABEL: test_f32_add_mul_rhs: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: v_fma_f32 v2, v2, v3, v4 ; GFX10-DENORM-NEXT: v_fmac_f32_e32 v2, v0, v1 ; GFX10-DENORM-NEXT: v_mov_b32_e32 v0, v2 @@ -106,7 +100,6 @@ ; GFX11-CONTRACT-LABEL: test_f32_add_mul_rhs: ; GFX11-CONTRACT: ; %bb.0: ; %.entry ; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CONTRACT-NEXT: v_fma_f32 v2, v2, v3, v4 ; GFX11-CONTRACT-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-CONTRACT-NEXT: v_fmac_f32_e32 v2, v0, v1 @@ -116,7 +109,6 @@ ; GFX11-DENORM-LABEL: test_f32_add_mul_rhs: ; GFX11-DENORM: ; %bb.0: ; %.entry ; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-DENORM-NEXT: v_fma_f32 v2, v2, v3, v4 ; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-DENORM-NEXT: v_fmac_f32_e32 v2, v0, v1 @@ -148,7 +140,6 @@ ; GFX10-CONTRACT-LABEL: test_half_add_mul: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: v_fma_f16 v2, v2, v3, v4 ; GFX10-CONTRACT-NEXT: v_fmac_f16_e32 v2, v0, v1 ; GFX10-CONTRACT-NEXT: v_mov_b32_e32 v0, v2 @@ -157,7 +148,6 @@ ; GFX10-DENORM-LABEL: test_half_add_mul: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: v_mul_f16_e32 v2, v2, v3 ; GFX10-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1 ; GFX10-DENORM-NEXT: v_add_f16_e32 v0, v0, v2 @@ -167,7 +157,6 @@ ; GFX11-CONTRACT-LABEL: test_half_add_mul: ; GFX11-CONTRACT: ; %bb.0: ; %.entry ; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CONTRACT-NEXT: v_fma_f16 v2, v2, v3, v4 ; GFX11-CONTRACT-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-CONTRACT-NEXT: v_fmac_f16_e32 v2, v0, v1 @@ -177,7 +166,6 @@ ; GFX11-DENORM-LABEL: test_half_add_mul: ; GFX11-DENORM: ; %bb.0: ; %.entry ; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-DENORM-NEXT: v_mul_f16_e32 v2, v2, v3 ; GFX11-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1 ; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -210,7 +198,6 @@ ; GFX10-CONTRACT-LABEL: test_half_add_mul_rhs: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: v_fma_f16 v2, v2, v3, v4 ; GFX10-CONTRACT-NEXT: v_fmac_f16_e32 v2, v0, v1 ; GFX10-CONTRACT-NEXT: v_mov_b32_e32 v0, v2 @@ -219,7 +206,6 @@ ; GFX10-DENORM-LABEL: test_half_add_mul_rhs: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: v_mul_f16_e32 v2, v2, v3 ; GFX10-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1 ; GFX10-DENORM-NEXT: v_add_f16_e32 v0, v0, v2 @@ -229,7 +215,6 @@ ; GFX11-CONTRACT-LABEL: test_half_add_mul_rhs: ; GFX11-CONTRACT: ; %bb.0: ; %.entry ; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CONTRACT-NEXT: v_fma_f16 v2, v2, v3, v4 ; GFX11-CONTRACT-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-CONTRACT-NEXT: v_fmac_f16_e32 v2, v0, v1 @@ -239,7 +224,6 @@ ; GFX11-DENORM-LABEL: test_half_add_mul_rhs: ; GFX11-DENORM: ; %bb.0: ; %.entry ; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-DENORM-NEXT: v_mul_f16_e32 v2, v2, v3 ; GFX11-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1 ; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -271,7 +255,6 @@ ; GFX10-CONTRACT-LABEL: test_double_add_mul: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[8:9] ; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] @@ -279,7 +262,6 @@ ; GFX10-DENORM-LABEL: test_double_add_mul: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[8:9] ; GFX10-DENORM-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] @@ -287,7 +269,6 @@ ; GFX11-CONTRACT-LABEL: test_double_add_mul: ; GFX11-CONTRACT: ; %bb.0: ; %.entry ; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[8:9] ; GFX11-CONTRACT-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] @@ -296,7 +277,6 @@ ; GFX11-DENORM-LABEL: test_double_add_mul: ; GFX11-DENORM: ; %bb.0: ; %.entry ; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-DENORM-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[8:9] ; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-DENORM-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] @@ -326,7 +306,6 @@ ; GFX10-CONTRACT-LABEL: test_double_add_mul_rhs: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[8:9] ; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] @@ -334,7 +313,6 @@ ; GFX10-DENORM-LABEL: test_double_add_mul_rhs: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[8:9] ; GFX10-DENORM-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] @@ -342,7 +320,6 @@ ; GFX11-CONTRACT-LABEL: test_double_add_mul_rhs: ; GFX11-CONTRACT: ; %bb.0: ; %.entry ; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[8:9] ; GFX11-CONTRACT-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] @@ -351,7 +328,6 @@ ; GFX11-DENORM-LABEL: test_double_add_mul_rhs: ; GFX11-DENORM: ; %bb.0: ; %.entry ; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-DENORM-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[8:9] ; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-DENORM-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] @@ -397,7 +373,6 @@ ; GFX10-CONTRACT-LABEL: test_v4f32_add_mul: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: v_fma_f32 v8, v8, v12, v16 ; GFX10-CONTRACT-NEXT: v_fma_f32 v9, v9, v13, v17 ; GFX10-CONTRACT-NEXT: v_fma_f32 v10, v10, v14, v18 @@ -415,7 +390,6 @@ ; GFX10-DENORM-LABEL: test_v4f32_add_mul: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: v_fma_f32 v8, v8, v12, v16 ; GFX10-DENORM-NEXT: v_fma_f32 v9, v9, v13, v17 ; GFX10-DENORM-NEXT: v_fma_f32 v10, v10, v14, v18 @@ -433,7 +407,6 @@ ; GFX11-CONTRACT-LABEL: test_v4f32_add_mul: ; GFX11-CONTRACT: ; %bb.0: ; %.entry ; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CONTRACT-NEXT: v_fma_f32 v8, v8, v12, v16 ; GFX11-CONTRACT-NEXT: v_fma_f32 v9, v9, v13, v17 ; GFX11-CONTRACT-NEXT: v_fma_f32 v10, v10, v14, v18 @@ -449,7 +422,6 @@ ; GFX11-DENORM-LABEL: test_v4f32_add_mul: ; GFX11-DENORM: ; %bb.0: ; %.entry ; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-DENORM-NEXT: v_fma_f32 v8, v8, v12, v16 ; GFX11-DENORM-NEXT: v_fma_f32 v9, v9, v13, v17 ; GFX11-DENORM-NEXT: v_fma_f32 v10, v10, v14, v18 @@ -502,7 +474,6 @@ ; GFX10-CONTRACT-LABEL: test_v4f32_add_mul_rhs: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: v_fma_f32 v8, v8, v12, v16 ; GFX10-CONTRACT-NEXT: v_fma_f32 v9, v9, v13, v17 ; GFX10-CONTRACT-NEXT: v_fma_f32 v10, v10, v14, v18 @@ -520,7 +491,6 @@ ; GFX10-DENORM-LABEL: test_v4f32_add_mul_rhs: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: v_fma_f32 v8, v8, v12, v16 ; GFX10-DENORM-NEXT: v_fma_f32 v9, v9, v13, v17 ; GFX10-DENORM-NEXT: v_fma_f32 v10, v10, v14, v18 @@ -538,7 +508,6 @@ ; GFX11-CONTRACT-LABEL: test_v4f32_add_mul_rhs: ; GFX11-CONTRACT: ; %bb.0: ; %.entry ; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CONTRACT-NEXT: v_fma_f32 v8, v8, v12, v16 ; GFX11-CONTRACT-NEXT: v_fma_f32 v9, v9, v13, v17 ; GFX11-CONTRACT-NEXT: v_fma_f32 v10, v10, v14, v18 @@ -554,7 +523,6 @@ ; GFX11-DENORM-LABEL: test_v4f32_add_mul_rhs: ; GFX11-DENORM: ; %bb.0: ; %.entry ; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-DENORM-NEXT: v_fma_f32 v8, v8, v12, v16 ; GFX11-DENORM-NEXT: v_fma_f32 v9, v9, v13, v17 ; GFX11-DENORM-NEXT: v_fma_f32 v10, v10, v14, v18 @@ -599,7 +567,6 @@ ; GFX10-CONTRACT-LABEL: test_f16_add_mul: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v4, v4, v6, v8 ; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v5, v5, v7, v9 ; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 @@ -609,7 +576,6 @@ ; GFX10-DENORM-LABEL: test_f16_add_mul: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: v_pk_mul_f16 v4, v4, v6 ; GFX10-DENORM-NEXT: v_pk_mul_f16 v5, v5, v7 ; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 @@ -623,7 +589,6 @@ ; GFX11-CONTRACT-LABEL: test_f16_add_mul: ; GFX11-CONTRACT: ; %bb.0: ; %.entry ; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CONTRACT-NEXT: v_pk_fma_f16 v4, v4, v6, v8 ; GFX11-CONTRACT-NEXT: v_pk_fma_f16 v5, v5, v7, v9 ; GFX11-CONTRACT-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) @@ -634,7 +599,6 @@ ; GFX11-DENORM-LABEL: test_f16_add_mul: ; GFX11-DENORM: ; %bb.0: ; %.entry ; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-DENORM-NEXT: v_pk_mul_f16 v4, v4, v6 ; GFX11-DENORM-NEXT: v_pk_mul_f16 v5, v5, v7 ; GFX11-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 @@ -679,7 +643,6 @@ ; GFX10-CONTRACT-LABEL: test_f16_add_mul_rhs: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v4, v4, v6, v8 ; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v5, v5, v7, v9 ; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 @@ -689,7 +652,6 @@ ; GFX10-DENORM-LABEL: test_f16_add_mul_rhs: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: v_pk_mul_f16 v4, v4, v6 ; GFX10-DENORM-NEXT: v_pk_mul_f16 v5, v5, v7 ; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 @@ -703,7 +665,6 @@ ; GFX11-CONTRACT-LABEL: test_f16_add_mul_rhs: ; GFX11-CONTRACT: ; %bb.0: ; %.entry ; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CONTRACT-NEXT: v_pk_fma_f16 v4, v4, v6, v8 ; GFX11-CONTRACT-NEXT: v_pk_fma_f16 v5, v5, v7, v9 ; GFX11-CONTRACT-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) @@ -714,7 +675,6 @@ ; GFX11-DENORM-LABEL: test_f16_add_mul_rhs: ; GFX11-DENORM: ; %bb.0: ; %.entry ; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-DENORM-NEXT: v_pk_mul_f16 v4, v4, v6 ; GFX11-DENORM-NEXT: v_pk_mul_f16 v5, v5, v7 ; GFX11-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 @@ -789,7 +749,6 @@ ; GFX10-CONTRACT-LABEL: test_f64_add_mul: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: s_clause 0x8 ; GFX10-CONTRACT-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX10-CONTRACT-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4 @@ -817,7 +776,6 @@ ; GFX10-DENORM-LABEL: test_f64_add_mul: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: s_clause 0x8 ; GFX10-DENORM-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX10-DENORM-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4 @@ -845,7 +803,6 @@ ; GFX11-CONTRACT-LABEL: test_f64_add_mul: ; GFX11-CONTRACT: ; %bb.0: ; %.entry ; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CONTRACT-NEXT: s_clause 0x8 ; GFX11-CONTRACT-NEXT: scratch_load_b32 v31, off, s32 ; GFX11-CONTRACT-NEXT: scratch_load_b32 v32, off, s32 offset:4 @@ -875,7 +832,6 @@ ; GFX11-DENORM-LABEL: test_f64_add_mul: ; GFX11-DENORM: ; %bb.0: ; %.entry ; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-DENORM-NEXT: s_clause 0x8 ; GFX11-DENORM-NEXT: scratch_load_b32 v31, off, s32 ; GFX11-DENORM-NEXT: scratch_load_b32 v32, off, s32 offset:4 @@ -964,7 +920,6 @@ ; GFX10-CONTRACT-LABEL: test_f64_add_mul_rhs: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: s_clause 0x8 ; GFX10-CONTRACT-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX10-CONTRACT-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4 @@ -992,7 +947,6 @@ ; GFX10-DENORM-LABEL: test_f64_add_mul_rhs: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: s_clause 0x8 ; GFX10-DENORM-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX10-DENORM-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4 @@ -1020,7 +974,6 @@ ; GFX11-CONTRACT-LABEL: test_f64_add_mul_rhs: ; GFX11-CONTRACT: ; %bb.0: ; %.entry ; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CONTRACT-NEXT: s_clause 0x8 ; GFX11-CONTRACT-NEXT: scratch_load_b32 v31, off, s32 ; GFX11-CONTRACT-NEXT: scratch_load_b32 v32, off, s32 offset:4 @@ -1050,7 +1003,6 @@ ; GFX11-DENORM-LABEL: test_f64_add_mul_rhs: ; GFX11-DENORM: ; %bb.0: ; %.entry ; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-DENORM-NEXT: s_clause 0x8 ; GFX11-DENORM-NEXT: scratch_load_b32 v31, off, s32 ; GFX11-DENORM-NEXT: scratch_load_b32 v32, off, s32 offset:4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll @@ -37,7 +37,6 @@ ; GFX10-LABEL: test_f32_add_mul: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX10-NEXT: v_add_f32_e32 v0, v0, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -45,21 +44,18 @@ ; GFX10-CONTRACT-LABEL: test_f32_add_mul: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: v_fma_f32 v0, v0, v1, v2 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-DENORM-LABEL: test_f32_add_mul: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: v_mad_f32 v0, v0, v1, v2 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-UNSAFE-LABEL: test_f32_add_mul: ; GFX10-UNSAFE: ; %bb.0: ; %.entry ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-UNSAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-UNSAFE-NEXT: v_fma_f32 v0, v0, v1, v2 ; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31] .entry: @@ -97,7 +93,6 @@ ; GFX10-LABEL: test_f32_add_mul_rhs: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX10-NEXT: v_add_f32_e32 v0, v2, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -105,21 +100,18 @@ ; GFX10-CONTRACT-LABEL: test_f32_add_mul_rhs: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: v_fma_f32 v0, v0, v1, v2 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-DENORM-LABEL: test_f32_add_mul_rhs: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: v_mad_f32 v0, v0, v1, v2 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-UNSAFE-LABEL: test_f32_add_mul_rhs: ; GFX10-UNSAFE: ; %bb.0: ; %.entry ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-UNSAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-UNSAFE-NEXT: v_fma_f32 v0, v0, v1, v2 ; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31] .entry: @@ -166,7 +158,6 @@ ; GFX10-LABEL: test_add_mul_multiple_defs_z: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[2:3], v[2:3], off ; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -176,7 +167,6 @@ ; GFX10-CONTRACT-LABEL: test_add_mul_multiple_defs_z: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: global_load_dwordx2 v[2:3], v[2:3], off ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) ; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v3, v0, v1 @@ -186,7 +176,6 @@ ; GFX10-DENORM-LABEL: test_add_mul_multiple_defs_z: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: global_load_dwordx2 v[2:3], v[2:3], off ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) ; GFX10-DENORM-NEXT: v_mac_f32_e32 v3, v0, v1 @@ -196,7 +185,6 @@ ; GFX10-UNSAFE-LABEL: test_add_mul_multiple_defs_z: ; GFX10-UNSAFE: ; %bb.0: ; %.entry ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-UNSAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-UNSAFE-NEXT: global_load_dwordx2 v[2:3], v[2:3], off ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) ; GFX10-UNSAFE-NEXT: v_fmac_f32_e32 v3, v0, v1 @@ -248,7 +236,6 @@ ; GFX10-LABEL: test_add_mul_rhs_multiple_defs_z: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[2:3], v[2:3], off ; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -258,7 +245,6 @@ ; GFX10-CONTRACT-LABEL: test_add_mul_rhs_multiple_defs_z: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: global_load_dwordx2 v[2:3], v[2:3], off ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) ; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v3, v0, v1 @@ -268,7 +254,6 @@ ; GFX10-DENORM-LABEL: test_add_mul_rhs_multiple_defs_z: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: global_load_dwordx2 v[2:3], v[2:3], off ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) ; GFX10-DENORM-NEXT: v_mac_f32_e32 v3, v0, v1 @@ -278,7 +263,6 @@ ; GFX10-UNSAFE-LABEL: test_add_mul_rhs_multiple_defs_z: ; GFX10-UNSAFE: ; %bb.0: ; %.entry ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-UNSAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-UNSAFE-NEXT: global_load_dwordx2 v[2:3], v[2:3], off ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) ; GFX10-UNSAFE-NEXT: v_fmac_f32_e32 v3, v0, v1 @@ -321,7 +305,6 @@ ; GFX10-LABEL: test_half_add_mul: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f16_e32 v0, v0, v1 ; GFX10-NEXT: v_add_f16_e32 v0, v0, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -329,14 +312,12 @@ ; GFX10-CONTRACT-LABEL: test_half_add_mul: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: v_fma_f16 v0, v0, v1, v2 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-DENORM-LABEL: test_half_add_mul: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1 ; GFX10-DENORM-NEXT: v_add_f16_e32 v0, v0, v2 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] @@ -344,7 +325,6 @@ ; GFX10-UNSAFE-LABEL: test_half_add_mul: ; GFX10-UNSAFE: ; %bb.0: ; %.entry ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-UNSAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-UNSAFE-NEXT: v_fma_f16 v0, v0, v1, v2 ; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31] .entry: @@ -382,7 +362,6 @@ ; GFX10-LABEL: test_half_add_mul_rhs: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f16_e32 v0, v0, v1 ; GFX10-NEXT: v_add_f16_e32 v0, v2, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -390,14 +369,12 @@ ; GFX10-CONTRACT-LABEL: test_half_add_mul_rhs: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: v_fma_f16 v0, v0, v1, v2 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-DENORM-LABEL: test_half_add_mul_rhs: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1 ; GFX10-DENORM-NEXT: v_add_f16_e32 v0, v2, v0 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] @@ -405,7 +382,6 @@ ; GFX10-UNSAFE-LABEL: test_half_add_mul_rhs: ; GFX10-UNSAFE: ; %bb.0: ; %.entry ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-UNSAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-UNSAFE-NEXT: v_fma_f16 v0, v0, v1, v2 ; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31] .entry: @@ -444,7 +420,6 @@ ; GFX10-LABEL: test_double_add_mul: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5] ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -452,14 +427,12 @@ ; GFX10-CONTRACT-LABEL: test_double_add_mul: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-DENORM-LABEL: test_double_add_mul: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX10-DENORM-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5] ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] @@ -467,7 +440,6 @@ ; GFX10-UNSAFE-LABEL: test_double_add_mul: ; GFX10-UNSAFE: ; %bb.0: ; %.entry ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-UNSAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-UNSAFE-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] ; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31] .entry: @@ -506,7 +478,6 @@ ; GFX10-LABEL: test_double_add_mul_rhs: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX10-NEXT: v_add_f64 v[0:1], v[4:5], v[0:1] ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -514,14 +485,12 @@ ; GFX10-CONTRACT-LABEL: test_double_add_mul_rhs: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-DENORM-LABEL: test_double_add_mul_rhs: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX10-DENORM-NEXT: v_add_f64 v[0:1], v[4:5], v[0:1] ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] @@ -529,7 +498,6 @@ ; GFX10-UNSAFE-LABEL: test_double_add_mul_rhs: ; GFX10-UNSAFE: ; %bb.0: ; %.entry ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-UNSAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-UNSAFE-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] ; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31] .entry: @@ -582,7 +550,6 @@ ; GFX10-LABEL: test_4xfloat_add_mul: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f32_e32 v0, v0, v4 ; GFX10-NEXT: v_mul_f32_e32 v1, v1, v5 ; GFX10-NEXT: v_mul_f32_e32 v2, v2, v6 @@ -596,7 +563,6 @@ ; GFX10-CONTRACT-LABEL: test_4xfloat_add_mul: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: v_fma_f32 v0, v0, v4, v8 ; GFX10-CONTRACT-NEXT: v_fma_f32 v1, v1, v5, v9 ; GFX10-CONTRACT-NEXT: v_fma_f32 v2, v2, v6, v10 @@ -606,7 +572,6 @@ ; GFX10-DENORM-LABEL: test_4xfloat_add_mul: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: v_mad_f32 v0, v0, v4, v8 ; GFX10-DENORM-NEXT: v_mad_f32 v1, v1, v5, v9 ; GFX10-DENORM-NEXT: v_mad_f32 v2, v2, v6, v10 @@ -616,7 +581,6 @@ ; GFX10-UNSAFE-LABEL: test_4xfloat_add_mul: ; GFX10-UNSAFE: ; %bb.0: ; %.entry ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-UNSAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-UNSAFE-NEXT: v_fma_f32 v0, v0, v4, v8 ; GFX10-UNSAFE-NEXT: v_fma_f32 v1, v1, v5, v9 ; GFX10-UNSAFE-NEXT: v_fma_f32 v2, v2, v6, v10 @@ -667,7 +631,6 @@ ; GFX10-LABEL: test_3xfloat_add_mul_rhs: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f32_e32 v0, v0, v3 ; GFX10-NEXT: v_mul_f32_e32 v1, v1, v4 ; GFX10-NEXT: v_mul_f32_e32 v2, v2, v5 @@ -679,7 +642,6 @@ ; GFX10-CONTRACT-LABEL: test_3xfloat_add_mul_rhs: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: v_fma_f32 v0, v0, v3, v6 ; GFX10-CONTRACT-NEXT: v_fma_f32 v1, v1, v4, v7 ; GFX10-CONTRACT-NEXT: v_fma_f32 v2, v2, v5, v8 @@ -688,7 +650,6 @@ ; GFX10-DENORM-LABEL: test_3xfloat_add_mul_rhs: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: v_mad_f32 v0, v0, v3, v6 ; GFX10-DENORM-NEXT: v_mad_f32 v1, v1, v4, v7 ; GFX10-DENORM-NEXT: v_mad_f32 v2, v2, v5, v8 @@ -697,7 +658,6 @@ ; GFX10-UNSAFE-LABEL: test_3xfloat_add_mul_rhs: ; GFX10-UNSAFE: ; %bb.0: ; %.entry ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-UNSAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-UNSAFE-NEXT: v_fma_f32 v0, v0, v3, v6 ; GFX10-UNSAFE-NEXT: v_fma_f32 v1, v1, v4, v7 ; GFX10-UNSAFE-NEXT: v_fma_f32 v2, v2, v5, v8 @@ -744,7 +704,6 @@ ; GFX10-LABEL: test_4xhalf_add_mul: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2 ; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3 ; GFX10-NEXT: v_pk_add_f16 v0, v0, v4 @@ -754,7 +713,6 @@ ; GFX10-CONTRACT-LABEL: test_4xhalf_add_mul: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 ; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] @@ -762,7 +720,6 @@ ; GFX10-DENORM-LABEL: test_4xhalf_add_mul: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 ; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 ; GFX10-DENORM-NEXT: v_pk_add_f16 v0, v0, v4 @@ -772,7 +729,6 @@ ; GFX10-UNSAFE-LABEL: test_4xhalf_add_mul: ; GFX10-UNSAFE: ; %bb.0: ; %.entry ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-UNSAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-UNSAFE-NEXT: v_pk_fma_f16 v0, v0, v2, v4 ; GFX10-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v3, v5 ; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31] @@ -818,7 +774,6 @@ ; GFX10-LABEL: test_3xhalf_add_mul_rhs: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2 ; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3 ; GFX10-NEXT: v_pk_add_f16 v0, v4, v0 @@ -828,7 +783,6 @@ ; GFX10-CONTRACT-LABEL: test_3xhalf_add_mul_rhs: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 ; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] @@ -836,7 +790,6 @@ ; GFX10-DENORM-LABEL: test_3xhalf_add_mul_rhs: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 ; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 ; GFX10-DENORM-NEXT: v_pk_add_f16 v0, v4, v0 @@ -846,7 +799,6 @@ ; GFX10-UNSAFE-LABEL: test_3xhalf_add_mul_rhs: ; GFX10-UNSAFE: ; %bb.0: ; %.entry ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-UNSAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-UNSAFE-NEXT: v_pk_fma_f16 v0, v0, v2, v4 ; GFX10-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v3, v5 ; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31] @@ -904,7 +856,6 @@ ; GFX10-LABEL: test_4xdouble_add_mul: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9] ; GFX10-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11] ; GFX10-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13] @@ -918,7 +869,6 @@ ; GFX10-CONTRACT-LABEL: test_4xdouble_add_mul: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17] ; GFX10-CONTRACT-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19] ; GFX10-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21] @@ -928,7 +878,6 @@ ; GFX10-DENORM-LABEL: test_4xdouble_add_mul: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9] ; GFX10-DENORM-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11] ; GFX10-DENORM-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13] @@ -942,7 +891,6 @@ ; GFX10-UNSAFE-LABEL: test_4xdouble_add_mul: ; GFX10-UNSAFE: ; %bb.0: ; %.entry ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-UNSAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-UNSAFE-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17] ; GFX10-UNSAFE-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19] ; GFX10-UNSAFE-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21] @@ -996,7 +944,6 @@ ; GFX10-LABEL: test_3xdouble_add_mul_rhs: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[6:7] ; GFX10-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] ; GFX10-NEXT: v_mul_f64 v[4:5], v[4:5], v[10:11] @@ -1008,7 +955,6 @@ ; GFX10-CONTRACT-LABEL: test_3xdouble_add_mul_rhs: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[6:7], v[12:13] ; GFX10-CONTRACT-NEXT: v_fma_f64 v[2:3], v[2:3], v[8:9], v[14:15] ; GFX10-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[10:11], v[16:17] @@ -1017,7 +963,6 @@ ; GFX10-DENORM-LABEL: test_3xdouble_add_mul_rhs: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[6:7] ; GFX10-DENORM-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] ; GFX10-DENORM-NEXT: v_mul_f64 v[4:5], v[4:5], v[10:11] @@ -1029,7 +974,6 @@ ; GFX10-UNSAFE-LABEL: test_3xdouble_add_mul_rhs: ; GFX10-UNSAFE: ; %bb.0: ; %.entry ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-UNSAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-UNSAFE-NEXT: v_fma_f64 v[0:1], v[0:1], v[6:7], v[12:13] ; GFX10-UNSAFE-NEXT: v_fma_f64 v[2:3], v[2:3], v[8:9], v[14:15] ; GFX10-UNSAFE-NEXT: v_fma_f64 v[4:5], v[4:5], v[10:11], v[16:17] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-mul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-mul.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-mul.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-mul.ll @@ -34,7 +34,6 @@ ; GFX10-LABEL: test_f32_sub_mul: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -42,28 +41,24 @@ ; GFX10-CONTRACT-LABEL: test_f32_sub_mul: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: v_fma_f32 v0, v0, v1, -v2 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-DENORM-LABEL: test_f32_sub_mul: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: v_mad_f32 v0, v0, v1, -v2 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-CONTRACT-LABEL: test_f32_sub_mul: ; GFX11-CONTRACT: ; %bb.0: ; %.entry ; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CONTRACT-NEXT: v_fma_f32 v0, v0, v1, -v2 ; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-DENORM-LABEL: test_f32_sub_mul: ; GFX11-DENORM: ; %bb.0: ; %.entry ; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-DENORM-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-DENORM-NEXT: v_sub_f32_e32 v0, v0, v2 @@ -97,7 +92,6 @@ ; GFX10-LABEL: test_f32_sub_mul_rhs: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX10-NEXT: v_sub_f32_e32 v0, v2, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -105,28 +99,24 @@ ; GFX10-CONTRACT-LABEL: test_f32_sub_mul_rhs: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: v_fma_f32 v0, -v0, v1, v2 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-DENORM-LABEL: test_f32_sub_mul_rhs: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: v_mad_f32 v0, -v0, v1, v2 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-CONTRACT-LABEL: test_f32_sub_mul_rhs: ; GFX11-CONTRACT: ; %bb.0: ; %.entry ; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CONTRACT-NEXT: v_fma_f32 v0, -v0, v1, v2 ; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-DENORM-LABEL: test_f32_sub_mul_rhs: ; GFX11-DENORM: ; %bb.0: ; %.entry ; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-DENORM-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-DENORM-NEXT: v_sub_f32_e32 v0, v2, v0 @@ -160,7 +150,6 @@ ; GFX10-LABEL: test_half_sub_mul: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f16_e32 v0, v0, v1 ; GFX10-NEXT: v_sub_f16_e32 v0, v0, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -168,14 +157,12 @@ ; GFX10-CONTRACT-LABEL: test_half_sub_mul: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: v_fma_f16 v0, v0, v1, -v2 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-DENORM-LABEL: test_half_sub_mul: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1 ; GFX10-DENORM-NEXT: v_sub_f16_e32 v0, v0, v2 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] @@ -183,14 +170,12 @@ ; GFX11-CONTRACT-LABEL: test_half_sub_mul: ; GFX11-CONTRACT: ; %bb.0: ; %.entry ; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CONTRACT-NEXT: v_fma_f16 v0, v0, v1, -v2 ; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-DENORM-LABEL: test_half_sub_mul: ; GFX11-DENORM: ; %bb.0: ; %.entry ; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1 ; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-DENORM-NEXT: v_sub_f16_e32 v0, v0, v2 @@ -224,7 +209,6 @@ ; GFX10-LABEL: test_half_sub_mul_rhs: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f16_e32 v0, v0, v1 ; GFX10-NEXT: v_sub_f16_e32 v0, v2, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -232,14 +216,12 @@ ; GFX10-CONTRACT-LABEL: test_half_sub_mul_rhs: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: v_fma_f16 v0, -v0, v1, v2 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-DENORM-LABEL: test_half_sub_mul_rhs: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1 ; GFX10-DENORM-NEXT: v_sub_f16_e32 v0, v2, v0 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] @@ -247,14 +229,12 @@ ; GFX11-CONTRACT-LABEL: test_half_sub_mul_rhs: ; GFX11-CONTRACT: ; %bb.0: ; %.entry ; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CONTRACT-NEXT: v_fma_f16 v0, -v0, v1, v2 ; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-DENORM-LABEL: test_half_sub_mul_rhs: ; GFX11-DENORM: ; %bb.0: ; %.entry ; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1 ; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-DENORM-NEXT: v_sub_f16_e32 v0, v2, v0 @@ -289,7 +269,6 @@ ; GFX10-LABEL: test_double_sub_mul: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5] ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -297,14 +276,12 @@ ; GFX10-CONTRACT-LABEL: test_double_sub_mul: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5] ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-DENORM-LABEL: test_double_sub_mul: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX10-DENORM-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5] ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] @@ -312,14 +289,12 @@ ; GFX11-CONTRACT-LABEL: test_double_sub_mul: ; GFX11-CONTRACT: ; %bb.0: ; %.entry ; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5] ; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-DENORM-LABEL: test_double_sub_mul: ; GFX11-DENORM: ; %bb.0: ; %.entry ; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-DENORM-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5] @@ -354,7 +329,6 @@ ; GFX10-LABEL: test_double_sub_mul_rhs: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX10-NEXT: v_add_f64 v[0:1], v[4:5], -v[0:1] ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -362,14 +336,12 @@ ; GFX10-CONTRACT-LABEL: test_double_sub_mul_rhs: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], v[4:5] ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-DENORM-LABEL: test_double_sub_mul_rhs: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX10-DENORM-NEXT: v_add_f64 v[0:1], v[4:5], -v[0:1] ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] @@ -377,14 +349,12 @@ ; GFX11-CONTRACT-LABEL: test_double_sub_mul_rhs: ; GFX11-CONTRACT: ; %bb.0: ; %.entry ; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CONTRACT-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], v[4:5] ; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-DENORM-LABEL: test_double_sub_mul_rhs: ; GFX11-DENORM: ; %bb.0: ; %.entry ; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-DENORM-NEXT: v_add_f64 v[0:1], v[4:5], -v[0:1] @@ -430,7 +400,6 @@ ; GFX10-LABEL: test_v4f32_sub_mul: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f32_e32 v0, v0, v4 ; GFX10-NEXT: v_mul_f32_e32 v1, v1, v5 ; GFX10-NEXT: v_mul_f32_e32 v2, v2, v6 @@ -444,7 +413,6 @@ ; GFX10-CONTRACT-LABEL: test_v4f32_sub_mul: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: v_fma_f32 v0, v0, v4, -v8 ; GFX10-CONTRACT-NEXT: v_fma_f32 v1, v1, v5, -v9 ; GFX10-CONTRACT-NEXT: v_fma_f32 v2, v2, v6, -v10 @@ -454,7 +422,6 @@ ; GFX10-DENORM-LABEL: test_v4f32_sub_mul: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: v_mad_f32 v0, v0, v4, -v8 ; GFX10-DENORM-NEXT: v_mad_f32 v1, v1, v5, -v9 ; GFX10-DENORM-NEXT: v_mad_f32 v2, v2, v6, -v10 @@ -464,7 +431,6 @@ ; GFX11-CONTRACT-LABEL: test_v4f32_sub_mul: ; GFX11-CONTRACT: ; %bb.0: ; %.entry ; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CONTRACT-NEXT: v_fma_f32 v0, v0, v4, -v8 ; GFX11-CONTRACT-NEXT: v_fma_f32 v1, v1, v5, -v9 ; GFX11-CONTRACT-NEXT: v_fma_f32 v2, v2, v6, -v10 @@ -474,7 +440,6 @@ ; GFX11-DENORM-LABEL: test_v4f32_sub_mul: ; GFX11-DENORM: ; %bb.0: ; %.entry ; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-DENORM-NEXT: v_dual_mul_f32 v0, v0, v4 :: v_dual_mul_f32 v1, v1, v5 ; GFX11-DENORM-NEXT: v_dual_mul_f32 v2, v2, v6 :: v_dual_mul_f32 v3, v3, v7 ; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) @@ -522,7 +487,6 @@ ; GFX10-LABEL: test_v4f32_sub_mul_rhs: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f32_e32 v0, v0, v4 ; GFX10-NEXT: v_mul_f32_e32 v1, v1, v5 ; GFX10-NEXT: v_mul_f32_e32 v2, v2, v6 @@ -536,7 +500,6 @@ ; GFX10-CONTRACT-LABEL: test_v4f32_sub_mul_rhs: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: v_fma_f32 v0, -v0, v4, v8 ; GFX10-CONTRACT-NEXT: v_fma_f32 v1, -v1, v5, v9 ; GFX10-CONTRACT-NEXT: v_fma_f32 v2, -v2, v6, v10 @@ -546,7 +509,6 @@ ; GFX10-DENORM-LABEL: test_v4f32_sub_mul_rhs: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: v_mad_f32 v0, -v0, v4, v8 ; GFX10-DENORM-NEXT: v_mad_f32 v1, -v1, v5, v9 ; GFX10-DENORM-NEXT: v_mad_f32 v2, -v2, v6, v10 @@ -556,7 +518,6 @@ ; GFX11-CONTRACT-LABEL: test_v4f32_sub_mul_rhs: ; GFX11-CONTRACT: ; %bb.0: ; %.entry ; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CONTRACT-NEXT: v_fma_f32 v0, -v0, v4, v8 ; GFX11-CONTRACT-NEXT: v_fma_f32 v1, -v1, v5, v9 ; GFX11-CONTRACT-NEXT: v_fma_f32 v2, -v2, v6, v10 @@ -566,7 +527,6 @@ ; GFX11-DENORM-LABEL: test_v4f32_sub_mul_rhs: ; GFX11-DENORM: ; %bb.0: ; %.entry ; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-DENORM-NEXT: v_dual_mul_f32 v0, v0, v4 :: v_dual_mul_f32 v1, v1, v5 ; GFX11-DENORM-NEXT: v_dual_mul_f32 v2, v2, v6 :: v_dual_mul_f32 v3, v3, v7 ; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) @@ -616,7 +576,6 @@ ; GFX10-LABEL: test_v4f16_sub_mul: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2 ; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3 ; GFX10-NEXT: v_sub_f16_e32 v2, v0, v4 @@ -630,7 +589,6 @@ ; GFX10-CONTRACT-LABEL: test_v4f16_sub_mul: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[0,0,1] neg_hi:[0,0,1] ; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[0,0,1] neg_hi:[0,0,1] ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] @@ -638,7 +596,6 @@ ; GFX10-DENORM-LABEL: test_v4f16_sub_mul: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 ; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 ; GFX10-DENORM-NEXT: v_sub_f16_e32 v2, v0, v4 @@ -652,7 +609,6 @@ ; GFX11-CONTRACT-LABEL: test_v4f16_sub_mul: ; GFX11-CONTRACT: ; %bb.0: ; %.entry ; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[0,0,1] neg_hi:[0,0,1] ; GFX11-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[0,0,1] neg_hi:[0,0,1] ; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31] @@ -660,7 +616,6 @@ ; GFX11-DENORM-LABEL: test_v4f16_sub_mul: ; GFX11-DENORM: ; %bb.0: ; %.entry ; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 ; GFX11-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 ; GFX11-DENORM-NEXT: v_lshrrev_b32_e32 v2, 16, v4 @@ -720,7 +675,6 @@ ; GFX10-LABEL: test_v4f16_sub_mul_rhs: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2 ; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3 ; GFX10-NEXT: v_sub_f16_e32 v2, v4, v0 @@ -734,7 +688,6 @@ ; GFX10-CONTRACT-LABEL: test_v4f16_sub_mul_rhs: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[1,0,0] neg_hi:[1,0,0] ; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[1,0,0] neg_hi:[1,0,0] ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] @@ -742,7 +695,6 @@ ; GFX10-DENORM-LABEL: test_v4f16_sub_mul_rhs: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 ; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 ; GFX10-DENORM-NEXT: v_sub_f16_e32 v2, v4, v0 @@ -756,7 +708,6 @@ ; GFX11-CONTRACT-LABEL: test_v4f16_sub_mul_rhs: ; GFX11-CONTRACT: ; %bb.0: ; %.entry ; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[1,0,0] neg_hi:[1,0,0] ; GFX11-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[1,0,0] neg_hi:[1,0,0] ; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31] @@ -764,7 +715,6 @@ ; GFX11-DENORM-LABEL: test_v4f16_sub_mul_rhs: ; GFX11-DENORM: ; %bb.0: ; %.entry ; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 ; GFX11-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 ; GFX11-DENORM-NEXT: v_lshrrev_b32_e32 v2, 16, v4 @@ -826,7 +776,6 @@ ; GFX10-LABEL: test_v4f64_sub_mul: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9] ; GFX10-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11] ; GFX10-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13] @@ -840,7 +789,6 @@ ; GFX10-CONTRACT-LABEL: test_v4f64_sub_mul: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], -v[16:17] ; GFX10-CONTRACT-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], -v[18:19] ; GFX10-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], -v[20:21] @@ -850,7 +798,6 @@ ; GFX10-DENORM-LABEL: test_v4f64_sub_mul: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9] ; GFX10-DENORM-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11] ; GFX10-DENORM-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13] @@ -864,7 +811,6 @@ ; GFX11-CONTRACT-LABEL: test_v4f64_sub_mul: ; GFX11-CONTRACT: ; %bb.0: ; %.entry ; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], -v[16:17] ; GFX11-CONTRACT-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], -v[18:19] ; GFX11-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], -v[20:21] @@ -874,7 +820,6 @@ ; GFX11-DENORM-LABEL: test_v4f64_sub_mul: ; GFX11-DENORM: ; %bb.0: ; %.entry ; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9] ; GFX11-DENORM-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11] ; GFX11-DENORM-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13] @@ -931,7 +876,6 @@ ; GFX10-LABEL: test_v4f64_sub_mul_rhs: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9] ; GFX10-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11] ; GFX10-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13] @@ -945,7 +889,6 @@ ; GFX10-CONTRACT-LABEL: test_v4f64_sub_mul_rhs: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], -v[0:1], v[8:9], v[16:17] ; GFX10-CONTRACT-NEXT: v_fma_f64 v[2:3], -v[2:3], v[10:11], v[18:19] ; GFX10-CONTRACT-NEXT: v_fma_f64 v[4:5], -v[4:5], v[12:13], v[20:21] @@ -955,7 +898,6 @@ ; GFX10-DENORM-LABEL: test_v4f64_sub_mul_rhs: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9] ; GFX10-DENORM-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11] ; GFX10-DENORM-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13] @@ -969,7 +911,6 @@ ; GFX11-CONTRACT-LABEL: test_v4f64_sub_mul_rhs: ; GFX11-CONTRACT: ; %bb.0: ; %.entry ; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CONTRACT-NEXT: v_fma_f64 v[0:1], -v[0:1], v[8:9], v[16:17] ; GFX11-CONTRACT-NEXT: v_fma_f64 v[2:3], -v[2:3], v[10:11], v[18:19] ; GFX11-CONTRACT-NEXT: v_fma_f64 v[4:5], -v[4:5], v[12:13], v[20:21] @@ -979,7 +920,6 @@ ; GFX11-DENORM-LABEL: test_v4f64_sub_mul_rhs: ; GFX11-DENORM: ; %bb.0: ; %.entry ; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9] ; GFX11-DENORM-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11] ; GFX11-DENORM-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-neg-mul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-neg-mul.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-neg-mul.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-neg-mul.ll @@ -30,7 +30,6 @@ ; GFX10-LABEL: test_f32_sub_ext_neg_mul: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f32_e64 v0, v0, -v1 ; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -38,14 +37,12 @@ ; GFX10-CONTRACT-LABEL: test_f32_sub_ext_neg_mul: ; GFX10-CONTRACT: ; %bb.0: ; %entry ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: v_fma_f32 v0, v0, -v1, -v2 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-DENORM-LABEL: test_f32_sub_ext_neg_mul: ; GFX10-DENORM: ; %bb.0: ; %entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: v_mad_f32 v0, v0, -v1, -v2 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] entry: @@ -78,7 +75,6 @@ ; GFX10-LABEL: test_f16_sub_ext_neg_mul: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f16_e64 v0, v0, -v1 ; GFX10-NEXT: v_sub_f16_e32 v0, v0, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -86,14 +82,12 @@ ; GFX10-CONTRACT-LABEL: test_f16_sub_ext_neg_mul: ; GFX10-CONTRACT: ; %bb.0: ; %entry ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: v_fma_f16 v0, v0, -v1, -v2 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-DENORM-LABEL: test_f16_sub_ext_neg_mul: ; GFX10-DENORM: ; %bb.0: ; %entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: v_mul_f16_e64 v0, v0, -v1 ; GFX10-DENORM-NEXT: v_sub_f16_e32 v0, v0, v2 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] @@ -128,7 +122,6 @@ ; GFX10-LABEL: test_f64_sub_ext_neg_mul: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX10-NEXT: v_add_f64 v[0:1], -v[0:1], -v[4:5] ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -136,14 +129,12 @@ ; GFX10-CONTRACT-LABEL: test_f64_sub_ext_neg_mul: ; GFX10-CONTRACT: ; %bb.0: ; %entry ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], -v[4:5] ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-DENORM-LABEL: test_f64_sub_ext_neg_mul: ; GFX10-DENORM: ; %bb.0: ; %entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX10-DENORM-NEXT: v_add_f64 v[0:1], -v[0:1], -v[4:5] ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] @@ -190,7 +181,6 @@ ; GFX10-LABEL: test_v4f32_sub_ext_neg_mul: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f32_e64 v0, v0, -v4 ; GFX10-NEXT: v_mul_f32_e64 v1, v1, -v5 ; GFX10-NEXT: v_mul_f32_e64 v2, v2, -v6 @@ -204,7 +194,6 @@ ; GFX10-CONTRACT-LABEL: test_v4f32_sub_ext_neg_mul: ; GFX10-CONTRACT: ; %bb.0: ; %entry ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: v_fma_f32 v0, v0, -v4, -v8 ; GFX10-CONTRACT-NEXT: v_fma_f32 v1, v1, -v5, -v9 ; GFX10-CONTRACT-NEXT: v_fma_f32 v2, v2, -v6, -v10 @@ -214,7 +203,6 @@ ; GFX10-DENORM-LABEL: test_v4f32_sub_ext_neg_mul: ; GFX10-DENORM: ; %bb.0: ; %entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: v_mad_f32 v0, v0, -v4, -v8 ; GFX10-DENORM-NEXT: v_mad_f32 v1, v1, -v5, -v9 ; GFX10-DENORM-NEXT: v_mad_f32 v2, v2, -v6, -v10 @@ -264,7 +252,6 @@ ; GFX10-LABEL: test_v4f16_sub_ext_neg_mul: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1] ; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1] ; GFX10-NEXT: v_sub_f16_e32 v2, v0, v4 @@ -278,7 +265,6 @@ ; GFX10-CONTRACT-LABEL: test_v4f16_sub_ext_neg_mul: ; GFX10-CONTRACT: ; %bb.0: ; %entry ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[0,1,1] neg_hi:[0,1,1] ; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[0,1,1] neg_hi:[0,1,1] ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] @@ -286,7 +272,6 @@ ; GFX10-DENORM-LABEL: test_v4f16_sub_ext_neg_mul: ; GFX10-DENORM: ; %bb.0: ; %entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1] ; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1] ; GFX10-DENORM-NEXT: v_sub_f16_e32 v2, v0, v4 @@ -342,7 +327,6 @@ ; GFX10-LABEL: test_v4f64_sub_ext_neg_mul: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9] ; GFX10-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11] ; GFX10-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13] @@ -356,7 +340,6 @@ ; GFX10-CONTRACT-LABEL: test_v4f64_sub_ext_neg_mul: ; GFX10-CONTRACT: ; %bb.0: ; %entry ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], -v[0:1], v[8:9], -v[16:17] ; GFX10-CONTRACT-NEXT: v_fma_f64 v[2:3], -v[2:3], v[10:11], -v[18:19] ; GFX10-CONTRACT-NEXT: v_fma_f64 v[4:5], -v[4:5], v[12:13], -v[20:21] @@ -366,7 +349,6 @@ ; GFX10-DENORM-LABEL: test_v4f64_sub_ext_neg_mul: ; GFX10-DENORM: ; %bb.0: ; %entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9] ; GFX10-DENORM-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11] ; GFX10-DENORM-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll @@ -89,7 +89,6 @@ ; GFX10-LABEL: func_dynamic_stackalloc_sgpr_align4: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s6, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 @@ -109,13 +108,11 @@ ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_mov_b32_e32 v1, s4 ; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: func_dynamic_stackalloc_sgpr_align4: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s2, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_add_i32 s32, s32, 16 @@ -136,7 +133,6 @@ ; GFX11-NEXT: s_add_u32 s0, s32, s0 ; GFX11-NEXT: s_add_i32 s32, s32, -16 ; GFX11-NEXT: scratch_store_b32 off, v0, s0 -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %n = load i32, ptr addrspace(4) @gv, align 4 %alloca = alloca i32, i32 %n, addrspace(5) @@ -228,7 +224,6 @@ ; GFX10-LABEL: func_dynamic_stackalloc_sgpr_align16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s6, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 @@ -248,13 +243,11 @@ ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_mov_b32_e32 v1, s4 ; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: func_dynamic_stackalloc_sgpr_align16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s2, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_add_i32 s32, s32, 16 @@ -275,7 +268,6 @@ ; GFX11-NEXT: s_add_u32 s0, s32, s0 ; GFX11-NEXT: s_add_i32 s32, s32, -16 ; GFX11-NEXT: scratch_store_b32 off, v0, s0 -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %n = load i32, ptr addrspace(4) @gv, align 16 %alloca = alloca i32, i32 %n, addrspace(5) @@ -372,7 +364,6 @@ ; GFX10-LABEL: func_dynamic_stackalloc_sgpr_align32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s6, s33 ; GFX10-NEXT: s_add_i32 s33, s32, 0x3e0 ; GFX10-NEXT: s_addk_i32 s32, 0x800 @@ -394,13 +385,11 @@ ; GFX10-NEXT: s_and_b32 s4, s4, 0xfffffc00 ; GFX10-NEXT: v_mov_b32_e32 v1, s4 ; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: func_dynamic_stackalloc_sgpr_align32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s2, s33 ; GFX11-NEXT: s_add_i32 s33, s32, 31 ; GFX11-NEXT: s_add_i32 s32, s32, 64 @@ -423,7 +412,6 @@ ; GFX11-NEXT: s_addk_i32 s32, 0xffc0 ; GFX11-NEXT: s_and_b32 s0, s0, 0xfffffc00 ; GFX11-NEXT: scratch_store_b32 off, v0, s0 -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %n = load i32, ptr addrspace(4) @gv %alloca = alloca i32, i32 %n, align 32, addrspace(5) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll @@ -338,7 +338,6 @@ ; GFX10-LABEL: extractelement_vgpr_v4i128_vgpr_idx: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: global_load_dwordx4 v[12:15], v[0:1], off ; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16 @@ -397,7 +396,6 @@ ; GFX11-LABEL: extractelement_vgpr_v4i128_vgpr_idx: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: global_load_b128 v[16:19], v[0:1], off ; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off offset:16 @@ -948,7 +946,6 @@ ; GFX10-LABEL: extractelement_vgpr_v4i128_idx0: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -956,7 +953,6 @@ ; GFX11-LABEL: extractelement_vgpr_v4i128_idx0: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -995,7 +991,6 @@ ; GFX10-LABEL: extractelement_vgpr_v4i128_idx1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -1003,7 +998,6 @@ ; GFX11-LABEL: extractelement_vgpr_v4i128_idx1: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off offset:16 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -1042,7 +1036,6 @@ ; GFX10-LABEL: extractelement_vgpr_v4i128_idx2: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:32 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -1050,7 +1043,6 @@ ; GFX11-LABEL: extractelement_vgpr_v4i128_idx2: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off offset:32 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -1089,7 +1081,6 @@ ; GFX10-LABEL: extractelement_vgpr_v4i128_idx3: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:48 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -1097,7 +1088,6 @@ ; GFX11-LABEL: extractelement_vgpr_v4i128_idx3: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off offset:48 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i16.ll @@ -163,7 +163,6 @@ ; GFX10-LABEL: extractelement_vgpr_v4i16_vgpr_idx: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 1, v2 ; GFX10-NEXT: v_and_b32_e32 v2, 1, v2 @@ -177,7 +176,6 @@ ; GFX11-LABEL: extractelement_vgpr_v4i16_vgpr_idx: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 1, v2 ; GFX11-NEXT: v_and_b32_e32 v2, 1, v2 @@ -369,7 +367,6 @@ ; GFX10-LABEL: extractelement_vgpr_v4i16_idx0: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -377,7 +374,6 @@ ; GFX11-LABEL: extractelement_vgpr_v4i16_idx0: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -414,7 +410,6 @@ ; GFX10-LABEL: extractelement_vgpr_v4i16_idx1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0 @@ -423,7 +418,6 @@ ; GFX11-LABEL: extractelement_vgpr_v4i16_idx1: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0 @@ -461,7 +455,6 @@ ; GFX10-LABEL: extractelement_vgpr_v4i16_idx2: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, v1 @@ -470,7 +463,6 @@ ; GFX11-LABEL: extractelement_vgpr_v4i16_idx2: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v0, v1 @@ -508,7 +500,6 @@ ; GFX10-LABEL: extractelement_vgpr_v4i16_idx3: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v1 @@ -517,7 +508,6 @@ ; GFX11-LABEL: extractelement_vgpr_v4i16_idx3: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v1 @@ -736,7 +726,6 @@ ; GFX10-LABEL: extractelement_vgpr_v8i16_vgpr_idx: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[3:6], v[0:1], off ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 1, v2 ; GFX10-NEXT: v_and_b32_e32 v2, 1, v2 @@ -754,7 +743,6 @@ ; GFX11-LABEL: extractelement_vgpr_v8i16_vgpr_idx: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b128 v[3:6], v[0:1], off ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 1, v2 ; GFX11-NEXT: v_and_b32_e32 v2, 1, v2 @@ -1074,7 +1062,6 @@ ; GFX10-LABEL: extractelement_vgpr_v8i16_idx0: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -1082,7 +1069,6 @@ ; GFX11-LABEL: extractelement_vgpr_v8i16_idx0: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -1122,7 +1108,6 @@ ; GFX10-LABEL: extractelement_vgpr_v8i16_idx1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0 @@ -1131,7 +1116,6 @@ ; GFX11-LABEL: extractelement_vgpr_v8i16_idx1: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0 @@ -1172,7 +1156,6 @@ ; GFX10-LABEL: extractelement_vgpr_v8i16_idx2: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, v1 @@ -1181,7 +1164,6 @@ ; GFX11-LABEL: extractelement_vgpr_v8i16_idx2: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v0, v1 @@ -1222,7 +1204,6 @@ ; GFX10-LABEL: extractelement_vgpr_v8i16_idx3: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v1 @@ -1231,7 +1212,6 @@ ; GFX11-LABEL: extractelement_vgpr_v8i16_idx3: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v1 @@ -1272,7 +1252,6 @@ ; GFX10-LABEL: extractelement_vgpr_v8i16_idx4: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, v2 @@ -1281,7 +1260,6 @@ ; GFX11-LABEL: extractelement_vgpr_v8i16_idx4: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v0, v2 @@ -1322,7 +1300,6 @@ ; GFX10-LABEL: extractelement_vgpr_v8i16_idx5: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v2 @@ -1331,7 +1308,6 @@ ; GFX11-LABEL: extractelement_vgpr_v8i16_idx5: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v2 @@ -1372,7 +1348,6 @@ ; GFX10-LABEL: extractelement_vgpr_v8i16_idx6: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, v3 @@ -1381,7 +1356,6 @@ ; GFX11-LABEL: extractelement_vgpr_v8i16_idx6: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v0, v3 @@ -1422,7 +1396,6 @@ ; GFX10-LABEL: extractelement_vgpr_v8i16_idx7: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v3 @@ -1431,7 +1404,6 @@ ; GFX11-LABEL: extractelement_vgpr_v8i16_idx7: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i8.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i8.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i8.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i8.ll @@ -134,7 +134,6 @@ ; GFX10-LABEL: extractelement_vgpr_v4i8_vgpr_idx: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: v_and_b32_e32 v1, 3, v2 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 3, v1 @@ -145,7 +144,6 @@ ; GFX11-LABEL: extractelement_vgpr_v4i8_vgpr_idx: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off ; GFX11-NEXT: v_and_b32_e32 v1, 3, v2 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) @@ -344,7 +342,6 @@ ; GFX10-LABEL: extractelement_vgpr_v4i8_idx0: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -352,7 +349,6 @@ ; GFX11-LABEL: extractelement_vgpr_v4i8_idx0: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -392,7 +388,6 @@ ; GFX10-LABEL: extractelement_vgpr_v4i8_idx1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v0 @@ -401,7 +396,6 @@ ; GFX11-LABEL: extractelement_vgpr_v4i8_idx1: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 8, v0 @@ -442,7 +436,6 @@ ; GFX10-LABEL: extractelement_vgpr_v4i8_idx2: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0 @@ -451,7 +444,6 @@ ; GFX11-LABEL: extractelement_vgpr_v4i8_idx2: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0 @@ -492,7 +484,6 @@ ; GFX10-LABEL: extractelement_vgpr_v4i8_idx3: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v0 @@ -501,7 +492,6 @@ ; GFX11-LABEL: extractelement_vgpr_v4i8_idx3: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v0 @@ -675,7 +665,6 @@ ; GFX10-LABEL: extractelement_vgpr_v8i8_vgpr_idx: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 2, v2 ; GFX10-NEXT: v_and_b32_e32 v2, 3, v2 @@ -689,7 +678,6 @@ ; GFX11-LABEL: extractelement_vgpr_v8i8_vgpr_idx: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 2, v2 ; GFX11-NEXT: v_and_b32_e32 v2, 3, v2 @@ -988,7 +976,6 @@ ; GFX10-LABEL: extractelement_vgpr_v8i8_idx0: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -996,7 +983,6 @@ ; GFX11-LABEL: extractelement_vgpr_v8i8_idx0: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -1036,7 +1022,6 @@ ; GFX10-LABEL: extractelement_vgpr_v8i8_idx1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v0 @@ -1045,7 +1030,6 @@ ; GFX11-LABEL: extractelement_vgpr_v8i8_idx1: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 8, v0 @@ -1086,7 +1070,6 @@ ; GFX10-LABEL: extractelement_vgpr_v8i8_idx2: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0 @@ -1095,7 +1078,6 @@ ; GFX11-LABEL: extractelement_vgpr_v8i8_idx2: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0 @@ -1136,7 +1118,6 @@ ; GFX10-LABEL: extractelement_vgpr_v8i8_idx3: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v0 @@ -1145,7 +1126,6 @@ ; GFX11-LABEL: extractelement_vgpr_v8i8_idx3: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v0 @@ -1186,7 +1166,6 @@ ; GFX10-LABEL: extractelement_vgpr_v8i8_idx4: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, v1 @@ -1195,7 +1174,6 @@ ; GFX11-LABEL: extractelement_vgpr_v8i8_idx4: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v0, v1 @@ -1236,7 +1214,6 @@ ; GFX10-LABEL: extractelement_vgpr_v8i8_idx5: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v1 @@ -1245,7 +1222,6 @@ ; GFX11-LABEL: extractelement_vgpr_v8i8_idx5: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 8, v1 @@ -1286,7 +1262,6 @@ ; GFX10-LABEL: extractelement_vgpr_v8i8_idx6: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v1 @@ -1295,7 +1270,6 @@ ; GFX11-LABEL: extractelement_vgpr_v8i8_idx6: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v1 @@ -1336,7 +1310,6 @@ ; GFX10-LABEL: extractelement_vgpr_v8i8_idx7: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v1 @@ -1345,7 +1318,6 @@ ; GFX11-LABEL: extractelement_vgpr_v8i8_idx7: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v1 @@ -1564,7 +1536,6 @@ ; GFX10-LABEL: extractelement_vgpr_v16i8_vgpr_idx: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[3:6], v[0:1], off ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 2, v2 ; GFX10-NEXT: v_and_b32_e32 v2, 3, v2 @@ -1582,7 +1553,6 @@ ; GFX11-LABEL: extractelement_vgpr_v16i8_vgpr_idx: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b128 v[3:6], v[0:1], off ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 2, v2 ; GFX11-NEXT: v_and_b32_e32 v2, 3, v2 @@ -1697,7 +1667,6 @@ ; GFX10-LABEL: extractelement_vgpr_v16i8_idx0: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -1705,7 +1674,6 @@ ; GFX11-LABEL: extractelement_vgpr_v16i8_idx0: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -1745,7 +1713,6 @@ ; GFX10-LABEL: extractelement_vgpr_v16i8_idx1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v0 @@ -1754,7 +1721,6 @@ ; GFX11-LABEL: extractelement_vgpr_v16i8_idx1: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 8, v0 @@ -1795,7 +1761,6 @@ ; GFX10-LABEL: extractelement_vgpr_v16i8_idx2: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0 @@ -1804,7 +1769,6 @@ ; GFX11-LABEL: extractelement_vgpr_v16i8_idx2: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0 @@ -1845,7 +1809,6 @@ ; GFX10-LABEL: extractelement_vgpr_v16i8_idx3: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v0 @@ -1854,7 +1817,6 @@ ; GFX11-LABEL: extractelement_vgpr_v16i8_idx3: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v0 @@ -1895,7 +1857,6 @@ ; GFX10-LABEL: extractelement_vgpr_v16i8_idx4: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, v1 @@ -1904,7 +1865,6 @@ ; GFX11-LABEL: extractelement_vgpr_v16i8_idx4: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v0, v1 @@ -1945,7 +1905,6 @@ ; GFX10-LABEL: extractelement_vgpr_v16i8_idx5: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v1 @@ -1954,7 +1913,6 @@ ; GFX11-LABEL: extractelement_vgpr_v16i8_idx5: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 8, v1 @@ -1995,7 +1953,6 @@ ; GFX10-LABEL: extractelement_vgpr_v16i8_idx6: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v1 @@ -2004,7 +1961,6 @@ ; GFX11-LABEL: extractelement_vgpr_v16i8_idx6: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v1 @@ -2045,7 +2001,6 @@ ; GFX10-LABEL: extractelement_vgpr_v16i8_idx7: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v1 @@ -2054,7 +2009,6 @@ ; GFX11-LABEL: extractelement_vgpr_v16i8_idx7: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v1 @@ -2095,7 +2049,6 @@ ; GFX10-LABEL: extractelement_vgpr_v16i8_idx8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, v2 @@ -2104,7 +2057,6 @@ ; GFX11-LABEL: extractelement_vgpr_v16i8_idx8: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v0, v2 @@ -2145,7 +2097,6 @@ ; GFX10-LABEL: extractelement_vgpr_v16i8_idx9: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v2 @@ -2154,7 +2105,6 @@ ; GFX11-LABEL: extractelement_vgpr_v16i8_idx9: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 8, v2 @@ -2195,7 +2145,6 @@ ; GFX10-LABEL: extractelement_vgpr_v16i8_idx10: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v2 @@ -2204,7 +2153,6 @@ ; GFX11-LABEL: extractelement_vgpr_v16i8_idx10: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v2 @@ -2245,7 +2193,6 @@ ; GFX10-LABEL: extractelement_vgpr_v16i8_idx11: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v2 @@ -2254,7 +2201,6 @@ ; GFX11-LABEL: extractelement_vgpr_v16i8_idx11: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v2 @@ -2295,7 +2241,6 @@ ; GFX10-LABEL: extractelement_vgpr_v16i8_idx12: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, v3 @@ -2304,7 +2249,6 @@ ; GFX11-LABEL: extractelement_vgpr_v16i8_idx12: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v0, v3 @@ -2345,7 +2289,6 @@ ; GFX10-LABEL: extractelement_vgpr_v16i8_idx13: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v3 @@ -2354,7 +2297,6 @@ ; GFX11-LABEL: extractelement_vgpr_v16i8_idx13: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 8, v3 @@ -2395,7 +2337,6 @@ ; GFX10-LABEL: extractelement_vgpr_v16i8_idx14: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v3 @@ -2404,7 +2345,6 @@ ; GFX11-LABEL: extractelement_vgpr_v16i8_idx14: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v3 @@ -2445,7 +2385,6 @@ ; GFX10-LABEL: extractelement_vgpr_v16i8_idx15: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v3 @@ -2454,7 +2393,6 @@ ; GFX11-LABEL: extractelement_vgpr_v16i8_idx15: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll @@ -32,7 +32,6 @@ ; GFX10PLUS-LABEL: dyn_extract_v8f32_const_s_v: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 @@ -180,7 +179,6 @@ ; GFX10PLUS-LABEL: dyn_extract_v8f32_v_v: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v8 @@ -344,7 +342,6 @@ ; GFX10-LABEL: dyn_extract_v8i64_const_s_v: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b64 s[6:7], 2 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 ; GFX10-NEXT: v_mov_b32_e32 v1, s6 @@ -381,7 +378,6 @@ ; GFX11-LABEL: dyn_extract_v8i64_const_s_v: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b64 s[2:3], 2 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 ; GFX11-NEXT: v_dual_mov_b32 v1, s2 :: v_dual_mov_b32 v2, s3 @@ -724,7 +720,6 @@ ; GFX10-LABEL: dyn_extract_v8i64_v_v: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo @@ -751,7 +746,6 @@ ; GFX11-LABEL: dyn_extract_v8i64_v_v: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16 @@ -1019,7 +1013,6 @@ ; GFX10PLUS-LABEL: dyn_extract_v8f32_v_v_offset3: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_add_nc_u32_e32 v8, 3, v8 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo @@ -1523,7 +1516,6 @@ ; GFX10-LABEL: dyn_extract_v8f64_v_v_offset3: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_nc_u32_e32 v16, 3, v16 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo @@ -1551,7 +1543,6 @@ ; GFX11-LABEL: dyn_extract_v8f64_v_v_offset3: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_add_nc_u32_e32 v16, 3, v16 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16 ; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v3 :: v_dual_cndmask_b32 v0, v0, v2 @@ -1597,7 +1588,6 @@ ; GFX10PLUS-LABEL: dyn_extract_v8p3_v_v: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v8 @@ -1723,7 +1713,6 @@ ; GFX10-LABEL: dyn_extract_v8p1_v_v: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo @@ -1750,7 +1739,6 @@ ; GFX11-LABEL: dyn_extract_v8p1_v_v: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16 @@ -2227,7 +2215,6 @@ ; GFX10PLUS-LABEL: dyn_extract_v6f32_v_v: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v6 @@ -2378,7 +2365,6 @@ ; GFX10PLUS-LABEL: dyn_extract_v7f32_v_v: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v7 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v7 @@ -2612,7 +2598,6 @@ ; GFX10-LABEL: dyn_extract_v6f64_v_v: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v12 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo @@ -2633,7 +2618,6 @@ ; GFX11-LABEL: dyn_extract_v6f64_v_v: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v12 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v12 @@ -2898,7 +2882,6 @@ ; GFX10-LABEL: dyn_extract_v7f64_v_v: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v14 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo @@ -2925,7 +2908,6 @@ ; GFX11-LABEL: dyn_extract_v7f64_v_v: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v14 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v14 @@ -3449,7 +3431,6 @@ ; GFX10-LABEL: dyn_extract_v15f32_const_s_v: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 @@ -3485,7 +3466,6 @@ ; GFX11-LABEL: dyn_extract_v15f32_const_s_v: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 @@ -3699,7 +3679,6 @@ ; GFX10-LABEL: dyn_extract_v15f32_v_v: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v15 @@ -3735,7 +3714,6 @@ ; GFX11-LABEL: dyn_extract_v15f32_v_v: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v15 @@ -3971,7 +3949,6 @@ ; GFX10-LABEL: dyn_extract_v15f32_v_v_offset3: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_nc_u32_e32 v15, 3, v15 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo @@ -4008,7 +3985,6 @@ ; GFX11-LABEL: dyn_extract_v15f32_v_v_offset3: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_add_nc_u32_e32 v15, 3, v15 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo @@ -4772,7 +4748,6 @@ ; GFX10-LABEL: v_extract_v64i32_7: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, v7 @@ -4781,7 +4756,6 @@ ; GFX11-LABEL: v_extract_v64i32_7: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off offset:16 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v0, v7 @@ -4814,7 +4788,6 @@ ; GFX10-LABEL: v_extract_v64i32_32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -4822,7 +4795,6 @@ ; GFX11-LABEL: v_extract_v64i32_32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -4856,7 +4828,6 @@ ; GFX10-LABEL: v_extract_v64i32_33: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, v1 @@ -4865,7 +4836,6 @@ ; GFX11-LABEL: v_extract_v64i32_33: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v0, v1 @@ -4897,7 +4867,6 @@ ; GFX10-LABEL: v_extract_v64i32_37: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:144 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, v5 @@ -4906,7 +4875,6 @@ ; GFX11-LABEL: v_extract_v64i32_37: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off offset:144 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v0, v5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll @@ -71,7 +71,6 @@ ; GFX10-LABEL: v_fdiv_f16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_cvt_f32_f16_e32 v2, v1 ; GFX10-NEXT: v_cvt_f32_f16_e32 v3, v0 ; GFX10-NEXT: v_rcp_f32_e32 v2, v2 @@ -83,7 +82,6 @@ ; GFX11-LABEL: v_fdiv_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v1 ; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v0 ; GFX11-NEXT: v_rcp_f32_e32 v2, v2 @@ -117,7 +115,6 @@ ; GFX10-LABEL: v_fdiv_f16_afn: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_rcp_f16_e32 v1, v1 ; GFX10-NEXT: v_mul_f16_e32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -125,7 +122,6 @@ ; GFX11-LABEL: v_fdiv_f16_afn: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_rcp_f16_e32 v1, v1 ; GFX11-NEXT: s_waitcnt_depctr 0xfff ; GFX11-NEXT: v_mul_f16_e32 v0, v0, v1 @@ -190,7 +186,6 @@ ; GFX10-LABEL: v_fdiv_f16_ulp25: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_cvt_f32_f16_e32 v2, v1 ; GFX10-NEXT: v_cvt_f32_f16_e32 v3, v0 ; GFX10-NEXT: v_rcp_f32_e32 v2, v2 @@ -202,7 +197,6 @@ ; GFX11-LABEL: v_fdiv_f16_ulp25: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v1 ; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v0 ; GFX11-NEXT: v_rcp_f32_e32 v2, v2 @@ -271,7 +265,6 @@ ; GFX10-LABEL: v_rcp_f16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_cvt_f32_f16_e32 v1, v0 ; GFX10-NEXT: v_cvt_f32_f16_e32 v2, 1.0 ; GFX10-NEXT: v_rcp_f32_e32 v1, v1 @@ -283,7 +276,6 @@ ; GFX11-LABEL: v_rcp_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cvt_f32_f16_e32 v1, v0 ; GFX11-NEXT: v_cvt_f32_f16_e32 v2, 1.0 ; GFX11-NEXT: v_rcp_f32_e32 v1, v1 @@ -352,7 +344,6 @@ ; GFX10-LABEL: v_rcp_f16_arcp: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_cvt_f32_f16_e32 v1, v0 ; GFX10-NEXT: v_cvt_f32_f16_e32 v2, 1.0 ; GFX10-NEXT: v_rcp_f32_e32 v1, v1 @@ -364,7 +355,6 @@ ; GFX11-LABEL: v_rcp_f16_arcp: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cvt_f32_f16_e32 v1, v0 ; GFX11-NEXT: v_cvt_f32_f16_e32 v2, 1.0 ; GFX11-NEXT: v_rcp_f32_e32 v1, v1 @@ -397,7 +387,6 @@ ; GFX10PLUS-LABEL: v_rcp_f16_arcp_afn: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_rcp_f16_e32 v0, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %fdiv = fdiv arcp afn half 1.0, %x @@ -455,7 +444,6 @@ ; GFX10PLUS-LABEL: v_rcp_f16_ulp25: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_rcp_f16_e32 v0, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %fdiv = fdiv half 1.0, %x, !fpmath !0 @@ -483,7 +471,6 @@ ; GFX10-LABEL: v_fdiv_f16_afn_ulp25: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_rcp_f16_e32 v1, v1 ; GFX10-NEXT: v_mul_f16_e32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -491,7 +478,6 @@ ; GFX11-LABEL: v_fdiv_f16_afn_ulp25: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_rcp_f16_e32 v1, v1 ; GFX11-NEXT: s_waitcnt_depctr 0xfff ; GFX11-NEXT: v_mul_f16_e32 v0, v0, v1 @@ -556,7 +542,6 @@ ; GFX10-LABEL: v_fdiv_f16_arcp_ulp25: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_cvt_f32_f16_e32 v2, v1 ; GFX10-NEXT: v_cvt_f32_f16_e32 v3, v0 ; GFX10-NEXT: v_rcp_f32_e32 v2, v2 @@ -568,7 +553,6 @@ ; GFX11-LABEL: v_fdiv_f16_arcp_ulp25: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v1 ; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v0 ; GFX11-NEXT: v_rcp_f32_e32 v2, v2 @@ -697,7 +681,6 @@ ; GFX10-LABEL: v_fdiv_v2f16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v1 ; GFX10-NEXT: v_cvt_f32_f16_e32 v3, v1 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 16, v0 @@ -718,7 +701,6 @@ ; GFX11-LABEL: v_fdiv_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v1 ; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v1 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v0 @@ -778,7 +760,6 @@ ; GFX10-LABEL: v_fdiv_v2f16_afn: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_rcp_f16_e32 v2, v1 ; GFX10-NEXT: v_rcp_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX10-NEXT: v_mul_f16_e32 v2, v0, v2 @@ -789,7 +770,6 @@ ; GFX11-LABEL: v_fdiv_v2f16_afn: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v1 ; GFX11-NEXT: v_rcp_f16_e32 v1, v1 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v0 @@ -919,7 +899,6 @@ ; GFX10-LABEL: v_fdiv_v2f16_ulp25: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v1 ; GFX10-NEXT: v_cvt_f32_f16_e32 v3, v1 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 16, v0 @@ -940,7 +919,6 @@ ; GFX11-LABEL: v_fdiv_v2f16_ulp25: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v1 ; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v1 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v0 @@ -1072,7 +1050,6 @@ ; GFX10-LABEL: v_rcp_v2f16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX10-NEXT: v_cvt_f32_f16_e32 v2, v0 ; GFX10-NEXT: v_cvt_f32_f16_e32 v4, 1.0 @@ -1091,7 +1068,6 @@ ; GFX11-LABEL: v_rcp_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v0 ; GFX11-NEXT: v_cvt_f32_f16_e32 v4, 1.0 @@ -1222,7 +1198,6 @@ ; GFX10-LABEL: v_rcp_v2f16_arcp: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX10-NEXT: v_cvt_f32_f16_e32 v2, v0 ; GFX10-NEXT: v_cvt_f32_f16_e32 v4, 1.0 @@ -1241,7 +1216,6 @@ ; GFX11-LABEL: v_rcp_v2f16_arcp: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v0 ; GFX11-NEXT: v_cvt_f32_f16_e32 v4, 1.0 @@ -1295,7 +1269,6 @@ ; GFX10-LABEL: v_rcp_v2f16_arcp_afn: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_rcp_f16_e32 v1, v0 ; GFX10-NEXT: v_rcp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX10-NEXT: v_pack_b32_f16 v0, v1, v0 @@ -1304,7 +1277,6 @@ ; GFX11-LABEL: v_rcp_v2f16_arcp_afn: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX11-NEXT: v_rcp_f16_e32 v0, v0 ; GFX11-NEXT: v_rcp_f16_e32 v1, v1 @@ -1405,7 +1377,6 @@ ; GFX10-LABEL: v_rcp_v2f16_ulp25: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_rcp_f16_e32 v1, v0 ; GFX10-NEXT: v_rcp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX10-NEXT: v_pack_b32_f16 v0, v1, v0 @@ -1414,7 +1385,6 @@ ; GFX11-LABEL: v_rcp_v2f16_ulp25: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX11-NEXT: v_rcp_f16_e32 v0, v0 ; GFX11-NEXT: v_rcp_f16_e32 v1, v1 @@ -1464,7 +1434,6 @@ ; GFX10-LABEL: v_fdiv_v2f16_afn_ulp25: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_rcp_f16_e32 v2, v1 ; GFX10-NEXT: v_rcp_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX10-NEXT: v_mul_f16_e32 v2, v0, v2 @@ -1475,7 +1444,6 @@ ; GFX11-LABEL: v_fdiv_v2f16_afn_ulp25: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v1 ; GFX11-NEXT: v_rcp_f16_e32 v1, v1 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v0 @@ -1605,7 +1573,6 @@ ; GFX10-LABEL: v_fdiv_v2f16_arcp_ulp25: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v1 ; GFX10-NEXT: v_cvt_f32_f16_e32 v3, v1 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 16, v0 @@ -1626,7 +1593,6 @@ ; GFX11-LABEL: v_fdiv_v2f16_arcp_ulp25: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v1 ; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v1 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v0 @@ -1686,7 +1652,6 @@ ; GFX10-LABEL: v_fdiv_v2f16_arcp_afn_ulp25: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_rcp_f16_e32 v2, v1 ; GFX10-NEXT: v_rcp_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX10-NEXT: v_mul_f16_e32 v2, v0, v2 @@ -1697,7 +1662,6 @@ ; GFX11-LABEL: v_fdiv_v2f16_arcp_afn_ulp25: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v1 ; GFX11-NEXT: v_rcp_f16_e32 v1, v1 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll @@ -86,7 +86,6 @@ ; GFX10-IEEE-LABEL: v_fdiv_f32: ; GFX10-IEEE: ; %bb.0: ; GFX10-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-IEEE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-IEEE-NEXT: v_div_scale_f32 v2, s4, v1, v1, v0 ; GFX10-IEEE-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0 ; GFX10-IEEE-NEXT: v_rcp_f32_e32 v3, v2 @@ -103,7 +102,6 @@ ; GFX10-FLUSH-LABEL: v_fdiv_f32: ; GFX10-FLUSH: ; %bb.0: ; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-FLUSH-NEXT: v_div_scale_f32 v2, s4, v1, v1, v0 ; GFX10-FLUSH-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 ; GFX10-FLUSH-NEXT: v_rcp_f32_e32 v3, v2 @@ -122,7 +120,6 @@ ; GFX11-IEEE-LABEL: v_fdiv_f32: ; GFX11-IEEE: ; %bb.0: ; GFX11-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-IEEE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-IEEE-NEXT: v_div_scale_f32 v2, null, v1, v1, v0 ; GFX11-IEEE-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0 ; GFX11-IEEE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) @@ -144,7 +141,6 @@ ; GFX11-FLUSH-LABEL: v_fdiv_f32: ; GFX11-FLUSH: ; %bb.0: ; GFX11-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-FLUSH-NEXT: v_div_scale_f32 v2, null, v1, v1, v0 ; GFX11-FLUSH-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 ; GFX11-FLUSH-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1) @@ -179,7 +175,6 @@ ; GFX10-LABEL: v_fdiv_f32_afn: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_rcp_f32_e32 v1, v1 ; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -187,7 +182,6 @@ ; GFX11-LABEL: v_fdiv_f32_afn: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_rcp_f32_e32 v1, v1 ; GFX11-NEXT: s_waitcnt_depctr 0xfff ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1 @@ -245,7 +239,6 @@ ; GFX10-IEEE-LABEL: v_fdiv_f32_ulp25: ; GFX10-IEEE: ; %bb.0: ; GFX10-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-IEEE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-IEEE-NEXT: v_div_scale_f32 v2, s4, v1, v1, v0 ; GFX10-IEEE-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0 ; GFX10-IEEE-NEXT: v_rcp_f32_e32 v3, v2 @@ -262,7 +255,6 @@ ; GFX10-FLUSH-LABEL: v_fdiv_f32_ulp25: ; GFX10-FLUSH: ; %bb.0: ; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-FLUSH-NEXT: v_cmp_lt_f32_e64 s4, 0x6f800000, |v1| ; GFX10-FLUSH-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x2f800000, s4 ; GFX10-FLUSH-NEXT: v_mul_f32_e32 v1, v1, v2 @@ -274,7 +266,6 @@ ; GFX11-IEEE-LABEL: v_fdiv_f32_ulp25: ; GFX11-IEEE: ; %bb.0: ; GFX11-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-IEEE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-IEEE-NEXT: v_div_scale_f32 v2, null, v1, v1, v0 ; GFX11-IEEE-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0 ; GFX11-IEEE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) @@ -296,7 +287,6 @@ ; GFX11-FLUSH-LABEL: v_fdiv_f32_ulp25: ; GFX11-FLUSH: ; %bb.0: ; GFX11-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-FLUSH-NEXT: v_cmp_lt_f32_e64 s0, 0x6f800000, |v1| ; GFX11-FLUSH-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-FLUSH-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x2f800000, s0 @@ -383,7 +373,6 @@ ; GFX10-IEEE-LABEL: v_rcp_f32: ; GFX10-IEEE: ; %bb.0: ; GFX10-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-IEEE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-IEEE-NEXT: v_div_scale_f32 v1, s4, v0, v0, 1.0 ; GFX10-IEEE-NEXT: v_div_scale_f32 v4, vcc_lo, 1.0, v0, 1.0 ; GFX10-IEEE-NEXT: v_rcp_f32_e32 v2, v1 @@ -400,7 +389,6 @@ ; GFX10-FLUSH-LABEL: v_rcp_f32: ; GFX10-FLUSH: ; %bb.0: ; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-FLUSH-NEXT: v_div_scale_f32 v1, s4, v0, v0, 1.0 ; GFX10-FLUSH-NEXT: v_div_scale_f32 v3, vcc_lo, 1.0, v0, 1.0 ; GFX10-FLUSH-NEXT: v_rcp_f32_e32 v2, v1 @@ -419,7 +407,6 @@ ; GFX11-IEEE-LABEL: v_rcp_f32: ; GFX11-IEEE: ; %bb.0: ; GFX11-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-IEEE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-IEEE-NEXT: v_div_scale_f32 v1, null, v0, v0, 1.0 ; GFX11-IEEE-NEXT: v_div_scale_f32 v4, vcc_lo, 1.0, v0, 1.0 ; GFX11-IEEE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) @@ -441,7 +428,6 @@ ; GFX11-FLUSH-LABEL: v_rcp_f32: ; GFX11-FLUSH: ; %bb.0: ; GFX11-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-FLUSH-NEXT: v_div_scale_f32 v1, null, v0, v0, 1.0 ; GFX11-FLUSH-NEXT: v_div_scale_f32 v3, vcc_lo, 1.0, v0, 1.0 ; GFX11-FLUSH-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1) @@ -537,7 +523,6 @@ ; GFX10-IEEE-LABEL: v_rcp_f32_arcp: ; GFX10-IEEE: ; %bb.0: ; GFX10-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-IEEE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-IEEE-NEXT: v_div_scale_f32 v1, s4, v0, v0, 1.0 ; GFX10-IEEE-NEXT: v_div_scale_f32 v4, vcc_lo, 1.0, v0, 1.0 ; GFX10-IEEE-NEXT: v_rcp_f32_e32 v2, v1 @@ -554,7 +539,6 @@ ; GFX10-FLUSH-LABEL: v_rcp_f32_arcp: ; GFX10-FLUSH: ; %bb.0: ; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-FLUSH-NEXT: v_div_scale_f32 v1, s4, v0, v0, 1.0 ; GFX10-FLUSH-NEXT: v_div_scale_f32 v3, vcc_lo, 1.0, v0, 1.0 ; GFX10-FLUSH-NEXT: v_rcp_f32_e32 v2, v1 @@ -573,7 +557,6 @@ ; GFX11-IEEE-LABEL: v_rcp_f32_arcp: ; GFX11-IEEE: ; %bb.0: ; GFX11-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-IEEE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-IEEE-NEXT: v_div_scale_f32 v1, null, v0, v0, 1.0 ; GFX11-IEEE-NEXT: v_div_scale_f32 v4, vcc_lo, 1.0, v0, 1.0 ; GFX11-IEEE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) @@ -595,7 +578,6 @@ ; GFX11-FLUSH-LABEL: v_rcp_f32_arcp: ; GFX11-FLUSH: ; %bb.0: ; GFX11-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-FLUSH-NEXT: v_div_scale_f32 v1, null, v0, v0, 1.0 ; GFX11-FLUSH-NEXT: v_div_scale_f32 v3, vcc_lo, 1.0, v0, 1.0 ; GFX11-FLUSH-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1) @@ -629,14 +611,12 @@ ; GFX10-LABEL: v_rcp_f32_arcp_afn: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_rcp_f32_e32 v0, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_rcp_f32_arcp_afn: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_rcp_f32_e32 v0, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %fdiv = fdiv arcp afn float 1.0, %x @@ -666,7 +646,6 @@ ; GFX10-IEEE-LABEL: v_rcp_f32_ulp25: ; GFX10-IEEE: ; %bb.0: ; GFX10-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-IEEE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-IEEE-NEXT: v_cmp_lt_f32_e64 s4, 0x6f800000, |v0| ; GFX10-IEEE-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x2f800000, s4 ; GFX10-IEEE-NEXT: v_mul_f32_e32 v0, v0, v1 @@ -678,14 +657,12 @@ ; GFX10-FLUSH-LABEL: v_rcp_f32_ulp25: ; GFX10-FLUSH: ; %bb.0: ; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-FLUSH-NEXT: v_rcp_f32_e32 v0, v0 ; GFX10-FLUSH-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-IEEE-LABEL: v_rcp_f32_ulp25: ; GFX11-IEEE: ; %bb.0: ; GFX11-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-IEEE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-IEEE-NEXT: v_cmp_lt_f32_e64 s0, 0x6f800000, |v0| ; GFX11-IEEE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-IEEE-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x2f800000, s0 @@ -700,7 +677,6 @@ ; GFX11-FLUSH-LABEL: v_rcp_f32_ulp25: ; GFX11-FLUSH: ; %bb.0: ; GFX11-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-FLUSH-NEXT: v_rcp_f32_e32 v0, v0 ; GFX11-FLUSH-NEXT: s_setpc_b64 s[30:31] %fdiv = fdiv float 1.0, %x, !fpmath !0 @@ -718,7 +694,6 @@ ; GFX10-LABEL: v_fdiv_f32_afn_ulp25: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_rcp_f32_e32 v1, v1 ; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -726,7 +701,6 @@ ; GFX11-LABEL: v_fdiv_f32_afn_ulp25: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_rcp_f32_e32 v1, v1 ; GFX11-NEXT: s_waitcnt_depctr 0xfff ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1 @@ -784,7 +758,6 @@ ; GFX10-IEEE-LABEL: v_fdiv_f32_arcp_ulp25: ; GFX10-IEEE: ; %bb.0: ; GFX10-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-IEEE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-IEEE-NEXT: v_div_scale_f32 v2, s4, v1, v1, v0 ; GFX10-IEEE-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0 ; GFX10-IEEE-NEXT: v_rcp_f32_e32 v3, v2 @@ -801,7 +774,6 @@ ; GFX10-FLUSH-LABEL: v_fdiv_f32_arcp_ulp25: ; GFX10-FLUSH: ; %bb.0: ; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-FLUSH-NEXT: v_cmp_lt_f32_e64 s4, 0x6f800000, |v1| ; GFX10-FLUSH-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x2f800000, s4 ; GFX10-FLUSH-NEXT: v_mul_f32_e32 v1, v1, v2 @@ -813,7 +785,6 @@ ; GFX11-IEEE-LABEL: v_fdiv_f32_arcp_ulp25: ; GFX11-IEEE: ; %bb.0: ; GFX11-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-IEEE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-IEEE-NEXT: v_div_scale_f32 v2, null, v1, v1, v0 ; GFX11-IEEE-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0 ; GFX11-IEEE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) @@ -835,7 +806,6 @@ ; GFX11-FLUSH-LABEL: v_fdiv_f32_arcp_ulp25: ; GFX11-FLUSH: ; %bb.0: ; GFX11-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-FLUSH-NEXT: v_cmp_lt_f32_e64 s0, 0x6f800000, |v1| ; GFX11-FLUSH-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-FLUSH-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x2f800000, s0 @@ -971,7 +941,6 @@ ; GFX10-IEEE-LABEL: v_fdiv_v2f32: ; GFX10-IEEE: ; %bb.0: ; GFX10-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-IEEE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-IEEE-NEXT: v_div_scale_f32 v4, s4, v2, v2, v0 ; GFX10-IEEE-NEXT: v_div_scale_f32 v5, s4, v3, v3, v1 ; GFX10-IEEE-NEXT: v_div_scale_f32 v10, vcc_lo, v0, v2, v0 @@ -1000,7 +969,6 @@ ; GFX10-FLUSH-LABEL: v_fdiv_v2f32: ; GFX10-FLUSH: ; %bb.0: ; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-FLUSH-NEXT: v_div_scale_f32 v4, s4, v2, v2, v0 ; GFX10-FLUSH-NEXT: v_div_scale_f32 v6, vcc_lo, v0, v2, v0 ; GFX10-FLUSH-NEXT: v_rcp_f32_e32 v5, v4 @@ -1032,7 +1000,6 @@ ; GFX11-IEEE-LABEL: v_fdiv_v2f32: ; GFX11-IEEE: ; %bb.0: ; GFX11-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-IEEE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-IEEE-NEXT: v_div_scale_f32 v4, null, v2, v2, v0 ; GFX11-IEEE-NEXT: v_div_scale_f32 v5, null, v3, v3, v1 ; GFX11-IEEE-NEXT: v_div_scale_f32 v10, vcc_lo, v0, v2, v0 @@ -1069,7 +1036,6 @@ ; GFX11-FLUSH-LABEL: v_fdiv_v2f32: ; GFX11-FLUSH: ; %bb.0: ; GFX11-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-FLUSH-NEXT: v_div_scale_f32 v4, null, v2, v2, v0 ; GFX11-FLUSH-NEXT: v_div_scale_f32 v6, vcc_lo, v0, v2, v0 ; GFX11-FLUSH-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1) @@ -1124,7 +1090,6 @@ ; GFX10-LABEL: v_fdiv_v2f32_afn: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_rcp_f32_e32 v2, v2 ; GFX10-NEXT: v_rcp_f32_e32 v3, v3 ; GFX10-NEXT: v_mul_f32_e32 v0, v0, v2 @@ -1134,7 +1099,6 @@ ; GFX11-LABEL: v_fdiv_v2f32_afn: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_rcp_f32_e32 v2, v2 ; GFX11-NEXT: v_rcp_f32_e32 v3, v3 ; GFX11-NEXT: s_waitcnt_depctr 0xfff @@ -1222,7 +1186,6 @@ ; GFX10-IEEE-LABEL: v_fdiv_v2f32_ulp25: ; GFX10-IEEE: ; %bb.0: ; GFX10-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-IEEE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-IEEE-NEXT: v_div_scale_f32 v4, s4, v2, v2, v0 ; GFX10-IEEE-NEXT: v_div_scale_f32 v5, s4, v3, v3, v1 ; GFX10-IEEE-NEXT: v_div_scale_f32 v10, vcc_lo, v0, v2, v0 @@ -1251,7 +1214,6 @@ ; GFX10-FLUSH-LABEL: v_fdiv_v2f32_ulp25: ; GFX10-FLUSH: ; %bb.0: ; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-FLUSH-NEXT: v_cmp_lt_f32_e64 s4, 0x6f800000, |v2| ; GFX10-FLUSH-NEXT: v_cndmask_b32_e64 v4, 1.0, 0x2f800000, s4 ; GFX10-FLUSH-NEXT: v_cmp_lt_f32_e64 s4, 0x6f800000, |v3| @@ -1269,7 +1231,6 @@ ; GFX11-IEEE-LABEL: v_fdiv_v2f32_ulp25: ; GFX11-IEEE: ; %bb.0: ; GFX11-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-IEEE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-IEEE-NEXT: v_div_scale_f32 v4, null, v2, v2, v0 ; GFX11-IEEE-NEXT: v_div_scale_f32 v5, null, v3, v3, v1 ; GFX11-IEEE-NEXT: v_div_scale_f32 v10, vcc_lo, v0, v2, v0 @@ -1306,7 +1267,6 @@ ; GFX11-FLUSH-LABEL: v_fdiv_v2f32_ulp25: ; GFX11-FLUSH: ; %bb.0: ; GFX11-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-FLUSH-NEXT: v_cmp_lt_f32_e64 s0, 0x6f800000, |v2| ; GFX11-FLUSH-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11-FLUSH-NEXT: v_cndmask_b32_e64 v4, 1.0, 0x2f800000, s0 @@ -1446,7 +1406,6 @@ ; GFX10-IEEE-LABEL: v_rcp_v2f32: ; GFX10-IEEE: ; %bb.0: ; GFX10-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-IEEE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-IEEE-NEXT: v_div_scale_f32 v2, s4, v0, v0, 1.0 ; GFX10-IEEE-NEXT: v_div_scale_f32 v3, s4, v1, v1, 1.0 ; GFX10-IEEE-NEXT: v_div_scale_f32 v8, vcc_lo, 1.0, v0, 1.0 @@ -1475,7 +1434,6 @@ ; GFX10-FLUSH-LABEL: v_rcp_v2f32: ; GFX10-FLUSH: ; %bb.0: ; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-FLUSH-NEXT: v_div_scale_f32 v2, s4, v0, v0, 1.0 ; GFX10-FLUSH-NEXT: v_div_scale_f32 v4, vcc_lo, 1.0, v0, 1.0 ; GFX10-FLUSH-NEXT: v_rcp_f32_e32 v3, v2 @@ -1507,7 +1465,6 @@ ; GFX11-IEEE-LABEL: v_rcp_v2f32: ; GFX11-IEEE: ; %bb.0: ; GFX11-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-IEEE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-IEEE-NEXT: v_div_scale_f32 v2, null, v0, v0, 1.0 ; GFX11-IEEE-NEXT: v_div_scale_f32 v3, null, v1, v1, 1.0 ; GFX11-IEEE-NEXT: v_div_scale_f32 v8, vcc_lo, 1.0, v0, 1.0 @@ -1544,7 +1501,6 @@ ; GFX11-FLUSH-LABEL: v_rcp_v2f32: ; GFX11-FLUSH: ; %bb.0: ; GFX11-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-FLUSH-NEXT: v_div_scale_f32 v2, null, v0, v0, 1.0 ; GFX11-FLUSH-NEXT: v_div_scale_f32 v4, vcc_lo, 1.0, v0, 1.0 ; GFX11-FLUSH-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1) @@ -1707,7 +1663,6 @@ ; GFX10-IEEE-LABEL: v_rcp_v2f32_arcp: ; GFX10-IEEE: ; %bb.0: ; GFX10-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-IEEE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-IEEE-NEXT: v_div_scale_f32 v2, s4, v0, v0, 1.0 ; GFX10-IEEE-NEXT: v_div_scale_f32 v3, s4, v1, v1, 1.0 ; GFX10-IEEE-NEXT: v_div_scale_f32 v8, vcc_lo, 1.0, v0, 1.0 @@ -1736,7 +1691,6 @@ ; GFX10-FLUSH-LABEL: v_rcp_v2f32_arcp: ; GFX10-FLUSH: ; %bb.0: ; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-FLUSH-NEXT: v_div_scale_f32 v2, s4, v0, v0, 1.0 ; GFX10-FLUSH-NEXT: v_div_scale_f32 v4, vcc_lo, 1.0, v0, 1.0 ; GFX10-FLUSH-NEXT: v_rcp_f32_e32 v3, v2 @@ -1768,7 +1722,6 @@ ; GFX11-IEEE-LABEL: v_rcp_v2f32_arcp: ; GFX11-IEEE: ; %bb.0: ; GFX11-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-IEEE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-IEEE-NEXT: v_div_scale_f32 v2, null, v0, v0, 1.0 ; GFX11-IEEE-NEXT: v_div_scale_f32 v3, null, v1, v1, 1.0 ; GFX11-IEEE-NEXT: v_div_scale_f32 v8, vcc_lo, 1.0, v0, 1.0 @@ -1805,7 +1758,6 @@ ; GFX11-FLUSH-LABEL: v_rcp_v2f32_arcp: ; GFX11-FLUSH: ; %bb.0: ; GFX11-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-FLUSH-NEXT: v_div_scale_f32 v2, null, v0, v0, 1.0 ; GFX11-FLUSH-NEXT: v_div_scale_f32 v4, vcc_lo, 1.0, v0, 1.0 ; GFX11-FLUSH-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1) @@ -1858,7 +1810,6 @@ ; GFX10-LABEL: v_rcp_v2f32_arcp_afn: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_rcp_f32_e32 v0, v0 ; GFX10-NEXT: v_rcp_f32_e32 v1, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -1866,7 +1817,6 @@ ; GFX11-LABEL: v_rcp_v2f32_arcp_afn: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_rcp_f32_e32 v0, v0 ; GFX11-NEXT: v_rcp_f32_e32 v1, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -1904,7 +1854,6 @@ ; GFX10-IEEE-LABEL: v_rcp_v2f32_ulp25: ; GFX10-IEEE: ; %bb.0: ; GFX10-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-IEEE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-IEEE-NEXT: v_cmp_lt_f32_e64 s4, 0x6f800000, |v0| ; GFX10-IEEE-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x2f800000, s4 ; GFX10-IEEE-NEXT: v_cmp_lt_f32_e64 s4, 0x6f800000, |v1| @@ -1922,7 +1871,6 @@ ; GFX10-FLUSH-LABEL: v_rcp_v2f32_ulp25: ; GFX10-FLUSH: ; %bb.0: ; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-FLUSH-NEXT: v_rcp_f32_e32 v0, v0 ; GFX10-FLUSH-NEXT: v_rcp_f32_e32 v1, v1 ; GFX10-FLUSH-NEXT: s_setpc_b64 s[30:31] @@ -1930,7 +1878,6 @@ ; GFX11-IEEE-LABEL: v_rcp_v2f32_ulp25: ; GFX11-IEEE: ; %bb.0: ; GFX11-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-IEEE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-IEEE-NEXT: v_cmp_lt_f32_e64 s0, 0x6f800000, |v0| ; GFX11-IEEE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11-IEEE-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x2f800000, s0 @@ -1949,7 +1896,6 @@ ; GFX11-FLUSH-LABEL: v_rcp_v2f32_ulp25: ; GFX11-FLUSH: ; %bb.0: ; GFX11-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-FLUSH-NEXT: v_rcp_f32_e32 v0, v0 ; GFX11-FLUSH-NEXT: v_rcp_f32_e32 v1, v1 ; GFX11-FLUSH-NEXT: s_setpc_b64 s[30:31] @@ -1970,7 +1916,6 @@ ; GFX10-LABEL: v_fdiv_v2f32_afn_ulp25: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_rcp_f32_e32 v2, v2 ; GFX10-NEXT: v_rcp_f32_e32 v3, v3 ; GFX10-NEXT: v_mul_f32_e32 v0, v0, v2 @@ -1980,7 +1925,6 @@ ; GFX11-LABEL: v_fdiv_v2f32_afn_ulp25: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_rcp_f32_e32 v2, v2 ; GFX11-NEXT: v_rcp_f32_e32 v3, v3 ; GFX11-NEXT: s_waitcnt_depctr 0xfff @@ -2068,7 +2012,6 @@ ; GFX10-IEEE-LABEL: v_fdiv_v2f32_arcp_ulp25: ; GFX10-IEEE: ; %bb.0: ; GFX10-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-IEEE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-IEEE-NEXT: v_div_scale_f32 v4, s4, v2, v2, v0 ; GFX10-IEEE-NEXT: v_div_scale_f32 v5, s4, v3, v3, v1 ; GFX10-IEEE-NEXT: v_div_scale_f32 v10, vcc_lo, v0, v2, v0 @@ -2097,7 +2040,6 @@ ; GFX10-FLUSH-LABEL: v_fdiv_v2f32_arcp_ulp25: ; GFX10-FLUSH: ; %bb.0: ; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-FLUSH-NEXT: v_cmp_lt_f32_e64 s4, 0x6f800000, |v2| ; GFX10-FLUSH-NEXT: v_cndmask_b32_e64 v4, 1.0, 0x2f800000, s4 ; GFX10-FLUSH-NEXT: v_cmp_lt_f32_e64 s4, 0x6f800000, |v3| @@ -2115,7 +2057,6 @@ ; GFX11-IEEE-LABEL: v_fdiv_v2f32_arcp_ulp25: ; GFX11-IEEE: ; %bb.0: ; GFX11-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-IEEE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-IEEE-NEXT: v_div_scale_f32 v4, null, v2, v2, v0 ; GFX11-IEEE-NEXT: v_div_scale_f32 v5, null, v3, v3, v1 ; GFX11-IEEE-NEXT: v_div_scale_f32 v10, vcc_lo, v0, v2, v0 @@ -2152,7 +2093,6 @@ ; GFX11-FLUSH-LABEL: v_fdiv_v2f32_arcp_ulp25: ; GFX11-FLUSH: ; %bb.0: ; GFX11-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-FLUSH-NEXT: v_cmp_lt_f32_e64 s0, 0x6f800000, |v2| ; GFX11-FLUSH-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11-FLUSH-NEXT: v_cndmask_b32_e64 v4, 1.0, 0x2f800000, s0 @@ -2184,7 +2124,6 @@ ; GFX10-LABEL: v_fdiv_v2f32_arcp_afn_ulp25: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_rcp_f32_e32 v2, v2 ; GFX10-NEXT: v_rcp_f32_e32 v3, v3 ; GFX10-NEXT: v_mul_f32_e32 v0, v0, v2 @@ -2194,7 +2133,6 @@ ; GFX11-LABEL: v_fdiv_v2f32_arcp_afn_ulp25: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_rcp_f32_e32 v2, v2 ; GFX11-NEXT: v_rcp_f32_e32 v3, v3 ; GFX11-NEXT: s_waitcnt_depctr 0xfff diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll @@ -69,7 +69,6 @@ ; GFX10-LABEL: v_fdiv_f64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_div_scale_f64 v[4:5], s4, v[2:3], v[2:3], v[0:1] ; GFX10-NEXT: v_div_scale_f64 v[10:11], vcc_lo, v[0:1], v[2:3], v[0:1] ; GFX10-NEXT: v_rcp_f64_e32 v[6:7], v[4:5] @@ -86,7 +85,6 @@ ; GFX11-LABEL: v_fdiv_f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_div_scale_f64 v[4:5], null, v[2:3], v[2:3], v[0:1] ; GFX11-NEXT: v_div_scale_f64 v[10:11], vcc_lo, v[0:1], v[2:3], v[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) @@ -125,7 +123,6 @@ ; GFX10-LABEL: v_fdiv_f64_afn: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; GFX10-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; GFX10-NEXT: v_fma_f64 v[4:5], v[6:7], v[4:5], v[4:5] @@ -139,7 +136,6 @@ ; GFX11-LABEL: v_fdiv_f64_afn: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; GFX11-NEXT: s_waitcnt_depctr 0xfff ; GFX11-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 @@ -212,7 +208,6 @@ ; GFX10-LABEL: v_fdiv_f64_ulp25: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_div_scale_f64 v[4:5], s4, v[2:3], v[2:3], v[0:1] ; GFX10-NEXT: v_div_scale_f64 v[10:11], vcc_lo, v[0:1], v[2:3], v[0:1] ; GFX10-NEXT: v_rcp_f64_e32 v[6:7], v[4:5] @@ -229,7 +224,6 @@ ; GFX11-LABEL: v_fdiv_f64_ulp25: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_div_scale_f64 v[4:5], null, v[2:3], v[2:3], v[0:1] ; GFX11-NEXT: v_div_scale_f64 v[10:11], vcc_lo, v[0:1], v[2:3], v[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) @@ -307,7 +301,6 @@ ; GFX10-LABEL: v_rcp_f64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_div_scale_f64 v[2:3], s4, v[0:1], v[0:1], 1.0 ; GFX10-NEXT: v_div_scale_f64 v[8:9], vcc_lo, 1.0, v[0:1], 1.0 ; GFX10-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] @@ -324,7 +317,6 @@ ; GFX11-LABEL: v_rcp_f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_div_scale_f64 v[2:3], null, v[0:1], v[0:1], 1.0 ; GFX11-NEXT: v_div_scale_f64 v[8:9], vcc_lo, 1.0, v[0:1], 1.0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) @@ -402,7 +394,6 @@ ; GFX10-LABEL: v_rcp_f64_arcp: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_div_scale_f64 v[2:3], s4, v[0:1], v[0:1], 1.0 ; GFX10-NEXT: v_div_scale_f64 v[8:9], vcc_lo, 1.0, v[0:1], 1.0 ; GFX10-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] @@ -419,7 +410,6 @@ ; GFX11-LABEL: v_rcp_f64_arcp: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_div_scale_f64 v[2:3], null, v[0:1], v[0:1], 1.0 ; GFX11-NEXT: v_div_scale_f64 v[8:9], vcc_lo, 1.0, v[0:1], 1.0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) @@ -458,7 +448,6 @@ ; GFX10-LABEL: v_rcp_f64_arcp_afn: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_rcp_f64_e32 v[2:3], v[0:1] ; GFX10-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0 ; GFX10-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3] @@ -472,7 +461,6 @@ ; GFX11-LABEL: v_rcp_f64_arcp_afn: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_rcp_f64_e32 v[2:3], v[0:1] ; GFX11-NEXT: s_waitcnt_depctr 0xfff ; GFX11-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0 @@ -546,7 +534,6 @@ ; GFX10-LABEL: v_rcp_f64_ulp25: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_div_scale_f64 v[2:3], s4, v[0:1], v[0:1], 1.0 ; GFX10-NEXT: v_div_scale_f64 v[8:9], vcc_lo, 1.0, v[0:1], 1.0 ; GFX10-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] @@ -563,7 +550,6 @@ ; GFX11-LABEL: v_rcp_f64_ulp25: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_div_scale_f64 v[2:3], null, v[0:1], v[0:1], 1.0 ; GFX11-NEXT: v_div_scale_f64 v[8:9], vcc_lo, 1.0, v[0:1], 1.0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) @@ -602,7 +588,6 @@ ; GFX10-LABEL: v_fdiv_f64_afn_ulp25: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; GFX10-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; GFX10-NEXT: v_fma_f64 v[4:5], v[6:7], v[4:5], v[4:5] @@ -616,7 +601,6 @@ ; GFX11-LABEL: v_fdiv_f64_afn_ulp25: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; GFX11-NEXT: s_waitcnt_depctr 0xfff ; GFX11-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 @@ -689,7 +673,6 @@ ; GFX10-LABEL: v_fdiv_f64_arcp_ulp25: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_div_scale_f64 v[4:5], s4, v[2:3], v[2:3], v[0:1] ; GFX10-NEXT: v_div_scale_f64 v[10:11], vcc_lo, v[0:1], v[2:3], v[0:1] ; GFX10-NEXT: v_rcp_f64_e32 v[6:7], v[4:5] @@ -706,7 +689,6 @@ ; GFX11-LABEL: v_fdiv_f64_arcp_ulp25: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_div_scale_f64 v[4:5], null, v[2:3], v[2:3], v[0:1] ; GFX11-NEXT: v_div_scale_f64 v[10:11], vcc_lo, v[0:1], v[2:3], v[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) @@ -822,7 +804,6 @@ ; GFX10-LABEL: v_fdiv_v2f64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_div_scale_f64 v[8:9], s4, v[4:5], v[4:5], v[0:1] ; GFX10-NEXT: v_div_scale_f64 v[10:11], s4, v[6:7], v[6:7], v[2:3] ; GFX10-NEXT: v_div_scale_f64 v[20:21], vcc_lo, v[0:1], v[4:5], v[0:1] @@ -851,7 +832,6 @@ ; GFX11-LABEL: v_fdiv_v2f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_div_scale_f64 v[8:9], null, v[4:5], v[4:5], v[0:1] ; GFX11-NEXT: v_div_scale_f64 v[10:11], null, v[6:7], v[6:7], v[2:3] ; GFX11-NEXT: v_div_scale_f64 v[20:21], vcc_lo, v[0:1], v[4:5], v[0:1] @@ -914,7 +894,6 @@ ; GFX10-LABEL: v_fdiv_v2f64_afn: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_rcp_f64_e32 v[8:9], v[4:5] ; GFX10-NEXT: v_rcp_f64_e32 v[10:11], v[6:7] ; GFX10-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0 @@ -936,7 +915,6 @@ ; GFX11-LABEL: v_fdiv_v2f64_afn: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_rcp_f64_e32 v[8:9], v[4:5] ; GFX11-NEXT: v_rcp_f64_e32 v[10:11], v[6:7] ; GFX11-NEXT: s_waitcnt_depctr 0xfff @@ -1059,7 +1037,6 @@ ; GFX10-LABEL: v_fdiv_v2f64_ulp25: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_div_scale_f64 v[8:9], s4, v[4:5], v[4:5], v[0:1] ; GFX10-NEXT: v_div_scale_f64 v[10:11], s4, v[6:7], v[6:7], v[2:3] ; GFX10-NEXT: v_div_scale_f64 v[20:21], vcc_lo, v[0:1], v[4:5], v[0:1] @@ -1088,7 +1065,6 @@ ; GFX11-LABEL: v_fdiv_v2f64_ulp25: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_div_scale_f64 v[8:9], null, v[4:5], v[4:5], v[0:1] ; GFX11-NEXT: v_div_scale_f64 v[10:11], null, v[6:7], v[6:7], v[2:3] ; GFX11-NEXT: v_div_scale_f64 v[20:21], vcc_lo, v[0:1], v[4:5], v[0:1] @@ -1221,7 +1197,6 @@ ; GFX10-LABEL: v_rcp_v2f64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_div_scale_f64 v[4:5], s4, v[0:1], v[0:1], 1.0 ; GFX10-NEXT: v_div_scale_f64 v[6:7], s4, v[2:3], v[2:3], 1.0 ; GFX10-NEXT: v_div_scale_f64 v[16:17], vcc_lo, 1.0, v[0:1], 1.0 @@ -1250,7 +1225,6 @@ ; GFX11-LABEL: v_rcp_v2f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_div_scale_f64 v[4:5], null, v[0:1], v[0:1], 1.0 ; GFX11-NEXT: v_div_scale_f64 v[6:7], null, v[2:3], v[2:3], 1.0 ; GFX11-NEXT: v_div_scale_f64 v[16:17], vcc_lo, 1.0, v[0:1], 1.0 @@ -1383,7 +1357,6 @@ ; GFX10-LABEL: v_rcp_v2f64_arcp: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_div_scale_f64 v[4:5], s4, v[0:1], v[0:1], 1.0 ; GFX10-NEXT: v_div_scale_f64 v[6:7], s4, v[2:3], v[2:3], 1.0 ; GFX10-NEXT: v_div_scale_f64 v[16:17], vcc_lo, 1.0, v[0:1], 1.0 @@ -1412,7 +1385,6 @@ ; GFX11-LABEL: v_rcp_v2f64_arcp: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_div_scale_f64 v[4:5], null, v[0:1], v[0:1], 1.0 ; GFX11-NEXT: v_div_scale_f64 v[6:7], null, v[2:3], v[2:3], 1.0 ; GFX11-NEXT: v_div_scale_f64 v[16:17], vcc_lo, 1.0, v[0:1], 1.0 @@ -1475,7 +1447,6 @@ ; GFX10-LABEL: v_rcp_v2f64_arcp_afn: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_rcp_f64_e32 v[4:5], v[0:1] ; GFX10-NEXT: v_rcp_f64_e32 v[6:7], v[2:3] ; GFX10-NEXT: v_fma_f64 v[8:9], -v[0:1], v[4:5], 1.0 @@ -1497,7 +1468,6 @@ ; GFX11-LABEL: v_rcp_v2f64_arcp_afn: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_rcp_f64_e32 v[4:5], v[0:1] ; GFX11-NEXT: v_rcp_f64_e32 v[6:7], v[2:3] ; GFX11-NEXT: s_waitcnt_depctr 0xfff @@ -1621,7 +1591,6 @@ ; GFX10-LABEL: v_rcp_v2f64_ulp25: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_div_scale_f64 v[4:5], s4, v[0:1], v[0:1], 1.0 ; GFX10-NEXT: v_div_scale_f64 v[6:7], s4, v[2:3], v[2:3], 1.0 ; GFX10-NEXT: v_div_scale_f64 v[16:17], vcc_lo, 1.0, v[0:1], 1.0 @@ -1650,7 +1619,6 @@ ; GFX11-LABEL: v_rcp_v2f64_ulp25: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_div_scale_f64 v[4:5], null, v[0:1], v[0:1], 1.0 ; GFX11-NEXT: v_div_scale_f64 v[6:7], null, v[2:3], v[2:3], 1.0 ; GFX11-NEXT: v_div_scale_f64 v[16:17], vcc_lo, 1.0, v[0:1], 1.0 @@ -1713,7 +1681,6 @@ ; GFX10-LABEL: v_fdiv_v2f64_afn_ulp25: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_rcp_f64_e32 v[8:9], v[4:5] ; GFX10-NEXT: v_rcp_f64_e32 v[10:11], v[6:7] ; GFX10-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0 @@ -1735,7 +1702,6 @@ ; GFX11-LABEL: v_fdiv_v2f64_afn_ulp25: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_rcp_f64_e32 v[8:9], v[4:5] ; GFX11-NEXT: v_rcp_f64_e32 v[10:11], v[6:7] ; GFX11-NEXT: s_waitcnt_depctr 0xfff @@ -1858,7 +1824,6 @@ ; GFX10-LABEL: v_fdiv_v2f64_arcp_ulp25: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_div_scale_f64 v[8:9], s4, v[4:5], v[4:5], v[0:1] ; GFX10-NEXT: v_div_scale_f64 v[10:11], s4, v[6:7], v[6:7], v[2:3] ; GFX10-NEXT: v_div_scale_f64 v[20:21], vcc_lo, v[0:1], v[4:5], v[0:1] @@ -1887,7 +1852,6 @@ ; GFX11-LABEL: v_fdiv_v2f64_arcp_ulp25: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_div_scale_f64 v[8:9], null, v[4:5], v[4:5], v[0:1] ; GFX11-NEXT: v_div_scale_f64 v[10:11], null, v[6:7], v[6:7], v[2:3] ; GFX11-NEXT: v_div_scale_f64 v[20:21], vcc_lo, v[0:1], v[4:5], v[0:1] @@ -1950,7 +1914,6 @@ ; GFX10-LABEL: v_fdiv_v2f64_arcp_afn_ulp25: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_rcp_f64_e32 v[8:9], v[4:5] ; GFX10-NEXT: v_rcp_f64_e32 v[10:11], v[6:7] ; GFX10-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0 @@ -1972,7 +1935,6 @@ ; GFX11-LABEL: v_fdiv_v2f64_arcp_afn_ulp25: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_rcp_f64_e32 v[8:9], v[4:5] ; GFX11-NEXT: v_rcp_f64_e32 v[10:11], v[6:7] ; GFX11-NEXT: s_waitcnt_depctr 0xfff diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll @@ -180,7 +180,6 @@ ; GFX10-LABEL: store_load_vindex_foo: ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v1, 15, v0 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-NEXT: v_mov_b32_e32 v2, 15 @@ -210,7 +209,6 @@ ; GFX11-LABEL: store_load_vindex_foo: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_lshlrev_b32 v1, 2, v0 ; GFX11-NEXT: v_and_b32_e32 v0, 15, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) @@ -244,11 +242,9 @@ ; GFX10-LABEL: private_ptr_foo: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_nc_u32_e32 v0, 4, v0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x41200000 ; GFX10-NEXT: scratch_store_dword v0, v1, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX940-LABEL: private_ptr_foo: @@ -263,10 +259,8 @@ ; GFX11-LABEL: private_ptr_foo: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v1, 0x41200000 :: v_dual_add_nc_u32 v0, 4, v0 ; GFX11-NEXT: scratch_store_b32 v0, v1, off -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %gep = getelementptr inbounds float, ptr addrspace(5) %arg, i32 1 store float 1.000000e+01, ptr addrspace(5) %gep, align 4 @@ -475,7 +469,6 @@ ; GFX10-LABEL: store_load_vindex_small_offset_foo: ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v1, 15, v0 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-NEXT: s_add_i32 s0, s32, 0x100 @@ -512,7 +505,6 @@ ; GFX11-LABEL: store_load_vindex_small_offset_foo: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_lshlrev_b32 v1, 2, v0 ; GFX11-NEXT: v_and_b32_e32 v0, 15, v0 ; GFX11-NEXT: s_add_i32 s0, s32, 0x100 @@ -744,7 +736,6 @@ ; GFX10-LABEL: store_load_vindex_large_offset_foo: ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v1, 15, v0 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-NEXT: s_add_i32 s0, s32, 0x4004 @@ -782,7 +773,6 @@ ; GFX11-LABEL: store_load_vindex_large_offset_foo: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_lshlrev_b32 v1, 2, v0 ; GFX11-NEXT: v_and_b32_e32 v0, 15, v0 ; GFX11-NEXT: s_add_i32 s0, s32, 0x4004 @@ -900,7 +890,6 @@ ; GFX10-LABEL: store_load_large_imm_offset_foo: ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v0, 13 ; GFX10-NEXT: v_mov_b32_e32 v1, 15 ; GFX10-NEXT: s_movk_i32 s0, 0x3e80 @@ -931,7 +920,6 @@ ; GFX11-LABEL: store_load_large_imm_offset_foo: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 0x3e80 ; GFX11-NEXT: v_mov_b32_e32 v2, 15 ; GFX11-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc @@ -1043,7 +1031,6 @@ ; GFX10-LABEL: store_load_i64_aligned: ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v1, 15 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: scratch_store_dwordx2 v0, v[1:2], off @@ -1065,7 +1052,6 @@ ; GFX11-LABEL: store_load_i64_aligned: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v1, 15 ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: scratch_store_b64 v0, v[1:2], off dlc @@ -1094,7 +1080,6 @@ ; GFX10-LABEL: store_load_i64_unaligned: ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v1, 15 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: scratch_store_dwordx2 v0, v[1:2], off @@ -1116,7 +1101,6 @@ ; GFX11-LABEL: store_load_i64_unaligned: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v1, 15 ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: scratch_store_b64 v0, v[1:2], off dlc @@ -1149,7 +1133,6 @@ ; GFX10-LABEL: store_load_v3i32_unaligned: ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s2, 3 ; GFX10-NEXT: s_mov_b32 s1, 2 ; GFX10-NEXT: s_mov_b32 s0, 1 @@ -1180,7 +1163,6 @@ ; GFX11-LABEL: store_load_v3i32_unaligned: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s2, 3 ; GFX11-NEXT: s_mov_b32 s1, 2 ; GFX11-NEXT: s_mov_b32 s0, 1 @@ -1218,7 +1200,6 @@ ; GFX10-LABEL: store_load_v4i32_unaligned: ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s3, 4 ; GFX10-NEXT: s_mov_b32 s2, 3 ; GFX10-NEXT: s_mov_b32 s1, 2 @@ -1251,7 +1232,6 @@ ; GFX11-LABEL: store_load_v4i32_unaligned: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s3, 4 ; GFX11-NEXT: s_mov_b32 s2, 3 ; GFX11-NEXT: s_mov_b32 s1, 2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll @@ -27,14 +27,12 @@ ; GFX10-LABEL: v_fma_f32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fma_f32 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_fma_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fma_f32 v0, v0, v1, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] %fma = call float @llvm.fma.f32(float %x, float %y, float %z) @@ -66,7 +64,6 @@ ; GFX10-LABEL: v_fma_v2f32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fma_f32 v0, v0, v2, v4 ; GFX10-NEXT: v_fma_f32 v1, v1, v3, v5 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -74,7 +71,6 @@ ; GFX11-LABEL: v_fma_v2f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fma_f32 v0, v0, v2, v4 ; GFX11-NEXT: v_fma_f32 v1, v1, v3, v5 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -108,14 +104,12 @@ ; GFX10-LABEL: v_fma_f16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fma_f16 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_fma_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fma_f16 v0, v0, v1, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] %fma = call half @llvm.fma.f16(half %x, half %y, half %z) @@ -148,14 +142,12 @@ ; GFX10-LABEL: v_fma_f16_fneg_lhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fma_f16 v0, -v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_fma_f16_fneg_lhs: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fma_f16 v0, -v0, v1, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] %neg.x = fneg half %x @@ -189,14 +181,12 @@ ; GFX10-LABEL: v_fma_f16_fneg_rhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fma_f16 v0, v0, -v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_fma_f16_fneg_rhs: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fma_f16 v0, v0, -v1, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] %neg.y = fneg half %y @@ -230,14 +220,12 @@ ; GFX10-LABEL: v_fma_f16_fneg_add: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fma_f16 v0, v0, v1, -v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_fma_f16_fneg_add: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fma_f16 v0, v0, v1, -v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] %neg.z = fneg half %z @@ -282,14 +270,12 @@ ; GFX10-LABEL: v_fma_v2f16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_fma_f16 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_fma_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_pk_fma_f16 v0, v0, v1, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] %fma = call <2 x half> @llvm.fma.v2f16(<2 x half> %x, <2 x half> %y, <2 x half> %z) @@ -339,14 +325,12 @@ ; GFX10-LABEL: v_fma_v2f16_fneg_lhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0] ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_fma_v2f16_fneg_lhs: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0] ; GFX11-NEXT: s_setpc_b64 s[30:31] %x.fneg = fneg <2 x half> %x @@ -397,14 +381,12 @@ ; GFX10-LABEL: v_fma_v2f16_fneg_rhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0] ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_fma_v2f16_fneg_rhs: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0] ; GFX11-NEXT: s_setpc_b64 s[30:31] %y.fneg = fneg <2 x half> %y @@ -449,14 +431,12 @@ ; GFX10-LABEL: v_fma_v2f16_fneg_lhs_rhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_fma_f16 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_fma_v2f16_fneg_lhs_rhs: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_pk_fma_f16 v0, v0, v1, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] %x.fneg = fneg <2 x half> %x @@ -509,7 +489,6 @@ ; GFX10-LABEL: v_fma_v3f16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_fma_f16 v0, v0, v2, v4 ; GFX10-NEXT: v_pk_fma_f16 v1, v1, v3, v5 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -517,7 +496,6 @@ ; GFX11-LABEL: v_fma_v3f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_pk_fma_f16 v0, v0, v2, v4 ; GFX11-NEXT: v_pk_fma_f16 v1, v1, v3, v5 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -580,7 +558,6 @@ ; GFX10-LABEL: v_fma_v4f16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_fma_f16 v0, v0, v2, v4 ; GFX10-NEXT: v_pk_fma_f16 v1, v1, v3, v5 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -588,7 +565,6 @@ ; GFX11-LABEL: v_fma_v4f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_pk_fma_f16 v0, v0, v2, v4 ; GFX11-NEXT: v_pk_fma_f16 v1, v1, v3, v5 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -618,14 +594,12 @@ ; GFX10-LABEL: v_fma_f64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_fma_f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] ; GFX11-NEXT: s_setpc_b64 s[30:31] %fma = call double @llvm.fma.f64(double %x, double %y, double %z) @@ -654,14 +628,12 @@ ; GFX10-LABEL: v_fma_f64_fneg_all: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5] ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_fma_f64_fneg_all: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5] ; GFX11-NEXT: s_setpc_b64 s[30:31] %neg.x = fneg double %x @@ -696,7 +668,6 @@ ; GFX10-LABEL: v_fma_v2f64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[8:9] ; GFX10-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[10:11] ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -704,7 +675,6 @@ ; GFX11-LABEL: v_fma_v2f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[8:9] ; GFX11-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[10:11] ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -734,14 +704,12 @@ ; GFX10-LABEL: v_fma_f32_fabs_lhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fma_f32 v0, |v0|, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_fma_f32_fabs_lhs: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fma_f32 v0, |v0|, v1, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] %fabs.x = call float @llvm.fabs.f32(float %x) @@ -771,14 +739,12 @@ ; GFX10-LABEL: v_fma_f32_fabs_rhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fma_f32 v0, v0, |v1|, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_fma_f32_fabs_rhs: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fma_f32 v0, v0, |v1|, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] %fabs.y = call float @llvm.fabs.f32(float %y) @@ -808,14 +774,12 @@ ; GFX10-LABEL: v_fma_f32_fabs_lhs_rhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fma_f32 v0, |v0|, |v1|, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_fma_f32_fabs_lhs_rhs: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fma_f32 v0, |v0|, |v1|, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] %fabs.x = call float @llvm.fabs.f32(float %x) @@ -941,14 +905,12 @@ ; GFX10-LABEL: v_fma_f32_fneg_lhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fma_f32 v0, -v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_fma_f32_fneg_lhs: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fma_f32 v0, -v0, v1, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] %neg.x = fneg float %x @@ -978,14 +940,12 @@ ; GFX10-LABEL: v_fma_f32_fneg_rhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fma_f32 v0, v0, -v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_fma_f32_fneg_rhs: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fma_f32 v0, v0, -v1, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] %neg.y = fneg float %y @@ -1015,14 +975,12 @@ ; GFX10-LABEL: v_fma_f32_fneg_z: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fma_f32 v0, v0, v1, -v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_fma_f32_fneg_z: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fma_f32 v0, v0, v1, -v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] %neg.z = fneg float %z diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll @@ -6,7 +6,6 @@ ; GFX10-LABEL: test_min_max_ValK0_K1_f32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -24,7 +23,6 @@ ; GFX10-LABEL: test_min_max_K0Val_K1_f32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -44,7 +42,6 @@ ; GFX10-LABEL: test_min_K1max_ValK0_f16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_max_f16_e32 v0, v0, v0 ; GFX10-NEXT: v_med3_f16 v0, v0, 2.0, 4.0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -65,7 +62,6 @@ ; GFX10-LABEL: test_min_K1max_K0Val_f16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_med3_f16 v0, v0, 2.0, 4.0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -85,7 +81,6 @@ ; GFX10-LABEL: test_max_min_ValK1_K0_f32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -103,7 +98,6 @@ ; GFX10-LABEL: test_max_min_K1Val_K0_f32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -121,7 +115,6 @@ ; GFX10-LABEL: test_max_K0min_ValK1_f16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_med3_f16 v0, v0, 2.0, 4.0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -140,7 +133,6 @@ ; GFX10-LABEL: test_max_K0min_K1Val_f16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_med3_f16 v0, v0, 2.0, 4.0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -161,7 +153,6 @@ ; GFX10-LABEL: test_min_max_global_nnan: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -179,7 +170,6 @@ ; GFX10-LABEL: test_max_min_global_nnan: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -202,7 +192,6 @@ ; GFX10-LABEL: test_min_max_K0_gt_K1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_max_f32_e32 v0, 4.0, v0 ; GFX10-NEXT: v_min_f32_e32 v0, 2.0, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -223,7 +212,6 @@ ; GFX10-LABEL: test_max_min_K0_gt_K1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_min_f32_e32 v0, 2.0, v0 ; GFX10-NEXT: v_max_f32_e32 v0, 4.0, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -244,7 +232,6 @@ ; GFX10-LABEL: test_min_max_non_inline_const: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_max_f32_e32 v0, 2.0, v0 ; GFX10-NEXT: v_min_f32_e32 v0, 0x41000000, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -266,7 +253,6 @@ ; GFX10-LABEL: test_min_max_f64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], 2.0 ; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], 4.0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -286,7 +272,6 @@ ; GFX10-LABEL: test_min_max_v2f16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_max_f16 v0, v0, 2.0 op_sel_hi:[1,0] ; GFX10-NEXT: v_pk_min_f16 v0, v0, 4.0 op_sel_hi:[1,0] ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -314,7 +299,6 @@ ; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_false: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_max_f32_e32 v0, 2.0, v0 ; GFX10-NEXT: v_min_f32_e32 v0, 4.0, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -336,7 +320,6 @@ ; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_false: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_min_f32_e32 v0, 4.0, v0 ; GFX10-NEXT: v_max_f32_e32 v0, 2.0, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -357,7 +340,6 @@ ; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_true: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 ; GFX10-NEXT: v_min_f32_e32 v0, 4.0, v0 ; GFX10-NEXT: v_max_f32_e32 v0, 2.0, v0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmul.v2f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmul.v2f16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmul.v2f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmul.v2f16.ll @@ -22,7 +22,6 @@ ; GFX10-LABEL: v_fmul_v2f16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] %mul = fmul <2 x half> %a, %b @@ -48,7 +47,6 @@ ; GFX10-LABEL: v_fmul_v2f16_fneg_lhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0] ; GFX10-NEXT: s_setpc_b64 s[30:31] %neg.a = fneg <2 x half> %a @@ -75,7 +73,6 @@ ; GFX10-LABEL: v_fmul_v2f16_fneg_rhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1] ; GFX10-NEXT: s_setpc_b64 s[30:31] %neg.b = fneg <2 x half> %b @@ -101,7 +98,6 @@ ; GFX10-LABEL: v_fmul_v2f16_fneg_lhs_fneg_rhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] %neg.a = fneg <2 x half> %a @@ -130,7 +126,6 @@ ; GFX10-LABEL: v_fmul_v3f16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2 ; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -161,7 +156,6 @@ ; GFX10-LABEL: v_fmul_v3f16_fneg_lhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[1,0] neg_hi:[1,0] ; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[1,0] neg_hi:[1,0] ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -193,7 +187,6 @@ ; GFX10-LABEL: v_fmul_v3f16_fneg_rhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1] ; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1] ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -222,7 +215,6 @@ ; GFX10-LABEL: v_fmul_v3f16_fneg_lhs_fneg_rhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2 ; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -254,7 +246,6 @@ ; GFX10-LABEL: v_fmul_v4f16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2 ; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -286,7 +277,6 @@ ; GFX10-LABEL: v_fmul_v4f16_fneg_lhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[1,0] neg_hi:[1,0] ; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[1,0] neg_hi:[1,0] ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -319,7 +309,6 @@ ; GFX10-LABEL: v_fmul_v4f16_fneg_rhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1] ; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1] ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -350,7 +339,6 @@ ; GFX10-LABEL: v_fmul_v4f16_fneg_lhs_fneg_rhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2 ; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -386,7 +374,6 @@ ; GFX10-LABEL: v_fmul_v6f16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v3 ; GFX10-NEXT: v_pk_mul_f16 v1, v1, v4 ; GFX10-NEXT: v_pk_mul_f16 v2, v2, v5 @@ -424,7 +411,6 @@ ; GFX10-LABEL: v_fmul_v6f16_fneg_lhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v3 neg_lo:[1,0] neg_hi:[1,0] ; GFX10-NEXT: v_pk_mul_f16 v1, v1, v4 neg_lo:[1,0] neg_hi:[1,0] ; GFX10-NEXT: v_pk_mul_f16 v2, v2, v5 neg_lo:[1,0] neg_hi:[1,0] @@ -463,7 +449,6 @@ ; GFX10-LABEL: v_fmul_v6f16_fneg_rhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v3 neg_lo:[0,1] neg_hi:[0,1] ; GFX10-NEXT: v_pk_mul_f16 v1, v1, v4 neg_lo:[0,1] neg_hi:[0,1] ; GFX10-NEXT: v_pk_mul_f16 v2, v2, v5 neg_lo:[0,1] neg_hi:[0,1] @@ -499,7 +484,6 @@ ; GFX10-LABEL: v_fmul_v6f16_fneg_lhs_fneg_rhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v3 ; GFX10-NEXT: v_pk_mul_f16 v1, v1, v4 ; GFX10-NEXT: v_pk_mul_f16 v2, v2, v5 @@ -540,7 +524,6 @@ ; GFX10-LABEL: v_fmul_v8f16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v4 ; GFX10-NEXT: v_pk_mul_f16 v1, v1, v5 ; GFX10-NEXT: v_pk_mul_f16 v2, v2, v6 @@ -584,7 +567,6 @@ ; GFX10-LABEL: v_fmul_v8f16_fneg_lhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v4 neg_lo:[1,0] neg_hi:[1,0] ; GFX10-NEXT: v_pk_mul_f16 v1, v1, v5 neg_lo:[1,0] neg_hi:[1,0] ; GFX10-NEXT: v_pk_mul_f16 v2, v2, v6 neg_lo:[1,0] neg_hi:[1,0] @@ -629,7 +611,6 @@ ; GFX10-LABEL: v_fmul_v8f16_fneg_rhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v4 neg_lo:[0,1] neg_hi:[0,1] ; GFX10-NEXT: v_pk_mul_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1] ; GFX10-NEXT: v_pk_mul_f16 v2, v2, v6 neg_lo:[0,1] neg_hi:[0,1] @@ -670,7 +651,6 @@ ; GFX10-LABEL: v_fmul_v8f16_fneg_lhs_fneg_rhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v4 ; GFX10-NEXT: v_pk_mul_f16 v1, v1, v5 ; GFX10-NEXT: v_pk_mul_f16 v2, v2, v6 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll @@ -33,7 +33,6 @@ ; GFX10-LABEL: v_pow_f32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_log_f32_e32 v0, v0 ; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 ; GFX10-NEXT: v_exp_f32_e32 v0, v0 @@ -42,7 +41,6 @@ ; GFX11-LABEL: v_pow_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_log_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v0, v1 @@ -90,7 +88,6 @@ ; GFX10-LABEL: v_pow_v2f32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_log_f32_e32 v0, v0 ; GFX10-NEXT: v_log_f32_e32 v1, v1 ; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v0, v2 @@ -102,7 +99,6 @@ ; GFX11-LABEL: v_pow_v2f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_log_f32_e32 v0, v0 ; GFX11-NEXT: v_log_f32_e32 v1, v1 ; GFX11-NEXT: s_waitcnt_depctr 0xfff @@ -152,7 +148,6 @@ ; GFX10-LABEL: v_pow_f16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_log_f16_e32 v0, v0 ; GFX10-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0 @@ -164,7 +159,6 @@ ; GFX11-LABEL: v_pow_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_log_f16_e32 v0, v0 ; GFX11-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX11-NEXT: s_waitcnt_depctr 0xfff @@ -236,7 +230,6 @@ ; GFX10-LABEL: v_pow_v2f16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_log_f16_e32 v2, v0 ; GFX10-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX10-NEXT: v_cvt_f32_f16_e32 v3, v1 @@ -256,7 +249,6 @@ ; GFX11-LABEL: v_pow_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_log_f16_e32 v2, v0 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1 @@ -347,7 +339,6 @@ ; GFX10-LABEL: v_pow_v2f16_fneg_lhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 ; GFX10-NEXT: v_cvt_f32_f16_e32 v3, v1 ; GFX10-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 @@ -368,7 +359,6 @@ ; GFX11-LABEL: v_pow_v2f16_fneg_lhs: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1 ; GFX11-NEXT: v_cvt_f32_f16_e32 v1, v1 @@ -462,7 +452,6 @@ ; GFX10-LABEL: v_pow_v2f16_fneg_rhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_log_f16_e32 v2, v0 ; GFX10-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 ; GFX10-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 @@ -483,7 +472,6 @@ ; GFX11-LABEL: v_pow_v2f16_fneg_rhs: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_log_f16_e32 v2, v0 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX11-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 @@ -583,7 +571,6 @@ ; GFX10-LABEL: v_pow_v2f16_fneg_lhs_rhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 ; GFX10-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 ; GFX10-NEXT: v_log_f16_e32 v2, v0 @@ -605,7 +592,6 @@ ; GFX11-LABEL: v_pow_v2f16_fneg_lhs_rhs: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 ; GFX11-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) @@ -671,7 +657,6 @@ ; GFX10-LABEL: v_pow_f32_fabs_lhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_log_f32_e64 v0, |v0| ; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 ; GFX10-NEXT: v_exp_f32_e32 v0, v0 @@ -680,7 +665,6 @@ ; GFX11-LABEL: v_pow_f32_fabs_lhs: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_log_f32_e64 v0, |v0| ; GFX11-NEXT: s_waitcnt_depctr 0xfff ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v0, v1 @@ -720,7 +704,6 @@ ; GFX10-LABEL: v_pow_f32_fabs_rhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_log_f32_e32 v0, v0 ; GFX10-NEXT: v_mul_legacy_f32_e64 v0, v0, |v1| ; GFX10-NEXT: v_exp_f32_e32 v0, v0 @@ -729,7 +712,6 @@ ; GFX11-LABEL: v_pow_f32_fabs_rhs: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_log_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff ; GFX11-NEXT: v_mul_dx9_zero_f32_e64 v0, v0, |v1| @@ -769,7 +751,6 @@ ; GFX10-LABEL: v_pow_f32_fabs_lhs_rhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_log_f32_e64 v0, |v0| ; GFX10-NEXT: v_mul_legacy_f32_e64 v0, v0, |v1| ; GFX10-NEXT: v_exp_f32_e32 v0, v0 @@ -778,7 +759,6 @@ ; GFX11-LABEL: v_pow_f32_fabs_lhs_rhs: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_log_f32_e64 v0, |v0| ; GFX11-NEXT: s_waitcnt_depctr 0xfff ; GFX11-NEXT: v_mul_dx9_zero_f32_e64 v0, v0, |v1| @@ -942,7 +922,6 @@ ; GFX10-LABEL: v_pow_f32_fneg_lhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_log_f32_e64 v0, -v0 ; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 ; GFX10-NEXT: v_exp_f32_e32 v0, v0 @@ -951,7 +930,6 @@ ; GFX11-LABEL: v_pow_f32_fneg_lhs: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_log_f32_e64 v0, -v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v0, v1 @@ -991,7 +969,6 @@ ; GFX10-LABEL: v_pow_f32_fneg_rhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_log_f32_e32 v0, v0 ; GFX10-NEXT: v_mul_legacy_f32_e64 v0, v0, -v1 ; GFX10-NEXT: v_exp_f32_e32 v0, v0 @@ -1000,7 +977,6 @@ ; GFX11-LABEL: v_pow_f32_fneg_rhs: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_log_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff ; GFX11-NEXT: v_mul_dx9_zero_f32_e64 v0, v0, -v1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll @@ -266,7 +266,6 @@ ; GFX10-LABEL: v_fshl_i7: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v3, 7 ; GFX10-NEXT: v_and_b32_e32 v2, 0x7f, v2 ; GFX10-NEXT: v_and_b32_e32 v1, 0x7f, v1 @@ -297,7 +296,6 @@ ; GFX11-LABEL: v_fshl_i7: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v3, 7 ; GFX11-NEXT: v_and_b32_e32 v2, 0x7f, v2 ; GFX11-NEXT: v_and_b32_e32 v1, 0x7f, v1 @@ -443,7 +441,6 @@ ; GFX10-LABEL: v_fshl_i8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_not_b32_e32 v3, v2 ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX10-NEXT: v_and_b32_e32 v2, 7, v2 @@ -457,7 +454,6 @@ ; GFX11-LABEL: v_fshl_i8: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_not_b32_e32 v3, v2 ; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX11-NEXT: v_and_b32_e32 v2, 7, v2 @@ -552,7 +548,6 @@ ; GFX10-LABEL: v_fshl_i8_4: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX10-NEXT: v_lshlrev_b16 v0, 4, v0 ; GFX10-NEXT: v_lshrrev_b16 v1, 4, v1 @@ -562,7 +557,6 @@ ; GFX11-LABEL: v_fshl_i8_4: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX11-NEXT: v_lshlrev_b16 v0, 4, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -651,7 +645,6 @@ ; GFX10-LABEL: v_fshl_i8_5: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX10-NEXT: v_lshlrev_b16 v0, 5, v0 ; GFX10-NEXT: v_lshrrev_b16 v1, 3, v1 @@ -661,7 +654,6 @@ ; GFX11-LABEL: v_fshl_i8_5: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX11-NEXT: v_lshlrev_b16 v0, 5, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -894,7 +886,6 @@ ; GFX10-LABEL: v_fshl_v2i8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v2 ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 8, v1 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 8, v0 @@ -922,7 +913,6 @@ ; GFX11-LABEL: v_fshl_v2i8: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v2 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 8, v1 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 8, v0 @@ -1383,7 +1373,6 @@ ; GFX10-LABEL: v_fshl_v4i8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 8, v2 ; GFX10-NEXT: v_and_b32_e32 v10, 7, v2 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v0 @@ -1437,7 +1426,6 @@ ; GFX11-LABEL: v_fshl_v4i8: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 8, v1 ; GFX11-NEXT: v_lshrrev_b32_e32 v9, 8, v2 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v0 @@ -1751,7 +1739,6 @@ ; GFX10-LABEL: v_fshl_i24: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v3, 24 ; GFX10-NEXT: v_and_b32_e32 v2, 0xffffff, v2 ; GFX10-NEXT: v_bfe_u32 v1, v1, 1, 23 @@ -1780,7 +1767,6 @@ ; GFX11-LABEL: v_fshl_i24: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v3, 24 ; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v2 ; GFX11-NEXT: v_bfe_u32 v1, v1, 1, 23 @@ -2627,7 +2613,6 @@ ; GFX10-LABEL: v_fshl_v2i24: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v7, 24 ; GFX10-NEXT: v_and_b32_e32 v4, 0xffffff, v4 @@ -2679,7 +2664,6 @@ ; GFX11-LABEL: v_fshl_v2i24: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v7, 24 ; GFX11-NEXT: v_and_b32_e32 v5, 0xffffff, v5 @@ -2879,47 +2863,18 @@ } define i32 @v_fshl_i32(i32 %lhs, i32 %rhs, i32 %amt) { -; GFX6-LABEL: v_fshl_i32: -; GFX6: ; %bb.0: -; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_alignbit_b32 v1, v0, v1, 1 -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 1, v0 -; GFX6-NEXT: v_not_b32_e32 v2, v2 -; GFX6-NEXT: v_alignbit_b32 v0, v0, v1, v2 -; GFX6-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_fshl_i32: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_alignbit_b32 v1, v0, v1, 1 -; GFX8-NEXT: v_lshrrev_b32_e32 v0, 1, v0 -; GFX8-NEXT: v_not_b32_e32 v2, v2 -; GFX8-NEXT: v_alignbit_b32 v0, v0, v1, v2 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_fshl_i32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_alignbit_b32 v1, v0, v1, 1 -; GFX9-NEXT: v_lshrrev_b32_e32 v0, 1, v0 -; GFX9-NEXT: v_not_b32_e32 v2, v2 -; GFX9-NEXT: v_alignbit_b32 v0, v0, v1, v2 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_fshl_i32: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_alignbit_b32 v1, v0, v1, 1 -; GFX10-NEXT: v_lshrrev_b32_e32 v0, 1, v0 -; GFX10-NEXT: v_not_b32_e32 v2, v2 -; GFX10-NEXT: v_alignbit_b32 v0, v0, v1, v2 -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: v_fshl_i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_alignbit_b32 v1, v0, v1, 1 +; GCN-NEXT: v_lshrrev_b32_e32 v0, 1, v0 +; GCN-NEXT: v_not_b32_e32 v2, v2 +; GCN-NEXT: v_alignbit_b32 v0, v0, v1, v2 +; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_fshl_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_alignbit_b32 v1, v0, v1, 1 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 1, v0 ; GFX11-NEXT: v_not_b32_e32 v2, v2 @@ -2931,35 +2886,15 @@ } define i32 @v_fshl_i32_5(i32 %lhs, i32 %rhs) { -; GFX6-LABEL: v_fshl_i32_5: -; GFX6: ; %bb.0: -; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_alignbit_b32 v0, v0, v1, -5 -; GFX6-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_fshl_i32_5: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_alignbit_b32 v0, v0, v1, -5 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_fshl_i32_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_alignbit_b32 v0, v0, v1, -5 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_fshl_i32_5: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_alignbit_b32 v0, v0, v1, -5 -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: v_fshl_i32_5: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_alignbit_b32 v0, v0, v1, -5 +; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_fshl_i32_5: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_alignbit_b32 v0, v0, v1, -5 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %rhs, i32 5) @@ -2967,35 +2902,15 @@ } define i32 @v_fshl_i32_8(i32 %lhs, i32 %rhs) { -; GFX6-LABEL: v_fshl_i32_8: -; GFX6: ; %bb.0: -; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_alignbit_b32 v0, v0, v1, -8 -; GFX6-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_fshl_i32_8: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_alignbit_b32 v0, v0, v1, -8 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_fshl_i32_8: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_alignbit_b32 v0, v0, v1, -8 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_fshl_i32_8: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_alignbit_b32 v0, v0, v1, -8 -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: v_fshl_i32_8: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_alignbit_b32 v0, v0, v1, -8 +; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_fshl_i32_8: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_alignbit_b32 v0, v0, v1, -8 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %rhs, i32 8) @@ -3195,7 +3110,6 @@ ; GFX10-LABEL: v_fshl_v2i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_alignbit_b32 v2, v0, v2, 1 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 1, v0 ; GFX10-NEXT: v_not_b32_e32 v4, v4 @@ -3209,7 +3123,6 @@ ; GFX11-LABEL: v_fshl_v2i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_alignbit_b32 v2, v0, v2, 1 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 1, v0 ; GFX11-NEXT: v_not_b32_e32 v4, v4 @@ -3279,7 +3192,6 @@ ; GFX10-LABEL: v_fshl_v3i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_alignbit_b32 v3, v0, v3, 1 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 1, v0 ; GFX10-NEXT: v_not_b32_e32 v6, v6 @@ -3297,7 +3209,6 @@ ; GFX11-LABEL: v_fshl_v3i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_alignbit_b32 v3, v0, v3, 1 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 1, v0 ; GFX11-NEXT: v_not_b32_e32 v6, v6 @@ -3383,7 +3294,6 @@ ; GFX10-LABEL: v_fshl_v4i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_alignbit_b32 v4, v0, v4, 1 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 1, v0 ; GFX10-NEXT: v_not_b32_e32 v8, v8 @@ -3405,7 +3315,6 @@ ; GFX11-LABEL: v_fshl_v4i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_alignbit_b32 v4, v0, v4, 1 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 1, v0 ; GFX11-NEXT: v_not_b32_e32 v8, v8 @@ -3627,7 +3536,6 @@ ; GFX10-LABEL: v_fshl_i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2 ; GFX10-NEXT: v_and_b32_e32 v2, 15, v2 ; GFX10-NEXT: v_lshrrev_b16 v1, 1, v1 @@ -3640,7 +3548,6 @@ ; GFX11-LABEL: v_fshl_i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_xor_b32_e32 v3, -1, v2 ; GFX11-NEXT: v_and_b32_e32 v2, 15, v2 ; GFX11-NEXT: v_lshrrev_b16 v1, 1, v1 @@ -3683,7 +3590,6 @@ ; GFX10-LABEL: v_fshl_i16_4: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshlrev_b16 v0, 4, v0 ; GFX10-NEXT: v_lshrrev_b16 v1, 12, v1 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 @@ -3692,7 +3598,6 @@ ; GFX11-LABEL: v_fshl_i16_4: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshlrev_b16 v0, 4, v0 ; GFX11-NEXT: v_lshrrev_b16 v1, 12, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -3730,7 +3635,6 @@ ; GFX10-LABEL: v_fshl_i16_5: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshlrev_b16 v0, 5, v0 ; GFX10-NEXT: v_lshrrev_b16 v1, 11, v1 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 @@ -3739,7 +3643,6 @@ ; GFX11-LABEL: v_fshl_i16_5: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshlrev_b16 v0, 5, v0 ; GFX11-NEXT: v_lshrrev_b16 v1, 11, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -4138,7 +4041,6 @@ ; GFX10-LABEL: v_fshl_v2i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2 ; GFX10-NEXT: v_and_b32_e32 v2, 0xf000f, v2 ; GFX10-NEXT: v_pk_lshrrev_b16 v1, 1, v1 op_sel_hi:[0,1] @@ -4151,7 +4053,6 @@ ; GFX11-LABEL: v_fshl_v2i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_xor_b32_e32 v3, -1, v2 ; GFX11-NEXT: v_and_b32_e32 v2, 0xf000f, v2 ; GFX11-NEXT: v_pk_lshrrev_b16 v1, 1, v1 op_sel_hi:[0,1] @@ -4209,7 +4110,6 @@ ; GFX10-LABEL: v_fshl_v2i16_4_8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, 0x80004, v0 ; GFX10-NEXT: v_pk_lshrrev_b16 v1, 0x8000c, v1 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 @@ -4218,7 +4118,6 @@ ; GFX11-LABEL: v_fshl_v2i16_4_8: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_pk_lshlrev_b16 v0, 0x80004, v0 ; GFX11-NEXT: v_pk_lshrrev_b16 v1, 0x8000c, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -4845,7 +4744,6 @@ ; GFX10-LABEL: v_fshl_v3i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_xor_b32_e32 v6, -1, v4 ; GFX10-NEXT: v_xor_b32_e32 v7, -1, v5 ; GFX10-NEXT: v_and_b32_e32 v4, 0xf000f, v4 @@ -4865,7 +4763,6 @@ ; GFX11-LABEL: v_fshl_v3i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_xor_b32_e32 v6, -1, v4 ; GFX11-NEXT: v_xor_b32_e32 v7, -1, v5 ; GFX11-NEXT: v_and_b32_e32 v4, 0xf000f, v4 @@ -5222,7 +5119,6 @@ ; GFX10-LABEL: v_fshl_v4i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_xor_b32_e32 v6, -1, v4 ; GFX10-NEXT: v_xor_b32_e32 v7, -1, v5 ; GFX10-NEXT: v_and_b32_e32 v4, 0xf000f, v4 @@ -5242,7 +5138,6 @@ ; GFX11-LABEL: v_fshl_v4i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_xor_b32_e32 v6, -1, v4 ; GFX11-NEXT: v_xor_b32_e32 v7, -1, v5 ; GFX11-NEXT: v_and_b32_e32 v4, 0xf000f, v4 @@ -5398,7 +5293,6 @@ ; GFX10-LABEL: v_fshl_i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_not_b32_e32 v5, v4 ; GFX10-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3] ; GFX10-NEXT: v_and_b32_e32 v4, 63, v4 @@ -5412,7 +5306,6 @@ ; GFX11-LABEL: v_fshl_i64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_not_b32_e32 v5, v4 ; GFX11-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3] ; GFX11-NEXT: v_and_b32_e32 v4, 63, v4 @@ -5457,7 +5350,6 @@ ; GFX10-LABEL: v_fshl_i64_5: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 5, v[0:1] ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 27, v3 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v2 @@ -5466,7 +5358,6 @@ ; GFX11-LABEL: v_fshl_i64_5: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], 5, v[0:1] ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 27, v3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -5477,39 +5368,16 @@ } define i64 @v_fshl_i64_32(i64 %lhs, i64 %rhs) { -; GFX6-LABEL: v_fshl_i64_32: -; GFX6: ; %bb.0: -; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, v0 -; GFX6-NEXT: v_mov_b32_e32 v0, v3 -; GFX6-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_fshl_i64_32: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v1, v0 -; GFX8-NEXT: v_mov_b32_e32 v0, v3 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_fshl_i64_32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v1, v0 -; GFX9-NEXT: v_mov_b32_e32 v0, v3 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_fshl_i64_32: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_mov_b32_e32 v1, v0 -; GFX10-NEXT: v_mov_b32_e32 v0, v3 -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: v_fshl_i64_32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v1, v0 +; GCN-NEXT: v_mov_b32_e32 v0, v3 +; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_fshl_i64_32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 32) @@ -5546,7 +5414,6 @@ ; GFX10-LABEL: v_fshl_i64_48: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v4, v0 ; GFX10-NEXT: v_lshrrev_b64 v[0:1], 16, v[2:3] ; GFX10-NEXT: v_lshl_or_b32 v1, v4, 16, v1 @@ -5555,7 +5422,6 @@ ; GFX11-LABEL: v_fshl_i64_48: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v4, v0 ; GFX11-NEXT: v_lshrrev_b64 v[0:1], 16, v[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -5909,7 +5775,6 @@ ; GFX10-LABEL: v_fshl_v2i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_not_b32_e32 v9, v8 ; GFX10-NEXT: v_not_b32_e32 v11, v10 ; GFX10-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5] @@ -5931,7 +5796,6 @@ ; GFX11-LABEL: v_fshl_v2i64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_not_b32_e32 v9, v8 ; GFX11-NEXT: v_not_b32_e32 v11, v10 ; GFX11-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5] @@ -6344,7 +6208,6 @@ ; GFX10-LABEL: v_fshl_i128: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v18, 0x7f, v8 ; GFX10-NEXT: v_not_b32_e32 v8, v8 ; GFX10-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5] @@ -6392,7 +6255,6 @@ ; GFX11-LABEL: v_fshl_i128: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5] ; GFX11-NEXT: v_and_b32_e32 v18, 0x7f, v8 ; GFX11-NEXT: v_not_b32_e32 v8, v8 @@ -7364,7 +7226,6 @@ ; GFX10-LABEL: v_fshl_i128_65: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshlrev_b64 v[2:3], 1, v[0:1] ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[6:7] ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 31, v5 @@ -7376,7 +7237,6 @@ ; GFX11-LABEL: v_fshl_i128_65: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshlrev_b64 v[2:3], 1, v[0:1] ; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[6:7] ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 31, v5 @@ -8114,7 +7974,6 @@ ; GFX10-LABEL: v_fshl_v2i128: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v27, 0x7f, v16 ; GFX10-NEXT: v_not_b32_e32 v16, v16 ; GFX10-NEXT: v_lshrrev_b64 v[8:9], 1, v[8:9] @@ -8204,7 +8063,6 @@ ; GFX11-LABEL: v_fshl_v2i128: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v27, 0x7f, v16 ; GFX11-NEXT: v_not_b32_e32 v16, v16 ; GFX11-NEXT: v_lshrrev_b64 v[8:9], 1, v[8:9] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll @@ -263,7 +263,6 @@ ; GFX10-LABEL: v_fshr_i7: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v3, 7 ; GFX10-NEXT: v_and_b32_e32 v2, 0x7f, v2 ; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 @@ -294,7 +293,6 @@ ; GFX11-LABEL: v_fshr_i7: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v3, 7 ; GFX11-NEXT: v_and_b32_e32 v2, 0x7f, v2 ; GFX11-NEXT: v_lshlrev_b16 v0, 1, v0 @@ -440,7 +438,6 @@ ; GFX10-LABEL: v_fshr_i8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_not_b32_e32 v3, v2 ; GFX10-NEXT: v_and_b32_e32 v2, 7, v2 ; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 @@ -454,7 +451,6 @@ ; GFX11-LABEL: v_fshr_i8: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_not_b32_e32 v3, v2 ; GFX11-NEXT: v_and_b32_e32 v2, 7, v2 ; GFX11-NEXT: v_lshlrev_b16 v0, 1, v0 @@ -548,7 +544,6 @@ ; GFX10-LABEL: v_fshr_i8_4: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX10-NEXT: v_lshlrev_b16 v0, 4, v0 ; GFX10-NEXT: v_lshrrev_b16 v1, 4, v1 @@ -558,7 +553,6 @@ ; GFX11-LABEL: v_fshr_i8_4: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX11-NEXT: v_lshlrev_b16 v0, 4, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -647,7 +641,6 @@ ; GFX10-LABEL: v_fshr_i8_5: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX10-NEXT: v_lshlrev_b16 v0, 3, v0 ; GFX10-NEXT: v_lshrrev_b16 v1, 5, v1 @@ -657,7 +650,6 @@ ; GFX11-LABEL: v_fshr_i8_5: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX11-NEXT: v_lshlrev_b16 v0, 3, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -890,7 +882,6 @@ ; GFX10-LABEL: v_fshr_v2i8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v2 ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 8, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 8, v1 @@ -918,7 +909,6 @@ ; GFX11-LABEL: v_fshr_v2i8: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v2 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 8, v0 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 8, v1 @@ -1381,7 +1371,6 @@ ; GFX10-LABEL: v_fshr_v4i8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshrrev_b32_e32 v7, 8, v2 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v0 ; GFX10-NEXT: v_not_b32_e32 v8, v2 @@ -1435,7 +1424,6 @@ ; GFX11-LABEL: v_fshr_v4i8: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 8, v1 ; GFX11-NEXT: v_lshrrev_b32_e32 v7, 8, v2 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v0 @@ -1757,7 +1745,6 @@ ; GFX10-LABEL: v_fshr_i24: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v3, 24 ; GFX10-NEXT: v_and_b32_e32 v2, 0xffffff, v2 ; GFX10-NEXT: v_and_b32_e32 v1, 0xffffff, v1 @@ -1787,7 +1774,6 @@ ; GFX11-LABEL: v_fshr_i24: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v3, 24 ; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v2 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 1, v0 @@ -2643,7 +2629,6 @@ ; GFX10-LABEL: v_fshr_v2i24: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v7, 24 ; GFX10-NEXT: v_and_b32_e32 v4, 0xffffff, v4 @@ -2697,7 +2682,6 @@ ; GFX11-LABEL: v_fshr_v2i24: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v7, 24 ; GFX11-NEXT: v_and_b32_e32 v5, 0xffffff, v5 @@ -2885,35 +2869,15 @@ } define i32 @v_fshr_i32(i32 %lhs, i32 %rhs, i32 %amt) { -; GFX6-LABEL: v_fshr_i32: -; GFX6: ; %bb.0: -; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_alignbit_b32 v0, v0, v1, v2 -; GFX6-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_fshr_i32: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_alignbit_b32 v0, v0, v1, v2 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_fshr_i32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_alignbit_b32 v0, v0, v1, v2 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_fshr_i32: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_alignbit_b32 v0, v0, v1, v2 -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: v_fshr_i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_alignbit_b32 v0, v0, v1, v2 +; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_fshr_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_alignbit_b32 v0, v0, v1, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt) @@ -2921,35 +2885,15 @@ } define i32 @v_fshr_i32_5(i32 %lhs, i32 %rhs) { -; GFX6-LABEL: v_fshr_i32_5: -; GFX6: ; %bb.0: -; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_alignbit_b32 v0, v0, v1, 5 -; GFX6-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_fshr_i32_5: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_alignbit_b32 v0, v0, v1, 5 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_fshr_i32_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_alignbit_b32 v0, v0, v1, 5 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_fshr_i32_5: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_alignbit_b32 v0, v0, v1, 5 -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: v_fshr_i32_5: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_alignbit_b32 v0, v0, v1, 5 +; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_fshr_i32_5: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_alignbit_b32 v0, v0, v1, 5 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 5) @@ -2957,35 +2901,15 @@ } define i32 @v_fshr_i32_8(i32 %lhs, i32 %rhs) { -; GFX6-LABEL: v_fshr_i32_8: -; GFX6: ; %bb.0: -; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_alignbit_b32 v0, v0, v1, 8 -; GFX6-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_fshr_i32_8: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_alignbit_b32 v0, v0, v1, 8 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_fshr_i32_8: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_alignbit_b32 v0, v0, v1, 8 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_fshr_i32_8: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_alignbit_b32 v0, v0, v1, 8 -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: v_fshr_i32_8: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_alignbit_b32 v0, v0, v1, 8 +; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_fshr_i32_8: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_alignbit_b32 v0, v0, v1, 8 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 8) @@ -3098,39 +3022,16 @@ } define <2 x i32> @v_fshr_v2i32(<2 x i32> %lhs, <2 x i32> %rhs, <2 x i32> %amt) { -; GFX6-LABEL: v_fshr_v2i32: -; GFX6: ; %bb.0: -; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_alignbit_b32 v0, v0, v2, v4 -; GFX6-NEXT: v_alignbit_b32 v1, v1, v3, v5 -; GFX6-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_fshr_v2i32: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_alignbit_b32 v0, v0, v2, v4 -; GFX8-NEXT: v_alignbit_b32 v1, v1, v3, v5 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_fshr_v2i32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_alignbit_b32 v0, v0, v2, v4 -; GFX9-NEXT: v_alignbit_b32 v1, v1, v3, v5 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_fshr_v2i32: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_alignbit_b32 v0, v0, v2, v4 -; GFX10-NEXT: v_alignbit_b32 v1, v1, v3, v5 -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: v_fshr_v2i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_alignbit_b32 v0, v0, v2, v4 +; GCN-NEXT: v_alignbit_b32 v1, v1, v3, v5 +; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_fshr_v2i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_alignbit_b32 v0, v0, v2, v4 ; GFX11-NEXT: v_alignbit_b32 v1, v1, v3, v5 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -3139,43 +3040,17 @@ } define <3 x i32> @v_fshr_v3i32(<3 x i32> %lhs, <3 x i32> %rhs, <3 x i32> %amt) { -; GFX6-LABEL: v_fshr_v3i32: -; GFX6: ; %bb.0: -; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_alignbit_b32 v0, v0, v3, v6 -; GFX6-NEXT: v_alignbit_b32 v1, v1, v4, v7 -; GFX6-NEXT: v_alignbit_b32 v2, v2, v5, v8 -; GFX6-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_fshr_v3i32: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_alignbit_b32 v0, v0, v3, v6 -; GFX8-NEXT: v_alignbit_b32 v1, v1, v4, v7 -; GFX8-NEXT: v_alignbit_b32 v2, v2, v5, v8 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_fshr_v3i32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_alignbit_b32 v0, v0, v3, v6 -; GFX9-NEXT: v_alignbit_b32 v1, v1, v4, v7 -; GFX9-NEXT: v_alignbit_b32 v2, v2, v5, v8 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_fshr_v3i32: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_alignbit_b32 v0, v0, v3, v6 -; GFX10-NEXT: v_alignbit_b32 v1, v1, v4, v7 -; GFX10-NEXT: v_alignbit_b32 v2, v2, v5, v8 -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: v_fshr_v3i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_alignbit_b32 v0, v0, v3, v6 +; GCN-NEXT: v_alignbit_b32 v1, v1, v4, v7 +; GCN-NEXT: v_alignbit_b32 v2, v2, v5, v8 +; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_fshr_v3i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_alignbit_b32 v0, v0, v3, v6 ; GFX11-NEXT: v_alignbit_b32 v1, v1, v4, v7 ; GFX11-NEXT: v_alignbit_b32 v2, v2, v5, v8 @@ -3185,47 +3060,18 @@ } define <4 x i32> @v_fshr_v4i32(<4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> %amt) { -; GFX6-LABEL: v_fshr_v4i32: -; GFX6: ; %bb.0: -; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_alignbit_b32 v0, v0, v4, v8 -; GFX6-NEXT: v_alignbit_b32 v1, v1, v5, v9 -; GFX6-NEXT: v_alignbit_b32 v2, v2, v6, v10 -; GFX6-NEXT: v_alignbit_b32 v3, v3, v7, v11 -; GFX6-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_fshr_v4i32: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_alignbit_b32 v0, v0, v4, v8 -; GFX8-NEXT: v_alignbit_b32 v1, v1, v5, v9 -; GFX8-NEXT: v_alignbit_b32 v2, v2, v6, v10 -; GFX8-NEXT: v_alignbit_b32 v3, v3, v7, v11 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_fshr_v4i32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_alignbit_b32 v0, v0, v4, v8 -; GFX9-NEXT: v_alignbit_b32 v1, v1, v5, v9 -; GFX9-NEXT: v_alignbit_b32 v2, v2, v6, v10 -; GFX9-NEXT: v_alignbit_b32 v3, v3, v7, v11 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_fshr_v4i32: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_alignbit_b32 v0, v0, v4, v8 -; GFX10-NEXT: v_alignbit_b32 v1, v1, v5, v9 -; GFX10-NEXT: v_alignbit_b32 v2, v2, v6, v10 -; GFX10-NEXT: v_alignbit_b32 v3, v3, v7, v11 -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: v_fshr_v4i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_alignbit_b32 v0, v0, v4, v8 +; GCN-NEXT: v_alignbit_b32 v1, v1, v5, v9 +; GCN-NEXT: v_alignbit_b32 v2, v2, v6, v10 +; GCN-NEXT: v_alignbit_b32 v3, v3, v7, v11 +; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_fshr_v4i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_alignbit_b32 v0, v0, v4, v8 ; GFX11-NEXT: v_alignbit_b32 v1, v1, v5, v9 ; GFX11-NEXT: v_alignbit_b32 v2, v2, v6, v10 @@ -3436,7 +3282,6 @@ ; GFX10-LABEL: v_fshr_i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2 ; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 ; GFX10-NEXT: v_and_b32_e32 v2, 15, v2 @@ -3449,7 +3294,6 @@ ; GFX11-LABEL: v_fshr_i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_xor_b32_e32 v3, -1, v2 ; GFX11-NEXT: v_lshlrev_b16 v0, 1, v0 ; GFX11-NEXT: v_and_b32_e32 v2, 15, v2 @@ -3492,7 +3336,6 @@ ; GFX10-LABEL: v_fshr_i16_4: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshlrev_b16 v0, 12, v0 ; GFX10-NEXT: v_lshrrev_b16 v1, 4, v1 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 @@ -3501,7 +3344,6 @@ ; GFX11-LABEL: v_fshr_i16_4: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshlrev_b16 v0, 12, v0 ; GFX11-NEXT: v_lshrrev_b16 v1, 4, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -3539,7 +3381,6 @@ ; GFX10-LABEL: v_fshr_i16_5: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshlrev_b16 v0, 11, v0 ; GFX10-NEXT: v_lshrrev_b16 v1, 5, v1 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 @@ -3548,7 +3389,6 @@ ; GFX11-LABEL: v_fshr_i16_5: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshlrev_b16 v0, 11, v0 ; GFX11-NEXT: v_lshrrev_b16 v1, 5, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -3992,7 +3832,6 @@ ; GFX10-LABEL: v_fshr_v2i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] ; GFX10-NEXT: v_and_b32_e32 v2, 0xf000f, v2 @@ -4005,7 +3844,6 @@ ; GFX11-LABEL: v_fshr_v2i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_xor_b32_e32 v3, -1, v2 ; GFX11-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] ; GFX11-NEXT: v_and_b32_e32 v2, 0xf000f, v2 @@ -4063,7 +3901,6 @@ ; GFX10-LABEL: v_fshr_v2i16_4_8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, 0x8000c, v0 ; GFX10-NEXT: v_pk_lshrrev_b16 v1, 0x80004, v1 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 @@ -4072,7 +3909,6 @@ ; GFX11-LABEL: v_fshr_v2i16_4_8: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_pk_lshlrev_b16 v0, 0x8000c, v0 ; GFX11-NEXT: v_pk_lshrrev_b16 v1, 0x80004, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -4842,7 +4678,6 @@ ; GFX10-LABEL: v_fshr_v3i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_xor_b32_e32 v6, -1, v4 ; GFX10-NEXT: v_xor_b32_e32 v7, -1, v5 ; GFX10-NEXT: v_and_b32_e32 v4, 0xf000f, v4 @@ -4862,7 +4697,6 @@ ; GFX11-LABEL: v_fshr_v3i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_xor_b32_e32 v6, -1, v4 ; GFX11-NEXT: v_xor_b32_e32 v7, -1, v5 ; GFX11-NEXT: v_and_b32_e32 v4, 0xf000f, v4 @@ -5314,7 +5148,6 @@ ; GFX10-LABEL: v_fshr_v4i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_xor_b32_e32 v6, -1, v4 ; GFX10-NEXT: v_xor_b32_e32 v7, -1, v5 ; GFX10-NEXT: v_and_b32_e32 v4, 0xf000f, v4 @@ -5334,7 +5167,6 @@ ; GFX11-LABEL: v_fshr_v4i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_xor_b32_e32 v6, -1, v4 ; GFX11-NEXT: v_xor_b32_e32 v7, -1, v5 ; GFX11-NEXT: v_and_b32_e32 v4, 0xf000f, v4 @@ -5520,7 +5352,6 @@ ; GFX10-LABEL: v_fshr_i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_not_b32_e32 v5, v4 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] ; GFX10-NEXT: v_and_b32_e32 v4, 63, v4 @@ -5534,7 +5365,6 @@ ; GFX11-LABEL: v_fshr_i64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_not_b32_e32 v5, v4 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] ; GFX11-NEXT: v_and_b32_e32 v4, 63, v4 @@ -5581,7 +5411,6 @@ ; GFX10-LABEL: v_fshr_i64_5: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v4, v0 ; GFX10-NEXT: v_lshrrev_b64 v[0:1], 5, v[2:3] ; GFX10-NEXT: v_lshl_or_b32 v1, v4, 27, v1 @@ -5590,7 +5419,6 @@ ; GFX11-LABEL: v_fshr_i64_5: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v4, v0 ; GFX11-NEXT: v_lshrrev_b64 v[0:1], 5, v[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -5601,39 +5429,16 @@ } define i64 @v_fshr_i64_32(i64 %lhs, i64 %rhs) { -; GFX6-LABEL: v_fshr_i64_32: -; GFX6: ; %bb.0: -; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, v0 -; GFX6-NEXT: v_mov_b32_e32 v0, v3 -; GFX6-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_fshr_i64_32: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v1, v0 -; GFX8-NEXT: v_mov_b32_e32 v0, v3 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_fshr_i64_32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v1, v0 -; GFX9-NEXT: v_mov_b32_e32 v0, v3 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_fshr_i64_32: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_mov_b32_e32 v1, v0 -; GFX10-NEXT: v_mov_b32_e32 v0, v3 -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: v_fshr_i64_32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v1, v0 +; GCN-NEXT: v_mov_b32_e32 v0, v3 +; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_fshr_i64_32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 32) @@ -5666,7 +5471,6 @@ ; GFX10-LABEL: v_fshr_i64_48: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 16, v[0:1] ; GFX10-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -5674,7 +5478,6 @@ ; GFX11-LABEL: v_fshr_i64_48: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], 16, v[0:1] ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -6028,7 +5831,6 @@ ; GFX10-LABEL: v_fshr_v2i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_not_b32_e32 v9, v8 ; GFX10-NEXT: v_not_b32_e32 v11, v10 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] @@ -6050,7 +5852,6 @@ ; GFX11-LABEL: v_fshr_v2i64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_not_b32_e32 v9, v8 ; GFX11-NEXT: v_not_b32_e32 v11, v10 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] @@ -6463,7 +6264,6 @@ ; GFX10-LABEL: v_fshr_i128: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_not_b32_e32 v9, v8 ; GFX10-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] ; GFX10-NEXT: v_lshrrev_b32_e32 v10, 31, v1 @@ -6512,7 +6312,6 @@ ; GFX11-LABEL: v_fshr_i128: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_not_b32_e32 v9, v8 ; GFX11-NEXT: v_lshrrev_b32_e32 v10, 31, v1 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] @@ -7486,7 +7285,6 @@ ; GFX10-LABEL: v_fshr_i128_65: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v8, v2 ; GFX10-NEXT: v_lshrrev_b64 v[4:5], 1, v[6:7] ; GFX10-NEXT: v_lshrrev_b64 v[2:3], 1, v[0:1] @@ -7498,7 +7296,6 @@ ; GFX11-LABEL: v_fshr_i128_65: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v8, v2 ; GFX11-NEXT: v_lshrrev_b64 v[4:5], 1, v[6:7] ; GFX11-NEXT: v_lshrrev_b64 v[2:3], 1, v[0:1] @@ -8238,7 +8035,6 @@ ; GFX10-LABEL: v_fshr_v2i128: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_not_b32_e32 v17, v16 ; GFX10-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] ; GFX10-NEXT: v_and_b32_e32 v26, 0x7f, v16 @@ -8330,7 +8126,6 @@ ; GFX11-LABEL: v_fshr_v2i128: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_not_b32_e32 v17, v16 ; GFX11-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] ; GFX11-NEXT: v_lshlrev_b64 v[6:7], 1, v[6:7] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll @@ -11,7 +11,6 @@ ; CHECK-LABEL: waterfall_loop: ; CHECK: ; %bb.0: ; %bb ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; CHECK-NEXT: s_xor_saveexec_b32 s4, -1 ; CHECK-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b32 exec_lo, s4 @@ -162,7 +161,6 @@ ; CHECK-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b32 exec_lo, s4 ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; CHECK-NEXT: s_setpc_b64 s[30:31] bb: %ret = tail call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 1, float 0.000000e+00, float 0.000000e+00, <8 x i32> %vgpr_srd, <4 x i32> zeroinitializer, i1 false, i32 0, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll @@ -117,7 +117,6 @@ ; GFX10-LABEL: dyn_insertelement_v8f32_const_s_v_v: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 ; GFX10-NEXT: v_cndmask_b32_e32 v8, 1.0, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 @@ -141,7 +140,6 @@ ; GFX11-LABEL: dyn_insertelement_v8f32_const_s_v_v: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 ; GFX11-NEXT: v_cndmask_b32_e32 v8, 1.0, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 @@ -754,7 +752,6 @@ ; GFX10-LABEL: dyn_insertelement_v8f64_const_s_v_v: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s18, 0 ; GFX10-NEXT: s_mov_b64 s[4:5], 1.0 ; GFX10-NEXT: s_mov_b32 s19, 0x40200000 @@ -821,7 +818,6 @@ ; GFX11-LABEL: dyn_insertelement_v8f64_const_s_v_v: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s14, 0 ; GFX11-NEXT: s_mov_b32 s15, 0x40200000 ; GFX11-NEXT: s_mov_b64 s[0:1], 1.0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll @@ -28,7 +28,6 @@ ; GFX10_W32-LABEL: v_div_fmas_f32: ; GFX10_W32: ; %bb.0: ; GFX10_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10_W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10_W32-NEXT: v_and_b32_e32 v3, 1, v3 ; GFX10_W32-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v3 ; GFX10_W32-NEXT: v_div_fmas_f32 v0, v0, v1, v2 @@ -37,7 +36,6 @@ ; GFX10_W64-LABEL: v_div_fmas_f32: ; GFX10_W64: ; %bb.0: ; GFX10_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10_W64-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10_W64-NEXT: v_and_b32_e32 v3, 1, v3 ; GFX10_W64-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 ; GFX10_W64-NEXT: v_div_fmas_f32 v0, v0, v1, v2 @@ -46,7 +44,6 @@ ; GFX11_W32-LABEL: v_div_fmas_f32: ; GFX11_W32: ; %bb.0: ; GFX11_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11_W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11_W32-NEXT: v_and_b32_e32 v3, 1, v3 ; GFX11_W32-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v3 ; GFX11_W32-NEXT: v_div_fmas_f32 v0, v0, v1, v2 @@ -55,7 +52,6 @@ ; GFX11_W64-LABEL: v_div_fmas_f32: ; GFX11_W64: ; %bb.0: ; GFX11_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11_W64-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11_W64-NEXT: v_and_b32_e32 v3, 1, v3 ; GFX11_W64-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 ; GFX11_W64-NEXT: v_div_fmas_f32 v0, v0, v1, v2 @@ -86,7 +82,6 @@ ; GFX10_W32-LABEL: v_div_fmas_f64: ; GFX10_W32: ; %bb.0: ; GFX10_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10_W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10_W32-NEXT: v_and_b32_e32 v6, 1, v6 ; GFX10_W32-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX10_W32-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5] @@ -95,7 +90,6 @@ ; GFX10_W64-LABEL: v_div_fmas_f64: ; GFX10_W64: ; %bb.0: ; GFX10_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10_W64-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10_W64-NEXT: v_and_b32_e32 v6, 1, v6 ; GFX10_W64-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 ; GFX10_W64-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5] @@ -104,7 +98,6 @@ ; GFX11_W32-LABEL: v_div_fmas_f64: ; GFX11_W32: ; %bb.0: ; GFX11_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11_W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11_W32-NEXT: v_and_b32_e32 v6, 1, v6 ; GFX11_W32-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX11_W32-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5] @@ -113,7 +106,6 @@ ; GFX11_W64-LABEL: v_div_fmas_f64: ; GFX11_W64: ; %bb.0: ; GFX11_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11_W64-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11_W64-NEXT: v_and_b32_e32 v6, 1, v6 ; GFX11_W64-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 ; GFX11_W64-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fadd.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fadd.ll @@ -161,7 +161,6 @@ ; GFX10PLUS-LABEL: ds_fadd_f32_vv: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: ds_add_rtn_f32 v0, v0, v1 ; GFX10PLUS-NEXT: s_waitcnt lgkmcnt(0) ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -188,7 +187,6 @@ ; GFX10PLUS-LABEL: ds_fadd_f32_vv_offset: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: ds_add_rtn_f32 v0, v0, v1 offset:512 ; GFX10PLUS-NEXT: s_waitcnt lgkmcnt(0) ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -216,7 +214,6 @@ ; GFX10PLUS-LABEL: ds_fadd_f32_vv_nortn: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: ds_add_f32 v0, v1 ; GFX10PLUS-NEXT: s_waitcnt lgkmcnt(0) ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -243,7 +240,6 @@ ; GFX10PLUS-LABEL: ds_fadd_f32_vv_offset_nortn: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: ds_add_f32 v0, v1 offset:512 ; GFX10PLUS-NEXT: s_waitcnt lgkmcnt(0) ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -271,7 +267,6 @@ ; GFX10PLUS-LABEL: ds_fadd_f32_vv_volatile: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: ds_add_rtn_f32 v0, v0, v1 ; GFX10PLUS-NEXT: s_waitcnt lgkmcnt(0) ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmin.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmin.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmin.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmin.ll @@ -161,7 +161,6 @@ ; GFX10PLUS-LABEL: ds_fmin_f32_vv: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: ds_min_rtn_f32 v0, v0, v1 ; GFX10PLUS-NEXT: s_waitcnt lgkmcnt(0) ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -188,7 +187,6 @@ ; GFX10PLUS-LABEL: ds_fmin_f32_vv_offset: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: ds_min_rtn_f32 v0, v0, v1 offset:512 ; GFX10PLUS-NEXT: s_waitcnt lgkmcnt(0) ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -216,7 +214,6 @@ ; GFX10PLUS-LABEL: ds_fmin_f32_vv_nortn: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: ds_min_f32 v0, v1 ; GFX10PLUS-NEXT: s_waitcnt lgkmcnt(0) ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -243,7 +240,6 @@ ; GFX10PLUS-LABEL: ds_fmin_f32_vv_offset_nortn: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: ds_min_f32 v0, v1 offset:512 ; GFX10PLUS-NEXT: s_waitcnt lgkmcnt(0) ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -271,7 +267,6 @@ ; GFX10PLUS-LABEL: ds_fmin_f32_vv_volatile: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: ds_min_rtn_f32 v0, v0, v1 ; GFX10PLUS-NEXT: s_waitcnt lgkmcnt(0) ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll @@ -24,7 +24,6 @@ ; GFX10PLUS-LABEL: v_fdot2_clamp: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_dot2_f32_f16 v0, v0, v1, v2 clamp ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %c, i1 true) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll @@ -28,21 +28,18 @@ ; GFX101-LABEL: v_mul_legacy_f32: ; GFX101: ; %bb.0: ; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX101-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX101-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 ; GFX101-NEXT: s_setpc_b64 s[30:31] ; ; GFX103-LABEL: v_mul_legacy_f32: ; GFX103: ; %bb.0: ; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX103-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX103-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 ; GFX103-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_mul_legacy_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call float @llvm.amdgcn.fmul.legacy(float %a, float %b) @@ -71,21 +68,18 @@ ; GFX101-LABEL: v_mul_legacy_undef0_f32: ; GFX101: ; %bb.0: ; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX101-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX101-NEXT: v_mul_legacy_f32_e32 v0, s4, v0 ; GFX101-NEXT: s_setpc_b64 s[30:31] ; ; GFX103-LABEL: v_mul_legacy_undef0_f32: ; GFX103: ; %bb.0: ; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX103-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX103-NEXT: v_mul_legacy_f32_e32 v0, s4, v0 ; GFX103-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_mul_legacy_undef0_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, s0, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call float @llvm.amdgcn.fmul.legacy(float undef, float %a) @@ -114,21 +108,18 @@ ; GFX101-LABEL: v_mul_legacy_undef1_f32: ; GFX101: ; %bb.0: ; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX101-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX101-NEXT: v_mul_legacy_f32_e32 v0, s4, v0 ; GFX101-NEXT: s_setpc_b64 s[30:31] ; ; GFX103-LABEL: v_mul_legacy_undef1_f32: ; GFX103: ; %bb.0: ; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX103-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX103-NEXT: v_mul_legacy_f32_e32 v0, s4, v0 ; GFX103-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_mul_legacy_undef1_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, s0, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call float @llvm.amdgcn.fmul.legacy(float %a, float undef) @@ -157,21 +148,18 @@ ; GFX101-LABEL: v_mul_legacy_undef_f32: ; GFX101: ; %bb.0: ; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX101-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX101-NEXT: v_mul_legacy_f32_e64 v0, s4, s4 ; GFX101-NEXT: s_setpc_b64 s[30:31] ; ; GFX103-LABEL: v_mul_legacy_undef_f32: ; GFX103: ; %bb.0: ; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX103-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX103-NEXT: v_mul_legacy_f32_e64 v0, s4, s4 ; GFX103-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_mul_legacy_undef_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_dx9_zero_f32_e64 v0, s0, s0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call float @llvm.amdgcn.fmul.legacy(float undef, float undef) @@ -200,21 +188,18 @@ ; GFX101-LABEL: v_mul_legacy_fabs_f32: ; GFX101: ; %bb.0: ; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX101-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX101-NEXT: v_mul_legacy_f32_e64 v0, |v0|, |v1| ; GFX101-NEXT: s_setpc_b64 s[30:31] ; ; GFX103-LABEL: v_mul_legacy_fabs_f32: ; GFX103: ; %bb.0: ; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX103-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX103-NEXT: v_mul_legacy_f32_e64 v0, |v0|, |v1| ; GFX103-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_mul_legacy_fabs_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_dx9_zero_f32_e64 v0, |v0|, |v1| ; GFX11-NEXT: s_setpc_b64 s[30:31] %a.fabs = call float @llvm.fabs.f32(float %a) @@ -245,21 +230,18 @@ ; GFX101-LABEL: v_mul_legacy_fneg_f32: ; GFX101: ; %bb.0: ; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX101-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX101-NEXT: v_mul_legacy_f32_e64 v0, -v0, -v1 ; GFX101-NEXT: s_setpc_b64 s[30:31] ; ; GFX103-LABEL: v_mul_legacy_fneg_f32: ; GFX103: ; %bb.0: ; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX103-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX103-NEXT: v_mul_legacy_f32_e64 v0, -v0, -v1 ; GFX103-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_mul_legacy_fneg_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_dx9_zero_f32_e64 v0, -v0, -v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %a.fneg = fneg float %a @@ -294,7 +276,6 @@ ; GFX101-LABEL: v_add_mul_legacy_f32: ; GFX101: ; %bb.0: ; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX101-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX101-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 ; GFX101-NEXT: v_add_f32_e32 v0, v0, v2 ; GFX101-NEXT: s_setpc_b64 s[30:31] @@ -302,7 +283,6 @@ ; GFX103-LABEL: v_add_mul_legacy_f32: ; GFX103: ; %bb.0: ; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX103-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX103-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 ; GFX103-NEXT: v_add_f32_e32 v0, v0, v2 ; GFX103-NEXT: s_setpc_b64 s[30:31] @@ -310,7 +290,6 @@ ; GFX11-LABEL: v_add_mul_legacy_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v0, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_add_f32_e32 v0, v0, v2 @@ -342,14 +321,12 @@ ; GFX101-LABEL: v_mad_legacy_f32: ; GFX101: ; %bb.0: ; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX101-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX101-NEXT: v_mad_legacy_f32 v0, v0, v1, v2 ; GFX101-NEXT: s_setpc_b64 s[30:31] ; ; GFX103-LABEL: v_mad_legacy_f32: ; GFX103: ; %bb.0: ; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX103-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX103-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 ; GFX103-NEXT: v_add_f32_e32 v0, v0, v2 ; GFX103-NEXT: s_setpc_b64 s[30:31] @@ -357,7 +334,6 @@ ; GFX11-LABEL: v_mad_legacy_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v0, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_add_f32_e32 v0, v0, v2 @@ -389,14 +365,12 @@ ; GFX101-LABEL: v_mad_legacy_fneg_f32: ; GFX101: ; %bb.0: ; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX101-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX101-NEXT: v_mad_legacy_f32 v0, -v0, -v1, v2 ; GFX101-NEXT: s_setpc_b64 s[30:31] ; ; GFX103-LABEL: v_mad_legacy_fneg_f32: ; GFX103: ; %bb.0: ; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX103-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX103-NEXT: v_mul_legacy_f32_e64 v0, -v0, -v1 ; GFX103-NEXT: v_add_f32_e32 v0, v0, v2 ; GFX103-NEXT: s_setpc_b64 s[30:31] @@ -404,7 +378,6 @@ ; GFX11-LABEL: v_mad_legacy_fneg_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_dx9_zero_f32_e64 v0, -v0, -v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_add_f32_e32 v0, v0, v2 @@ -475,21 +448,18 @@ ; GFX101-LABEL: v_mul_legacy_f32_1.0: ; GFX101: ; %bb.0: ; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX101-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX101-NEXT: v_mul_legacy_f32_e32 v0, 1.0, v0 ; GFX101-NEXT: s_setpc_b64 s[30:31] ; ; GFX103-LABEL: v_mul_legacy_f32_1.0: ; GFX103: ; %bb.0: ; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX103-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX103-NEXT: v_mul_legacy_f32_e32 v0, 1.0, v0 ; GFX103-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_mul_legacy_f32_1.0: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, 1.0, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call float @llvm.amdgcn.fmul.legacy(float %a, float 1.0) @@ -518,21 +488,18 @@ ; GFX101-LABEL: v_mul_legacy_f32_1.0_swap: ; GFX101: ; %bb.0: ; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX101-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX101-NEXT: v_mul_legacy_f32_e32 v0, 1.0, v0 ; GFX101-NEXT: s_setpc_b64 s[30:31] ; ; GFX103-LABEL: v_mul_legacy_f32_1.0_swap: ; GFX103: ; %bb.0: ; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX103-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX103-NEXT: v_mul_legacy_f32_e32 v0, 1.0, v0 ; GFX103-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_mul_legacy_f32_1.0_swap: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, 1.0, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call float @llvm.amdgcn.fmul.legacy(float 1.0, float %b) @@ -561,21 +528,18 @@ ; GFX101-LABEL: v_mul_legacy_f32_2.0: ; GFX101: ; %bb.0: ; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX101-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX101-NEXT: v_mul_legacy_f32_e32 v0, 2.0, v0 ; GFX101-NEXT: s_setpc_b64 s[30:31] ; ; GFX103-LABEL: v_mul_legacy_f32_2.0: ; GFX103: ; %bb.0: ; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX103-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX103-NEXT: v_mul_legacy_f32_e32 v0, 2.0, v0 ; GFX103-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_mul_legacy_f32_2.0: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, 2.0, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call float @llvm.amdgcn.fmul.legacy(float %a, float 2.0) @@ -604,21 +568,18 @@ ; GFX101-LABEL: v_mul_legacy_f32_2.0_swap: ; GFX101: ; %bb.0: ; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX101-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX101-NEXT: v_mul_legacy_f32_e32 v0, 2.0, v0 ; GFX101-NEXT: s_setpc_b64 s[30:31] ; ; GFX103-LABEL: v_mul_legacy_f32_2.0_swap: ; GFX103: ; %bb.0: ; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX103-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX103-NEXT: v_mul_legacy_f32_e32 v0, 2.0, v0 ; GFX103-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_mul_legacy_f32_2.0_swap: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, 2.0, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call float @llvm.amdgcn.fmul.legacy(float 2.0, float %b) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll @@ -7,7 +7,6 @@ ; GFX10-LABEL: global_atomic_csub: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_csub v0, v[0:1], v2, off glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -15,7 +14,6 @@ ; GFX11-LABEL: global_atomic_csub: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_csub_u32 v0, v[0:1], v2, off glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -27,7 +25,6 @@ ; GFX10-LABEL: global_atomic_csub_offset: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b64 s[4:5], 0x1000 ; GFX10-NEXT: v_mov_b32_e32 v3, s4 ; GFX10-NEXT: v_mov_b32_e32 v4, s5 @@ -40,7 +37,6 @@ ; GFX11-LABEL: global_atomic_csub_offset: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b64 s[0:1], 0x1000 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_dual_mov_b32 v4, s1 :: v_dual_mov_b32 v3, s0 @@ -59,7 +55,6 @@ ; GFX10-LABEL: global_atomic_csub_nortn: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_csub v0, v[0:1], v2, off glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -67,7 +62,6 @@ ; GFX11-LABEL: global_atomic_csub_nortn: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_csub_u32 v0, v[0:1], v2, off glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -79,7 +73,6 @@ ; GFX10-LABEL: global_atomic_csub_offset_nortn: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b64 s[4:5], 0x1000 ; GFX10-NEXT: v_mov_b32_e32 v3, s4 ; GFX10-NEXT: v_mov_b32_e32 v4, s5 @@ -92,7 +85,6 @@ ; GFX11-LABEL: global_atomic_csub_offset_nortn: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b64 s[0:1], 0x1000 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_dual_mov_b32 v4, s1 :: v_dual_mov_b32 v3, s0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2d.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2d.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2d.ll @@ -88,7 +88,6 @@ ; GFX10-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dword v7, v4, s[10:11] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: load_2d_v4f32_xyzw_tfe: @@ -114,7 +113,6 @@ ; GFX11-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v7, v4, s[10:11] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %v.vec = extractvalue { <4 x float>, i32 } %v, 0 @@ -176,7 +174,6 @@ ; GFX10-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe lwe ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dword v7, v4, s[10:11] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: load_2d_v4f32_xyzw_tfe_lwe: @@ -202,7 +199,6 @@ ; GFX11-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe lwe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v7, v4, s[10:11] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 3, i32 0) %v.vec = extractvalue { <4 x float>, i32 } %v, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.a16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.a16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.a16.ll @@ -101,7 +101,6 @@ ; GFX10-NEXT: image_load v[0:4], v[10:11], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16 tfe ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dword v5, v4, s[10:11] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: load_2darraymsaa_v4f32_xyzw_tfe: @@ -131,7 +130,6 @@ ; GFX11-NEXT: image_load v[0:4], v[10:11], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16 tfe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v5, v4, s[10:11] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 1, i32 0) %v.vec = extractvalue { <4 x float>, i32 } %v, 0 @@ -198,7 +196,6 @@ ; GFX10-NEXT: image_load v[0:4], v[10:11], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16 tfe lwe ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dword v5, v4, s[10:11] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: load_2darraymsaa_v4f32_xyzw_tfe_lwe: @@ -228,7 +225,6 @@ ; GFX11-NEXT: image_load v[0:4], v[10:11], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16 tfe lwe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v5, v4, s[10:11] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 3, i32 0) %v.vec = extractvalue { <4 x float>, i32 } %v, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.ll @@ -92,7 +92,6 @@ ; GFX10-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dword v9, v4, s[10:11] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: load_2darraymsaa_v4f32_xyzw_tfe: @@ -119,7 +118,6 @@ ; GFX11-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v9, v4, s[10:11] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0) %v.vec = extractvalue { <4 x float>, i32 } %v, 0 @@ -185,7 +183,6 @@ ; GFX10-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe lwe ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dword v9, v4, s[10:11] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: load_2darraymsaa_v4f32_xyzw_tfe_lwe: @@ -212,7 +209,6 @@ ; GFX11-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe lwe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v9, v4, s[10:11] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 3, i32 0) %v.vec = extractvalue { <4 x float>, i32 } %v, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.a16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.a16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.a16.ll @@ -95,7 +95,6 @@ ; GFX10-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16 tfe ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dword v7, v4, s[10:11] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: load_3d_v4f32_xyzw_tfe: @@ -121,7 +120,6 @@ ; GFX11-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16 tfe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v7, v4, s[10:11] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.3d.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 1, i32 0) %v.vec = extractvalue { <4 x float>, i32 } %v, 0 @@ -186,7 +184,6 @@ ; GFX10-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16 tfe lwe ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dword v7, v4, s[10:11] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: load_3d_v4f32_xyzw_tfe_lwe: @@ -212,7 +209,6 @@ ; GFX11-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16 tfe lwe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v7, v4, s[10:11] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.3d.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 3, i32 0) %v.vec = extractvalue { <4 x float>, i32 } %v, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.ll @@ -90,7 +90,6 @@ ; GFX10-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm tfe ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dword v8, v4, s[10:11] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: load_3d_v4f32_xyzw_tfe: @@ -116,7 +115,6 @@ ; GFX11-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm tfe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v8, v4, s[10:11] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.3d.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 1, i32 0) %v.vec = extractvalue { <4 x float>, i32 } %v, 0 @@ -180,7 +178,6 @@ ; GFX10-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm tfe lwe ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dword v8, v4, s[10:11] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: load_3d_v4f32_xyzw_tfe_lwe: @@ -206,7 +203,6 @@ ; GFX11-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm tfe lwe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v8, v4, s[10:11] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.3d.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 3, i32 0) %v.vec = extractvalue { <4 x float>, i32 } %v, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll @@ -1456,7 +1456,6 @@ ; GFX10-LABEL: test_setreg_roundingmode_var_vgpr: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] ; GFX10-NEXT: v_readfirstlane_b32 s4, v0 ; encoding: [0x00,0x05,0x08,0x7e] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND @@ -1466,7 +1465,6 @@ ; GFX11-LABEL: test_setreg_roundingmode_var_vgpr: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 ; encoding: [0x00,0x05,0x00,0x7e] ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ;;#ASMEND diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll @@ -21,7 +21,6 @@ ; GFX10-LABEL: v_sdot2: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_dot2_i32_i16 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] %r = call i32 @llvm.amdgcn.sdot2(<2 x i16> %a, <2 x i16> %b, i32 %c, i1 false) @@ -44,7 +43,6 @@ ; GFX10-LABEL: v_sdot2_clamp: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_dot2_i32_i16 v0, v0, v1, v2 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] %r = call i32 @llvm.amdgcn.sdot2(<2 x i16> %a, <2 x i16> %b, i32 %c, i1 true) @@ -93,7 +91,6 @@ ; GFX10-LABEL: v_sdot2_inline_literal_a: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_dot2_i32_i16 v0, 4, v0, v1 op_sel_hi:[0,1,1] ; GFX10-NEXT: s_setpc_b64 s[30:31] %r = call i32 @llvm.amdgcn.sdot2(<2 x i16> , <2 x i16> %b, i32 %c, i1 false) @@ -117,7 +114,6 @@ ; GFX10-LABEL: v_sdot2_inline_literal_b: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_dot2_i32_i16 v0, v0, 4, v1 op_sel_hi:[1,0,1] ; GFX10-NEXT: s_setpc_b64 s[30:31] %r = call i32 @llvm.amdgcn.sdot2(<2 x i16> %a, <2 x i16> , i32 %c, i1 false) @@ -142,7 +138,6 @@ ; GFX10-LABEL: v_sdot2_inline_literal_a_b: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_dot2_i32_i16 v0, 8, 4, v1 op_sel_hi:[0,0,1] ; GFX10-NEXT: s_setpc_b64 s[30:31] %r = call i32 @llvm.amdgcn.sdot2(<2 x i16> , <2 x i16> , i32 %c, i1 false) @@ -167,7 +162,6 @@ ; GFX10-LABEL: v_sdot2_inline_literal_a_b_c: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_dot2_i32_i16 v0, 8, 4, 8 op_sel_hi:[0,0,1] ; GFX10-NEXT: s_setpc_b64 s[30:31] %r = call i32 @llvm.amdgcn.sdot2(<2 x i16> , <2 x i16> , i32 8, i1 false) @@ -192,7 +186,6 @@ ; GFX10-LABEL: v_sdot2_inline_literal_c: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_dot2_i32_i16 v0, v0, v1, 7 ; GFX10-NEXT: s_setpc_b64 s[30:31] %r = call i32 @llvm.amdgcn.sdot2(<2 x i16> %a, <2 x i16> %b, i32 7, i1 false) @@ -217,7 +210,6 @@ ; GFX10-LABEL: v_sdot2_fneg_a: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_dot2_i32_i16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0] ; GFX10-NEXT: s_setpc_b64 s[30:31] %neg.a = fneg <2 x half> %a @@ -244,7 +236,6 @@ ; GFX10-LABEL: v_sdot2_fneg_b: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_dot2_i32_i16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0] ; GFX10-NEXT: s_setpc_b64 s[30:31] %neg.b = fneg <2 x half> %b @@ -272,7 +263,6 @@ ; GFX10-LABEL: v_sdot2_fnegf32_c: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_xor_b32_e32 v2, 0x80000000, v2 ; GFX10-NEXT: v_dot2_i32_i16 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -301,7 +291,6 @@ ; GFX10-LABEL: v_sdot2_fnegv2f16_c: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 ; GFX10-NEXT: v_dot2_i32_i16 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -330,7 +319,6 @@ ; GFX10-LABEL: v_sdot2_shuffle10_a: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_alignbit_b32 v0, v0, v0, 16 ; GFX10-NEXT: v_dot2_i32_i16 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -358,7 +346,6 @@ ; GFX10-LABEL: v_sdot2_shuffle10_b: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_alignbit_b32 v1, v1, v1, 16 ; GFX10-NEXT: v_dot2_i32_i16 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot4.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot4.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot4.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot4.ll @@ -13,7 +13,6 @@ ; GFX10-LABEL: v_sdot4: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_dot4c_i32_i8_e32 v2, v0, v1 ; GFX10-NEXT: v_mov_b32_e32 v0, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -31,7 +30,6 @@ ; GFX10-LABEL: v_sdot4_clamp: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_dot4_i32_i8 v0, v0, v1, v2 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] %r = call i32 @llvm.amdgcn.sdot4(i32 %a, i32 %b, i32 %c, i1 true) @@ -65,7 +63,6 @@ ; GFX10-LABEL: v_sdot4_cast_v4i8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s4, 8 ; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX10-NEXT: v_and_or_b32 v0, v0, 0xff, v1 @@ -101,7 +98,6 @@ ; GFX10-LABEL: v_sdot4_fnegf32_a: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 ; GFX10-NEXT: v_dot4c_i32_i8_e32 v2, v0, v1 ; GFX10-NEXT: v_mov_b32_e32 v0, v2 @@ -123,7 +119,6 @@ ; GFX10-LABEL: v_sdot4_fnegv2f16_a: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 ; GFX10-NEXT: v_dot4c_i32_i8_e32 v2, v0, v1 ; GFX10-NEXT: v_mov_b32_e32 v0, v2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot8.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot8.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot8.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot8.ll @@ -13,7 +13,6 @@ ; GFX10-LABEL: v_sdot8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_dot8_i32_i4 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] %r = call i32 @llvm.amdgcn.sdot8(i32 %a, i32 %b, i32 %c, i1 false) @@ -30,7 +29,6 @@ ; GFX10-LABEL: v_sdot8_clamp: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_dot8_i32_i4 v0, v0, v1, v2 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] %r = call i32 @llvm.amdgcn.sdot8(i32 %a, i32 %b, i32 %c, i1 true) @@ -56,7 +54,6 @@ ; GFX10-LABEL: v_sdot8_fnegf32_a: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 ; GFX10-NEXT: v_dot8_i32_i4 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -77,7 +74,6 @@ ; GFX10-LABEL: v_sdot8_fnegv2f16_a: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 ; GFX10-NEXT: v_dot8_i32_i4 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sudot4.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sudot4.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sudot4.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sudot4.ll @@ -7,7 +7,6 @@ ; GFX11-LABEL: test_llvm_amdgcn_sudot4_uu: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: v_dot4_i32_iu8 v0, v0, v1, v2 ; encoding: [0x00,0x40,0x16,0xcc,0x00,0x03,0x0a,0x1c] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] entry: @@ -19,7 +18,6 @@ ; GFX11-LABEL: test_llvm_amdgcn_sudot4_us: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: v_dot4_i32_iu8 v0, v0, v1, v2 neg_lo:[0,1,0] ; encoding: [0x00,0x40,0x16,0xcc,0x00,0x03,0x0a,0x5c] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] entry: @@ -31,7 +29,6 @@ ; GFX11-LABEL: test_llvm_amdgcn_sudot4_su: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: v_dot4_i32_iu8 v0, v0, v1, v2 neg_lo:[1,0,0] ; encoding: [0x00,0x40,0x16,0xcc,0x00,0x03,0x0a,0x3c] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] entry: @@ -43,7 +40,6 @@ ; GFX11-LABEL: test_llvm_amdgcn_sudot4_ss: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: v_dot4_i32_iu8 v0, v0, v1, v2 neg_lo:[1,1,0] ; encoding: [0x00,0x40,0x16,0xcc,0x00,0x03,0x0a,0x7c] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] entry: @@ -57,7 +53,6 @@ ; GFX11-LABEL: test_llvm_amdgcn_sudot4_uu_clamp: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: v_dot4_i32_iu8 v0, v0, v1, v2 clamp ; encoding: [0x00,0xc0,0x16,0xcc,0x00,0x03,0x0a,0x1c] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] entry: @@ -69,7 +64,6 @@ ; GFX11-LABEL: test_llvm_amdgcn_sudot4_us_clamp: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: v_dot4_i32_iu8 v0, v0, v1, v2 neg_lo:[0,1,0] clamp ; encoding: [0x00,0xc0,0x16,0xcc,0x00,0x03,0x0a,0x5c] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] entry: @@ -81,7 +75,6 @@ ; GFX11-LABEL: test_llvm_amdgcn_sudot4_su_clamp: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: v_dot4_i32_iu8 v0, v0, v1, v2 neg_lo:[1,0,0] clamp ; encoding: [0x00,0xc0,0x16,0xcc,0x00,0x03,0x0a,0x3c] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] entry: @@ -93,7 +86,6 @@ ; GFX11-LABEL: test_llvm_amdgcn_sudot4_ss_clamp: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: v_dot4_i32_iu8 v0, v0, v1, v2 neg_lo:[1,1,0] clamp ; encoding: [0x00,0xc0,0x16,0xcc,0x00,0x03,0x0a,0x7c] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] entry: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sudot8.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sudot8.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sudot8.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sudot8.ll @@ -7,7 +7,6 @@ ; GFX11-LABEL: test_llvm_amdgcn_sudot8_uu: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: v_dot8_i32_iu4 v0, v0, v1, v2 ; encoding: [0x00,0x40,0x18,0xcc,0x00,0x03,0x0a,0x1c] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] entry: @@ -19,7 +18,6 @@ ; GFX11-LABEL: test_llvm_amdgcn_sudot8_us: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: v_dot8_i32_iu4 v0, v0, v1, v2 neg_lo:[0,1,0] ; encoding: [0x00,0x40,0x18,0xcc,0x00,0x03,0x0a,0x5c] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] entry: @@ -31,7 +29,6 @@ ; GFX11-LABEL: test_llvm_amdgcn_sudot8_su: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: v_dot8_i32_iu4 v0, v0, v1, v2 neg_lo:[1,0,0] ; encoding: [0x00,0x40,0x18,0xcc,0x00,0x03,0x0a,0x3c] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] entry: @@ -43,7 +40,6 @@ ; GFX11-LABEL: test_llvm_amdgcn_sudot8_ss: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: v_dot8_i32_iu4 v0, v0, v1, v2 neg_lo:[1,1,0] ; encoding: [0x00,0x40,0x18,0xcc,0x00,0x03,0x0a,0x7c] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] entry: @@ -57,7 +53,6 @@ ; GFX11-LABEL: test_llvm_amdgcn_sudot8_uu_clamp: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: v_dot8_i32_iu4 v0, v0, v1, v2 clamp ; encoding: [0x00,0xc0,0x18,0xcc,0x00,0x03,0x0a,0x1c] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] entry: @@ -69,7 +64,6 @@ ; GFX11-LABEL: test_llvm_amdgcn_sudot8_us_clamp: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: v_dot8_i32_iu4 v0, v0, v1, v2 neg_lo:[0,1,0] clamp ; encoding: [0x00,0xc0,0x18,0xcc,0x00,0x03,0x0a,0x5c] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] entry: @@ -81,7 +75,6 @@ ; GFX11-LABEL: test_llvm_amdgcn_sudot8_su_clamp: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: v_dot8_i32_iu4 v0, v0, v1, v2 neg_lo:[1,0,0] clamp ; encoding: [0x00,0xc0,0x18,0xcc,0x00,0x03,0x0a,0x3c] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] entry: @@ -93,7 +86,6 @@ ; GFX11-LABEL: test_llvm_amdgcn_sudot8_ss_clamp: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: v_dot8_i32_iu4 v0, v0, v1, v2 neg_lo:[1,1,0] clamp ; encoding: [0x00,0xc0,0x18,0xcc,0x00,0x03,0x0a,0x7c] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] entry: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.trig.preop.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.trig.preop.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.trig.preop.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.trig.preop.ll @@ -15,7 +15,6 @@ ; GFX10PLUS-LABEL: v_trig_preop_f64: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_trig_preop_f64 v[0:1], v[0:1], v2 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 %b) @@ -32,7 +31,6 @@ ; GFX10PLUS-LABEL: v_trig_preop_f64_imm: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_trig_preop_f64 v[0:1], v[0:1], 7 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 7) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll @@ -20,7 +20,6 @@ ; GFX10-LABEL: v_udot2: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_dot2_u32_u16 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] %r = call i32 @llvm.amdgcn.udot2(<2 x i16> %a, <2 x i16> %b, i32 %c, i1 false) @@ -43,7 +42,6 @@ ; GFX10-LABEL: v_udot2_clamp: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_dot2_u32_u16 v0, v0, v1, v2 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] %r = call i32 @llvm.amdgcn.udot2(<2 x i16> %a, <2 x i16> %b, i32 %c, i1 true) @@ -91,7 +89,6 @@ ; GFX10-LABEL: v_udot2_inline_literal_a: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_dot2_u32_u16 v0, 4, v0, v1 op_sel_hi:[0,1,1] ; GFX10-NEXT: s_setpc_b64 s[30:31] %r = call i32 @llvm.amdgcn.udot2(<2 x i16> , <2 x i16> %b, i32 %c, i1 false) @@ -114,7 +111,6 @@ ; GFX10-LABEL: v_udot2_inline_literal_b: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_dot2_u32_u16 v0, v0, 4, v1 op_sel_hi:[1,0,1] ; GFX10-NEXT: s_setpc_b64 s[30:31] %r = call i32 @llvm.amdgcn.udot2(<2 x i16> %a, <2 x i16> , i32 %c, i1 false) @@ -137,7 +133,6 @@ ; GFX10-LABEL: v_udot2_inline_literal_a_b: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_dot2_u32_u16 v0, 8, 4, v1 op_sel_hi:[0,0,1] ; GFX10-NEXT: s_setpc_b64 s[30:31] %r = call i32 @llvm.amdgcn.udot2(<2 x i16> , <2 x i16> , i32 %c, i1 false) @@ -160,7 +155,6 @@ ; GFX10-LABEL: v_udot2_inline_literal_a_b_c: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_dot2_u32_u16 v0, 8, 4, 8 op_sel_hi:[0,0,1] ; GFX10-NEXT: s_setpc_b64 s[30:31] %r = call i32 @llvm.amdgcn.udot2(<2 x i16> , <2 x i16> , i32 8, i1 false) @@ -183,7 +177,6 @@ ; GFX10-LABEL: v_udot2_inline_literal_c: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_dot2_u32_u16 v0, v0, v1, 7 ; GFX10-NEXT: s_setpc_b64 s[30:31] %r = call i32 @llvm.amdgcn.udot2(<2 x i16> %a, <2 x i16> %b, i32 7, i1 false) @@ -206,7 +199,6 @@ ; GFX10-LABEL: v_udot2_fneg_a: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_dot2_u32_u16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0] ; GFX10-NEXT: s_setpc_b64 s[30:31] %neg.a = fneg <2 x half> %a @@ -231,7 +223,6 @@ ; GFX10-LABEL: v_udot2_fneg_b: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_dot2_u32_u16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0] ; GFX10-NEXT: s_setpc_b64 s[30:31] %neg.b = fneg <2 x half> %b @@ -258,7 +249,6 @@ ; GFX10-LABEL: v_udot2_fnegf32_c: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_xor_b32_e32 v2, 0x80000000, v2 ; GFX10-NEXT: v_dot2_u32_u16 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -286,7 +276,6 @@ ; GFX10-LABEL: v_udot2_fnegv2f16_c: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 ; GFX10-NEXT: v_dot2_u32_u16 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -314,7 +303,6 @@ ; GFX10-LABEL: v_udot2_shuffle10_a: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_alignbit_b32 v0, v0, v0, 16 ; GFX10-NEXT: v_dot2_u32_u16 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -341,7 +329,6 @@ ; GFX10-LABEL: v_udot2_shuffle10_b: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_alignbit_b32 v1, v1, v1, 16 ; GFX10-NEXT: v_dot2_u32_u16 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot4.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot4.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot4.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot4.ll @@ -14,7 +14,6 @@ ; GFX10PLUS-LABEL: v_udot4: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_dot4_u32_u8 v0, v0, v1, v2 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %r = call i32 @llvm.amdgcn.udot4(i32 %a, i32 %b, i32 %c, i1 false) @@ -31,7 +30,6 @@ ; GFX10PLUS-LABEL: v_udot4_clamp: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_dot4_u32_u8 v0, v0, v1, v2 clamp ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %r = call i32 @llvm.amdgcn.udot4(i32 %a, i32 %b, i32 %c, i1 true) @@ -65,7 +63,6 @@ ; GFX10-LABEL: v_udot4_cast_v4i8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s4, 8 ; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX10-NEXT: v_and_or_b32 v0, v0, 0xff, v1 @@ -87,7 +84,6 @@ ; GFX11-LABEL: v_udot4_cast_v4i8: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX11-NEXT: v_and_b32_e32 v5, 0xff, v5 ; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 @@ -123,7 +119,6 @@ ; GFX10PLUS-LABEL: v_udot4_fnegf32_a: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 ; GFX10PLUS-NEXT: v_dot4_u32_u8 v0, v0, v1, v2 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -144,7 +139,6 @@ ; GFX10PLUS-LABEL: v_udot4_fnegv2f16_a: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 ; GFX10PLUS-NEXT: v_dot4_u32_u8 v0, v0, v1, v2 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot8.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot8.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot8.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot8.ll @@ -14,7 +14,6 @@ ; GFX10PLUS-LABEL: v_udot8: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_dot8_u32_u4 v0, v0, v1, v2 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %r = call i32 @llvm.amdgcn.udot8(i32 %a, i32 %b, i32 %c, i1 false) @@ -31,7 +30,6 @@ ; GFX10PLUS-LABEL: v_udot8_clamp: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_dot8_u32_u4 v0, v0, v1, v2 clamp ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %r = call i32 @llvm.amdgcn.udot8(i32 %a, i32 %b, i32 %c, i1 true) @@ -57,7 +55,6 @@ ; GFX10PLUS-LABEL: v_udot8_fnegf32_a: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 ; GFX10PLUS-NEXT: v_dot8_u32_u4 v0, v0, v1, v2 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -78,7 +75,6 @@ ; GFX10PLUS-LABEL: v_udot8_fnegv2f16_a: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 ; GFX10PLUS-NEXT: v_dot8_u32_u4 v0, v0, v1, v2 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.powi.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.powi.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.powi.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.powi.ll @@ -31,7 +31,6 @@ ; GFX11-LABEL: v_powi_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_log_f16_e32 v0, v0 ; GFX11-NEXT: v_cvt_f32_i32_e32 v1, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) @@ -63,7 +62,6 @@ ; GFX11-LABEL: v_powi_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_log_f32_e32 v0, v0 ; GFX11-NEXT: v_cvt_f32_i32_e32 v1, v1 ; GFX11-NEXT: s_waitcnt_depctr 0xfff @@ -85,7 +83,6 @@ ; GFX11-LABEL: v_powi_0_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %res = call float @llvm.powi.f32.i32(float %l, i32 0) @@ -101,7 +98,6 @@ ; GFX11-LABEL: v_powi_1_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %res = call float @llvm.powi.f32.i32(float %l, i32 1) ret float %res @@ -119,7 +115,6 @@ ; GFX11-LABEL: v_powi_neg1_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_log_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, -1.0, v0 @@ -142,7 +137,6 @@ ; GFX11-LABEL: v_powi_2_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_log_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, 2.0, v0 @@ -165,7 +159,6 @@ ; GFX11-LABEL: v_powi_neg2_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_log_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, -2.0, v0 @@ -188,7 +181,6 @@ ; GFX11-LABEL: v_powi_4_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_log_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, 4.0, v0 @@ -211,7 +203,6 @@ ; GFX11-LABEL: v_powi_8_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_log_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, 0x41000000, v0 @@ -234,7 +225,6 @@ ; GFX11-LABEL: v_powi_16_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_log_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, 0x41800000, v0 @@ -257,7 +247,6 @@ ; GFX11-LABEL: v_powi_128_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_log_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, 0x43000000, v0 @@ -280,7 +269,6 @@ ; GFX11-LABEL: v_powi_neg128_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_log_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, 0xc3000000, v0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.128.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.128.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.128.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.128.ll @@ -26,7 +26,6 @@ ; GFX10-LABEL: load_lds_v4i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ds_read_b128 v[0:3], v0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -34,7 +33,6 @@ ; GFX11-LABEL: load_lds_v4i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_load_b128 v[0:3], v0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -148,7 +146,6 @@ ; GFX10-LABEL: load_lds_v4i32_align1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ds_read_u8 v1, v0 ; GFX10-NEXT: ds_read_u8 v2, v0 offset:1 ; GFX10-NEXT: ds_read_u8 v3, v0 offset:2 @@ -198,7 +195,6 @@ ; GFX11-LABEL: load_lds_v4i32_align1: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_load_u8 v1, v0 ; GFX11-NEXT: ds_load_u8 v2, v0 offset:1 ; GFX11-NEXT: ds_load_u8 v3, v0 offset:2 @@ -299,7 +295,6 @@ ; GFX10-LABEL: load_lds_v4i32_align2: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ds_read_u16 v1, v0 ; GFX10-NEXT: ds_read_u16 v2, v0 offset:2 ; GFX10-NEXT: ds_read_u16 v3, v0 offset:4 @@ -321,7 +316,6 @@ ; GFX11-LABEL: load_lds_v4i32_align2: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_load_u16 v1, v0 ; GFX11-NEXT: ds_load_u16 v2, v0 offset:2 ; GFX11-NEXT: ds_load_u16 v3, v0 offset:4 @@ -366,7 +360,6 @@ ; GFX10-LABEL: load_lds_v4i32_align4: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; GFX10-NEXT: ds_read2_b32 v[0:1], v0 offset1:1 ; GFX10-NEXT: ds_read2_b32 v[2:3], v2 offset0:2 offset1:3 @@ -376,7 +369,6 @@ ; GFX11-LABEL: load_lds_v4i32_align4: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v2, v0 ; GFX11-NEXT: ds_load_2addr_b32 v[0:1], v0 offset1:1 ; GFX11-NEXT: ds_load_2addr_b32 v[2:3], v2 offset0:2 offset1:3 @@ -405,7 +397,6 @@ ; GFX10-LABEL: load_lds_v4i32_align8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; GFX10-NEXT: ds_read2_b32 v[0:1], v0 offset1:1 ; GFX10-NEXT: ds_read2_b32 v[2:3], v2 offset0:2 offset1:3 @@ -415,7 +406,6 @@ ; GFX11-LABEL: load_lds_v4i32_align8: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_load_2addr_b64 v[0:3], v0 offset1:1 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -442,7 +432,6 @@ ; GFX10-LABEL: load_lds_v4i32_align16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ds_read_b128 v[0:3], v0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -450,7 +439,6 @@ ; GFX11-LABEL: load_lds_v4i32_align16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_load_b128 v[0:3], v0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.96.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.96.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.96.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.96.ll @@ -26,7 +26,6 @@ ; GFX10-LABEL: load_lds_v3i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ds_read_b96 v[0:2], v0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -34,7 +33,6 @@ ; GFX11-LABEL: load_lds_v3i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_load_b96 v[0:2], v0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -127,7 +125,6 @@ ; GFX10-LABEL: load_lds_v3i32_align1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ds_read_u8 v1, v0 ; GFX10-NEXT: ds_read_u8 v2, v0 offset:1 ; GFX10-NEXT: ds_read_u8 v3, v0 offset:2 @@ -166,7 +163,6 @@ ; GFX11-LABEL: load_lds_v3i32_align1: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_load_u8 v1, v0 ; GFX11-NEXT: ds_load_u8 v2, v0 offset:1 ; GFX11-NEXT: ds_load_u8 v3, v0 offset:2 @@ -248,7 +244,6 @@ ; GFX10-LABEL: load_lds_v3i32_align2: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ds_read_u16 v1, v0 ; GFX10-NEXT: ds_read_u16 v2, v0 offset:2 ; GFX10-NEXT: ds_read_u16 v3, v0 offset:4 @@ -266,7 +261,6 @@ ; GFX11-LABEL: load_lds_v3i32_align2: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_load_u16 v1, v0 ; GFX11-NEXT: ds_load_u16 v2, v0 offset:2 ; GFX11-NEXT: ds_load_u16 v3, v0 offset:4 @@ -307,7 +301,6 @@ ; GFX10-LABEL: load_lds_v3i32_align4: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; GFX10-NEXT: ds_read2_b32 v[0:1], v0 offset1:1 ; GFX10-NEXT: ds_read_b32 v2, v2 offset:8 @@ -317,7 +310,6 @@ ; GFX11-LABEL: load_lds_v3i32_align4: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v2, v0 ; GFX11-NEXT: ds_load_2addr_b32 v[0:1], v0 offset1:1 ; GFX11-NEXT: ds_load_b32 v2, v2 offset:8 @@ -350,7 +342,6 @@ ; GFX10-LABEL: load_lds_v3i32_align8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; GFX10-NEXT: ds_read2_b32 v[0:1], v0 offset1:1 ; GFX10-NEXT: ds_read_b32 v2, v2 offset:8 @@ -360,7 +351,6 @@ ; GFX11-LABEL: load_lds_v3i32_align8: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v2, v0 ; GFX11-NEXT: ds_load_2addr_b32 v[0:1], v0 offset1:1 ; GFX11-NEXT: ds_load_b32 v2, v2 offset:8 @@ -389,7 +379,6 @@ ; GFX10-LABEL: load_lds_v3i32_align16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ds_read_b96 v[0:2], v0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -397,7 +386,6 @@ ; GFX11-LABEL: load_lds_v3i32_align16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_load_b96 v[0:2], v0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll @@ -75,7 +75,6 @@ ; GFX10-LABEL: load_lds_v4i32_align1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; GFX10-NEXT: ds_read2_b32 v[0:1], v0 offset1:1 ; GFX10-NEXT: ds_read2_b32 v[2:3], v2 offset0:2 offset1:3 @@ -85,7 +84,6 @@ ; GFX11-LABEL: load_lds_v4i32_align1: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_load_b128 v[0:3], v0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -148,7 +146,6 @@ ; GFX10-LABEL: load_lds_v3i32_align1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; GFX10-NEXT: ds_read2_b32 v[0:1], v0 offset1:1 ; GFX10-NEXT: ds_read_b32 v2, v2 offset:8 @@ -158,7 +155,6 @@ ; GFX11-LABEL: load_lds_v3i32_align1: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_load_b96 v[0:2], v0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -212,7 +208,6 @@ ; GFX10-LABEL: store_lds_v4i32_align1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ds_write2_b32 v0, v1, v2 offset1:1 ; GFX10-NEXT: ds_write2_b32 v0, v3, v4 offset0:2 offset1:3 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) @@ -221,7 +216,6 @@ ; GFX11-LABEL: store_lds_v4i32_align1: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_store_b128 v0, v[1:4] ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -268,7 +262,6 @@ ; GFX10-LABEL: store_lds_v3i32_align1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ds_write2_b32 v0, v1, v2 offset1:1 ; GFX10-NEXT: ds_write_b32 v0, v3 offset:8 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) @@ -277,7 +270,6 @@ ; GFX11-LABEL: store_lds_v3i32_align1: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_store_b96 v0, v[1:3] ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll @@ -29,7 +29,6 @@ ; GFX10PLUS-LABEL: v_lshr_i8: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX10PLUS-NEXT: v_lshrrev_b16 v0, v1, v0 @@ -62,7 +61,6 @@ ; GFX10PLUS-LABEL: v_lshr_i8_7: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX10PLUS-NEXT: v_lshrrev_b16 v0, 7, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -113,7 +111,6 @@ ; GFX10PLUS-LABEL: v_lshr_i24: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_and_b32_e32 v1, 0xffffff, v1 ; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0xffffff, v0 ; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v0, v1, v0 @@ -132,7 +129,6 @@ ; GFX10PLUS-LABEL: v_lshr_i24_7: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_bfe_u32 v0, v0, 7, 17 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = lshr i24 %value, 7 @@ -179,7 +175,6 @@ ; GFX10PLUS-LABEL: v_lshr_i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v0, v1, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = lshr i32 %value, %amount @@ -196,7 +191,6 @@ ; GFX10PLUS-LABEL: v_lshr_i32_31: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v0, 31, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = lshr i32 %value, 31 @@ -282,7 +276,6 @@ ; GFX10PLUS-LABEL: v_lshr_v2i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v0, v2, v0 ; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v1, v3, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -301,7 +294,6 @@ ; GFX10PLUS-LABEL: v_lshr_v2i32_31: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v0, 31, v0 ; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v1, 31, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -337,7 +329,6 @@ ; GFX10PLUS-LABEL: v_lshr_v3i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v0, v3, v0 ; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v1, v4, v1 ; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v2, v5, v2 @@ -377,7 +368,6 @@ ; GFX10PLUS-LABEL: v_lshr_v4i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v0, v4, v0 ; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v1, v5, v1 ; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v2, v6, v2 @@ -421,7 +411,6 @@ ; GFX10PLUS-LABEL: v_lshr_v5i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v0, v5, v0 ; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v1, v6, v1 ; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v2, v7, v2 @@ -481,7 +470,6 @@ ; GFX10-LABEL: v_lshr_v16i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, v16, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v1, v17, v1 @@ -505,7 +493,6 @@ ; GFX11-LABEL: v_lshr_v16i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_load_b32 v31, off, s32 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, v16, v0 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, v17, v1 @@ -597,7 +584,6 @@ ; GFX10PLUS-LABEL: v_lshr_i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshrrev_b16 v0, v1, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = lshr i16 %value, %amount @@ -626,7 +612,6 @@ ; GFX10PLUS-LABEL: v_lshr_i16_15: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshrrev_b16 v0, 15, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = lshr i16 %value, 15 @@ -746,7 +731,6 @@ ; GFX10PLUS-LABEL: v_lshr_v2i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_lshrrev_b16 v0, v1, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = lshr <2 x i16> %value, %amount @@ -779,7 +763,6 @@ ; GFX10PLUS-LABEL: v_lshr_v2i16_15: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_lshrrev_b16 v0, 15, v0 op_sel_hi:[0,1] ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = lshr <2 x i16> %value, @@ -959,7 +942,6 @@ ; GFX10PLUS-LABEL: v_lshr_v4i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_lshrrev_b16 v0, v2, v0 ; GFX10PLUS-NEXT: v_pk_lshrrev_b16 v1, v3, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -1131,7 +1113,6 @@ ; GFX10PLUS-LABEL: v_lshr_v8i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_lshrrev_b16 v0, v4, v0 ; GFX10PLUS-NEXT: v_pk_lshrrev_b16 v1, v5, v1 ; GFX10PLUS-NEXT: v_pk_lshrrev_b16 v2, v6, v2 @@ -1289,7 +1270,6 @@ ; GFX10PLUS-LABEL: v_lshr_i64: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshrrev_b64 v[0:1], v2, v[0:1] ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = lshr i64 %value, %amount @@ -1307,7 +1287,6 @@ ; GFX10PLUS-LABEL: v_lshr_i64_63: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v0, 31, v1 ; GFX10PLUS-NEXT: v_mov_b32_e32 v1, 0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -1326,7 +1305,6 @@ ; GFX10PLUS-LABEL: v_lshr_i64_33: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v0, 1, v1 ; GFX10PLUS-NEXT: v_mov_b32_e32 v1, 0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -1345,7 +1323,6 @@ ; GFX10-LABEL: v_lshr_i64_32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v0, v1 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -1353,7 +1330,6 @@ ; GFX11-LABEL: v_lshr_i64_32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v0, v1 :: v_dual_mov_b32 v1, 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = lshr i64 %value, 32 @@ -1382,7 +1358,6 @@ ; GFX10PLUS-LABEL: v_lshr_i64_31: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshrrev_b64 v[0:1], 31, v[0:1] ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = lshr i64 %value, 31 @@ -1540,7 +1515,6 @@ ; GFX10PLUS-LABEL: v_lshr_v2i64: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshrrev_b64 v[0:1], v4, v[0:1] ; GFX10PLUS-NEXT: v_lshrrev_b64 v[2:3], v6, v[2:3] ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -1573,7 +1547,6 @@ ; GFX10PLUS-LABEL: v_lshr_v2i64_31: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshrrev_b64 v[0:1], 31, v[0:1] ; GFX10PLUS-NEXT: v_lshrrev_b64 v[2:3], 31, v[2:3] ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -1667,7 +1640,6 @@ ; GFX10-LABEL: v_lshr_i65: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v5, 0 ; GFX10-NEXT: v_and_b32_e32 v4, 1, v2 ; GFX10-NEXT: v_sub_nc_u32_e32 v2, 64, v3 @@ -1690,7 +1662,6 @@ ; GFX11-LABEL: v_lshr_i65: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v5, 0 :: v_dual_and_b32 v4, 1, v2 ; GFX11-NEXT: v_sub_nc_u32_e32 v2, 64, v3 ; GFX11-NEXT: v_subrev_nc_u32_e32 v10, 64, v3 @@ -1752,7 +1723,6 @@ ; GFX10-LABEL: v_lshr_i65_33: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v3, v1 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_and_b32_e32 v0, 1, v2 @@ -1765,7 +1735,6 @@ ; GFX11-LABEL: v_lshr_i65_33: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_and_b32 v0, 1, v2 ; GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 1, v3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll @@ -59,7 +59,6 @@ ; GFX10PLUS-LABEL: v_mul_i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_mul_lo_u16 v0, v0, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = mul i16 %num, %den @@ -123,7 +122,6 @@ ; GFX10PLUS-LABEL: v_mul_i16_zeroext: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_mul_lo_u16 v0, v0, v1 ; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -192,7 +190,6 @@ ; GFX10PLUS-LABEL: v_mul_i16_signext: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_mul_lo_u16 v0, v0, v1 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 16 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -224,7 +221,6 @@ ; GFX10PLUS-LABEL: v_mul_i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_mul_lo_u32 v0, v0, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = mul i32 %num, %den @@ -258,7 +254,6 @@ ; GFX10PLUS-LABEL: v_mul_v2i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_mul_lo_u32 v0, v0, v2 ; GFX10PLUS-NEXT: v_mul_lo_u32 v1, v1, v3 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -382,7 +377,6 @@ ; GFX10-LABEL: v_mul_i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v4, v0 ; GFX10-NEXT: v_mov_b32_e32 v5, v1 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v4, v2, 0 @@ -394,7 +388,6 @@ ; GFX11-LABEL: v_mul_i64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v4, v0 :: v_dual_mov_b32 v5, v1 ; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v4, v2, 0 ; GFX11-NEXT: v_mul_lo_u32 v3, v4, v3 @@ -520,7 +513,6 @@ ; GFX10-LABEL: v_mul_i96: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v6, v0 ; GFX10-NEXT: v_mov_b32_e32 v7, v1 ; GFX10-NEXT: v_mul_lo_u32 v2, v2, v3 @@ -535,7 +527,6 @@ ; GFX11-LABEL: v_mul_i96: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v6, v0 :: v_dual_mov_b32 v7, v1 ; GFX11-NEXT: v_mul_lo_u32 v2, v2, v3 ; GFX11-NEXT: v_mul_lo_u32 v5, v6, v5 @@ -790,7 +781,6 @@ ; GFX10-LABEL: v_mul_i128: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v8, v0 ; GFX10-NEXT: v_mov_b32_e32 v9, v1 ; GFX10-NEXT: v_mov_b32_e32 v10, v2 @@ -813,7 +803,6 @@ ; GFX11-LABEL: v_mul_i128: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v8, v0 :: v_dual_mov_b32 v9, v1 ; GFX11-NEXT: v_mov_b32_e32 v10, v2 ; GFX11-NEXT: v_mul_lo_u32 v3, v3, v4 @@ -1852,7 +1841,6 @@ ; GFX10-LABEL: v_mul_i256: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v16, v0 ; GFX10-NEXT: v_mov_b32_e32 v17, v1 ; GFX10-NEXT: v_mul_lo_u32 v27, v6, v9 @@ -1924,7 +1912,6 @@ ; GFX11-LABEL: v_mul_i256: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v16, v0 :: v_dual_mov_b32 v17, v1 ; GFX11-NEXT: v_mul_lo_u32 v7, v7, v8 ; GFX11-NEXT: v_mul_lo_u32 v27, v6, v9 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.v2i16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.v2i16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.v2i16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.v2i16.ll @@ -22,7 +22,6 @@ ; GFX10-LABEL: v_mul_v2i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] %mul = mul <2 x i16> %a, %b @@ -48,7 +47,6 @@ ; GFX10-LABEL: v_mul_v2i16_fneg_lhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0] ; GFX10-NEXT: s_setpc_b64 s[30:31] %neg.a = fneg <2 x half> %a @@ -76,7 +74,6 @@ ; GFX10-LABEL: v_mul_v2i16_fneg_rhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1] ; GFX10-NEXT: s_setpc_b64 s[30:31] %neg.b = fneg <2 x half> %b @@ -105,7 +102,6 @@ ; GFX10-LABEL: v_mul_v2i16_fneg_lhs_fneg_rhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1 neg_lo:[1,1] neg_hi:[1,1] ; GFX10-NEXT: s_setpc_b64 s[30:31] %neg.a = fneg <2 x half> %a diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll @@ -106,7 +106,6 @@ ; GFX10PLUS-LABEL: v_orn2_i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_not_b32_e32 v1, v1 ; GFX10PLUS-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -257,7 +256,6 @@ ; GFX10PLUS-LABEL: v_orn2_i64: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_not_b32_e32 v2, v2 ; GFX10PLUS-NEXT: v_not_b32_e32 v3, v3 ; GFX10PLUS-NEXT: v_or_b32_e32 v0, v0, v2 @@ -452,7 +450,6 @@ ; GFX10PLUS-LABEL: v_orn2_i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_xor_b32_e32 v1, -1, v1 ; GFX10PLUS-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -679,7 +676,6 @@ ; GFX10PLUS-LABEL: v_orn2_v2i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_xor_b32_e32 v1, -1, v1 ; GFX10PLUS-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -903,7 +899,6 @@ ; GFX10PLUS-LABEL: v_orn2_v3i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_xor_b32_e32 v2, -1, v2 ; GFX10PLUS-NEXT: v_xor_b32_e32 v3, -11, v3 ; GFX10PLUS-NEXT: v_or_b32_e32 v0, v0, v2 @@ -1140,7 +1135,6 @@ ; GFX10PLUS-LABEL: v_orn2_v4i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_xor_b32_e32 v2, -1, v2 ; GFX10PLUS-NEXT: v_xor_b32_e32 v3, -1, v3 ; GFX10PLUS-NEXT: v_or_b32_e32 v0, v0, v2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll @@ -48,7 +48,6 @@ ; GFX10PLUS-LABEL: v_saddsat_i7: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshlrev_b16 v0, 9, v0 ; GFX10PLUS-NEXT: v_lshlrev_b16 v1, 9, v1 ; GFX10PLUS-NEXT: v_add_nc_i16 v0, v0, v1 clamp @@ -159,7 +158,6 @@ ; GFX10PLUS-LABEL: v_saddsat_i8: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshlrev_b16 v0, 8, v0 ; GFX10PLUS-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX10PLUS-NEXT: v_add_nc_i16 v0, v0, v1 clamp @@ -310,7 +308,6 @@ ; GFX10-LABEL: v_saddsat_v2i8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 8, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v1 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 @@ -329,7 +326,6 @@ ; GFX11-LABEL: v_saddsat_v2i8: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 8, v0 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v1 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 @@ -658,7 +654,6 @@ ; GFX10-LABEL: v_saddsat_v4i8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 8, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 ; GFX10-NEXT: v_and_b32_e32 v4, 0xffff, v0 @@ -690,7 +685,6 @@ ; GFX11-LABEL: v_saddsat_v4i8: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 8, v0 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v1 ; GFX11-NEXT: v_and_b32_e32 v4, 0xffff, v0 @@ -1050,7 +1044,6 @@ ; GFX10PLUS-LABEL: v_saddsat_i24: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, 8, v0 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v1, 8, v1 ; GFX10PLUS-NEXT: v_add_nc_i32 v0, v0, v1 clamp @@ -1149,7 +1142,6 @@ ; GFX10PLUS-LABEL: v_saddsat_i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_add_nc_i32 v0, v0, v1 clamp ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = call i32 @llvm.sadd.sat.i32(i32 %lhs, i32 %rhs) @@ -1322,7 +1314,6 @@ ; GFX10PLUS-LABEL: v_saddsat_v2i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_add_nc_i32 v0, v0, v2 clamp ; GFX10PLUS-NEXT: v_add_nc_i32 v1, v1, v3 clamp ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -1456,7 +1447,6 @@ ; GFX10PLUS-LABEL: v_saddsat_v3i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_add_nc_i32 v0, v0, v3 clamp ; GFX10PLUS-NEXT: v_add_nc_i32 v1, v1, v4 clamp ; GFX10PLUS-NEXT: v_add_nc_i32 v2, v2, v5 clamp @@ -1625,7 +1615,6 @@ ; GFX10PLUS-LABEL: v_saddsat_v4i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_add_nc_i32 v0, v0, v4 clamp ; GFX10PLUS-NEXT: v_add_nc_i32 v1, v1, v5 clamp ; GFX10PLUS-NEXT: v_add_nc_i32 v2, v2, v6 clamp @@ -1831,7 +1820,6 @@ ; GFX10PLUS-LABEL: v_saddsat_v5i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_add_nc_i32 v0, v0, v5 clamp ; GFX10PLUS-NEXT: v_add_nc_i32 v1, v1, v6 clamp ; GFX10PLUS-NEXT: v_add_nc_i32 v2, v2, v7 clamp @@ -2230,7 +2218,6 @@ ; GFX10-LABEL: v_saddsat_v16i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX10-NEXT: v_add_nc_i32 v0, v0, v16 clamp ; GFX10-NEXT: v_add_nc_i32 v1, v1, v17 clamp @@ -2254,7 +2241,6 @@ ; GFX11-LABEL: v_saddsat_v16i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_load_b32 v31, off, s32 ; GFX11-NEXT: v_add_nc_i32 v0, v0, v16 clamp ; GFX11-NEXT: v_add_nc_i32 v1, v1, v17 clamp @@ -2639,7 +2625,6 @@ ; GFX10PLUS-LABEL: v_saddsat_i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_add_nc_i16 v0, v0, v1 clamp ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = call i16 @llvm.sadd.sat.i16(i16 %lhs, i16 %rhs) @@ -2834,7 +2819,6 @@ ; GFX10PLUS-LABEL: v_saddsat_v2i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_add_i16 v0, v0, v1 clamp ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> %lhs, <2 x i16> %rhs) @@ -3172,7 +3156,6 @@ ; GFX10PLUS-LABEL: v_saddsat_v4i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_add_i16 v0, v0, v2 clamp ; GFX10PLUS-NEXT: v_pk_add_i16 v1, v1, v3 clamp ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -3478,7 +3461,6 @@ ; GFX10PLUS-LABEL: v_saddsat_v6i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_add_i16 v0, v0, v3 clamp ; GFX10PLUS-NEXT: v_pk_add_i16 v1, v1, v4 clamp ; GFX10PLUS-NEXT: v_pk_add_i16 v2, v2, v5 clamp @@ -3874,7 +3856,6 @@ ; GFX10PLUS-LABEL: v_saddsat_v8i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_add_i16 v0, v0, v4 clamp ; GFX10PLUS-NEXT: v_pk_add_i16 v1, v1, v5 clamp ; GFX10PLUS-NEXT: v_pk_add_i16 v2, v2, v6 clamp @@ -4200,7 +4181,6 @@ ; GFX10-LABEL: v_saddsat_i48: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 16, v[0:1] ; GFX10-NEXT: v_lshlrev_b64 v[2:3], 16, v[2:3] ; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, v0, v2 @@ -4218,7 +4198,6 @@ ; GFX11-LABEL: v_saddsat_i48: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], 16, v[0:1] ; GFX11-NEXT: v_lshlrev_b64 v[2:3], 16, v[2:3] ; GFX11-NEXT: v_add_co_u32 v4, vcc_lo, v0, v2 @@ -4588,7 +4567,6 @@ ; GFX10-LABEL: v_saddsat_i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, v0, v2 ; GFX10-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo ; GFX10-NEXT: v_cmp_gt_i64_e64 s4, 0, v[2:3] @@ -4603,7 +4581,6 @@ ; GFX11-LABEL: v_saddsat_i64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_add_co_u32 v4, vcc_lo, v0, v2 ; GFX11-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo ; GFX11-NEXT: v_cmp_gt_i64_e64 s0, 0, v[2:3] @@ -4937,7 +4914,6 @@ ; GFX10-LABEL: v_saddsat_v2i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_co_u32 v8, vcc_lo, v0, v4 ; GFX10-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v1, v5, vcc_lo ; GFX10-NEXT: v_add_co_u32 v10, vcc_lo, v2, v6 @@ -4961,7 +4937,6 @@ ; GFX11-LABEL: v_saddsat_v2i64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_add_co_u32 v8, vcc_lo, v0, v4 ; GFX11-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v1, v5, vcc_lo ; GFX11-NEXT: v_add_co_u32 v10, vcc_lo, v2, v6 @@ -5853,7 +5828,6 @@ ; GFX10-LABEL: v_saddsat_v2i128: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_co_u32 v8, vcc_lo, v0, v8 ; GFX10-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v1, v9, vcc_lo ; GFX10-NEXT: v_add_co_ci_u32_e32 v16, vcc_lo, v2, v10, vcc_lo @@ -5905,7 +5879,6 @@ ; GFX11-LABEL: v_saddsat_v2i128: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_add_co_u32 v8, vcc_lo, v0, v8 ; GFX11-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v1, v9, vcc_lo ; GFX11-NEXT: v_add_co_ci_u32_e32 v16, vcc_lo, v2, v10, vcc_lo diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sbfx.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sbfx.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sbfx.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sbfx.ll @@ -5,72 +5,44 @@ ; Test vector signed bitfield extract. define signext i8 @v_ashr_i8_i32(i32 %value) { -; GFX89-LABEL: v_ashr_i8_i32: -; GFX89: ; %bb.0: -; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX89-NEXT: v_bfe_i32 v0, v0, 4, 8 -; GFX89-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_ashr_i8_i32: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_bfe_i32 v0, v0, 4, 8 -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: v_ashr_i8_i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_bfe_i32 v0, v0, 4, 8 +; GCN-NEXT: s_setpc_b64 s[30:31] %1 = ashr i32 %value, 4 %2 = trunc i32 %1 to i8 ret i8 %2 } define signext i16 @v_ashr_i16_i32(i32 %value) { -; GFX89-LABEL: v_ashr_i16_i32: -; GFX89: ; %bb.0: -; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX89-NEXT: v_bfe_i32 v0, v0, 9, 16 -; GFX89-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_ashr_i16_i32: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_bfe_i32 v0, v0, 9, 16 -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: v_ashr_i16_i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_bfe_i32 v0, v0, 9, 16 +; GCN-NEXT: s_setpc_b64 s[30:31] %1 = ashr i32 %value, 9 %2 = trunc i32 %1 to i16 ret i16 %2 } define signext i8 @v_lshr_i8_i32(i32 %value) { -; GFX89-LABEL: v_lshr_i8_i32: -; GFX89: ; %bb.0: -; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX89-NEXT: v_bfe_i32 v0, v0, 4, 8 -; GFX89-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_lshr_i8_i32: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_bfe_i32 v0, v0, 4, 8 -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: v_lshr_i8_i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_bfe_i32 v0, v0, 4, 8 +; GCN-NEXT: s_setpc_b64 s[30:31] %1 = lshr i32 %value, 4 %2 = trunc i32 %1 to i8 ret i8 %2 } define signext i16 @v_lshr_i16_i32(i32 %value) { -; GFX89-LABEL: v_lshr_i16_i32: -; GFX89: ; %bb.0: -; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX89-NEXT: v_bfe_i32 v0, v0, 9, 16 -; GFX89-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_lshr_i16_i32: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_bfe_i32 v0, v0, 9, 16 -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: v_lshr_i16_i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_bfe_i32 v0, v0, 9, 16 +; GCN-NEXT: s_setpc_b64 s[30:31] %1 = lshr i32 %value, 9 %2 = trunc i32 %1 to i16 ret i16 %2 @@ -78,22 +50,13 @@ ; Test vector bitfield extract for 64-bits. define i64 @v_ashr_i64(i64 %value) { -; GFX89-LABEL: v_ashr_i64: -; GFX89: ; %bb.0: -; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX89-NEXT: v_ashrrev_i64 v[0:1], 10, v[0:1] -; GFX89-NEXT: v_bfe_i32 v0, v0, 0, 4 -; GFX89-NEXT: v_ashrrev_i32_e32 v1, 31, v0 -; GFX89-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_ashr_i64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_ashrrev_i64 v[0:1], 10, v[0:1] -; GFX10-NEXT: v_bfe_i32 v0, v0, 0, 4 -; GFX10-NEXT: v_ashrrev_i32_e32 v1, 31, v0 -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: v_ashr_i64: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_ashrrev_i64 v[0:1], 10, v[0:1] +; GCN-NEXT: v_bfe_i32 v0, v0, 0, 4 +; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GCN-NEXT: s_setpc_b64 s[30:31] %1 = ashr i64 %value, 10 %2 = shl i64 %1, 60 %3 = ashr i64 %2, 60 @@ -101,22 +64,13 @@ } define i64 @v_lshr_i64(i64 %value) { -; GFX89-LABEL: v_lshr_i64: -; GFX89: ; %bb.0: -; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX89-NEXT: v_ashrrev_i64 v[0:1], 10, v[0:1] -; GFX89-NEXT: v_bfe_i32 v0, v0, 0, 4 -; GFX89-NEXT: v_ashrrev_i32_e32 v1, 31, v0 -; GFX89-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_lshr_i64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_ashrrev_i64 v[0:1], 10, v[0:1] -; GFX10-NEXT: v_bfe_i32 v0, v0, 0, 4 -; GFX10-NEXT: v_ashrrev_i32_e32 v1, 31, v0 -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: v_lshr_i64: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_ashrrev_i64 v[0:1], 10, v[0:1] +; GCN-NEXT: v_bfe_i32 v0, v0, 0, 4 +; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GCN-NEXT: s_setpc_b64 s[30:31] %1 = lshr i64 %value, 10 %2 = shl i64 %1, 60 %3 = ashr i64 %2, 60 @@ -175,3 +129,6 @@ %3 = ashr i64 %2, 60 ret i64 %3 } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX10: {{.*}} +; GFX89: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll @@ -15,7 +15,6 @@ ; GFX10PLUS-LABEL: v_sext_inreg_i8_4: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 4 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %shl = shl i8 %value, 4 @@ -33,7 +32,6 @@ ; GFX10PLUS-LABEL: v_sext_inreg_i8_7: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %shl = shl i8 %value, 7 @@ -114,7 +112,6 @@ ; GFX10PLUS-LABEL: v_sext_inreg_i24_12: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 24 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, 12, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -133,7 +130,6 @@ ; GFX10PLUS-LABEL: v_sext_inreg_i24_7: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 17 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %shl = shl i24 %value, 7 @@ -181,7 +177,6 @@ ; GFX10PLUS-LABEL: v_sext_inreg_i32_3: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 29 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %shl = shl i32 %value, 3 @@ -199,7 +194,6 @@ ; GFX10PLUS-LABEL: v_sext_inreg_i32_31: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, 31, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %shl = shl i32 %value, 31 @@ -248,7 +242,6 @@ ; GFX10PLUS-LABEL: v_sext_inreg_v2i32_14: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 18 ; GFX10PLUS-NEXT: v_bfe_i32 v1, v1, 0, 18 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -268,7 +261,6 @@ ; GFX10PLUS-LABEL: v_sext_inreg_v2i32_31: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 1 ; GFX10PLUS-NEXT: v_bfe_i32 v1, v1, 0, 1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -306,7 +298,6 @@ ; GFX10PLUS-LABEL: v_sext_inreg_v3i32_16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 16 ; GFX10PLUS-NEXT: v_bfe_i32 v1, v1, 0, 16 ; GFX10PLUS-NEXT: v_bfe_i32 v2, v2, 0, 16 @@ -348,7 +339,6 @@ ; GFX10PLUS-LABEL: v_sext_inreg_v4i32_6: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 26 ; GFX10PLUS-NEXT: v_bfe_i32 v1, v1, 0, 26 ; GFX10PLUS-NEXT: v_bfe_i32 v2, v2, 0, 26 @@ -394,7 +384,6 @@ ; GFX10PLUS-LABEL: v_sext_inreg_v5i32_30: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 2 ; GFX10PLUS-NEXT: v_bfe_i32 v1, v1, 0, 2 ; GFX10PLUS-NEXT: v_bfe_i32 v2, v2, 0, 2 @@ -454,7 +443,6 @@ ; GFX10PLUS-LABEL: v_sext_inreg_v16i32_27: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 5 ; GFX10PLUS-NEXT: v_bfe_i32 v1, v1, 0, 5 ; GFX10PLUS-NEXT: v_bfe_i32 v2, v2, 0, 5 @@ -545,7 +533,6 @@ ; GFX10PLUS-LABEL: v_sext_inreg_i16_4: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 12 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %shl = shl i16 %value, 4 @@ -576,7 +563,6 @@ ; GFX10PLUS-LABEL: v_sext_inreg_i16_15: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %shl = shl i16 %value, 15 @@ -675,7 +661,6 @@ ; GFX10PLUS-LABEL: v_sext_inreg_v2i16_8: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1] ; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v0, 8, v0 op_sel_hi:[0,1] ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -713,7 +698,6 @@ ; GFX10PLUS-LABEL: v_sext_inreg_v2i16_15: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v0, 15, v0 op_sel_hi:[0,1] ; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v0, 15, v0 op_sel_hi:[0,1] ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -839,7 +823,6 @@ ; GFX10PLUS-LABEL: v_sext_inreg_v4i16_3: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v0, 3, v0 op_sel_hi:[0,1] ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v1, 3, v1 op_sel_hi:[0,1] ; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v0, 3, v0 op_sel_hi:[0,1] @@ -1039,7 +1022,6 @@ ; GFX10PLUS-LABEL: v_sext_inreg_v8i16_11: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v0, 11, v0 op_sel_hi:[0,1] ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v1, 11, v1 op_sel_hi:[0,1] ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v2, 11, v2 op_sel_hi:[0,1] @@ -1225,7 +1207,6 @@ ; GFX10PLUS-LABEL: v_sext_inreg_i64_23: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_bfe_i32 v1, v0, 0, 9 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %shl = shl i64 %value, 23 @@ -1244,7 +1225,6 @@ ; GFX10PLUS-LABEL: v_sext_inreg_i64_40: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 24 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -1264,7 +1244,6 @@ ; GFX10PLUS-LABEL: v_sext_inreg_i64_63: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 1 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -1284,7 +1263,6 @@ ; GFX10PLUS-LABEL: v_sext_inreg_i64_33: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 31 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -1304,7 +1282,6 @@ ; GFX10PLUS-LABEL: v_sext_inreg_i64_32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, v1 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -1323,7 +1300,6 @@ ; GFX10PLUS-LABEL: v_sext_inreg_i64_31: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_bfe_i32 v1, v0, 0, 1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %shl = shl i64 %value, 31 @@ -1417,7 +1393,6 @@ ; GFX10PLUS-LABEL: v_sext_inreg_v2i64_16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_bfe_i32 v1, v0, 0, 16 ; GFX10PLUS-NEXT: v_bfe_i32 v3, v2, 0, 16 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -1437,7 +1412,6 @@ ; GFX10PLUS-LABEL: v_sext_inreg_v2i64_31: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_bfe_i32 v1, v0, 0, 1 ; GFX10PLUS-NEXT: v_bfe_i32 v3, v2, 0, 1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -1511,7 +1485,6 @@ ; GFX10PLUS-LABEL: v_sext_inreg_i65_22: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshlrev_b64 v[2:3], 22, v[2:3] ; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v3, 10, v1 ; GFX10PLUS-NEXT: v_lshrrev_b64 v[0:1], 0, v[0:1] @@ -1567,7 +1540,6 @@ ; GFX10PLUS-LABEL: v_sext_inreg_i65_33: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_mov_b32_e32 v3, v1 ; GFX10PLUS-NEXT: v_bfe_i32 v1, v2, 0, 1 ; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v3, 1, v3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll @@ -46,7 +46,6 @@ ; GFX10-LABEL: v_shl_i64_zext_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v0, 0x3fffffff, v0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 @@ -55,7 +54,6 @@ ; GFX11-LABEL: v_shl_i64_zext_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3fffffff, v0 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -97,7 +95,6 @@ ; GFX10-LABEL: v_shl_i64_sext_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v0, 0x1fffffff, v0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 @@ -106,7 +103,6 @@ ; GFX11-LABEL: v_shl_i64_sext_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x1fffffff, v0 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -164,7 +160,6 @@ ; GFX10-LABEL: v_shl_i64_zext_i32_overflow: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1] @@ -173,7 +168,6 @@ ; GFX11-LABEL: v_shl_i64_zext_i32_overflow: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x7fffffff, v0 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1] ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -231,7 +225,6 @@ ; GFX10PLUS-LABEL: v_shl_i64_sext_i32_overflow: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GFX10PLUS-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1] @@ -471,7 +464,6 @@ ; GFX10-LABEL: v_shl_v2i64_zext_v2i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v0, 0x3fffffff, v0 ; GFX10-NEXT: v_and_b32_e32 v1, 0x3fffffff, v1 ; GFX10-NEXT: v_mov_b32_e32 v3, 0 @@ -483,7 +475,6 @@ ; GFX11-LABEL: v_shl_v2i64_zext_v2i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v0, 0x3fffffff, v0 ; GFX11-NEXT: v_and_b32_e32 v1, 0x3fffffff, v1 ; GFX11-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_lshlrev_b32 v2, 2, v1 @@ -538,7 +529,6 @@ ; GFX10-LABEL: v_shl_v2i64_sext_v2i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v0, 0x1fffffff, v0 ; GFX10-NEXT: v_and_b32_e32 v1, 0x1fffffff, v1 ; GFX10-NEXT: v_mov_b32_e32 v3, 0 @@ -550,7 +540,6 @@ ; GFX11-LABEL: v_shl_v2i64_sext_v2i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v0, 0x1fffffff, v0 ; GFX11-NEXT: v_and_b32_e32 v1, 0x1fffffff, v1 ; GFX11-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_lshlrev_b32 v2, 2, v1 @@ -619,7 +608,6 @@ ; GFX10PLUS-LABEL: v_shl_i32_zext_i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0x3fff, v0 ; GFX10PLUS-NEXT: v_lshlrev_b16 v0, 2, v0 ; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0xffff, v0 @@ -723,7 +711,6 @@ ; GFX10PLUS-LABEL: v_shl_v2i32_zext_v2i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0x3fff3fff, v0 ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v1, 2, v0 op_sel_hi:[0,1] ; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0xffff, v1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll @@ -28,7 +28,6 @@ ; GFX10PLUS-LABEL: v_shl_i8: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX10PLUS-NEXT: v_lshlrev_b16 v0, v1, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -58,7 +57,6 @@ ; GFX10PLUS-LABEL: v_shl_i8_7: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshlrev_b16 v0, 7, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = shl i8 %value, 7 @@ -118,7 +116,6 @@ ; GFX10PLUS-LABEL: v_shl_i24: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_and_b32_e32 v1, 0xffffff, v1 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, v1, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -136,7 +133,6 @@ ; GFX10PLUS-LABEL: v_shl_i24_7: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, 7, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = shl i24 %value, 7 @@ -181,7 +177,6 @@ ; GFX10PLUS-LABEL: v_shl_i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, v1, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = shl i32 %value, %amount @@ -198,7 +193,6 @@ ; GFX10PLUS-LABEL: v_shl_i32_31: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, 31, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = shl i32 %value, 31 @@ -284,7 +278,6 @@ ; GFX10PLUS-LABEL: v_shl_v2i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, v2, v0 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v1, v3, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -303,7 +296,6 @@ ; GFX10PLUS-LABEL: v_shl_v2i32_31: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, 31, v0 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v1, 31, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -339,7 +331,6 @@ ; GFX10PLUS-LABEL: v_shl_v3i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, v3, v0 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v1, v4, v1 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v2, v5, v2 @@ -379,7 +370,6 @@ ; GFX10PLUS-LABEL: v_shl_v4i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, v4, v0 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v1, v5, v1 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v2, v6, v2 @@ -423,7 +413,6 @@ ; GFX10PLUS-LABEL: v_shl_v5i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, v5, v0 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v1, v6, v1 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v2, v7, v2 @@ -483,7 +472,6 @@ ; GFX10-LABEL: v_shl_v16i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, v16, v0 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, v17, v1 @@ -507,7 +495,6 @@ ; GFX11-LABEL: v_shl_v16i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_load_b32 v31, off, s32 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, v16, v0 ; GFX11-NEXT: v_lshlrev_b32_e32 v1, v17, v1 @@ -598,7 +585,6 @@ ; GFX10PLUS-LABEL: v_shl_i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshlrev_b16 v0, v1, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = shl i16 %value, %amount @@ -627,7 +613,6 @@ ; GFX10PLUS-LABEL: v_shl_i16_15: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshlrev_b16 v0, 15, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = shl i16 %value, 15 @@ -754,7 +739,6 @@ ; GFX10PLUS-LABEL: v_shl_v2i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v0, v1, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = shl <2 x i16> %value, %amount @@ -787,7 +771,6 @@ ; GFX10PLUS-LABEL: v_shl_v2i16_15: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v0, 15, v0 op_sel_hi:[0,1] ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = shl <2 x i16> %value, @@ -965,7 +948,6 @@ ; GFX10PLUS-LABEL: v_shl_v4i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v0, v2, v0 ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v1, v3, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -1133,7 +1115,6 @@ ; GFX10PLUS-LABEL: v_shl_v8i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v0, v4, v0 ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v1, v5, v1 ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v2, v6, v2 @@ -1283,7 +1264,6 @@ ; GFX10PLUS-LABEL: v_shl_i64: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1] ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = shl i64 %value, %amount @@ -1301,7 +1281,6 @@ ; GFX10-LABEL: v_shl_i64_63: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 31, v0 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -1309,7 +1288,6 @@ ; GFX11-LABEL: v_shl_i64_63: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_lshlrev_b32 v1, 31, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = shl i64 %value, 63 @@ -1327,7 +1305,6 @@ ; GFX10-LABEL: v_shl_i64_33: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -1335,7 +1312,6 @@ ; GFX11-LABEL: v_shl_i64_33: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_lshlrev_b32 v1, 1, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = shl i64 %value, 33 @@ -1353,7 +1329,6 @@ ; GFX10-LABEL: v_shl_i64_32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -1361,7 +1336,6 @@ ; GFX11-LABEL: v_shl_i64_32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = shl i64 %value, 32 @@ -1390,7 +1364,6 @@ ; GFX10PLUS-LABEL: v_shl_i64_31: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1] ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = shl i64 %value, 31 @@ -1548,7 +1521,6 @@ ; GFX10PLUS-LABEL: v_shl_v2i64: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshlrev_b64 v[0:1], v4, v[0:1] ; GFX10PLUS-NEXT: v_lshlrev_b64 v[2:3], v6, v[2:3] ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -1581,7 +1553,6 @@ ; GFX10PLUS-LABEL: v_shl_v2i64_31: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1] ; GFX10PLUS-NEXT: v_lshlrev_b64 v[2:3], 31, v[2:3] ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -1663,7 +1634,6 @@ ; GFX10-LABEL: v_shl_i65: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_sub_nc_u32_e32 v6, 64, v3 ; GFX10-NEXT: v_lshlrev_b64 v[4:5], v3, v[2:3] ; GFX10-NEXT: v_subrev_nc_u32_e32 v8, 64, v3 @@ -1682,7 +1652,6 @@ ; GFX11-LABEL: v_shl_i65: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_sub_nc_u32_e32 v6, 64, v3 ; GFX11-NEXT: v_lshlrev_b64 v[4:5], v3, v[2:3] ; GFX11-NEXT: v_subrev_nc_u32_e32 v8, 64, v3 @@ -1731,7 +1700,6 @@ ; GFX10-LABEL: v_shl_i65_33: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 1, v0 ; GFX10-NEXT: v_lshrrev_b64 v[2:3], 31, v[0:1] ; GFX10-NEXT: v_mov_b32_e32 v0, 0 @@ -1741,7 +1709,6 @@ ; GFX11-LABEL: v_shl_i65_33: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshlrev_b32_e32 v4, 1, v0 ; GFX11-NEXT: v_lshrrev_b64 v[2:3], 31, v[0:1] ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, v4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/shlN_add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/shlN_add.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/shlN_add.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/shlN_add.ll @@ -118,7 +118,6 @@ ; GFX10-LABEL: v_shl1_add_u32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshl_add_u32 v0, v0, 1, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] %shl = shl i32 %src0, 1 @@ -143,7 +142,6 @@ ; GFX10-LABEL: v_shl2_add_u32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] %shl = shl i32 %src0, 2 @@ -168,7 +166,6 @@ ; GFX10-LABEL: v_shl3_add_u32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshl_add_u32 v0, v0, 3, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] %shl = shl i32 %src0, 3 @@ -193,7 +190,6 @@ ; GFX10-LABEL: v_shl4_add_u32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshl_add_u32 v0, v0, 4, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] %shl = shl i32 %src0, 4 @@ -218,7 +214,6 @@ ; GFX10-LABEL: v_shl5_add_u32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshl_add_u32 v0, v0, 5, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] %shl = shl i32 %src0, 5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/smed3.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/smed3.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/smed3.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/smed3.ll @@ -13,7 +13,6 @@ ; GFX10-LABEL: test_min_max_ValK0_K1_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17 ; GFX10-NEXT: s_setpc_b64 s[30:31] %smax = call i32 @llvm.smax.i32(i32 %a, i32 -12) @@ -31,7 +30,6 @@ ; GFX10-LABEL: min_max_ValK0_K1_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17 ; GFX10-NEXT: s_setpc_b64 s[30:31] %smax = call i32 @llvm.smax.i32(i32 -12, i32 %a) @@ -49,7 +47,6 @@ ; GFX10-LABEL: test_min_K1max_ValK0__i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17 ; GFX10-NEXT: s_setpc_b64 s[30:31] %smax = call i32 @llvm.smax.i32(i32 %a, i32 -12) @@ -67,7 +64,6 @@ ; GFX10-LABEL: test_min_K1max_K0Val__i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17 ; GFX10-NEXT: s_setpc_b64 s[30:31] %smax = call i32 @llvm.smax.i32(i32 -12, i32 %a) @@ -85,7 +81,6 @@ ; GFX10-LABEL: test_max_min_ValK1_K0_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17 ; GFX10-NEXT: s_setpc_b64 s[30:31] %smin = call i32 @llvm.smin.i32(i32 %a, i32 17) @@ -103,7 +98,6 @@ ; GFX10-LABEL: test_max_min_K1Val_K0_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17 ; GFX10-NEXT: s_setpc_b64 s[30:31] %smin = call i32 @llvm.smin.i32(i32 17, i32 %a) @@ -121,7 +115,6 @@ ; GFX10-LABEL: test_max_K0min_ValK1__i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17 ; GFX10-NEXT: s_setpc_b64 s[30:31] %smin = call i32 @llvm.smin.i32(i32 %a, i32 17) @@ -139,7 +132,6 @@ ; GFX10-LABEL: test_max_K0min_K1Val__i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17 ; GFX10-NEXT: s_setpc_b64 s[30:31] %smin = call i32 @llvm.smin.i32(i32 17, i32 %a) @@ -170,7 +162,6 @@ ; GFX10-LABEL: test_max_K0min_K1Val__v2i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_min_i16 v0, 17, v0 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_max_i16 v0, -12, v0 op_sel_hi:[0,1] ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -207,7 +198,6 @@ ; GFX10-LABEL: test_non_inline_constant_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_med3_i32 v0, v0, -12, 0x41 ; GFX10-NEXT: s_setpc_b64 s[30:31] %smax = call i32 @llvm.smax.i32(i32 %a, i32 -12) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll @@ -48,7 +48,6 @@ ; GFX10PLUS-LABEL: v_ssubsat_i7: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshlrev_b16 v0, 9, v0 ; GFX10PLUS-NEXT: v_lshlrev_b16 v1, 9, v1 ; GFX10PLUS-NEXT: v_sub_nc_i16 v0, v0, v1 clamp @@ -159,7 +158,6 @@ ; GFX10PLUS-LABEL: v_ssubsat_i8: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshlrev_b16 v0, 8, v0 ; GFX10PLUS-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX10PLUS-NEXT: v_sub_nc_i16 v0, v0, v1 clamp @@ -310,7 +308,6 @@ ; GFX10-LABEL: v_ssubsat_v2i8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 8, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v1 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 @@ -329,7 +326,6 @@ ; GFX11-LABEL: v_ssubsat_v2i8: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 8, v0 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v1 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 @@ -658,7 +654,6 @@ ; GFX10-LABEL: v_ssubsat_v4i8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 8, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 ; GFX10-NEXT: v_and_b32_e32 v4, 0xffff, v0 @@ -690,7 +685,6 @@ ; GFX11-LABEL: v_ssubsat_v4i8: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 8, v0 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v1 ; GFX11-NEXT: v_and_b32_e32 v4, 0xffff, v0 @@ -1050,7 +1044,6 @@ ; GFX10PLUS-LABEL: v_ssubsat_i24: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, 8, v0 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v1, 8, v1 ; GFX10PLUS-NEXT: v_sub_nc_i32 v0, v0, v1 clamp @@ -1149,7 +1142,6 @@ ; GFX10PLUS-LABEL: v_ssubsat_i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_sub_nc_i32 v0, v0, v1 clamp ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = call i32 @llvm.ssub.sat.i32(i32 %lhs, i32 %rhs) @@ -1322,7 +1314,6 @@ ; GFX10PLUS-LABEL: v_ssubsat_v2i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_sub_nc_i32 v0, v0, v2 clamp ; GFX10PLUS-NEXT: v_sub_nc_i32 v1, v1, v3 clamp ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -1456,7 +1447,6 @@ ; GFX10PLUS-LABEL: v_ssubsat_v3i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_sub_nc_i32 v0, v0, v3 clamp ; GFX10PLUS-NEXT: v_sub_nc_i32 v1, v1, v4 clamp ; GFX10PLUS-NEXT: v_sub_nc_i32 v2, v2, v5 clamp @@ -1625,7 +1615,6 @@ ; GFX10PLUS-LABEL: v_ssubsat_v4i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_sub_nc_i32 v0, v0, v4 clamp ; GFX10PLUS-NEXT: v_sub_nc_i32 v1, v1, v5 clamp ; GFX10PLUS-NEXT: v_sub_nc_i32 v2, v2, v6 clamp @@ -1831,7 +1820,6 @@ ; GFX10PLUS-LABEL: v_ssubsat_v5i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_sub_nc_i32 v0, v0, v5 clamp ; GFX10PLUS-NEXT: v_sub_nc_i32 v1, v1, v6 clamp ; GFX10PLUS-NEXT: v_sub_nc_i32 v2, v2, v7 clamp @@ -2230,7 +2218,6 @@ ; GFX10-LABEL: v_ssubsat_v16i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX10-NEXT: v_sub_nc_i32 v0, v0, v16 clamp ; GFX10-NEXT: v_sub_nc_i32 v1, v1, v17 clamp @@ -2254,7 +2241,6 @@ ; GFX11-LABEL: v_ssubsat_v16i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_load_b32 v31, off, s32 ; GFX11-NEXT: v_sub_nc_i32 v0, v0, v16 clamp ; GFX11-NEXT: v_sub_nc_i32 v1, v1, v17 clamp @@ -2639,7 +2625,6 @@ ; GFX10PLUS-LABEL: v_ssubsat_i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_sub_nc_i16 v0, v0, v1 clamp ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = call i16 @llvm.ssub.sat.i16(i16 %lhs, i16 %rhs) @@ -2834,7 +2819,6 @@ ; GFX10PLUS-LABEL: v_ssubsat_v2i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_sub_i16 v0, v0, v1 clamp ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> %lhs, <2 x i16> %rhs) @@ -3172,7 +3156,6 @@ ; GFX10PLUS-LABEL: v_ssubsat_v4i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_sub_i16 v0, v0, v2 clamp ; GFX10PLUS-NEXT: v_pk_sub_i16 v1, v1, v3 clamp ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -3478,7 +3461,6 @@ ; GFX10PLUS-LABEL: v_ssubsat_v6i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_sub_i16 v0, v0, v3 clamp ; GFX10PLUS-NEXT: v_pk_sub_i16 v1, v1, v4 clamp ; GFX10PLUS-NEXT: v_pk_sub_i16 v2, v2, v5 clamp @@ -3874,7 +3856,6 @@ ; GFX10PLUS-LABEL: v_ssubsat_v8i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_sub_i16 v0, v0, v4 clamp ; GFX10PLUS-NEXT: v_pk_sub_i16 v1, v1, v5 clamp ; GFX10PLUS-NEXT: v_pk_sub_i16 v2, v2, v6 clamp @@ -4200,7 +4181,6 @@ ; GFX10-LABEL: v_ssubsat_i48: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 16, v[0:1] ; GFX10-NEXT: v_lshlrev_b64 v[2:3], 16, v[2:3] ; GFX10-NEXT: v_sub_co_u32 v4, vcc_lo, v0, v2 @@ -4218,7 +4198,6 @@ ; GFX11-LABEL: v_ssubsat_i48: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], 16, v[0:1] ; GFX11-NEXT: v_lshlrev_b64 v[2:3], 16, v[2:3] ; GFX11-NEXT: v_sub_co_u32 v4, vcc_lo, v0, v2 @@ -4588,7 +4567,6 @@ ; GFX10-LABEL: v_ssubsat_i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_sub_co_u32 v4, vcc_lo, v0, v2 ; GFX10-NEXT: v_sub_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo ; GFX10-NEXT: v_cmp_lt_i64_e64 s4, 0, v[2:3] @@ -4603,7 +4581,6 @@ ; GFX11-LABEL: v_ssubsat_i64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_sub_co_u32 v4, vcc_lo, v0, v2 ; GFX11-NEXT: v_sub_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo ; GFX11-NEXT: v_cmp_lt_i64_e64 s0, 0, v[2:3] @@ -4937,7 +4914,6 @@ ; GFX10-LABEL: v_ssubsat_v2i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_sub_co_u32 v8, vcc_lo, v0, v4 ; GFX10-NEXT: v_sub_co_ci_u32_e32 v9, vcc_lo, v1, v5, vcc_lo ; GFX10-NEXT: v_sub_co_u32 v10, vcc_lo, v2, v6 @@ -4961,7 +4937,6 @@ ; GFX11-LABEL: v_ssubsat_v2i64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_sub_co_u32 v8, vcc_lo, v0, v4 ; GFX11-NEXT: v_sub_co_ci_u32_e32 v9, vcc_lo, v1, v5, vcc_lo ; GFX11-NEXT: v_sub_co_u32 v10, vcc_lo, v2, v6 @@ -5894,7 +5869,6 @@ ; GFX10-LABEL: v_ssubsat_v2i128: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_sub_co_u32 v16, vcc_lo, v0, v8 ; GFX10-NEXT: v_sub_co_ci_u32_e32 v17, vcc_lo, v1, v9, vcc_lo ; GFX10-NEXT: v_sub_co_ci_u32_e32 v18, vcc_lo, v2, v10, vcc_lo @@ -5950,7 +5924,6 @@ ; GFX11-LABEL: v_ssubsat_v2i128: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_sub_co_u32 v16, vcc_lo, v0, v8 ; GFX11-NEXT: v_sub_co_ci_u32_e32 v17, vcc_lo, v1, v9, vcc_lo ; GFX11-NEXT: v_sub_co_ci_u32_e32 v18, vcc_lo, v2, v10, vcc_lo diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll @@ -38,7 +38,6 @@ ; GFX10PLUS-LABEL: v_uaddsat_i7: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshlrev_b16 v0, 9, v0 ; GFX10PLUS-NEXT: v_lshlrev_b16 v1, 9, v1 ; GFX10PLUS-NEXT: v_add_nc_u16 v0, v0, v1 clamp @@ -124,7 +123,6 @@ ; GFX10PLUS-LABEL: v_uaddsat_i8: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshlrev_b16 v0, 8, v0 ; GFX10PLUS-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX10PLUS-NEXT: v_add_nc_u16 v0, v0, v1 clamp @@ -235,7 +233,6 @@ ; GFX10-LABEL: v_uaddsat_v2i8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 8, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v1 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 @@ -254,7 +251,6 @@ ; GFX11-LABEL: v_uaddsat_v2i8: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 8, v0 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v1 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 @@ -498,7 +494,6 @@ ; GFX10-LABEL: v_uaddsat_v4i8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 8, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 ; GFX10-NEXT: v_and_b32_e32 v4, 0xffff, v0 @@ -530,7 +525,6 @@ ; GFX11-LABEL: v_uaddsat_v4i8: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 8, v0 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v1 ; GFX11-NEXT: v_and_b32_e32 v4, 0xffff, v0 @@ -811,7 +805,6 @@ ; GFX10PLUS-LABEL: v_uaddsat_i24: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, 8, v0 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v1, 8, v1 ; GFX10PLUS-NEXT: v_add_nc_u32_e64 v0, v0, v1 clamp @@ -888,7 +881,6 @@ ; GFX10PLUS-LABEL: v_uaddsat_i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_add_nc_u32_e64 v0, v0, v1 clamp ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = call i32 @llvm.uadd.sat.i32(i32 %lhs, i32 %rhs) @@ -1009,7 +1001,6 @@ ; GFX10PLUS-LABEL: v_uaddsat_v2i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_add_nc_u32_e64 v0, v0, v2 clamp ; GFX10PLUS-NEXT: v_add_nc_u32_e64 v1, v1, v3 clamp ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -1093,7 +1084,6 @@ ; GFX10PLUS-LABEL: v_uaddsat_v3i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_add_nc_u32_e64 v0, v0, v3 clamp ; GFX10PLUS-NEXT: v_add_nc_u32_e64 v1, v1, v4 clamp ; GFX10PLUS-NEXT: v_add_nc_u32_e64 v2, v2, v5 clamp @@ -1194,7 +1184,6 @@ ; GFX10PLUS-LABEL: v_uaddsat_v4i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_add_nc_u32_e64 v0, v0, v4 clamp ; GFX10PLUS-NEXT: v_add_nc_u32_e64 v1, v1, v5 clamp ; GFX10PLUS-NEXT: v_add_nc_u32_e64 v2, v2, v6 clamp @@ -1312,7 +1301,6 @@ ; GFX10PLUS-LABEL: v_uaddsat_v5i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_add_nc_u32_e64 v0, v0, v5 clamp ; GFX10PLUS-NEXT: v_add_nc_u32_e64 v1, v1, v6 clamp ; GFX10PLUS-NEXT: v_add_nc_u32_e64 v2, v2, v7 clamp @@ -1503,7 +1491,6 @@ ; GFX10-LABEL: v_uaddsat_v16i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX10-NEXT: v_add_nc_u32_e64 v0, v0, v16 clamp ; GFX10-NEXT: v_add_nc_u32_e64 v1, v1, v17 clamp @@ -1527,7 +1514,6 @@ ; GFX11-LABEL: v_uaddsat_v16i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_load_b32 v31, off, s32 ; GFX11-NEXT: v_add_nc_u32_e64 v0, v0, v16 clamp ; GFX11-NEXT: v_add_nc_u32_e64 v1, v1, v17 clamp @@ -1774,7 +1760,6 @@ ; GFX10PLUS-LABEL: v_uaddsat_i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_add_nc_u16 v0, v0, v1 clamp ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = call i16 @llvm.uadd.sat.i16(i16 %lhs, i16 %rhs) @@ -1910,7 +1895,6 @@ ; GFX10PLUS-LABEL: v_uaddsat_v2i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_add_u16 v0, v0, v1 clamp ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> %lhs, <2 x i16> %rhs) @@ -2108,7 +2092,6 @@ ; GFX10PLUS-LABEL: v_uaddsat_v4i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_add_u16 v0, v0, v2 clamp ; GFX10PLUS-NEXT: v_pk_add_u16 v1, v1, v3 clamp ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -2272,7 +2255,6 @@ ; GFX10PLUS-LABEL: v_uaddsat_v6i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_add_u16 v0, v0, v3 clamp ; GFX10PLUS-NEXT: v_pk_add_u16 v1, v1, v4 clamp ; GFX10PLUS-NEXT: v_pk_add_u16 v2, v2, v5 clamp @@ -2470,7 +2452,6 @@ ; GFX10PLUS-LABEL: v_uaddsat_v8i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_add_u16 v0, v0, v4 clamp ; GFX10PLUS-NEXT: v_pk_add_u16 v1, v1, v5 clamp ; GFX10PLUS-NEXT: v_pk_add_u16 v2, v2, v6 clamp @@ -2659,7 +2640,6 @@ ; GFX10PLUS-LABEL: v_uaddsat_i48: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshlrev_b64 v[0:1], 16, v[0:1] ; GFX10PLUS-NEXT: v_lshlrev_b64 v[2:3], 16, v[2:3] ; GFX10PLUS-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 @@ -2874,7 +2854,6 @@ ; GFX10PLUS-LABEL: v_uaddsat_i64: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 ; GFX10PLUS-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v0, -1, vcc_lo @@ -3039,7 +3018,6 @@ ; GFX10-LABEL: v_uaddsat_v2i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4 ; GFX10-NEXT: v_add_co_u32 v2, s4, v2, v6 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v5, vcc_lo @@ -3053,7 +3031,6 @@ ; GFX11-LABEL: v_uaddsat_v2i64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4 ; GFX11-NEXT: v_add_co_u32 v2, s0, v2, v6 ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v5, vcc_lo @@ -3346,7 +3323,6 @@ ; GFX10-LABEL: v_uaddsat_v2i128: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v8 ; GFX10-NEXT: v_add_co_u32 v4, s4, v4, v12 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v9, vcc_lo @@ -3368,7 +3344,6 @@ ; GFX11-LABEL: v_uaddsat_v2i128: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v8 ; GFX11-NEXT: v_add_co_u32 v4, s0, v4, v12 ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v9, vcc_lo diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ubfx.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ubfx.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ubfx.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ubfx.ll @@ -5,18 +5,11 @@ ; Test vector bitfield extract. define i32 @v_srl_mask_i32(i32 %value) { -; GFX89-LABEL: v_srl_mask_i32: -; GFX89: ; %bb.0: -; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX89-NEXT: v_bfe_u32 v0, v0, 8, 5 -; GFX89-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_srl_mask_i32: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_bfe_u32 v0, v0, 8, 5 -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: v_srl_mask_i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_bfe_u32 v0, v0, 8, 5 +; GCN-NEXT: s_setpc_b64 s[30:31] %1 = lshr i32 %value, 8 %2 = and i32 %1, 31 ret i32 %2 @@ -46,18 +39,11 @@ ; Test vector bitfield extract. define i32 @v_mask_srl_i32(i32 %value) { -; GFX89-LABEL: v_mask_srl_i32: -; GFX89: ; %bb.0: -; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX89-NEXT: v_bfe_u32 v0, v0, 8, 5 -; GFX89-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_mask_srl_i32: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_bfe_u32 v0, v0, 8, 5 -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: v_mask_srl_i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_bfe_u32 v0, v0, 8, 5 +; GCN-NEXT: s_setpc_b64 s[30:31] %1 = and i32 %value, 7936 ; 31 << 8 %2 = lshr i32 %1, 8 ret i32 %2 @@ -76,22 +62,13 @@ ; Test vector bitfield extract for 64-bits. define i64 @v_srl_mask_i64(i64 %value) { -; GFX89-LABEL: v_srl_mask_i64: -; GFX89: ; %bb.0: -; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX89-NEXT: v_lshrrev_b64 v[0:1], 25, v[0:1] -; GFX89-NEXT: v_mov_b32_e32 v1, 0 -; GFX89-NEXT: v_bfe_u32 v0, v0, 0, 10 -; GFX89-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_srl_mask_i64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_lshrrev_b64 v[0:1], 25, v[0:1] -; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: v_bfe_u32 v0, v0, 0, 10 -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: v_srl_mask_i64: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_lshrrev_b64 v[0:1], 25, v[0:1] +; GCN-NEXT: v_mov_b32_e32 v1, 0 +; GCN-NEXT: v_bfe_u32 v0, v0, 0, 10 +; GCN-NEXT: s_setpc_b64 s[30:31] %1 = lshr i64 %value, 25 %2 = and i64 %1, 1023 ret i64 %2 @@ -124,22 +101,13 @@ ; TODO: No need for a 64-bit shift instruction when the extracted value is ; entirely contained within the upper or lower half. define i64 @v_mask_srl_i64(i64 %value) { -; GFX89-LABEL: v_mask_srl_i64: -; GFX89: ; %bb.0: -; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX89-NEXT: v_lshrrev_b64 v[0:1], 25, v[0:1] -; GFX89-NEXT: v_mov_b32_e32 v1, 0 -; GFX89-NEXT: v_bfe_u32 v0, v0, 0, 10 -; GFX89-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_mask_srl_i64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_lshrrev_b64 v[0:1], 25, v[0:1] -; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: v_bfe_u32 v0, v0, 0, 10 -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: v_mask_srl_i64: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_lshrrev_b64 v[0:1], 25, v[0:1] +; GCN-NEXT: v_mov_b32_e32 v1, 0 +; GCN-NEXT: v_bfe_u32 v0, v0, 0, 10 +; GCN-NEXT: s_setpc_b64 s[30:31] %1 = and i64 %value, 34326183936 ; 1023 << 25 %2 = lshr i64 %1, 25 ret i64 %2 @@ -155,3 +123,6 @@ %2 = lshr i64 %1, 25 ret i64 %2 } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX10: {{.*}} +; GFX89: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/umed3.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/umed3.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/umed3.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/umed3.ll @@ -13,7 +13,6 @@ ; GFX10-LABEL: test_min_max_ValK0_K1_u32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17 ; GFX10-NEXT: s_setpc_b64 s[30:31] %umax = call i32 @llvm.umax.i32(i32 %a, i32 12) @@ -31,7 +30,6 @@ ; GFX10-LABEL: min_max_ValK0_K1_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17 ; GFX10-NEXT: s_setpc_b64 s[30:31] %umax = call i32 @llvm.umax.i32(i32 12, i32 %a) @@ -49,7 +47,6 @@ ; GFX10-LABEL: test_min_K1max_ValK0__u32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17 ; GFX10-NEXT: s_setpc_b64 s[30:31] %umax = call i32 @llvm.umax.i32(i32 %a, i32 12) @@ -67,7 +64,6 @@ ; GFX10-LABEL: test_min_K1max_K0Val__u32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17 ; GFX10-NEXT: s_setpc_b64 s[30:31] %umax = call i32 @llvm.umax.i32(i32 12, i32 %a) @@ -85,7 +81,6 @@ ; GFX10-LABEL: test_max_min_ValK1_K0_u32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17 ; GFX10-NEXT: s_setpc_b64 s[30:31] %umin = call i32 @llvm.umin.i32(i32 %a, i32 17) @@ -103,7 +98,6 @@ ; GFX10-LABEL: test_max_min_K1Val_K0_u32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17 ; GFX10-NEXT: s_setpc_b64 s[30:31] %umin = call i32 @llvm.umin.i32(i32 17, i32 %a) @@ -121,7 +115,6 @@ ; GFX10-LABEL: test_max_K0min_ValK1__u32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17 ; GFX10-NEXT: s_setpc_b64 s[30:31] %umin = call i32 @llvm.umin.i32(i32 %a, i32 17) @@ -139,7 +132,6 @@ ; GFX10-LABEL: test_max_K0min_K1Val__u32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17 ; GFX10-NEXT: s_setpc_b64 s[30:31] %umin = call i32 @llvm.umin.i32(i32 17, i32 %a) @@ -170,7 +162,6 @@ ; GFX10-LABEL: test_max_K0min_K1Val__v2u16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_min_u16 v0, 17, v0 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_max_u16 v0, 12, v0 op_sel_hi:[0,1] ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -207,7 +198,6 @@ ; GFX10-LABEL: test_non_inline_constant_u32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_med3_u32 v0, v0, 12, 0x41 ; GFX10-NEXT: s_setpc_b64 s[30:31] %umax = call i32 @llvm.umax.i32(i32 %a, i32 12) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll @@ -37,7 +37,6 @@ ; GFX10PLUS-LABEL: v_usubsat_i7: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshlrev_b16 v0, 9, v0 ; GFX10PLUS-NEXT: v_lshlrev_b16 v1, 9, v1 ; GFX10PLUS-NEXT: v_sub_nc_u16 v0, v0, v1 clamp @@ -121,7 +120,6 @@ ; GFX10PLUS-LABEL: v_usubsat_i8: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshlrev_b16 v0, 8, v0 ; GFX10PLUS-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX10PLUS-NEXT: v_sub_nc_u16 v0, v0, v1 clamp @@ -229,7 +227,6 @@ ; GFX10-LABEL: v_usubsat_v2i8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 8, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v1 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 @@ -248,7 +245,6 @@ ; GFX11-LABEL: v_usubsat_v2i8: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 8, v0 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v1 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 @@ -486,7 +482,6 @@ ; GFX10-LABEL: v_usubsat_v4i8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 8, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 ; GFX10-NEXT: v_and_b32_e32 v4, 0xffff, v0 @@ -518,7 +513,6 @@ ; GFX11-LABEL: v_usubsat_v4i8: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 8, v0 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v1 ; GFX11-NEXT: v_and_b32_e32 v4, 0xffff, v0 @@ -794,7 +788,6 @@ ; GFX10PLUS-LABEL: v_usubsat_i24: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, 8, v0 ; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v1, 8, v1 ; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v0, v0, v1 clamp @@ -869,7 +862,6 @@ ; GFX10PLUS-LABEL: v_usubsat_i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v0, v0, v1 clamp ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = call i32 @llvm.usub.sat.i32(i32 %lhs, i32 %rhs) @@ -985,7 +977,6 @@ ; GFX10PLUS-LABEL: v_usubsat_v2i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v0, v0, v2 clamp ; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v1, v1, v3 clamp ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -1064,7 +1055,6 @@ ; GFX10PLUS-LABEL: v_usubsat_v3i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v0, v0, v3 clamp ; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v1, v1, v4 clamp ; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v2, v2, v5 clamp @@ -1158,7 +1148,6 @@ ; GFX10PLUS-LABEL: v_usubsat_v4i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v0, v0, v4 clamp ; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v1, v1, v5 clamp ; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v2, v2, v6 clamp @@ -1267,7 +1256,6 @@ ; GFX10PLUS-LABEL: v_usubsat_v5i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v0, v0, v5 clamp ; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v1, v1, v6 clamp ; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v2, v2, v7 clamp @@ -1437,7 +1425,6 @@ ; GFX10-LABEL: v_usubsat_v16i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX10-NEXT: v_sub_nc_u32_e64 v0, v0, v16 clamp ; GFX10-NEXT: v_sub_nc_u32_e64 v1, v1, v17 clamp @@ -1461,7 +1448,6 @@ ; GFX11-LABEL: v_usubsat_v16i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_load_b32 v31, off, s32 ; GFX11-NEXT: v_sub_nc_u32_e64 v0, v0, v16 clamp ; GFX11-NEXT: v_sub_nc_u32_e64 v1, v1, v17 clamp @@ -1691,7 +1677,6 @@ ; GFX10PLUS-LABEL: v_usubsat_i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_sub_nc_u16 v0, v0, v1 clamp ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = call i16 @llvm.usub.sat.i16(i16 %lhs, i16 %rhs) @@ -1822,7 +1807,6 @@ ; GFX10PLUS-LABEL: v_usubsat_v2i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_sub_u16 v0, v0, v1 clamp ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> %lhs, <2 x i16> %rhs) @@ -2010,7 +1994,6 @@ ; GFX10PLUS-LABEL: v_usubsat_v4i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_sub_u16 v0, v0, v2 clamp ; GFX10PLUS-NEXT: v_pk_sub_u16 v1, v1, v3 clamp ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -2164,7 +2147,6 @@ ; GFX10PLUS-LABEL: v_usubsat_v6i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_sub_u16 v0, v0, v3 clamp ; GFX10PLUS-NEXT: v_pk_sub_u16 v1, v1, v4 clamp ; GFX10PLUS-NEXT: v_pk_sub_u16 v2, v2, v5 clamp @@ -2348,7 +2330,6 @@ ; GFX10PLUS-LABEL: v_usubsat_v8i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_sub_u16 v0, v0, v4 clamp ; GFX10PLUS-NEXT: v_pk_sub_u16 v1, v1, v5 clamp ; GFX10PLUS-NEXT: v_pk_sub_u16 v2, v2, v6 clamp @@ -2529,7 +2510,6 @@ ; GFX10PLUS-LABEL: v_usubsat_i48: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshlrev_b64 v[0:1], 16, v[0:1] ; GFX10PLUS-NEXT: v_lshlrev_b64 v[2:3], 16, v[2:3] ; GFX10PLUS-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2 @@ -2742,7 +2722,6 @@ ; GFX10PLUS-LABEL: v_usubsat_i64: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2 ; GFX10PLUS-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc_lo @@ -2907,7 +2886,6 @@ ; GFX10-LABEL: v_usubsat_v2i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v4 ; GFX10-NEXT: v_sub_co_u32 v2, s4, v2, v6 ; GFX10-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v5, vcc_lo @@ -2921,7 +2899,6 @@ ; GFX11-LABEL: v_usubsat_v2i64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v4 ; GFX11-NEXT: v_sub_co_u32 v2, s0, v2, v6 ; GFX11-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v5, vcc_lo @@ -3214,7 +3191,6 @@ ; GFX10-LABEL: v_usubsat_v2i128: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v8 ; GFX10-NEXT: v_sub_co_u32 v4, s4, v4, v12 ; GFX10-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v9, vcc_lo @@ -3236,7 +3212,6 @@ ; GFX11-LABEL: v_usubsat_v2i128: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v8 ; GFX11-NEXT: v_sub_co_u32 v4, s0, v4, v12 ; GFX11-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v9, vcc_lo diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/v_bfe_i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/v_bfe_i32.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/v_bfe_i32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/v_bfe_i32.ll @@ -16,7 +16,6 @@ ; GFX10PLUS-LABEL: check_v_bfe: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 16 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] entry: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll @@ -222,7 +222,6 @@ ; GFX10-LABEL: vector_xnor_i32_one_use: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_xor_b32_e32 v0, v0, v1 ; GFX10-NEXT: v_not_b32_e32 v0, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -245,7 +244,6 @@ ; GFX10-LABEL: vector_xnor_i64_one_use: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_xor_b32_e32 v0, v0, v2 ; GFX10-NEXT: v_xor_b32_e32 v1, v1, v3 ; GFX10-NEXT: v_not_b32_e32 v0, v0 @@ -429,7 +427,6 @@ ; GFX10-LABEL: vector_xor_na_b_i32_one_use: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_xor3_b32 v0, v0, -1, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] entry: @@ -469,7 +466,6 @@ ; GFX10-LABEL: vector_xor_a_nb_i32_one_use: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_xor3_b32 v0, v1, -1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] entry: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/zextload.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/zextload.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/zextload.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/zextload.ll @@ -36,7 +36,6 @@ ; GFX10-LABEL: zextload_global_i1_to_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0 @@ -45,7 +44,6 @@ ; GFX11-LABEL: zextload_global_i1_to_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 @@ -83,7 +81,6 @@ ; GFX10-LABEL: zextload_global_i8_to_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -91,7 +88,6 @@ ; GFX11-LABEL: zextload_global_i8_to_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -128,7 +124,6 @@ ; GFX10-LABEL: zextload_global_i16_to_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_ushort v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -136,7 +131,6 @@ ; GFX11-LABEL: zextload_global_i16_to_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_u16 v0, v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -179,7 +173,6 @@ ; GFX10-LABEL: zextload_global_i1_to_i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -189,7 +182,6 @@ ; GFX11-LABEL: zextload_global_i1_to_i64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 1, v0 @@ -230,7 +222,6 @@ ; GFX10-LABEL: zextload_global_i8_to_i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -239,7 +230,6 @@ ; GFX11-LABEL: zextload_global_i8_to_i64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off ; GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-NEXT: s_waitcnt vmcnt(0) @@ -280,7 +270,6 @@ ; GFX10-LABEL: zextload_global_i16_to_i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_ushort v0, v[0:1], off ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -289,7 +278,6 @@ ; GFX11-LABEL: zextload_global_i16_to_i64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_u16 v0, v[0:1], off ; GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-NEXT: s_waitcnt vmcnt(0) @@ -330,7 +318,6 @@ ; GFX10-LABEL: zextload_global_i32_to_i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -339,7 +326,6 @@ ; GFX11-LABEL: zextload_global_i32_to_i64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off ; GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-NEXT: s_waitcnt vmcnt(0) @@ -383,7 +369,6 @@ ; GFX10-LABEL: zextload_global_i32_to_i96: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 @@ -393,7 +378,6 @@ ; GFX11-LABEL: zextload_global_i32_to_i96: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 ; GFX11-NEXT: s_waitcnt vmcnt(0) @@ -440,7 +424,6 @@ ; GFX10-LABEL: zextload_global_i32_to_i128: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 @@ -451,7 +434,6 @@ ; GFX11-LABEL: zextload_global_i32_to_i128: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 ; GFX11-NEXT: v_mov_b32_e32 v3, 0 diff --git a/llvm/test/CodeGen/AMDGPU/amd.endpgm.ll b/llvm/test/CodeGen/AMDGPU/amd.endpgm.ll --- a/llvm/test/CodeGen/AMDGPU/amd.endpgm.ll +++ b/llvm/test/CodeGen/AMDGPU/amd.endpgm.ll @@ -28,13 +28,11 @@ ; GFX10-LABEL: test1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: test1: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_endpgm tail call void @llvm.amdgcn.endpgm() unreachable diff --git a/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll b/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll --- a/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll +++ b/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll @@ -66,7 +66,6 @@ ; GFX1100-LABEL: syncscope_system: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: flat_load_b32 v3, v[0:1] ; GFX1100-NEXT: s_mov_b32 s0, 0 ; GFX1100-NEXT: .LBB0_1: ; %atomicrmw.start @@ -366,7 +365,6 @@ ; GFX1100-LABEL: no_unsafe: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: flat_load_b32 v3, v[0:1] ; GFX1100-NEXT: s_mov_b32 s0, 0 ; GFX1100-NEXT: .LBB3_1: ; %atomicrmw.start diff --git a/llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll b/llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll --- a/llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll +++ b/llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll @@ -33,25 +33,21 @@ ; GFX10-BACKOFF-LABEL: back_off_barrier_no_fence: ; GFX10-BACKOFF: ; %bb.0: ; GFX10-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-BACKOFF-NEXT: flat_load_dword v0, v[0:1] ; GFX10-BACKOFF-NEXT: s_barrier ; GFX10-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-BACKOFF-NEXT: flat_store_dword v[2:3], v0 ; GFX10-BACKOFF-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-BACKOFF-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-BACKOFF-LABEL: back_off_barrier_no_fence: ; GFX11-BACKOFF: ; %bb.0: ; GFX11-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-BACKOFF-NEXT: flat_load_b32 v0, v[0:1] ; GFX11-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-BACKOFF-NEXT: s_barrier ; GFX11-BACKOFF-NEXT: flat_store_b32 v[2:3], v0 ; GFX11-BACKOFF-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-BACKOFF-NEXT: s_setpc_b64 s[30:31] %load = load i32, ptr %in call void @llvm.amdgcn.s.barrier() @@ -85,7 +81,6 @@ ; GFX10-BACKOFF-LABEL: back_off_barrier_with_fence: ; GFX10-BACKOFF: ; %bb.0: ; GFX10-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-BACKOFF-NEXT: flat_load_dword v0, v[0:1] ; GFX10-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0 @@ -95,13 +90,11 @@ ; GFX10-BACKOFF-NEXT: buffer_gl0_inv ; GFX10-BACKOFF-NEXT: flat_store_dword v[2:3], v0 ; GFX10-BACKOFF-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-BACKOFF-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-BACKOFF-LABEL: back_off_barrier_with_fence: ; GFX11-BACKOFF: ; %bb.0: ; GFX11-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-BACKOFF-NEXT: flat_load_b32 v0, v[0:1] ; GFX11-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0 @@ -111,7 +104,6 @@ ; GFX11-BACKOFF-NEXT: buffer_gl0_inv ; GFX11-BACKOFF-NEXT: flat_store_b32 v[2:3], v0 ; GFX11-BACKOFF-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-BACKOFF-NEXT: s_setpc_b64 s[30:31] %load = load i32, ptr %in fence syncscope("workgroup") release diff --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll --- a/llvm/test/CodeGen/AMDGPU/bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/bf16.ll @@ -57,11 +57,9 @@ ; GFX10-LABEL: test_load_store: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_ushort v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_short v[2:3], v0, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %val = load bfloat, ptr addrspace(1) %in store bfloat %val, ptr addrspace(1) %out @@ -119,11 +117,9 @@ ; GFX10-LABEL: test_load_store_f32_to_bf16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_short_d16_hi v[2:3], v0, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %val = load float, ptr addrspace(1) %in %val.bf16 = fptrunc float %val to bfloat @@ -186,12 +182,10 @@ ; GFX10-LABEL: test_load_store_f64_to_bf16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_cvt_f32_f64_e32 v0, v[0:1] ; GFX10-NEXT: global_store_short_d16_hi v[2:3], v0, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %val = load double, ptr addrspace(1) %in %val.bf16 = fptrunc double %val to bfloat @@ -251,12 +245,10 @@ ; GFX10-LABEL: test_load_store_bf16_to_f32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v4, 0 ; GFX10-NEXT: global_load_short_d16_hi v4, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dword v[2:3], v4, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %val = load bfloat, ptr addrspace(1) %in %val.f32 = fpext bfloat %val to float @@ -320,13 +312,11 @@ ; GFX10-LABEL: test_load_store_bf16_to_f64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v4, 0 ; GFX10-NEXT: global_load_short_d16_hi v4, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_cvt_f64_f32_e32 v[0:1], v4 ; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %val = load bfloat, ptr addrspace(1) %in %val.f64 = fpext bfloat %val to double @@ -382,11 +372,9 @@ ; GFX10-LABEL: test_load_store_v2bf16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dword v[2:3], v0, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %val = load <2 x bfloat>, ptr addrspace(1) %in store <2 x bfloat> %val, ptr addrspace(1) %out @@ -441,11 +429,9 @@ ; GFX10-LABEL: test_load_store_v4bf16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %val = load <4 x bfloat>, ptr addrspace(1) %in store <4 x bfloat> %val, ptr addrspace(1) %out @@ -500,11 +486,9 @@ ; GFX10-LABEL: test_load_store_v8bf16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %val = load <8 x bfloat>, ptr addrspace(1) %in store <8 x bfloat> %val, ptr addrspace(1) %out @@ -575,7 +559,6 @@ ; GFX10-LABEL: test_load_store_v16bf16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16 ; GFX10-NEXT: global_load_dwordx4 v[8:11], v[0:1], off @@ -583,7 +566,6 @@ ; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dwordx4 v[2:3], v[8:11], off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %val = load <16 x bfloat>, ptr addrspace(1) %in store <16 x bfloat> %val, ptr addrspace(1) %out @@ -633,9 +615,7 @@ ; GFX10-LABEL: test_arg_store: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_store_short_d16_hi v[1:2], v0, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] store bfloat %in, ptr addrspace(1) %out ret void @@ -685,9 +665,7 @@ ; GFX10-LABEL: test_arg_store_v2bf16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_store_dword v[1:2], v0, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] store <2 x bfloat> %in, ptr addrspace(1) %out ret void @@ -745,10 +723,8 @@ ; GFX10-LABEL: test_arg_store_v3bf16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_store_short v[2:3], v1, off offset:4 ; GFX10-NEXT: global_store_dword v[2:3], v0, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] store <3 x bfloat> %in, <3 x bfloat> addrspace(1) * %out ret void @@ -802,9 +778,7 @@ ; GFX10-LABEL: test_arg_store_v4bf16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] store <4 x bfloat> %in, ptr addrspace(1) %out ret void @@ -866,9 +840,7 @@ ; GFX10-LABEL: test_arg_store_v8bf16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_store_dwordx4 v[4:5], v[0:3], off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] store <8 x bfloat> %in, ptr addrspace(1) %out ret void @@ -953,10 +925,8 @@ ; GFX10-LABEL: test_arg_store_v16bf16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_store_dwordx4 v[8:9], v[4:7], off offset:16 ; GFX10-NEXT: global_store_dwordx4 v[8:9], v[0:3], off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] store <16 x bfloat> %in, ptr addrspace(1) %out ret void @@ -1009,10 +979,8 @@ ; GFX10-LABEL: test_inreg_arg_store: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v2, s4 ; GFX10-NEXT: global_store_short_d16_hi v[0:1], v2, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] store bfloat %in, ptr addrspace(1) %out ret void @@ -1053,9 +1021,7 @@ ; GFX10-LABEL: test_byval: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_store_short_d16_hi v0, off, s[0:3], s32 -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] store bfloat %val, ptr addrspace(5) %bv %retval = load bfloat, ptr addrspace(5) %bv @@ -1097,9 +1063,7 @@ ; GFX10-LABEL: test_sret: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_store_short_d16_hi v1, v0, s[0:3], 0 offen -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] store bfloat %val, ptr addrspace(5) %sret ret void @@ -1153,11 +1117,9 @@ ; GFX10-LABEL: test_bitcast_from_bfloat: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_ushort v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_short v[2:3], v0, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %val = load bfloat, ptr addrspace(1) %in %val_int = bitcast bfloat %val to i16 @@ -1213,11 +1175,9 @@ ; GFX10-LABEL: test_bitcast_to_bfloat: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_ushort v2, v[2:3], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_short v[0:1], v2, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %val = load i16, ptr addrspace(1) %in %val_fp = bitcast i16 %val to bfloat @@ -1249,7 +1209,6 @@ ; GFX10-LABEL: test_ret: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] entry: ret bfloat %in @@ -1279,7 +1238,6 @@ ; GFX10-LABEL: test_ret_v2bf16: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] entry: ret <2 x bfloat> %in @@ -1314,7 +1272,6 @@ ; GFX10-LABEL: test_ret_v3bf16: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v2, 0xffff0000, v0 ; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX10-NEXT: v_and_or_b32 v0, 0xffff, v0, v2 @@ -1347,7 +1304,6 @@ ; GFX10-LABEL: test_ret_v4bf16: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] entry: ret <4 x bfloat> %in @@ -1377,7 +1333,6 @@ ; GFX10-LABEL: test_ret_v8bf16: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] entry: ret <8 x bfloat> %in @@ -1407,7 +1362,6 @@ ; GFX10-LABEL: test_ret_v16bf16: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] entry: ret <16 x bfloat> %in @@ -1537,7 +1491,6 @@ ; GFX10-LABEL: test_call: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s6, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_xor_saveexec_b32 s4, -1 @@ -1702,7 +1655,6 @@ ; GFX10-LABEL: test_call_v2bf16: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s6, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_xor_saveexec_b32 s4, -1 @@ -1879,7 +1831,6 @@ ; GFX10-LABEL: test_call_v3bf16: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s6, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_xor_saveexec_b32 s4, -1 @@ -2082,7 +2033,6 @@ ; GFX10-LABEL: test_call_v4bf16: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s6, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_xor_saveexec_b32 s4, -1 @@ -2340,7 +2290,6 @@ ; GFX10-LABEL: test_call_v8bf16: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s6, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_xor_saveexec_b32 s4, -1 @@ -2714,7 +2663,6 @@ ; GFX10-LABEL: test_call_v16bf16: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s6, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_xor_saveexec_b32 s4, -1 @@ -2825,7 +2773,6 @@ ; GFX10-LABEL: test_alloca_load_store_ret: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: buffer_store_short_d16_hi v0, off, s[0:3], s32 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3140,7 +3087,6 @@ ; GFX10-LABEL: test_overflow_stack: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_clause 0x2 ; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:8 ; GFX10-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4 @@ -3181,7 +3127,6 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_store_dword v33, v0, s[0:3], 0 offen offset:116 ; GFX10-NEXT: buffer_store_short_d16_hi v1, v0, s[0:3], 0 offen offset:128 -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %ins.0 = insertvalue { <32 x i32>, bfloat } poison, <32 x i32> %b, 0 %ins.1 = insertvalue { <32 x i32>, bfloat } %ins.0 ,bfloat %a, 1 diff --git a/llvm/test/CodeGen/AMDGPU/bfi_int.ll b/llvm/test/CodeGen/AMDGPU/bfi_int.ll --- a/llvm/test/CodeGen/AMDGPU/bfi_int.ll +++ b/llvm/test/CodeGen/AMDGPU/bfi_int.ll @@ -105,7 +105,6 @@ ; GFX10-LABEL: v_bfi_def_i32: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_bfi_b32 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -118,7 +117,6 @@ ; GFX10-GISEL-LABEL: v_bfi_def_i32: ; GFX10-GISEL: ; %bb.0: ; %entry ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: v_bfi_b32 v0, v0, v1, v2 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] entry: @@ -227,7 +225,6 @@ ; GFX10-LABEL: v_bfi_sha256_ch: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_bfi_b32 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -240,7 +237,6 @@ ; GFX10-GISEL-LABEL: v_bfi_sha256_ch: ; GFX10-GISEL: ; %bb.0: ; %entry ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: v_bfi_b32 v0, v0, v1, v2 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] entry: @@ -563,7 +559,6 @@ ; GFX10-LABEL: v_bfi_sha256_ma: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_xor_b32_e32 v0, v0, v1 ; GFX10-NEXT: v_bfi_b32 v0, v0, v2, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -578,7 +573,6 @@ ; GFX10-GISEL-LABEL: v_bfi_sha256_ma: ; GFX10-GISEL: ; %bb.0: ; %entry ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, v0, v1 ; GFX10-GISEL-NEXT: v_bfi_b32 v0, v0, v2, v1 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -608,7 +602,6 @@ ; GFX10-LABEL: v_bitselect_v2i32_pat1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_bfi_b32 v0, v2, v0, v4 ; GFX10-NEXT: v_bfi_b32 v1, v3, v1, v5 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -623,7 +616,6 @@ ; GFX10-GISEL-LABEL: v_bitselect_v2i32_pat1: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: v_bfi_b32 v0, v2, v0, v4 ; GFX10-GISEL-NEXT: v_bfi_b32 v1, v3, v1, v5 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -651,7 +643,6 @@ ; GFX10-LABEL: v_bitselect_i64_pat_0: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_bfi_b32 v0, v0, v2, v4 ; GFX10-NEXT: v_bfi_b32 v1, v1, v3, v5 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -666,7 +657,6 @@ ; GFX10-GISEL-LABEL: v_bitselect_i64_pat_0: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: v_bfi_b32 v0, v0, v2, v4 ; GFX10-GISEL-NEXT: v_bfi_b32 v1, v1, v3, v5 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -955,7 +945,6 @@ ; GFX10-LABEL: v_bitselect_i64_pat_1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_bfi_b32 v0, v2, v0, v4 ; GFX10-NEXT: v_bfi_b32 v1, v3, v1, v5 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -970,7 +959,6 @@ ; GFX10-GISEL-LABEL: v_bitselect_i64_pat_1: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: v_bfi_b32 v0, v2, v0, v4 ; GFX10-GISEL-NEXT: v_bfi_b32 v1, v3, v1, v5 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -1131,7 +1119,6 @@ ; GFX10-LABEL: v_bitselect_i64_pat_2: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_bfi_b32 v0, v2, v0, v4 ; GFX10-NEXT: v_bfi_b32 v1, v3, v1, v5 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -1146,7 +1133,6 @@ ; GFX10-GISEL-LABEL: v_bitselect_i64_pat_2: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: v_bfi_b32 v0, v2, v0, v4 ; GFX10-GISEL-NEXT: v_bfi_b32 v1, v3, v1, v5 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -1178,7 +1164,6 @@ ; GFX10-LABEL: v_bfi_sha256_ma_i64: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_xor_b32_e32 v0, v0, v2 ; GFX10-NEXT: v_xor_b32_e32 v1, v1, v3 ; GFX10-NEXT: v_bfi_b32 v0, v0, v4, v2 @@ -1197,7 +1182,6 @@ ; GFX10-GISEL-LABEL: v_bfi_sha256_ma_i64: ; GFX10-GISEL: ; %bb.0: ; %entry ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 ; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 ; GFX10-GISEL-NEXT: v_bfi_b32 v0, v0, v4, v2 diff --git a/llvm/test/CodeGen/AMDGPU/bitreverse.ll b/llvm/test/CodeGen/AMDGPU/bitreverse.ll --- a/llvm/test/CodeGen/AMDGPU/bitreverse.ll +++ b/llvm/test/CodeGen/AMDGPU/bitreverse.ll @@ -878,7 +878,6 @@ ; GFX11-FLAT-LABEL: missing_truncate_promote_bitreverse: ; GFX11-FLAT: ; %bb.0: ; %bb ; GFX11-FLAT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FLAT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-FLAT-NEXT: v_bfrev_b32_e32 v0, v0 ; GFX11-FLAT-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-FLAT-NEXT: v_lshrrev_b32_e32 v0, 16, v0 @@ -888,7 +887,6 @@ ; GFX11-GISEL-LABEL: missing_truncate_promote_bitreverse: ; GFX11-GISEL: ; %bb.0: ; %bb ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: v_bfrev_b32_e32 v0, v0 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v0, 16, v0 diff --git a/llvm/test/CodeGen/AMDGPU/bswap.ll b/llvm/test/CodeGen/AMDGPU/bswap.ll --- a/llvm/test/CodeGen/AMDGPU/bswap.ll +++ b/llvm/test/CodeGen/AMDGPU/bswap.ll @@ -500,7 +500,6 @@ ; GFX11-LABEL: missing_truncate_promote_bswap: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_perm_b32 v0, 0, v0, 0xc0c0001 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 @@ -534,7 +533,6 @@ ; GFX11-LABEL: v_bswap_i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_perm_b32 v0, 0, v0, 0xc0c0001 ; GFX11-NEXT: s_setpc_b64 s[30:31] %bswap = call i16 @llvm.bswap.i16(i16 %src) @@ -562,7 +560,6 @@ ; GFX11-LABEL: v_bswap_i16_zext_to_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_perm_b32 v0, 0, v0, 0xc0c0001 ; GFX11-NEXT: s_setpc_b64 s[30:31] %bswap = call i16 @llvm.bswap.i16(i16 %src) @@ -592,7 +589,6 @@ ; GFX11-LABEL: v_bswap_i16_sext_to_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_perm_b32 v0, 0, v0, 0xc0c0001 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_bfe_i32 v0, v0, 0, 16 @@ -629,7 +625,6 @@ ; GFX11-LABEL: v_bswap_v2i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_perm_b32 v0, 0, v0, 0x2030001 ; GFX11-NEXT: s_setpc_b64 s[30:31] %bswap = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %src) @@ -668,7 +663,6 @@ ; GFX11-LABEL: v_bswap_v3i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_perm_b32 v0, 0, v0, 0x2030001 ; GFX11-NEXT: v_perm_b32 v1, 0, v1, 0x2030001 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -714,7 +708,6 @@ ; GFX11-LABEL: v_bswap_v4i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_perm_b32 v0, 0, v0, 0x2030001 ; GFX11-NEXT: v_perm_b32 v1, 0, v1, 0x2030001 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -750,7 +743,6 @@ ; GFX11-LABEL: v_bswap_i48: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_perm_b32 v2, 0, v0, 0x10203 ; GFX11-NEXT: v_perm_b32 v0, 0, v1, 0x10203 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) diff --git a/llvm/test/CodeGen/AMDGPU/bug-sdag-emitcopyfromreg.ll b/llvm/test/CodeGen/AMDGPU/bug-sdag-emitcopyfromreg.ll --- a/llvm/test/CodeGen/AMDGPU/bug-sdag-emitcopyfromreg.ll +++ b/llvm/test/CodeGen/AMDGPU/bug-sdag-emitcopyfromreg.ll @@ -7,7 +7,6 @@ ; ISA-LABEL: f: ; ISA: ; %bb.0: ; %bb ; ISA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; ISA-NEXT: s_waitcnt_vscnt null, 0x0 ; ISA-NEXT: s_mov_b64 s[4:5], 0 ; ISA-NEXT: v_cmp_gt_i32_e32 vcc_lo, 1, v0 ; ISA-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 @@ -41,7 +40,6 @@ ; ISA-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; ISA-NEXT: flat_store_dword v[1:2], v6 ; ISA-NEXT: s_waitcnt lgkmcnt(0) -; ISA-NEXT: s_waitcnt_vscnt null, 0x0 ; ISA-NEXT: s_setpc_b64 s[30:31] ; MIR-LABEL: name: f ; MIR: bb.0.bb: diff --git a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll --- a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll +++ b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll @@ -5692,7 +5692,6 @@ ; GFX11-LABEL: tail_call_byval_align16: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_load_b32 v31, off, s32 ; GFX11-NEXT: s_getpc_b64 s[0:1] ; GFX11-NEXT: s_add_u32 s0, s0, byval_align16_f64_arg@rel32@lo+4 @@ -5781,7 +5780,6 @@ ; GFX11-LABEL: tail_call_stack_passed_arg_alignment_v32i32_f64: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: scratch_load_b32 v33, off, s32 ; GFX11-NEXT: scratch_load_b64 v[31:32], off, s32 offset:4 @@ -6029,7 +6027,6 @@ ; GFX11-LABEL: stack_12xv3i32: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -6381,7 +6378,6 @@ ; GFX11-LABEL: stack_12xv3f32: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -6761,7 +6757,6 @@ ; GFX11-LABEL: stack_8xv5i32: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -7146,7 +7141,6 @@ ; GFX11-LABEL: stack_8xv5f32: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 diff --git a/llvm/test/CodeGen/AMDGPU/calling-conventions.ll b/llvm/test/CodeGen/AMDGPU/calling-conventions.ll --- a/llvm/test/CodeGen/AMDGPU/calling-conventions.ll +++ b/llvm/test/CodeGen/AMDGPU/calling-conventions.ll @@ -91,35 +91,21 @@ } define fastcc float @fastcc(float %arg0) #0 { -; SIVI-LABEL: fastcc: -; SIVI: ; %bb.0: -; SIVI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SIVI-NEXT: v_add_f32_e32 v0, 4.0, v0 -; SIVI-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: fastcc: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: v_add_f32_e32 v0, 4.0, v0 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: fastcc: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_add_f32_e32 v0, 4.0, v0 +; GCN-NEXT: s_setpc_b64 s[30:31] %add = fadd float %arg0, 4.0 ret float %add } define coldcc float @coldcc(float %arg0) #0 { -; SIVI-LABEL: coldcc: -; SIVI: ; %bb.0: -; SIVI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SIVI-NEXT: v_add_f32_e32 v0, 4.0, v0 -; SIVI-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: coldcc: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: v_add_f32_e32 v0, 4.0, v0 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: coldcc: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_add_f32_e32 v0, 4.0, v0 +; GCN-NEXT: s_setpc_b64 s[30:31] %add = fadd float %arg0, 4.0 ret float %add } @@ -955,3 +941,5 @@ } attributes #0 = { nounwind noinline } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; SIVI: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll --- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll +++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll @@ -131,7 +131,6 @@ ; GFX10-LABEL: test_sinkable_flat_small_offset_i32: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX10-NEXT: v_mov_b32_e32 v4, 0 ; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo @@ -145,7 +144,6 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: flat_store_dword v[0:1], v4 offset:252 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] entry: %out.gep = getelementptr i32, ptr %out, i64 999999 @@ -290,7 +288,6 @@ ; GFX10-LABEL: test_sink_noop_addrspacecast_flat_to_global_i32: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX10-NEXT: v_mov_b32_e32 v4, 0 ; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo @@ -304,7 +301,6 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: flat_store_dword v[0:1], v4 offset:252 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] entry: %out.gep = getelementptr i32, ptr %out, i64 999999 @@ -405,7 +401,6 @@ ; GFX10-LABEL: test_sink_noop_addrspacecast_flat_to_constant_i32: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX10-NEXT: v_mov_b32_e32 v4, 0 ; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo @@ -419,7 +414,6 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: flat_store_dword v[0:1], v4 offset:252 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] entry: %out.gep = getelementptr i32, ptr %out, i64 999999 @@ -576,7 +570,6 @@ ; GFX10-LABEL: test_sink_flat_small_max_flat_offset: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mbcnt_lo_u32_b32 v4, -1, 0 ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX10-NEXT: v_mov_b32_e32 v4, 0 @@ -593,7 +586,6 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: flat_store_dword v[0:1], v4 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] entry: %out.gep = getelementptr i32, ptr %out, i32 1024 @@ -701,7 +693,6 @@ ; GFX10-LABEL: test_sink_flat_small_max_plus_1_flat_offset: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mbcnt_lo_u32_b32 v4, -1, 0 ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX10-NEXT: v_mov_b32_e32 v4, 0 @@ -718,7 +709,6 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: flat_store_dword v[0:1], v4 offset:636 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] entry: %out.gep = getelementptr i32, ptr %out, i64 99999 @@ -826,7 +816,6 @@ ; GFX10-LABEL: test_sinkable_flat_reg_offset: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mbcnt_lo_u32_b32 v6, -1, 0 ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX10-NEXT: v_mov_b32_e32 v6, 0 @@ -843,7 +832,6 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: flat_store_dword v[0:1], v6 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] entry: %out.gep = getelementptr i32, ptr %out, i32 1024 diff --git a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll --- a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll +++ b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll @@ -29,7 +29,6 @@ ; GFX10_DEFAULT-LABEL: chain_hi_to_lo_private: ; GFX10_DEFAULT: ; %bb.0: ; %bb ; GFX10_DEFAULT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10_DEFAULT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10_DEFAULT-NEXT: s_clause 0x1 ; GFX10_DEFAULT-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:2 ; GFX10_DEFAULT-NEXT: buffer_load_short_d16_hi v0, off, s[0:3], 0 @@ -39,7 +38,6 @@ ; FLATSCR_GFX10-LABEL: chain_hi_to_lo_private: ; FLATSCR_GFX10: ; %bb.0: ; %bb ; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FLATSCR_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; FLATSCR_GFX10-NEXT: s_mov_b32 s0, 2 ; FLATSCR_GFX10-NEXT: scratch_load_ushort v0, off, s0 ; FLATSCR_GFX10-NEXT: s_waitcnt_depctr 0xffe3 @@ -51,7 +49,6 @@ ; GFX11-LABEL: chain_hi_to_lo_private: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, 2 ; GFX11-NEXT: scratch_load_u16 v0, off, s0 ; GFX11-NEXT: s_mov_b32 s0, 0 @@ -91,7 +88,6 @@ ; GFX10_DEFAULT-LABEL: chain_hi_to_lo_private_different_bases: ; GFX10_DEFAULT: ; %bb.0: ; %bb ; GFX10_DEFAULT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10_DEFAULT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10_DEFAULT-NEXT: s_clause 0x1 ; GFX10_DEFAULT-NEXT: buffer_load_ushort v0, v0, s[0:3], 0 offen ; GFX10_DEFAULT-NEXT: buffer_load_short_d16_hi v0, v1, s[0:3], 0 offen @@ -101,7 +97,6 @@ ; FLATSCR_GFX10-LABEL: chain_hi_to_lo_private_different_bases: ; FLATSCR_GFX10: ; %bb.0: ; %bb ; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FLATSCR_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; FLATSCR_GFX10-NEXT: scratch_load_ushort v0, v0, off ; FLATSCR_GFX10-NEXT: scratch_load_short_d16_hi v0, v1, off ; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0) @@ -110,7 +105,6 @@ ; GFX11-LABEL: chain_hi_to_lo_private_different_bases: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_load_u16 v0, v0, off ; GFX11-NEXT: scratch_load_d16_hi_b16 v0, v1, off ; GFX11-NEXT: s_waitcnt vmcnt(0) @@ -147,7 +141,6 @@ ; GFX10_DEFAULT-LABEL: chain_hi_to_lo_arithmatic: ; GFX10_DEFAULT: ; %bb.0: ; %bb ; GFX10_DEFAULT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10_DEFAULT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10_DEFAULT-NEXT: v_add_f16_e32 v1, 1.0, v1 ; GFX10_DEFAULT-NEXT: buffer_load_short_d16_hi v1, v0, s[0:3], 0 offen ; GFX10_DEFAULT-NEXT: s_waitcnt vmcnt(0) @@ -157,7 +150,6 @@ ; FLATSCR_GFX10-LABEL: chain_hi_to_lo_arithmatic: ; FLATSCR_GFX10: ; %bb.0: ; %bb ; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FLATSCR_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; FLATSCR_GFX10-NEXT: v_add_f16_e32 v1, 1.0, v1 ; FLATSCR_GFX10-NEXT: scratch_load_short_d16_hi v1, v0, off ; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0) @@ -167,7 +159,6 @@ ; GFX11-LABEL: chain_hi_to_lo_arithmatic: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_add_f16_e32 v1, 1.0, v1 ; GFX11-NEXT: scratch_load_d16_hi_b16 v1, v0, off ; GFX11-NEXT: s_waitcnt vmcnt(0) @@ -197,7 +188,6 @@ ; GFX10-LABEL: chain_hi_to_lo_group: ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: ds_read_u16 v0, v1 offset:2 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) @@ -208,7 +198,6 @@ ; GFX11-LABEL: chain_hi_to_lo_group: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-NEXT: ds_load_u16 v0, v1 offset:2 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) @@ -239,7 +228,6 @@ ; GFX10-LABEL: chain_hi_to_lo_group_different_bases: ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ds_read_u16 v0, v0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: ds_read_u16_d16_hi v0, v1 @@ -249,7 +237,6 @@ ; GFX11-LABEL: chain_hi_to_lo_group_different_bases: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_load_u16 v0, v0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: ds_load_u16_d16_hi v0, v1 @@ -281,7 +268,6 @@ ; GFX10-LABEL: chain_hi_to_lo_global: ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v0, 2 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: global_load_ushort v0, v[0:1], off @@ -294,7 +280,6 @@ ; GFX11-LABEL: chain_hi_to_lo_global: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v0, 2 ; GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-NEXT: global_load_u16 v0, v[0:1], off @@ -327,7 +312,6 @@ ; GFX10-LABEL: chain_hi_to_lo_global_different_bases: ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_ushort v0, v[0:1], off ; GFX10-NEXT: global_load_short_d16_hi v0, v[2:3], off ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -336,7 +320,6 @@ ; GFX11-LABEL: chain_hi_to_lo_global_different_bases: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_u16 v0, v[0:1], off ; GFX11-NEXT: global_load_d16_hi_b16 v0, v[2:3], off ; GFX11-NEXT: s_waitcnt vmcnt(0) @@ -368,7 +351,6 @@ ; GFX10-LABEL: chain_hi_to_lo_flat: ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v0, 2 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: flat_load_ushort v0, v[0:1] @@ -382,7 +364,6 @@ ; GFX11-LABEL: chain_hi_to_lo_flat: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v0, 2 ; GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-NEXT: flat_load_u16 v0, v[0:1] @@ -416,7 +397,6 @@ ; GFX10-LABEL: chain_hi_to_lo_flat_different_bases: ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: flat_load_ushort v0, v[0:1] ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: flat_load_short_d16_hi v0, v[2:3] @@ -426,7 +406,6 @@ ; GFX11-LABEL: chain_hi_to_lo_flat_different_bases: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: flat_load_u16 v0, v[0:1] ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: flat_load_d16_hi_b16 v0, v[2:3] @@ -626,7 +605,6 @@ ; GFX10-LABEL: chain_hi_to_lo_group_other_dep: ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ds_read_u16_d16_hi v1, v0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_pk_sub_u16 v1, v1, -12 op_sel_hi:[1,0] @@ -638,7 +616,6 @@ ; GFX11-LABEL: chain_hi_to_lo_group_other_dep: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_load_u16_d16_hi v1, v0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_pk_sub_u16 v1, v1, -12 op_sel_hi:[1,0] @@ -683,7 +660,6 @@ ; GFX10-LABEL: chain_hi_to_lo_group_other_dep_multi_chain: ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ds_read_u16 v1, v0 offset:2 ; GFX10-NEXT: ds_read_u16_d16_hi v0, v0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) @@ -694,7 +670,6 @@ ; GFX11-LABEL: chain_hi_to_lo_group_other_dep_multi_chain: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_load_u16 v1, v0 offset:2 ; GFX11-NEXT: ds_load_u16_d16_hi v0, v0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) @@ -738,7 +713,6 @@ ; GFX10_DEFAULT-LABEL: chain_hi_to_lo_private_other_dep: ; GFX10_DEFAULT: ; %bb.0: ; %bb ; GFX10_DEFAULT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10_DEFAULT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10_DEFAULT-NEXT: buffer_load_short_d16_hi v1, v0, s[0:3], 0 offen ; GFX10_DEFAULT-NEXT: s_waitcnt vmcnt(0) ; GFX10_DEFAULT-NEXT: v_pk_sub_u16 v1, v1, -12 op_sel_hi:[1,0] @@ -750,7 +724,6 @@ ; FLATSCR_GFX10-LABEL: chain_hi_to_lo_private_other_dep: ; FLATSCR_GFX10: ; %bb.0: ; %bb ; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FLATSCR_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; FLATSCR_GFX10-NEXT: scratch_load_short_d16_hi v1, v0, off ; FLATSCR_GFX10-NEXT: v_add_nc_u32_e32 v2, 2, v0 ; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0) @@ -762,7 +735,6 @@ ; GFX11-LABEL: chain_hi_to_lo_private_other_dep: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_load_d16_hi_b16 v1, v0, off ; GFX11-NEXT: v_add_nc_u32_e32 v2, 2, v0 ; GFX11-NEXT: s_waitcnt vmcnt(0) @@ -808,7 +780,6 @@ ; GFX10-LABEL: chain_hi_to_lo_global_other_dep: ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_ushort v2, v[0:1], off offset:2 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_load_short_d16_hi v0, v[0:1], off glc dlc @@ -820,7 +791,6 @@ ; GFX11-LABEL: chain_hi_to_lo_global_other_dep: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_u16 v2, v[0:1], off offset:2 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_load_d16_hi_b16 v0, v[0:1], off glc dlc @@ -869,7 +839,6 @@ ; GFX10-LABEL: chain_hi_to_lo_flat_other_dep: ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v0, 2 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v1, vcc_lo ; GFX10-NEXT: flat_load_ushort v2, v[2:3] glc dlc @@ -883,7 +852,6 @@ ; GFX11-LABEL: chain_hi_to_lo_flat_other_dep: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: flat_load_u16 v2, v[0:1] offset:2 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: flat_load_d16_hi_b16 v0, v[0:1] glc dlc @@ -930,7 +898,6 @@ ; GFX10-LABEL: chain_hi_to_lo_group_may_alias_store: ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v2, 0x7b ; GFX10-NEXT: ds_read_u16 v3, v0 ; GFX10-NEXT: ds_write_b16 v1, v2 @@ -942,7 +909,6 @@ ; GFX11-LABEL: chain_hi_to_lo_group_may_alias_store: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v2, 0x7b ; GFX11-NEXT: ds_load_u16 v3, v0 ; GFX11-NEXT: ds_store_b16 v1, v2 diff --git a/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll b/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll --- a/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll +++ b/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll @@ -1575,7 +1575,6 @@ ; GFX11-LABEL: v_clamp_cvt_pkrtz_src_v2f16_denorm: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cvt_pk_rtz_f16_f32_e64 v0, v0, v1 clamp ; GFX11-NEXT: s_setpc_b64 s[30:31] %add = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %a, float %b) diff --git a/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll b/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll --- a/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll +++ b/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll @@ -8,7 +8,6 @@ ; GFX1010-LABEL: combine_add_zext_xor: ; GFX1010: ; %bb.0: ; %.entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1010-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1010-NEXT: v_mov_b32_e32 v1, 0 ; GFX1010-NEXT: s_branch .LBB0_2 ; GFX1010-NEXT: .LBB0_1: ; %bb9 @@ -35,7 +34,6 @@ ; GFX1100-LABEL: combine_add_zext_xor: ; GFX1100: ; %bb.0: ; %.entry ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_mov_b32_e32 v1, 0 ; GFX1100-NEXT: s_branch .LBB0_2 ; GFX1100-NEXT: .LBB0_1: ; %bb9 @@ -90,7 +88,6 @@ ; GFX1010-LABEL: combine_sub_zext_xor: ; GFX1010: ; %bb.0: ; %.entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1010-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1010-NEXT: v_mov_b32_e32 v1, 0 ; GFX1010-NEXT: s_branch .LBB1_2 ; GFX1010-NEXT: .LBB1_1: ; %bb9 @@ -117,7 +114,6 @@ ; GFX1100-LABEL: combine_sub_zext_xor: ; GFX1100: ; %bb.0: ; %.entry ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_mov_b32_e32 v1, 0 ; GFX1100-NEXT: s_branch .LBB1_2 ; GFX1100-NEXT: .LBB1_1: ; %bb9 @@ -172,7 +168,6 @@ ; GFX1010-LABEL: combine_add_zext_or: ; GFX1010: ; %bb.0: ; %.entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1010-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1010-NEXT: s_mov_b32 s4, 0 ; GFX1010-NEXT: s_branch .LBB2_2 ; GFX1010-NEXT: .LBB2_1: ; %bb9 @@ -201,7 +196,6 @@ ; GFX1100-LABEL: combine_add_zext_or: ; GFX1100: ; %bb.0: ; %.entry ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: s_mov_b32 s0, 0 ; GFX1100-NEXT: s_branch .LBB2_2 ; GFX1100-NEXT: .LBB2_1: ; %bb9 @@ -258,7 +252,6 @@ ; GFX1010-LABEL: combine_sub_zext_or: ; GFX1010: ; %bb.0: ; %.entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1010-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1010-NEXT: s_mov_b32 s4, 0 ; GFX1010-NEXT: s_branch .LBB3_2 ; GFX1010-NEXT: .LBB3_1: ; %bb9 @@ -287,7 +280,6 @@ ; GFX1100-LABEL: combine_sub_zext_or: ; GFX1100: ; %bb.0: ; %.entry ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: s_mov_b32 s0, 0 ; GFX1100-NEXT: s_branch .LBB3_2 ; GFX1100-NEXT: .LBB3_1: ; %bb9 @@ -344,7 +336,6 @@ ; GFX1010-LABEL: combine_add_zext_and: ; GFX1010: ; %bb.0: ; %.entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1010-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1010-NEXT: v_mov_b32_e32 v1, 0 ; GFX1010-NEXT: s_branch .LBB4_2 ; GFX1010-NEXT: .LBB4_1: ; %bb9 @@ -370,7 +361,6 @@ ; GFX1100-LABEL: combine_add_zext_and: ; GFX1100: ; %bb.0: ; %.entry ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_mov_b32_e32 v1, 0 ; GFX1100-NEXT: s_branch .LBB4_2 ; GFX1100-NEXT: .LBB4_1: ; %bb9 @@ -425,7 +415,6 @@ ; GFX1010-LABEL: combine_sub_zext_and: ; GFX1010: ; %bb.0: ; %.entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1010-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1010-NEXT: v_mov_b32_e32 v1, 0 ; GFX1010-NEXT: s_branch .LBB5_2 ; GFX1010-NEXT: .LBB5_1: ; %bb9 @@ -451,7 +440,6 @@ ; GFX1100-LABEL: combine_sub_zext_and: ; GFX1100: ; %bb.0: ; %.entry ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_mov_b32_e32 v1, 0 ; GFX1100-NEXT: s_branch .LBB5_2 ; GFX1100-NEXT: .LBB5_1: ; %bb9 diff --git a/llvm/test/CodeGen/AMDGPU/cse-convergent.ll b/llvm/test/CodeGen/AMDGPU/cse-convergent.ll --- a/llvm/test/CodeGen/AMDGPU/cse-convergent.ll +++ b/llvm/test/CodeGen/AMDGPU/cse-convergent.ll @@ -5,7 +5,6 @@ ; GCN-LABEL: test: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_waitcnt_vscnt null, 0x0 ; GCN-NEXT: s_xor_saveexec_b32 s4, -1 ; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill @@ -47,7 +46,6 @@ ; GCN-NEXT: s_waitcnt_depctr 0xffe3 ; GCN-NEXT: s_mov_b32 exec_lo, s4 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_waitcnt_vscnt null, 0x0 ; GCN-NEXT: s_setpc_b64 s[30:31] entry: ; %x = subgroup operation over all lanes. diff --git a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll --- a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll +++ b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll @@ -18,7 +18,6 @@ ; GFX10-LABEL: v_uitofp_i32_to_f32_mask255: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -31,7 +30,6 @@ ; GFX11-LABEL: v_uitofp_i32_to_f32_mask255: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %masked = and i32 %arg0, 255 @@ -49,7 +47,6 @@ ; GFX10-LABEL: v_sitofp_i32_to_f32_mask255: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -62,7 +59,6 @@ ; GFX11-LABEL: v_sitofp_i32_to_f32_mask255: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %masked = and i32 %arg0, 255 @@ -81,7 +77,6 @@ ; GFX10-LABEL: v_uitofp_to_f32_lshr7_mask255: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 7, v0 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -96,7 +91,6 @@ ; GFX11-LABEL: v_uitofp_to_f32_lshr7_mask255: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 7, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 @@ -117,7 +111,6 @@ ; GFX10-LABEL: v_uitofp_to_f32_lshr8_mask255: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_cvt_f32_ubyte1_e32 v0, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -130,7 +123,6 @@ ; GFX11-LABEL: v_uitofp_to_f32_lshr8_mask255: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cvt_f32_ubyte1_e32 v0, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %lshr.8 = lshr i32 %arg0, 8 @@ -165,11 +157,9 @@ ; GFX10-LABEL: v_uitofp_to_f32_multi_use_lshr8_mask255: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GFX10-NEXT: v_cvt_f32_ubyte1_e32 v0, v0 ; GFX10-NEXT: global_store_dword v[0:1], v1, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_uitofp_to_f32_multi_use_lshr8_mask255: @@ -184,11 +174,9 @@ ; GFX11-LABEL: v_uitofp_to_f32_multi_use_lshr8_mask255: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GFX11-NEXT: v_cvt_f32_ubyte1_e32 v0, v0 ; GFX11-NEXT: global_store_b32 v[0:1], v1, off -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %lshr.8 = lshr i32 %arg0, 8 store i32 %lshr.8, ptr addrspace(1) undef @@ -207,7 +195,6 @@ ; GFX10-LABEL: v_uitofp_to_f32_lshr16_mask255: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_cvt_f32_ubyte2_e32 v0, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -220,7 +207,6 @@ ; GFX11-LABEL: v_uitofp_to_f32_lshr16_mask255: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cvt_f32_ubyte2_e32 v0, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %lshr.16 = lshr i32 %arg0, 16 @@ -239,7 +225,6 @@ ; GFX10-LABEL: v_uitofp_to_f32_lshr24_mask255: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_cvt_f32_ubyte3_e32 v0, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -252,7 +237,6 @@ ; GFX11-LABEL: v_uitofp_to_f32_lshr24_mask255: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cvt_f32_ubyte3_e32 v0, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %lshr.16 = lshr i32 %arg0, 24 @@ -271,7 +255,6 @@ ; GFX10-LABEL: v_uitofp_i8_to_f32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -284,7 +267,6 @@ ; GFX11-LABEL: v_uitofp_i8_to_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %cvt = uitofp i8 %arg0 to float @@ -303,7 +285,6 @@ ; GFX10-LABEL: v_uitofp_v2i8_to_v2f32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v2, v0 ; GFX10-NEXT: v_cvt_f32_ubyte1_e32 v1, v0 ; GFX10-NEXT: v_mov_b32_e32 v0, v2 @@ -320,7 +301,6 @@ ; GFX11-LABEL: v_uitofp_v2i8_to_v2f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v2, v0 ; GFX11-NEXT: v_cvt_f32_ubyte1_e32 v1, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -344,7 +324,6 @@ ; GFX10-LABEL: v_uitofp_v3i8_to_v3f32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v3, v0 ; GFX10-NEXT: v_cvt_f32_ubyte1_e32 v1, v0 ; GFX10-NEXT: v_cvt_f32_ubyte2_e32 v2, v0 @@ -363,7 +342,6 @@ ; GFX11-LABEL: v_uitofp_v3i8_to_v3f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v3, v0 ; GFX11-NEXT: v_cvt_f32_ubyte1_e32 v1, v0 ; GFX11-NEXT: v_cvt_f32_ubyte2_e32 v2, v0 @@ -390,7 +368,6 @@ ; GFX10-LABEL: v_uitofp_v4i8_to_v4f32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v4, v0 ; GFX10-NEXT: v_cvt_f32_ubyte1_e32 v1, v0 ; GFX10-NEXT: v_cvt_f32_ubyte2_e32 v2, v0 @@ -411,7 +388,6 @@ ; GFX11-LABEL: v_uitofp_v4i8_to_v4f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v4, v0 ; GFX11-NEXT: v_cvt_f32_ubyte1_e32 v1, v0 ; GFX11-NEXT: v_cvt_f32_ubyte2_e32 v2, v0 @@ -438,7 +414,6 @@ ; GFX10-LABEL: v_uitofp_unpack_i32_to_v4f32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v4, v0 ; GFX10-NEXT: v_cvt_f32_ubyte1_e32 v1, v0 ; GFX10-NEXT: v_cvt_f32_ubyte2_e32 v2, v0 @@ -459,7 +434,6 @@ ; GFX11-LABEL: v_uitofp_unpack_i32_to_v4f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v4, v0 ; GFX11-NEXT: v_cvt_f32_ubyte1_e32 v1, v0 ; GFX11-NEXT: v_cvt_f32_ubyte2_e32 v2, v0 @@ -508,7 +482,6 @@ ; GFX10-LABEL: v_uitofp_i32_to_f16_mask255: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 ; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -523,7 +496,6 @@ ; GFX11-LABEL: v_uitofp_i32_to_f16_mask255: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0 @@ -552,7 +524,6 @@ ; GFX10-LABEL: v_sitofp_i32_to_f16_mask255: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 ; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -567,7 +538,6 @@ ; GFX11-LABEL: v_sitofp_i32_to_f16_mask255: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0 @@ -596,7 +566,6 @@ ; GFX10-LABEL: v_uitofp_to_f16_lshr8_mask255: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_cvt_f32_ubyte1_e32 v0, v0 ; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -611,7 +580,6 @@ ; GFX11-LABEL: v_uitofp_to_f16_lshr8_mask255: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cvt_f32_ubyte1_e32 v0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0 @@ -641,7 +609,6 @@ ; GFX10-LABEL: v_uitofp_to_f16_lshr16_mask255: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_cvt_f32_ubyte2_e32 v0, v0 ; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -656,7 +623,6 @@ ; GFX11-LABEL: v_uitofp_to_f16_lshr16_mask255: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cvt_f32_ubyte2_e32 v0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0 @@ -686,7 +652,6 @@ ; GFX10-LABEL: v_uitofp_to_f16_lshr24_mask255: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_cvt_f32_ubyte3_e32 v0, v0 ; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -701,7 +666,6 @@ ; GFX11-LABEL: v_uitofp_to_f16_lshr24_mask255: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cvt_f32_ubyte3_e32 v0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0 @@ -730,7 +694,6 @@ ; GFX10-LABEL: v_uitofp_i8_to_f16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_cvt_f16_u16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -743,7 +706,6 @@ ; GFX11-LABEL: v_uitofp_i8_to_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_cvt_f16_u16_e32 v0, v0 @@ -763,7 +725,6 @@ ; GFX10-LABEL: v_uitofp_i32_to_f64_mask255: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX10-NEXT: v_cvt_f64_u32_e32 v[0:1], v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -778,7 +739,6 @@ ; GFX11-LABEL: v_uitofp_i32_to_f64_mask255: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_cvt_f64_u32_e32 v[0:1], v0 @@ -799,7 +759,6 @@ ; GFX10-LABEL: v_uitofp_to_f64_lshr8_mask255: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_bfe_u32 v0, v0, 8, 8 ; GFX10-NEXT: v_cvt_f64_u32_e32 v[0:1], v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -814,7 +773,6 @@ ; GFX11-LABEL: v_uitofp_to_f64_lshr8_mask255: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_bfe_u32 v0, v0, 8, 8 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_cvt_f64_u32_e32 v[0:1], v0 @@ -836,7 +794,6 @@ ; GFX10-LABEL: v_uitofp_to_f64_lshr16_mask255: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_bfe_u32 v0, v0, 16, 8 ; GFX10-NEXT: v_cvt_f64_u32_e32 v[0:1], v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -851,7 +808,6 @@ ; GFX11-LABEL: v_uitofp_to_f64_lshr16_mask255: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_bfe_u32 v0, v0, 16, 8 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_cvt_f64_u32_e32 v[0:1], v0 @@ -873,7 +829,6 @@ ; GFX10-LABEL: v_uitofp_to_f64_lshr24_mask255: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v0 ; GFX10-NEXT: v_cvt_f64_u32_e32 v[0:1], v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -888,7 +843,6 @@ ; GFX11-LABEL: v_uitofp_to_f64_lshr24_mask255: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_cvt_f64_u32_e32 v[0:1], v0 @@ -918,7 +872,6 @@ ; GFX10-LABEL: v_uitofp_i8_to_f64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0xffff ; GFX10-NEXT: v_and_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX10-NEXT: v_cvt_f64_u32_e32 v[0:1], v0 @@ -935,7 +888,6 @@ ; GFX11-LABEL: v_uitofp_i8_to_f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll --- a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll +++ b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll @@ -171,7 +171,6 @@ ; GCN-LABEL: fmac_sequence_simple: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_waitcnt_vscnt null, 0x0 ; GCN-NEXT: v_fma_f32 v2, v2, v3, v4 ; GCN-NEXT: v_fmac_f32_e32 v2, v0, v1 ; GCN-NEXT: v_mov_b32_e32 v0, v2 @@ -187,7 +186,6 @@ ; GCN-LABEL: fmac_sequence_innermost_fmul: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_waitcnt_vscnt null, 0x0 ; GCN-NEXT: v_mad_f32 v2, v2, v3, v6 ; GCN-NEXT: v_fmac_f32_e32 v2, v0, v1 ; GCN-NEXT: v_fmac_f32_e32 v2, v4, v5 @@ -206,7 +204,6 @@ ; GCN-LABEL: fmac_sequence_innermost_fmul_swapped_operands: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_waitcnt_vscnt null, 0x0 ; GCN-NEXT: v_mad_f32 v2, v2, v3, v6 ; GCN-NEXT: v_fmac_f32_e32 v2, v0, v1 ; GCN-NEXT: v_fmac_f32_e32 v2, v4, v5 diff --git a/llvm/test/CodeGen/AMDGPU/divergence-driven-buildvector.ll b/llvm/test/CodeGen/AMDGPU/divergence-driven-buildvector.ll --- a/llvm/test/CodeGen/AMDGPU/divergence-driven-buildvector.ll +++ b/llvm/test/CodeGen/AMDGPU/divergence-driven-buildvector.ll @@ -80,7 +80,6 @@ ; GFX11-LABEL: divergent_vec_0_i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %tmp = insertelement <2 x i16> undef, i16 0, i32 0 @@ -165,7 +164,6 @@ ; GFX11-LABEL: divergent_vec_i16_0: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %tmp = insertelement <2 x i16> undef, i16 %a, i32 0 @@ -250,7 +248,6 @@ ; GFX11-LABEL: divergent_vec_f16_0: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %tmp = insertelement <2 x half> undef, half %a, i32 0 @@ -350,7 +347,6 @@ ; GFX11-LABEL: divergent_vec_i16_LL: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 ; GFX11-NEXT: s_setpc_b64 s[30:31] %tmp = insertelement <2 x i16> undef, i16 %a, i32 0 @@ -439,7 +435,6 @@ ; GFX11-LABEL: divergent_vec_i16_LH: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_bfi_b32 v0, 0xffff, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %shift = lshr i32 %b, 16 @@ -533,7 +528,6 @@ ; GFX11-LABEL: divergent_vec_i16_HH: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x7060302 ; GFX11-NEXT: s_setpc_b64 s[30:31] %shift_a = lshr i32 %a, 16 @@ -641,7 +635,6 @@ ; GFX11-LABEL: divergent_vec_f16_LL: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 ; GFX11-NEXT: s_setpc_b64 s[30:31] %tmp = insertelement <2 x half> undef, half %a, i32 0 @@ -676,7 +669,6 @@ ; GFX11-LABEL: build_vec_v2i16_undeflo_divergent: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_load_u16_d16 v0, v0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll b/llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll --- a/llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll +++ b/llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll @@ -45,7 +45,6 @@ ; GFX10-LABEL: s_add_co_select_user: ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b64 s[4:5], 0 ; GFX10-NEXT: s_load_dword s4, s[4:5], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) @@ -63,7 +62,6 @@ ; GFX11-LABEL: s_add_co_select_user: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b64 s[0:1], 0 ; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll b/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll --- a/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll +++ b/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll @@ -119,7 +119,6 @@ ; GFX11-LABEL: vec_8xi16_extract_4xi16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_cbranch_scc0 .LBB0_2 ; GFX11-NEXT: ; %bb.1: ; %F ; GFX11-NEXT: global_load_b128 v[2:5], v[2:3], off glc dlc @@ -283,7 +282,6 @@ ; GFX11-LABEL: vec_8xi16_extract_4xi16_2: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_cbranch_scc0 .LBB1_2 ; GFX11-NEXT: ; %bb.1: ; %F ; GFX11-NEXT: global_load_b128 v[2:5], v[2:3], off glc dlc @@ -450,7 +448,6 @@ ; GFX11-LABEL: vec_8xf16_extract_4xf16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_cbranch_scc0 .LBB2_2 ; GFX11-NEXT: ; %bb.1: ; %F ; GFX11-NEXT: global_load_b128 v[2:5], v[2:3], off glc dlc @@ -652,7 +649,6 @@ ; GFX11-LABEL: vec_16xi16_extract_4xi16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_cbranch_scc0 .LBB3_2 ; GFX11-NEXT: ; %bb.1: ; %F ; GFX11-NEXT: global_load_b128 v[4:7], v[2:3], off offset:16 glc dlc @@ -859,7 +855,6 @@ ; GFX11-LABEL: vec_16xi16_extract_4xi16_2: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_cbranch_scc0 .LBB4_2 ; GFX11-NEXT: ; %bb.1: ; %F ; GFX11-NEXT: global_load_b128 v[4:7], v[2:3], off offset:16 glc dlc @@ -1069,7 +1064,6 @@ ; GFX11-LABEL: vec_16xf16_extract_4xf16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_cbranch_scc0 .LBB5_2 ; GFX11-NEXT: ; %bb.1: ; %F ; GFX11-NEXT: global_load_b128 v[4:7], v[2:3], off offset:16 glc dlc @@ -1156,7 +1150,6 @@ ; GFX11-LABEL: large_vector: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshl_add_u32 v2, v1, 5, v0 ; GFX11-NEXT: ds_load_2addr_b32 v[0:1], v2 offset1:1 ; GFX11-NEXT: ds_load_2addr_b32 v[2:3], v2 offset0:2 offset1:3 diff --git a/llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.global.ll b/llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.global.ll --- a/llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.global.ll +++ b/llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.global.ll @@ -37,7 +37,6 @@ ; GFX10-LABEL: global_load_2xi16_align2: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -45,7 +44,6 @@ ; GFX11-LABEL: global_load_2xi16_align2: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -163,7 +161,6 @@ ; GFX10-LABEL: global_load_2xi16_align1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -171,7 +168,6 @@ ; GFX11-LABEL: global_load_2xi16_align1: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -282,7 +278,6 @@ ; GFX10-LABEL: global_load_2xi16_align4: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -290,7 +285,6 @@ ; GFX11-LABEL: global_load_2xi16_align4: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.private.ll b/llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.private.ll --- a/llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.private.ll +++ b/llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.private.ll @@ -46,7 +46,6 @@ ; GFX10-LABEL: private_load_2xi16_align2: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -54,7 +53,6 @@ ; GFX10-FLASTSCR-LABEL: private_load_2xi16_align2: ; GFX10-FLASTSCR: ; %bb.0: ; GFX10-FLASTSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-FLASTSCR-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-FLASTSCR-NEXT: scratch_load_dword v0, v0, off ; GFX10-FLASTSCR-NEXT: s_waitcnt vmcnt(0) ; GFX10-FLASTSCR-NEXT: s_setpc_b64 s[30:31] @@ -62,7 +60,6 @@ ; GFX11-LABEL: private_load_2xi16_align2: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_load_b32 v0, v0, off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -70,7 +67,6 @@ ; GFX11-FLASTSCR-LABEL: private_load_2xi16_align2: ; GFX11-FLASTSCR: ; %bb.0: ; GFX11-FLASTSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FLASTSCR-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-FLASTSCR-NEXT: scratch_load_b32 v0, v0, off ; GFX11-FLASTSCR-NEXT: s_waitcnt vmcnt(0) ; GFX11-FLASTSCR-NEXT: s_setpc_b64 s[30:31] @@ -124,37 +120,29 @@ ; GFX10-LABEL: private_store_2xi16_align2: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-FLASTSCR-LABEL: private_store_2xi16_align2: ; GFX10-FLASTSCR: ; %bb.0: ; GFX10-FLASTSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-FLASTSCR-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-FLASTSCR-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX10-FLASTSCR-NEXT: scratch_store_dword v1, v0, off -; GFX10-FLASTSCR-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-FLASTSCR-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: private_store_2xi16_align2: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX11-NEXT: scratch_store_b32 v1, v0, off -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-FLASTSCR-LABEL: private_store_2xi16_align2: ; GFX11-FLASTSCR: ; %bb.0: ; GFX11-FLASTSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FLASTSCR-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-FLASTSCR-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX11-FLASTSCR-NEXT: scratch_store_b32 v1, v0, off -; GFX11-FLASTSCR-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-FLASTSCR-NEXT: s_setpc_b64 s[30:31] %gep.r = getelementptr i16, ptr addrspace(5) %r, i64 1 store i16 1, ptr addrspace(5) %r, align 2 @@ -210,7 +198,6 @@ ; GFX10-LABEL: private_load_2xi16_align1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -218,7 +205,6 @@ ; GFX10-FLASTSCR-LABEL: private_load_2xi16_align1: ; GFX10-FLASTSCR: ; %bb.0: ; GFX10-FLASTSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-FLASTSCR-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-FLASTSCR-NEXT: scratch_load_dword v0, v0, off ; GFX10-FLASTSCR-NEXT: s_waitcnt vmcnt(0) ; GFX10-FLASTSCR-NEXT: s_setpc_b64 s[30:31] @@ -226,7 +212,6 @@ ; GFX11-LABEL: private_load_2xi16_align1: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_load_b32 v0, v0, off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -234,7 +219,6 @@ ; GFX11-FLASTSCR-LABEL: private_load_2xi16_align1: ; GFX11-FLASTSCR: ; %bb.0: ; GFX11-FLASTSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FLASTSCR-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-FLASTSCR-NEXT: scratch_load_b32 v0, v0, off ; GFX11-FLASTSCR-NEXT: s_waitcnt vmcnt(0) ; GFX11-FLASTSCR-NEXT: s_setpc_b64 s[30:31] @@ -293,37 +277,29 @@ ; GFX10-LABEL: private_store_2xi16_align1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-FLASTSCR-LABEL: private_store_2xi16_align1: ; GFX10-FLASTSCR: ; %bb.0: ; GFX10-FLASTSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-FLASTSCR-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-FLASTSCR-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX10-FLASTSCR-NEXT: scratch_store_dword v1, v0, off -; GFX10-FLASTSCR-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-FLASTSCR-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: private_store_2xi16_align1: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX11-NEXT: scratch_store_b32 v1, v0, off -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-FLASTSCR-LABEL: private_store_2xi16_align1: ; GFX11-FLASTSCR: ; %bb.0: ; GFX11-FLASTSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FLASTSCR-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-FLASTSCR-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX11-FLASTSCR-NEXT: scratch_store_b32 v1, v0, off -; GFX11-FLASTSCR-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-FLASTSCR-NEXT: s_setpc_b64 s[30:31] %gep.r = getelementptr i16, ptr addrspace(5) %r, i64 1 store i16 1, ptr addrspace(5) %r, align 1 @@ -364,7 +340,6 @@ ; GFX10-LABEL: private_load_2xi16_align4: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -372,7 +347,6 @@ ; GFX10-FLASTSCR-LABEL: private_load_2xi16_align4: ; GFX10-FLASTSCR: ; %bb.0: ; GFX10-FLASTSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-FLASTSCR-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-FLASTSCR-NEXT: scratch_load_dword v0, v0, off ; GFX10-FLASTSCR-NEXT: s_waitcnt vmcnt(0) ; GFX10-FLASTSCR-NEXT: s_setpc_b64 s[30:31] @@ -380,7 +354,6 @@ ; GFX11-LABEL: private_load_2xi16_align4: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_load_b32 v0, v0, off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -388,7 +361,6 @@ ; GFX11-FLASTSCR-LABEL: private_load_2xi16_align4: ; GFX11-FLASTSCR: ; %bb.0: ; GFX11-FLASTSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FLASTSCR-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-FLASTSCR-NEXT: scratch_load_b32 v0, v0, off ; GFX11-FLASTSCR-NEXT: s_waitcnt vmcnt(0) ; GFX11-FLASTSCR-NEXT: s_setpc_b64 s[30:31] @@ -439,37 +411,29 @@ ; GFX10-LABEL: private_store_2xi16_align4: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-FLASTSCR-LABEL: private_store_2xi16_align4: ; GFX10-FLASTSCR: ; %bb.0: ; GFX10-FLASTSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-FLASTSCR-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-FLASTSCR-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX10-FLASTSCR-NEXT: scratch_store_dword v1, v0, off -; GFX10-FLASTSCR-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-FLASTSCR-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: private_store_2xi16_align4: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX11-NEXT: scratch_store_b32 v1, v0, off -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-FLASTSCR-LABEL: private_store_2xi16_align4: ; GFX11-FLASTSCR: ; %bb.0: ; GFX11-FLASTSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FLASTSCR-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-FLASTSCR-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX11-FLASTSCR-NEXT: scratch_store_b32 v1, v0, off -; GFX11-FLASTSCR-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-FLASTSCR-NEXT: s_setpc_b64 s[30:31] %gep.r = getelementptr i16, ptr addrspace(5) %r, i64 1 store i16 1, ptr addrspace(5) %r, align 4 diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll --- a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll @@ -196,7 +196,6 @@ ; GFX11-LABEL: v_test_canonicalize_build_vector_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_pk_max_f16 v0, v0, v0 @@ -2268,7 +2267,6 @@ ; GFX11-LABEL: v_test_canonicalize_var_v3f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_pk_max_f16 v0, v0, v0 ; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -2311,7 +2309,6 @@ ; GFX11-LABEL: v_test_canonicalize_var_v4f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_pk_max_f16 v0, v0, v0 ; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -2387,7 +2384,6 @@ ; GFX11-LABEL: v_test_canonicalize_reg_undef_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_max_f16_e32 v0, v0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_pack_b32_f16 v0, v0, 0 @@ -2422,7 +2418,6 @@ ; GFX11-LABEL: v_test_canonicalize_undef_reg_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_max_f16_e32 v0, v0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 16, v0 @@ -2455,7 +2450,6 @@ ; GFX11-LABEL: v_test_canonicalize_undef_lo_imm_hi_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_bfrev_b32_e32 v0, 60 ; GFX11-NEXT: s_setpc_b64 s[30:31] %vec = insertelement <2 x half> undef, half 1.0, i32 1 @@ -2486,7 +2480,6 @@ ; GFX11-LABEL: v_test_canonicalize_imm_lo_undef_hi_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x3c00 ; GFX11-NEXT: s_setpc_b64 s[30:31] %vec = insertelement <2 x half> undef, half 1.0, i32 0 @@ -2517,7 +2510,6 @@ ; GFX11-LABEL: v_test_canonicalize_undef_lo_k_hi_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_bfrev_b32_e32 v0, 50 ; GFX11-NEXT: s_setpc_b64 s[30:31] %vec = insertelement <2 x half> undef, half 16.0, i32 1 @@ -2548,7 +2540,6 @@ ; GFX11-LABEL: v_test_canonicalize_k_lo_undef_hi_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x4c00 ; GFX11-NEXT: s_setpc_b64 s[30:31] %vec = insertelement <2 x half> undef, half 16.0, i32 0 @@ -2582,7 +2573,6 @@ ; GFX11-LABEL: v_test_canonicalize_reg_k_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_max_f16_e32 v0, v0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_pack_b32_f16 v0, v0, 2.0 @@ -2619,7 +2609,6 @@ ; GFX11-LABEL: v_test_canonicalize_k_reg_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_max_f16_e32 v0, v0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_pack_b32_f16 v0, 2.0, v0 @@ -2708,7 +2697,6 @@ ; GFX11-LABEL: v_test_canonicalize_reg_undef_undef_undef_v4f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_max_f16_e32 v0, v0, v0 ; GFX11-NEXT: v_mov_b32_e32 v1, 0x7e007e00 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -2754,7 +2742,6 @@ ; GFX11-LABEL: v_test_canonicalize_reg_reg_undef_undef_v4f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 ; GFX11-NEXT: v_mov_b32_e32 v1, 0x7e007e00 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -2805,7 +2792,6 @@ ; GFX11-LABEL: v_test_canonicalize_reg_undef_reg_reg_v4f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_max_f16_e32 v0, v0, v0 ; GFX11-NEXT: v_perm_b32 v1, v2, v1, 0x5040100 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) @@ -2862,7 +2848,6 @@ ; GFX11-LABEL: v_test_canonicalize_var_v6f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_pk_max_f16 v0, v0, v0 ; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 ; GFX11-NEXT: v_pk_max_f16 v2, v2, v2 @@ -2922,7 +2907,6 @@ ; GFX11-LABEL: v_test_canonicalize_var_v8f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_pk_max_f16 v0, v0, v0 ; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 ; GFX11-NEXT: v_pk_max_f16 v2, v2, v2 @@ -2999,7 +2983,6 @@ ; GFX11-LABEL: v_test_canonicalize_var_v12f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_pk_max_f16 v0, v0, v0 ; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 ; GFX11-NEXT: v_pk_max_f16 v2, v2, v2 @@ -3094,7 +3077,6 @@ ; GFX11-LABEL: v_test_canonicalize_var_v16f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_pk_max_f16 v0, v0, v0 ; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 ; GFX11-NEXT: v_pk_max_f16 v2, v2, v2 @@ -3257,7 +3239,6 @@ ; GFX11-LABEL: v_test_canonicalize_var_v32f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_pk_max_f16 v0, v0, v0 ; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 ; GFX11-NEXT: v_pk_max_f16 v2, v2, v2 @@ -3687,7 +3668,6 @@ ; GFX11-LABEL: v_test_canonicalize_var_v64f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_load_b32 v31, off, s32 ; GFX11-NEXT: v_pk_max_f16 v0, v0, v0 ; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize.ll --- a/llvm/test/CodeGen/AMDGPU/fcanonicalize.ll +++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize.ll @@ -2207,7 +2207,6 @@ ; GFX11-LABEL: v_test_canonicalize_v2f32_flush: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %canon = call <2 x float> @llvm.canonicalize.v2f32(<2 x float> %arg) @@ -2235,7 +2234,6 @@ ; GFX11-LABEL: v_test_canonicalize_v3f32_flush: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1 ; GFX11-NEXT: v_max_f32_e32 v2, v2, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -2266,7 +2264,6 @@ ; GFX11-LABEL: v_test_canonicalize_v4f32_flush: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1 ; GFX11-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -2305,7 +2302,6 @@ ; GFX11-LABEL: v_test_canonicalize_v8f32_flush: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1 ; GFX11-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3 ; GFX11-NEXT: v_dual_max_f32 v4, v4, v4 :: v_dual_max_f32 v5, v5, v5 @@ -2333,7 +2329,6 @@ ; GFX11-LABEL: v_test_canonicalize_v2f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] ; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -2361,7 +2356,6 @@ ; GFX11-LABEL: v_test_canonicalize_v3f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] ; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] ; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] @@ -2392,7 +2386,6 @@ ; GFX11-LABEL: v_test_canonicalize_v4f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] ; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] ; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll b/llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll --- a/llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll @@ -658,7 +658,6 @@ ; GFX11-LABEL: v_copysign_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call half @llvm.copysign.f16(half %mag, half %sign) @@ -688,7 +687,6 @@ ; GFX11-LABEL: v_test_copysign_f16_0: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v0, 0x7fff, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call half @llvm.copysign.f16(half %mag, half 0.0) @@ -718,7 +716,6 @@ ; GFX11-LABEL: v_test_copysign_f16_1: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v0, 0x7fff, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call half @llvm.copysign.f16(half %mag, half 1.0) @@ -748,7 +745,6 @@ ; GFX11-LABEL: v_test_copysign_f16_10: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v0, 0x7fff, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call half @llvm.copysign.f16(half %mag, half 10.0) @@ -778,7 +774,6 @@ ; GFX11-LABEL: v_test_copysign_f16_neg1: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_or_b32_e32 v0, 0x8000, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call half @llvm.copysign.f16(half %mag, half -1.0) @@ -808,7 +803,6 @@ ; GFX11-LABEL: v_test_copysign_f16_neg10: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_or_b32_e32 v0, 0x8000, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call half @llvm.copysign.f16(half %mag, half -10.0) diff --git a/llvm/test/CodeGen/AMDGPU/fcopysign.f32.ll b/llvm/test/CodeGen/AMDGPU/fcopysign.f32.ll --- a/llvm/test/CodeGen/AMDGPU/fcopysign.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/fcopysign.f32.ll @@ -679,7 +679,6 @@ ; GFX11-LABEL: v_test_copysign_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_bfi_b32 v0, 0x7fffffff, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call float @llvm.copysign.f32(float %mag, float %sign) @@ -696,7 +695,6 @@ ; GFX11-LABEL: v_test_copysign_f32_0: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call float @llvm.copysign.f32(float %mag, float 0.0) @@ -713,7 +711,6 @@ ; GFX11-LABEL: v_test_copysign_f32_1: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call float @llvm.copysign.f32(float %mag, float 1.0) @@ -730,7 +727,6 @@ ; GFX11-LABEL: v_test_copysign_f32_10: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call float @llvm.copysign.f32(float %mag, float 10.0) @@ -747,7 +743,6 @@ ; GFX11-LABEL: v_test_copysign_f32_neg1: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_or_b32_e32 v0, 0x80000000, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call float @llvm.copysign.f32(float %mag, float -1.0) @@ -764,7 +759,6 @@ ; GFX11-LABEL: v_test_copysign_f32_neg10: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_or_b32_e32 v0, 0x80000000, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call float @llvm.copysign.f32(float %mag, float -10.0) @@ -783,7 +777,6 @@ ; GFX11-LABEL: v_test_copysign_v2f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_bfi_b32 v0, 0x7fffffff, v0, v2 ; GFX11-NEXT: v_bfi_b32 v1, 0x7fffffff, v1, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -802,7 +795,6 @@ ; GFX11-LABEL: v_test_copysign_v2f32_0: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0 ; GFX11-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -821,7 +813,6 @@ ; GFX11-LABEL: v_test_copysign_v2f32_neg1: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_or_b32_e32 v0, 0x80000000, v0 ; GFX11-NEXT: v_or_b32_e32 v1, 0x80000000, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -842,7 +833,6 @@ ; GFX11-LABEL: v_test_copysign_v3f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_bfi_b32 v0, 0x7fffffff, v0, v3 ; GFX11-NEXT: v_bfi_b32 v1, 0x7fffffff, v1, v4 ; GFX11-NEXT: v_bfi_b32 v2, 0x7fffffff, v2, v5 @@ -865,7 +855,6 @@ ; GFX11-LABEL: v_test_copysign_v4f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_bfi_b32 v0, 0x7fffffff, v0, v4 ; GFX11-NEXT: v_bfi_b32 v1, 0x7fffffff, v1, v5 ; GFX11-NEXT: v_bfi_b32 v2, 0x7fffffff, v2, v6 @@ -890,7 +879,6 @@ ; GFX11-LABEL: v_test_copysign_v5f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_bfi_b32 v0, 0x7fffffff, v0, v5 ; GFX11-NEXT: v_bfi_b32 v1, 0x7fffffff, v1, v6 ; GFX11-NEXT: v_bfi_b32 v2, 0x7fffffff, v2, v7 diff --git a/llvm/test/CodeGen/AMDGPU/fcopysign.f64.ll b/llvm/test/CodeGen/AMDGPU/fcopysign.f64.ll --- a/llvm/test/CodeGen/AMDGPU/fcopysign.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/fcopysign.f64.ll @@ -854,7 +854,6 @@ ; GFX11-LABEL: v_test_copysign_f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v0, v10 ; GFX11-NEXT: v_bfi_b32 v1, 0x7fffffff, v11, v21 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -873,7 +872,6 @@ ; GFX11-LABEL: v_test_copysign_f64_0: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v0, v10 :: v_dual_and_b32 v1, 0x7fffffff, v11 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call double @llvm.copysign.f64(double %mag, double 0.0) @@ -891,7 +889,6 @@ ; GFX11-LABEL: v_test_copysign_f64_1: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v0, v10 :: v_dual_and_b32 v1, 0x7fffffff, v11 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call double @llvm.copysign.f64(double %mag, double 1.0) @@ -909,7 +906,6 @@ ; GFX11-LABEL: v_test_copysign_f64_10: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v0, v10 :: v_dual_and_b32 v1, 0x7fffffff, v11 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call double @llvm.copysign.f64(double %mag, double 10.0) @@ -927,7 +923,6 @@ ; GFX11-LABEL: v_test_copysign_f64_neg1: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v0, v10 ; GFX11-NEXT: v_or_b32_e32 v1, 0x80000000, v11 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -946,7 +941,6 @@ ; GFX11-LABEL: v_test_copysign_f64_neg10: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v0, v10 ; GFX11-NEXT: v_or_b32_e32 v1, 0x80000000, v11 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -966,7 +960,6 @@ ; GFX11-LABEL: v_test_copysign_f64_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v0, v10 ; GFX11-NEXT: v_bfi_b32 v1, 0x7fffffff, v11, v20 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -996,7 +989,6 @@ ; GFX11-LABEL: v_test_copysign_f64_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v0, v10 :: v_dual_lshlrev_b32 v1, 16, v20 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_bfi_b32 v1, 0x7fffffff, v11, v1 @@ -1020,7 +1012,6 @@ ; GFX11-LABEL: v_test_copysign_v2f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v0, v2 ; GFX11-NEXT: v_bfi_b32 v1, 0x7fffffff, v3, v7 ; GFX11-NEXT: v_bfi_b32 v3, 0x7fffffff, v5, v9 @@ -1046,7 +1037,6 @@ ; GFX11-LABEL: v_test_copysign_v3f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v0, v2 ; GFX11-NEXT: v_bfi_b32 v1, 0x7fffffff, v3, v9 ; GFX11-NEXT: v_bfi_b32 v3, 0x7fffffff, v5, v11 @@ -1076,7 +1066,6 @@ ; GFX11-LABEL: v_test_copysign_v4f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v0, v2 ; GFX11-NEXT: v_bfi_b32 v1, 0x7fffffff, v3, v11 ; GFX11-NEXT: v_bfi_b32 v3, 0x7fffffff, v5, v13 diff --git a/llvm/test/CodeGen/AMDGPU/fexp.ll b/llvm/test/CodeGen/AMDGPU/fexp.ll --- a/llvm/test/CodeGen/AMDGPU/fexp.ll +++ b/llvm/test/CodeGen/AMDGPU/fexp.ll @@ -15,7 +15,6 @@ ; GFX11-LABEL: v_exp_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_exp_f32_e32 v0, v0 @@ -37,7 +36,6 @@ ; GFX11-LABEL: v_exp_v2f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mul_f32 v0, 0x3fb8aa3b, v0 :: v_dual_mul_f32 v1, 0x3fb8aa3b, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_exp_f32_e32 v0, v0 @@ -62,7 +60,6 @@ ; GFX11-LABEL: v_exp_v3f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mul_f32 v0, 0x3fb8aa3b, v0 :: v_dual_mul_f32 v1, 0x3fb8aa3b, v1 ; GFX11-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) @@ -92,7 +89,6 @@ ; GFX11-LABEL: v_exp_v4f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mul_f32 v0, 0x3fb8aa3b, v0 :: v_dual_mul_f32 v1, 0x3fb8aa3b, v1 ; GFX11-NEXT: v_dual_mul_f32 v2, 0x3fb8aa3b, v2 :: v_dual_mul_f32 v3, 0x3fb8aa3b, v3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) @@ -133,7 +129,6 @@ ; GFX11-LABEL: v_exp_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f16_e32 v0, 0x3dc5, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_exp_f16_e32 v0, v0 @@ -180,7 +175,6 @@ ; GFX11-LABEL: v_exp_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_pk_mul_f16 v0, 0x3dc5, v0 op_sel_hi:[0,1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0 @@ -255,7 +249,6 @@ ; GFX11-LABEL: v_exp_v4f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1 ; GFX11-NEXT: v_mul_f16_e32 v1, 0x3dc5, v1 diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll --- a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll @@ -192,7 +192,6 @@ ; GFX10-LABEL: zero_init_foo: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s0, 0 ; GFX10-NEXT: s_mov_b32 s1, s0 ; GFX10-NEXT: s_mov_b32 s2, s0 @@ -205,13 +204,11 @@ ; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:32 ; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:16 ; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s32 -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: zero_init_foo: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, 0 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_mov_b32 s1, s0 @@ -224,7 +221,6 @@ ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 offset:32 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 offset:16 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-PAL-LABEL: zero_init_foo: @@ -264,7 +260,6 @@ ; GFX10-PAL-LABEL: zero_init_foo: ; GFX10-PAL: ; %bb.0: ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: s_mov_b32 s0, 0 ; GFX10-PAL-NEXT: s_mov_b32 s1, s0 ; GFX10-PAL-NEXT: s_mov_b32 s2, s0 @@ -277,13 +272,11 @@ ; GFX10-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:32 ; GFX10-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:16 ; GFX10-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s32 -; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-PAL-LABEL: zero_init_foo: ; GFX11-PAL: ; %bb.0: ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: s_mov_b32 s0, 0 ; GFX11-PAL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-PAL-NEXT: s_mov_b32 s1, s0 @@ -296,7 +289,6 @@ ; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s32 offset:32 ; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s32 offset:16 ; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s32 -; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: s_setpc_b64 s[30:31] %alloca = alloca [32 x i16], align 2, addrspace(5) call void @llvm.memset.p5.i64(ptr addrspace(5) align 2 dereferenceable(64) %alloca, i8 0, i64 64, i1 false) @@ -716,7 +708,6 @@ ; GFX10-LABEL: store_load_vindex_foo: ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v1, 15, v0 ; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, s32 ; GFX10-NEXT: v_mov_b32_e32 v2, 15 @@ -730,7 +721,6 @@ ; GFX11-LABEL: store_load_vindex_foo: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0 ; GFX11-NEXT: v_lshl_add_u32 v0, v0, 2, s32 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -772,7 +762,6 @@ ; GFX10-PAL-LABEL: store_load_vindex_foo: ; GFX10-PAL: ; %bb.0: ; %bb ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: v_and_b32_e32 v1, 15, v0 ; GFX10-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, s32 ; GFX10-PAL-NEXT: v_mov_b32_e32 v2, 15 @@ -786,7 +775,6 @@ ; GFX11-PAL-LABEL: store_load_vindex_foo: ; GFX11-PAL: ; %bb.0: ; %bb ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0 ; GFX11-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, s32 ; GFX11-PAL-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -819,20 +807,16 @@ ; GFX10-LABEL: private_ptr_foo: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_nc_u32_e32 v0, 4, v0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x41200000 ; GFX10-NEXT: scratch_store_dword v0, v1, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: private_ptr_foo: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v1, 0x41200000 :: v_dual_add_nc_u32 v0, 4, v0 ; GFX11-NEXT: scratch_store_b32 v0, v1, off -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-PAL-LABEL: private_ptr_foo: @@ -856,20 +840,16 @@ ; GFX10-PAL-LABEL: private_ptr_foo: ; GFX10-PAL: ; %bb.0: ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: v_add_nc_u32_e32 v0, 4, v0 ; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 0x41200000 ; GFX10-PAL-NEXT: scratch_store_dword v0, v1, off -; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-PAL-LABEL: private_ptr_foo: ; GFX11-PAL: ; %bb.0: ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: v_dual_mov_b32 v1, 0x41200000 :: v_dual_add_nc_u32 v0, 4, v0 ; GFX11-PAL-NEXT: scratch_store_b32 v0, v1, off -; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: s_setpc_b64 s[30:31] %gep = getelementptr inbounds float, ptr addrspace(5) %arg, i32 1 store float 1.000000e+01, ptr addrspace(5) %gep, align 4 @@ -1081,7 +1061,6 @@ ; GFX10-LABEL: zero_init_small_offset_foo: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_load_dword v0, off, s32 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_mov_b32 s0, 0 @@ -1096,13 +1075,11 @@ ; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:272 ; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:288 ; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:304 -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: zero_init_small_offset_foo: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_load_b32 v0, off, s32 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_mov_b32 s0, 0 @@ -1117,7 +1094,6 @@ ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 offset:272 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 offset:288 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 offset:304 -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-PAL-LABEL: zero_init_small_offset_foo: @@ -1161,7 +1137,6 @@ ; GFX10-PAL-LABEL: zero_init_small_offset_foo: ; GFX10-PAL: ; %bb.0: ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: scratch_load_dword v0, off, s32 glc dlc ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX10-PAL-NEXT: s_mov_b32 s0, 0 @@ -1176,13 +1151,11 @@ ; GFX10-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:272 ; GFX10-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:288 ; GFX10-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:304 -; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-PAL-LABEL: zero_init_small_offset_foo: ; GFX11-PAL: ; %bb.0: ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: scratch_load_b32 v0, off, s32 glc dlc ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX11-PAL-NEXT: s_mov_b32 s0, 0 @@ -1197,7 +1170,6 @@ ; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s32 offset:272 ; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s32 offset:288 ; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s32 offset:304 -; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: s_setpc_b64 s[30:31] %padding = alloca [64 x i32], align 4, addrspace(5) %alloca = alloca [32 x i16], align 2, addrspace(5) @@ -1758,7 +1730,6 @@ ; GFX10-LABEL: store_load_vindex_small_offset_foo: ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v1, 15, v0 ; GFX10-NEXT: s_add_i32 s0, s32, 0x100 ; GFX10-NEXT: v_mov_b32_e32 v2, 15 @@ -1776,7 +1747,6 @@ ; GFX11-LABEL: store_load_vindex_small_offset_foo: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0 ; GFX11-NEXT: s_add_i32 s0, s32, 0x100 ; GFX11-NEXT: scratch_load_b32 v3, off, s32 glc dlc @@ -1826,7 +1796,6 @@ ; GFX10-PAL-LABEL: store_load_vindex_small_offset_foo: ; GFX10-PAL: ; %bb.0: ; %bb ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: v_and_b32_e32 v1, 15, v0 ; GFX10-PAL-NEXT: s_add_i32 s0, s32, 0x100 ; GFX10-PAL-NEXT: v_mov_b32_e32 v2, 15 @@ -1844,7 +1813,6 @@ ; GFX11-PAL-LABEL: store_load_vindex_small_offset_foo: ; GFX11-PAL: ; %bb.0: ; %bb ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0 ; GFX11-PAL-NEXT: s_add_i32 s0, s32, 0x100 ; GFX11-PAL-NEXT: scratch_load_b32 v3, off, s32 glc dlc @@ -2086,7 +2054,6 @@ ; GFX10-LABEL: zero_init_large_offset_foo: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_load_dword v0, off, s32 offset:4 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_mov_b32 s0, 0 @@ -2105,13 +2072,11 @@ ; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:32 ; GFX10-NEXT: s_add_i32 s0, s32, 0x4004 ; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:48 -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: zero_init_large_offset_foo: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_load_b32 v0, off, s32 offset:4 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_mov_b32 s0, 0 @@ -2129,7 +2094,6 @@ ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s0 offset:32 ; GFX11-NEXT: s_add_i32 s0, s32, 0x4004 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s0 offset:48 -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-PAL-LABEL: zero_init_large_offset_foo: @@ -2181,7 +2145,6 @@ ; GFX1010-PAL-LABEL: zero_init_large_offset_foo: ; GFX1010-PAL: ; %bb.0: ; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1010-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s32 offset:4 glc dlc ; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX1010-PAL-NEXT: s_mov_b32 s0, 0 @@ -2203,13 +2166,11 @@ ; GFX1010-PAL-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1010-PAL-NEXT: s_add_i32 s0, s32, 0x4004 ; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:48 -; GFX1010-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1010-PAL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1030-PAL-LABEL: zero_init_large_offset_foo: ; GFX1030-PAL: ; %bb.0: ; GFX1030-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1030-PAL-NEXT: scratch_load_dword v0, off, s32 offset:4 glc dlc ; GFX1030-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX1030-PAL-NEXT: s_mov_b32 s0, 0 @@ -2228,13 +2189,11 @@ ; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:32 ; GFX1030-PAL-NEXT: s_add_i32 s0, s32, 0x4004 ; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:48 -; GFX1030-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1030-PAL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-PAL-LABEL: zero_init_large_offset_foo: ; GFX11-PAL: ; %bb.0: ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: scratch_load_b32 v0, off, s32 offset:4 glc dlc ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX11-PAL-NEXT: s_mov_b32 s0, 0 @@ -2252,7 +2211,6 @@ ; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s0 offset:32 ; GFX11-PAL-NEXT: s_add_i32 s0, s32, 0x4004 ; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s0 offset:48 -; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: s_setpc_b64 s[30:31] %padding = alloca [4096 x i32], align 4, addrspace(5) %alloca = alloca [32 x i16], align 2, addrspace(5) @@ -2816,7 +2774,6 @@ ; GFX10-LABEL: store_load_vindex_large_offset_foo: ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v1, 15, v0 ; GFX10-NEXT: s_add_i32 s0, s32, 0x4004 ; GFX10-NEXT: v_mov_b32_e32 v2, 15 @@ -2834,7 +2791,6 @@ ; GFX11-LABEL: store_load_vindex_large_offset_foo: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0 ; GFX11-NEXT: s_add_i32 s0, s32, 0x4004 ; GFX11-NEXT: scratch_load_b32 v3, off, s32 offset:4 glc dlc @@ -2886,7 +2842,6 @@ ; GFX10-PAL-LABEL: store_load_vindex_large_offset_foo: ; GFX10-PAL: ; %bb.0: ; %bb ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: v_and_b32_e32 v1, 15, v0 ; GFX10-PAL-NEXT: s_add_i32 s0, s32, 0x4004 ; GFX10-PAL-NEXT: v_mov_b32_e32 v2, 15 @@ -2904,7 +2859,6 @@ ; GFX11-PAL-LABEL: store_load_vindex_large_offset_foo: ; GFX11-PAL: ; %bb.0: ; %bb ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0 ; GFX11-PAL-NEXT: s_add_i32 s0, s32, 0x4004 ; GFX11-PAL-NEXT: scratch_load_b32 v3, off, s32 offset:4 glc dlc @@ -3107,7 +3061,6 @@ ; GFX10-LABEL: store_load_large_imm_offset_foo: ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v0, 13 ; GFX10-NEXT: v_mov_b32_e32 v1, 15 ; GFX10-NEXT: s_movk_i32 s0, 0x3800 @@ -3124,7 +3077,6 @@ ; GFX11-LABEL: store_load_large_imm_offset_foo: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15 ; GFX11-NEXT: s_movk_i32 s0, 0x3000 ; GFX11-NEXT: s_add_i32 s1, s32, 4 @@ -3173,7 +3125,6 @@ ; GFX10-PAL-LABEL: store_load_large_imm_offset_foo: ; GFX10-PAL: ; %bb.0: ; %bb ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: v_mov_b32_e32 v0, 13 ; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 15 ; GFX10-PAL-NEXT: s_movk_i32 s0, 0x3800 @@ -3190,7 +3141,6 @@ ; GFX11-PAL-LABEL: store_load_large_imm_offset_foo: ; GFX11-PAL: ; %bb.0: ; %bb ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15 ; GFX11-PAL-NEXT: s_movk_i32 s0, 0x3000 ; GFX11-PAL-NEXT: s_add_i32 s1, s32, 4 @@ -3364,7 +3314,6 @@ ; GFX10-LABEL: store_load_i64_aligned: ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v1, 15 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: scratch_store_dwordx2 v0, v[1:2], off @@ -3376,7 +3325,6 @@ ; GFX11-LABEL: store_load_i64_aligned: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_mov_b32 v2, 0 ; GFX11-NEXT: scratch_store_b64 v0, v[1:2], off dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3409,7 +3357,6 @@ ; GFX10-PAL-LABEL: store_load_i64_aligned: ; GFX10-PAL: ; %bb.0: ; %bb ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 15 ; GFX10-PAL-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-PAL-NEXT: scratch_store_dwordx2 v0, v[1:2], off @@ -3421,7 +3368,6 @@ ; GFX11-PAL-LABEL: store_load_i64_aligned: ; GFX11-PAL: ; %bb.0: ; %bb ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_mov_b32 v2, 0 ; GFX11-PAL-NEXT: scratch_store_b64 v0, v[1:2], off dlc ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3449,7 +3395,6 @@ ; GFX10-LABEL: store_load_i64_unaligned: ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v1, 15 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: scratch_store_dwordx2 v0, v[1:2], off @@ -3461,7 +3406,6 @@ ; GFX11-LABEL: store_load_i64_unaligned: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_mov_b32 v2, 0 ; GFX11-NEXT: scratch_store_b64 v0, v[1:2], off dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3494,7 +3438,6 @@ ; GFX10-PAL-LABEL: store_load_i64_unaligned: ; GFX10-PAL: ; %bb.0: ; %bb ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 15 ; GFX10-PAL-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-PAL-NEXT: scratch_store_dwordx2 v0, v[1:2], off @@ -3506,7 +3449,6 @@ ; GFX11-PAL-LABEL: store_load_i64_unaligned: ; GFX11-PAL: ; %bb.0: ; %bb ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_mov_b32 v2, 0 ; GFX11-PAL-NEXT: scratch_store_b64 v0, v[1:2], off dlc ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3535,7 +3477,6 @@ ; GFX10-LABEL: store_load_v3i32_unaligned: ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v1, 1 ; GFX10-NEXT: v_mov_b32_e32 v2, 2 ; GFX10-NEXT: v_mov_b32_e32 v3, 3 @@ -3548,7 +3489,6 @@ ; GFX11-LABEL: store_load_v3i32_unaligned: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 2 ; GFX11-NEXT: v_mov_b32_e32 v3, 3 ; GFX11-NEXT: scratch_store_b96 v0, v[1:3], off dlc @@ -3584,7 +3524,6 @@ ; GFX10-PAL-LABEL: store_load_v3i32_unaligned: ; GFX10-PAL: ; %bb.0: ; %bb ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 1 ; GFX10-PAL-NEXT: v_mov_b32_e32 v2, 2 ; GFX10-PAL-NEXT: v_mov_b32_e32 v3, 3 @@ -3597,7 +3536,6 @@ ; GFX11-PAL-LABEL: store_load_v3i32_unaligned: ; GFX11-PAL: ; %bb.0: ; %bb ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 2 ; GFX11-PAL-NEXT: v_mov_b32_e32 v3, 3 ; GFX11-PAL-NEXT: scratch_store_b96 v0, v[1:3], off dlc @@ -3628,7 +3566,6 @@ ; GFX10-LABEL: store_load_v4i32_unaligned: ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v1, 1 ; GFX10-NEXT: v_mov_b32_e32 v2, 2 ; GFX10-NEXT: v_mov_b32_e32 v3, 3 @@ -3642,7 +3579,6 @@ ; GFX11-LABEL: store_load_v4i32_unaligned: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 2 ; GFX11-NEXT: v_dual_mov_b32 v3, 3 :: v_dual_mov_b32 v4, 4 ; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off dlc @@ -3680,7 +3616,6 @@ ; GFX10-PAL-LABEL: store_load_v4i32_unaligned: ; GFX10-PAL: ; %bb.0: ; %bb ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 1 ; GFX10-PAL-NEXT: v_mov_b32_e32 v2, 2 ; GFX10-PAL-NEXT: v_mov_b32_e32 v3, 3 @@ -3694,7 +3629,6 @@ ; GFX11-PAL-LABEL: store_load_v4i32_unaligned: ; GFX11-PAL: ; %bb.0: ; %bb ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 2 ; GFX11-PAL-NEXT: v_dual_mov_b32 v3, 3 :: v_dual_mov_b32 v4, 4 ; GFX11-PAL-NEXT: scratch_store_b128 v0, v[1:4], off dlc @@ -3723,7 +3657,6 @@ ; GFX10-LABEL: store_load_i32_negative_unaligned: ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_nc_u32_e32 v0, -1, v0 ; GFX10-NEXT: v_mov_b32_e32 v1, 1 ; GFX10-NEXT: scratch_store_byte v0, v1, off @@ -3735,7 +3668,6 @@ ; GFX11-LABEL: store_load_i32_negative_unaligned: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_add_nc_u32 v0, -1, v0 ; GFX11-NEXT: scratch_store_b8 v0, v1, off dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3768,7 +3700,6 @@ ; GFX10-PAL-LABEL: store_load_i32_negative_unaligned: ; GFX10-PAL: ; %bb.0: ; %bb ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: v_add_nc_u32_e32 v0, -1, v0 ; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 1 ; GFX10-PAL-NEXT: scratch_store_byte v0, v1, off @@ -3780,7 +3711,6 @@ ; GFX11-PAL-LABEL: store_load_i32_negative_unaligned: ; GFX11-PAL: ; %bb.0: ; %bb ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_add_nc_u32 v0, -1, v0 ; GFX11-PAL-NEXT: scratch_store_b8 v0, v1, off dlc ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3809,7 +3739,6 @@ ; GFX10-LABEL: store_load_i32_large_negative_unaligned: ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_nc_u32_e32 v0, 0xffffef7f, v0 ; GFX10-NEXT: v_mov_b32_e32 v1, 1 ; GFX10-NEXT: scratch_store_byte v0, v1, off @@ -3821,7 +3750,6 @@ ; GFX11-LABEL: store_load_i32_large_negative_unaligned: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_add_nc_u32 v0, 0xffffef7f, v0 ; GFX11-NEXT: scratch_store_b8 v0, v1, off dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3854,7 +3782,6 @@ ; GFX10-PAL-LABEL: store_load_i32_large_negative_unaligned: ; GFX10-PAL: ; %bb.0: ; %bb ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: v_add_nc_u32_e32 v0, 0xffffef7f, v0 ; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 1 ; GFX10-PAL-NEXT: scratch_store_byte v0, v1, off @@ -3866,7 +3793,6 @@ ; GFX11-PAL-LABEL: store_load_i32_large_negative_unaligned: ; GFX11-PAL: ; %bb.0: ; %bb ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_add_nc_u32 v0, 0xffffef7f, v0 ; GFX11-PAL-NEXT: scratch_store_b8 v0, v1, off dlc ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 diff --git a/llvm/test/CodeGen/AMDGPU/fma.f16.ll b/llvm/test/CodeGen/AMDGPU/fma.f16.ll --- a/llvm/test/CodeGen/AMDGPU/fma.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fma.f16.ll @@ -17,7 +17,6 @@ ; GFX10-LABEL: test_fma: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fma_f16 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] %r = call half @llvm.fma.f16(half %x, half %y, half %z) @@ -35,7 +34,6 @@ ; GFX10-LABEL: test_fmac: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fmac_f16_e32 v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] %r = call half @llvm.fma.f16(half %y, half %z, half %x) @@ -61,7 +59,6 @@ ; GFX10-LABEL: test_fmaak: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fmaak_f16 v0, v0, v1, 0x4200 ; GFX10-NEXT: s_setpc_b64 s[30:31] %r = call half @llvm.fma.f16(half %x, half %y, half 0xH4200) @@ -87,7 +84,6 @@ ; GFX10-LABEL: test_fmamk: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fmamk_f16 v0, v0, 0x4200, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] %r = call half @llvm.fma.f16(half %x, half 0xH4200, half %z) @@ -112,7 +108,6 @@ ; GFX10-SDAG-LABEL: test_D139469_f16: ; GFX10-SDAG: ; %bb.0: ; %bb ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v1, 0x211e ; GFX10-SDAG-NEXT: v_mul_f16_e32 v2, 0x291e, v0 ; GFX10-SDAG-NEXT: v_fmac_f16_e32 v1, 0x291e, v0 @@ -125,7 +120,6 @@ ; GFX10-GISEL-LABEL: test_D139469_f16: ; GFX10-GISEL: ; %bb.0: ; %bb ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x291e ; GFX10-GISEL-NEXT: v_mul_f16_e32 v1, 0x291e, v0 ; GFX10-GISEL-NEXT: v_fmaak_f16 v0, s4, v0, 0x211e @@ -184,7 +178,6 @@ ; GFX10-SDAG-LABEL: test_D139469_v2f16: ; GFX10-SDAG: ; %bb.0: ; %bb ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: s_movk_i32 s4, 0x211e ; GFX10-SDAG-NEXT: v_pk_mul_f16 v1, 0x291e, v0 op_sel_hi:[0,1] ; GFX10-SDAG-NEXT: v_pk_fma_f16 v0, 0x291e, v0, s4 op_sel_hi:[0,1,0] @@ -202,7 +195,6 @@ ; GFX10-GISEL-LABEL: test_D139469_v2f16: ; GFX10-GISEL: ; %bb.0: ; %bb ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_mov_b32 s4, 0x291e291e ; GFX10-GISEL-NEXT: v_pk_mul_f16 v1, v0, 0x291e op_sel_hi:[1,0] ; GFX10-GISEL-NEXT: v_pk_fma_f16 v0, v0, s4, 0x211e op_sel_hi:[1,1,0] diff --git a/llvm/test/CodeGen/AMDGPU/fmad-formation-fmul-distribute-denormal-mode.ll b/llvm/test/CodeGen/AMDGPU/fmad-formation-fmul-distribute-denormal-mode.ll --- a/llvm/test/CodeGen/AMDGPU/fmad-formation-fmul-distribute-denormal-mode.ll +++ b/llvm/test/CodeGen/AMDGPU/fmad-formation-fmul-distribute-denormal-mode.ll @@ -34,14 +34,12 @@ ; FMAGFX10-LABEL: unsafe_fmul_fadd_distribute_fast_f32: ; FMAGFX10: ; %bb.0: ; FMAGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FMAGFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; FMAGFX10-NEXT: v_fmac_f32_e32 v0, v1, v0 ; FMAGFX10-NEXT: s_setpc_b64 s[30:31] ; ; FMAGFX11-LABEL: unsafe_fmul_fadd_distribute_fast_f32: ; FMAGFX11: ; %bb.0: ; FMAGFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FMAGFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; FMAGFX11-NEXT: v_fmac_f32_e32 v0, v1, v0 ; FMAGFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -54,7 +52,6 @@ ; FMADGFX10-LABEL: unsafe_fmul_fadd_distribute_fast_f32: ; FMADGFX10: ; %bb.0: ; FMADGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FMADGFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; FMADGFX10-NEXT: v_fmac_f32_e32 v0, v1, v0 ; FMADGFX10-NEXT: s_setpc_b64 s[30:31] %add = fadd fast float %arg1, 1.0 @@ -79,14 +76,12 @@ ; FMAGFX10-LABEL: unsafe_fmul_fsub_distribute_fast_f32: ; FMAGFX10: ; %bb.0: ; FMAGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FMAGFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; FMAGFX10-NEXT: v_fma_f32 v0, -v1, v0, v0 ; FMAGFX10-NEXT: s_setpc_b64 s[30:31] ; ; FMAGFX11-LABEL: unsafe_fmul_fsub_distribute_fast_f32: ; FMAGFX11: ; %bb.0: ; FMAGFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FMAGFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; FMAGFX11-NEXT: v_fma_f32 v0, -v1, v0, v0 ; FMAGFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -99,7 +94,6 @@ ; FMADGFX10-LABEL: unsafe_fmul_fsub_distribute_fast_f32: ; FMADGFX10: ; %bb.0: ; FMADGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FMADGFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; FMADGFX10-NEXT: v_fma_f32 v0, -v1, v0, v0 ; FMADGFX10-NEXT: s_setpc_b64 s[30:31] %add = fsub fast float 1.0, %arg1 @@ -127,7 +121,6 @@ ; FMAGFX10-LABEL: unsafe_fmul_fadd_distribute_fast_v2f32: ; FMAGFX10: ; %bb.0: ; FMAGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FMAGFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; FMAGFX10-NEXT: v_fmac_f32_e32 v0, v2, v0 ; FMAGFX10-NEXT: v_fmac_f32_e32 v1, v3, v1 ; FMAGFX10-NEXT: s_setpc_b64 s[30:31] @@ -135,7 +128,6 @@ ; FMAGFX11-LABEL: unsafe_fmul_fadd_distribute_fast_v2f32: ; FMAGFX11: ; %bb.0: ; FMAGFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FMAGFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; FMAGFX11-NEXT: v_dual_fmac_f32 v0, v2, v0 :: v_dual_fmac_f32 v1, v3, v1 ; FMAGFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -149,7 +141,6 @@ ; FMADGFX10-LABEL: unsafe_fmul_fadd_distribute_fast_v2f32: ; FMADGFX10: ; %bb.0: ; FMADGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FMADGFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; FMADGFX10-NEXT: v_fmac_f32_e32 v0, v2, v0 ; FMADGFX10-NEXT: v_fmac_f32_e32 v1, v3, v1 ; FMADGFX10-NEXT: s_setpc_b64 s[30:31] @@ -178,7 +169,6 @@ ; FMAGFX10-LABEL: unsafe_fmul_fsub_distribute_fast_v2f32: ; FMAGFX10: ; %bb.0: ; FMAGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FMAGFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; FMAGFX10-NEXT: v_fma_f32 v0, -v2, v0, v0 ; FMAGFX10-NEXT: v_fma_f32 v1, -v3, v1, v1 ; FMAGFX10-NEXT: s_setpc_b64 s[30:31] @@ -186,7 +176,6 @@ ; FMAGFX11-LABEL: unsafe_fmul_fsub_distribute_fast_v2f32: ; FMAGFX11: ; %bb.0: ; FMAGFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FMAGFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; FMAGFX11-NEXT: v_fma_f32 v0, -v2, v0, v0 ; FMAGFX11-NEXT: v_fma_f32 v1, -v3, v1, v1 ; FMAGFX11-NEXT: s_setpc_b64 s[30:31] @@ -201,7 +190,6 @@ ; FMADGFX10-LABEL: unsafe_fmul_fsub_distribute_fast_v2f32: ; FMADGFX10: ; %bb.0: ; FMADGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FMADGFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; FMADGFX10-NEXT: v_fma_f32 v0, -v2, v0, v0 ; FMADGFX10-NEXT: v_fma_f32 v1, -v3, v1, v1 ; FMADGFX10-NEXT: s_setpc_b64 s[30:31] @@ -227,14 +215,12 @@ ; FMAGFX10-LABEL: unsafe_fast_fmul_fadd_distribute_post_legalize_f32: ; FMAGFX10: ; %bb.0: ; FMAGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FMAGFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; FMAGFX10-NEXT: v_fma_f32 v0, v0, v1, v1 ; FMAGFX10-NEXT: s_setpc_b64 s[30:31] ; ; FMAGFX11-LABEL: unsafe_fast_fmul_fadd_distribute_post_legalize_f32: ; FMAGFX11: ; %bb.0: ; FMAGFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FMAGFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; FMAGFX11-NEXT: v_fma_f32 v0, v0, v1, v1 ; FMAGFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -247,7 +233,6 @@ ; FMADGFX10-LABEL: unsafe_fast_fmul_fadd_distribute_post_legalize_f32: ; FMADGFX10: ; %bb.0: ; FMADGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FMADGFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; FMADGFX10-NEXT: v_mad_f32 v0, v0, v1, v1 ; FMADGFX10-NEXT: s_setpc_b64 s[30:31] %add = fadd fast float %arg0, 1.0 @@ -273,14 +258,12 @@ ; FMAGFX10-LABEL: unsafe_fast_fmul_fsub_ditribute_post_legalize: ; FMAGFX10: ; %bb.0: ; FMAGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FMAGFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; FMAGFX10-NEXT: v_fma_f32 v0, -v0, v1, v1 ; FMAGFX10-NEXT: s_setpc_b64 s[30:31] ; ; FMAGFX11-LABEL: unsafe_fast_fmul_fsub_ditribute_post_legalize: ; FMAGFX11: ; %bb.0: ; FMAGFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FMAGFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; FMAGFX11-NEXT: v_fma_f32 v0, -v0, v1, v1 ; FMAGFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -293,7 +276,6 @@ ; FMADGFX10-LABEL: unsafe_fast_fmul_fsub_ditribute_post_legalize: ; FMADGFX10: ; %bb.0: ; FMADGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FMADGFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; FMADGFX10-NEXT: v_mad_f32 v0, -v0, v1, v1 ; FMADGFX10-NEXT: s_setpc_b64 s[30:31] %sub = fsub fast float 1.0, %arg0 diff --git a/llvm/test/CodeGen/AMDGPU/fmax3.ll b/llvm/test/CodeGen/AMDGPU/fmax3.ll --- a/llvm/test/CodeGen/AMDGPU/fmax3.ll +++ b/llvm/test/CodeGen/AMDGPU/fmax3.ll @@ -593,7 +593,6 @@ ; GFX11-LABEL: no_fmax3_v2f16: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_pk_max_f16 v0, v0, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_pk_max_f16 v0, v2, v0 diff --git a/llvm/test/CodeGen/AMDGPU/fmax_legacy.f16.ll b/llvm/test/CodeGen/AMDGPU/fmax_legacy.f16.ll --- a/llvm/test/CodeGen/AMDGPU/fmax_legacy.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fmax_legacy.f16.ll @@ -61,7 +61,6 @@ ; GFX11-SAFE-LABEL: test_fmax_legacy_ugt_f16: ; GFX11-SAFE: ; %bb.0: ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SAFE-NEXT: v_cmp_nle_f16_e32 vcc_lo, v0, v1 ; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo ; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31] @@ -69,7 +68,6 @@ ; GFX11-NNAN-LABEL: test_fmax_legacy_ugt_f16: ; GFX11-NNAN: ; %bb.0: ; GFX11-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NNAN-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NNAN-NEXT: v_max_f16_e32 v0, v0, v1 ; GFX11-NNAN-NEXT: s_setpc_b64 s[30:31] %cmp = fcmp ugt half %a, %b @@ -151,7 +149,6 @@ ; GFX11-SAFE-LABEL: test_fmax_legacy_ugt_v2f16: ; GFX11-SAFE: ; %bb.0: ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v1 ; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v3, 16, v0 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) @@ -165,7 +162,6 @@ ; GFX11-NNAN-LABEL: test_fmax_legacy_ugt_v2f16: ; GFX11-NNAN: ; %bb.0: ; GFX11-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NNAN-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NNAN-NEXT: v_pk_max_f16 v0, v0, v1 ; GFX11-NNAN-NEXT: s_setpc_b64 s[30:31] %cmp = fcmp ugt <2 x half> %a, %b @@ -263,7 +259,6 @@ ; GFX11-SAFE-LABEL: test_fmax_legacy_ugt_v3f16: ; GFX11-SAFE: ; %bb.0: ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2 ; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v0 ; GFX11-SAFE-NEXT: v_cmp_nle_f16_e32 vcc_lo, v0, v2 @@ -279,7 +274,6 @@ ; GFX11-NNAN-LABEL: test_fmax_legacy_ugt_v3f16: ; GFX11-NNAN: ; %bb.0: ; GFX11-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NNAN-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NNAN-NEXT: v_pk_max_f16 v0, v0, v2 ; GFX11-NNAN-NEXT: v_pk_max_f16 v1, v1, v3 ; GFX11-NNAN-NEXT: s_setpc_b64 s[30:31] @@ -401,7 +395,6 @@ ; GFX11-SAFE-LABEL: test_fmax_legacy_ugt_v4f16: ; GFX11-SAFE: ; %bb.0: ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v3 ; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v1 ; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v6, 16, v2 @@ -423,7 +416,6 @@ ; GFX11-NNAN-LABEL: test_fmax_legacy_ugt_v4f16: ; GFX11-NNAN: ; %bb.0: ; GFX11-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NNAN-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NNAN-NEXT: v_pk_max_f16 v0, v0, v2 ; GFX11-NNAN-NEXT: v_pk_max_f16 v1, v1, v3 ; GFX11-NNAN-NEXT: s_setpc_b64 s[30:31] @@ -623,7 +615,6 @@ ; GFX11-SAFE-LABEL: test_fmax_legacy_ugt_v8f16: ; GFX11-SAFE: ; %bb.0: ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v10, 16, v7 ; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v11, 16, v3 ; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v12, 16, v6 @@ -659,7 +650,6 @@ ; GFX11-NNAN-LABEL: test_fmax_legacy_ugt_v8f16: ; GFX11-NNAN: ; %bb.0: ; GFX11-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NNAN-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NNAN-NEXT: v_pk_max_f16 v0, v0, v4 ; GFX11-NNAN-NEXT: v_pk_max_f16 v1, v1, v5 ; GFX11-NNAN-NEXT: v_pk_max_f16 v2, v2, v6 diff --git a/llvm/test/CodeGen/AMDGPU/fmin_legacy.f16.ll b/llvm/test/CodeGen/AMDGPU/fmin_legacy.f16.ll --- a/llvm/test/CodeGen/AMDGPU/fmin_legacy.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fmin_legacy.f16.ll @@ -62,7 +62,6 @@ ; GFX11-SAFE-LABEL: test_fmin_legacy_ule_f16: ; GFX11-SAFE: ; %bb.0: ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1 ; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo ; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31] @@ -70,7 +69,6 @@ ; GFX11-NNAN-LABEL: test_fmin_legacy_ule_f16: ; GFX11-NNAN: ; %bb.0: ; GFX11-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NNAN-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NNAN-NEXT: v_min_f16_e32 v0, v0, v1 ; GFX11-NNAN-NEXT: s_setpc_b64 s[30:31] %cmp = fcmp ule half %a, %b @@ -152,7 +150,6 @@ ; GFX11-SAFE-LABEL: test_fmin_legacy_ule_v2f16: ; GFX11-SAFE: ; %bb.0: ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v1 ; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v3, 16, v0 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) @@ -166,7 +163,6 @@ ; GFX11-NNAN-LABEL: test_fmin_legacy_ule_v2f16: ; GFX11-NNAN: ; %bb.0: ; GFX11-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NNAN-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NNAN-NEXT: v_pk_min_f16 v0, v0, v1 ; GFX11-NNAN-NEXT: s_setpc_b64 s[30:31] %cmp = fcmp ule <2 x half> %a, %b @@ -264,7 +260,6 @@ ; GFX11-SAFE-LABEL: test_fmin_legacy_ule_v3f16: ; GFX11-SAFE: ; %bb.0: ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2 ; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v0 ; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v2 @@ -280,7 +275,6 @@ ; GFX11-NNAN-LABEL: test_fmin_legacy_ule_v3f16: ; GFX11-NNAN: ; %bb.0: ; GFX11-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NNAN-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NNAN-NEXT: v_pk_min_f16 v0, v0, v2 ; GFX11-NNAN-NEXT: v_pk_min_f16 v1, v1, v3 ; GFX11-NNAN-NEXT: s_setpc_b64 s[30:31] @@ -402,7 +396,6 @@ ; GFX11-SAFE-LABEL: test_fmin_legacy_ule_v4f16: ; GFX11-SAFE: ; %bb.0: ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v3 ; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v1 ; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v6, 16, v2 @@ -424,7 +417,6 @@ ; GFX11-NNAN-LABEL: test_fmin_legacy_ule_v4f16: ; GFX11-NNAN: ; %bb.0: ; GFX11-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NNAN-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NNAN-NEXT: v_pk_min_f16 v0, v0, v2 ; GFX11-NNAN-NEXT: v_pk_min_f16 v1, v1, v3 ; GFX11-NNAN-NEXT: s_setpc_b64 s[30:31] @@ -624,7 +616,6 @@ ; GFX11-SAFE-LABEL: test_fmin_legacy_ule_v8f16: ; GFX11-SAFE: ; %bb.0: ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v10, 16, v7 ; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v11, 16, v3 ; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v12, 16, v6 @@ -660,7 +651,6 @@ ; GFX11-NNAN-LABEL: test_fmin_legacy_ule_v8f16: ; GFX11-NNAN: ; %bb.0: ; GFX11-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NNAN-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NNAN-NEXT: v_pk_min_f16 v0, v0, v4 ; GFX11-NNAN-NEXT: v_pk_min_f16 v1, v1, v5 ; GFX11-NNAN-NEXT: v_pk_min_f16 v2, v2, v6 diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll --- a/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll @@ -50,7 +50,6 @@ ; GFX11-SAFE-LABEL: v_fneg_add_f16: ; GFX11-SAFE: ; %bb.0: ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SAFE-NEXT: v_add_f16_e32 v0, v0, v1 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SAFE-NEXT: v_xor_b32_e32 v0, 0x8000, v0 @@ -59,7 +58,6 @@ ; GFX11-NSZ-LABEL: v_fneg_add_f16: ; GFX11-NSZ: ; %bb.0: ; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NSZ-NEXT: v_sub_f16_e64 v0, -v0, v1 ; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31] %add = fadd half %a, %b @@ -89,7 +87,6 @@ ; GFX11-LABEL: v_fneg_add_store_use_add_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_add_f16_e32 v1, v0, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_xor_b32_e32 v0, 0x8000, v1 @@ -143,7 +140,6 @@ ; GFX11-SAFE-LABEL: v_fneg_add_multi_use_add_f16: ; GFX11-SAFE: ; %bb.0: ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SAFE-NEXT: v_add_f16_e32 v1, v0, v1 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SAFE-NEXT: v_xor_b32_e32 v0, 0x8000, v1 @@ -153,7 +149,6 @@ ; GFX11-NSZ-LABEL: v_fneg_add_multi_use_add_f16: ; GFX11-NSZ: ; %bb.0: ; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NSZ-NEXT: v_sub_f16_e64 v0, -v0, v1 ; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NSZ-NEXT: v_mul_f16_e32 v1, -4.0, v0 @@ -205,7 +200,6 @@ ; GFX11-SAFE-LABEL: v_fneg_add_fneg_x_f16: ; GFX11-SAFE: ; %bb.0: ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SAFE-NEXT: v_sub_f16_e32 v0, v1, v0 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SAFE-NEXT: v_xor_b32_e32 v0, 0x8000, v0 @@ -214,7 +208,6 @@ ; GFX11-NSZ-LABEL: v_fneg_add_fneg_x_f16: ; GFX11-NSZ: ; %bb.0: ; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NSZ-NEXT: v_sub_f16_e32 v0, v0, v1 ; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31] %fneg.a = fneg half %a @@ -261,7 +254,6 @@ ; GFX11-SAFE-LABEL: v_fneg_add_x_fneg_f16: ; GFX11-SAFE: ; %bb.0: ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SAFE-NEXT: v_sub_f16_e32 v0, v0, v1 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SAFE-NEXT: v_xor_b32_e32 v0, 0x8000, v0 @@ -270,7 +262,6 @@ ; GFX11-NSZ-LABEL: v_fneg_add_x_fneg_f16: ; GFX11-NSZ: ; %bb.0: ; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NSZ-NEXT: v_sub_f16_e32 v0, v1, v0 ; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31] %fneg.b = fneg half %b @@ -317,7 +308,6 @@ ; GFX11-SAFE-LABEL: v_fneg_add_fneg_fneg_f16: ; GFX11-SAFE: ; %bb.0: ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SAFE-NEXT: v_sub_f16_e64 v0, -v0, v1 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SAFE-NEXT: v_xor_b32_e32 v0, 0x8000, v0 @@ -326,7 +316,6 @@ ; GFX11-NSZ-LABEL: v_fneg_add_fneg_fneg_f16: ; GFX11-NSZ: ; %bb.0: ; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NSZ-NEXT: v_add_f16_e32 v0, v0, v1 ; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31] %fneg.a = fneg half %a @@ -380,7 +369,6 @@ ; GFX11-SAFE-LABEL: v_fneg_add_store_use_fneg_x_f16: ; GFX11-SAFE: ; %bb.0: ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SAFE-NEXT: v_sub_f16_e32 v1, v1, v0 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-SAFE-NEXT: v_xor_b32_e32 v2, 0x8000, v1 @@ -391,7 +379,6 @@ ; GFX11-NSZ-LABEL: v_fneg_add_store_use_fneg_x_f16: ; GFX11-NSZ: ; %bb.0: ; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NSZ-NEXT: v_sub_f16_e32 v2, v0, v1 ; GFX11-NSZ-NEXT: v_xor_b32_e32 v1, 0x8000, v0 ; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -455,7 +442,6 @@ ; GFX11-SAFE-LABEL: v_fneg_add_multi_use_fneg_x_f16: ; GFX11-SAFE: ; %bb.0: ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SAFE-NEXT: v_sub_f16_e32 v1, v1, v0 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-SAFE-NEXT: v_xor_b32_e32 v3, 0x8000, v1 @@ -466,7 +452,6 @@ ; GFX11-NSZ-LABEL: v_fneg_add_multi_use_fneg_x_f16: ; GFX11-NSZ: ; %bb.0: ; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NSZ-NEXT: v_sub_f16_e32 v3, v0, v1 ; GFX11-NSZ-NEXT: v_mul_f16_e64 v1, -v0, v2 ; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -734,7 +719,6 @@ ; GFX11-LABEL: v_fneg_mul_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f16_e64 v0, v0, -v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %mul = fmul half %a, %b @@ -764,7 +748,6 @@ ; GFX11-LABEL: v_fneg_mul_store_use_mul_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f16_e32 v1, v0, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_xor_b32_e32 v0, 0x8000, v1 @@ -798,7 +781,6 @@ ; GFX11-LABEL: v_fneg_mul_multi_use_mul_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f16_e64 v0, v0, -v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_mul_f16_e32 v1, -4.0, v0 @@ -831,7 +813,6 @@ ; GFX11-LABEL: v_fneg_mul_fneg_x_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f16_e32 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %fneg.a = fneg half %a @@ -860,7 +841,6 @@ ; GFX11-LABEL: v_fneg_mul_x_fneg_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f16_e32 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %fneg.b = fneg half %b @@ -889,7 +869,6 @@ ; GFX11-LABEL: v_fneg_mul_fneg_fneg_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f16_e64 v0, v0, -v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %fneg.a = fneg half %a @@ -922,7 +901,6 @@ ; GFX11-LABEL: v_fneg_mul_store_use_fneg_x_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f16_e32 v2, v0, v1 ; GFX11-NEXT: v_xor_b32_e32 v1, 0x8000, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -962,7 +940,6 @@ ; GFX11-LABEL: v_fneg_mul_multi_use_fneg_x_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f16_e32 v3, v0, v1 ; GFX11-NEXT: v_mul_f16_e64 v1, -v0, v2 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -1005,7 +982,6 @@ ; GFX11-LABEL: v_fneg_minnum_f16_ieee: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_max_f16_e64 v1, -v1, -v1 ; GFX11-NEXT: v_max_f16_e64 v0, -v0, -v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -1036,7 +1012,6 @@ ; GFX11-LABEL: v_fneg_minnum_f16_no_ieee: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_max_f16_e64 v0, -v0, -v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %min = call half @llvm.minnum.f16(half %a, half %b) @@ -1060,7 +1035,6 @@ ; GFX11-LABEL: v_fneg_self_minnum_f16_ieee: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_xor_b32_e32 v0, 0x8000, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %min = call half @llvm.minnum.f16(half %a, half %a) @@ -1084,7 +1058,6 @@ ; GFX11-LABEL: v_fneg_self_minnum_f16_no_ieee: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_xor_b32_e32 v0, 0x8000, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %min = call half @llvm.minnum.f16(half %a, half %a) @@ -1112,7 +1085,6 @@ ; GFX11-LABEL: v_fneg_posk_minnum_f16_ieee: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_max_f16_e64 v0, -v0, -v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_max_f16_e32 v0, -4.0, v0 @@ -1140,7 +1112,6 @@ ; GFX11-LABEL: v_fneg_posk_minnum_f16_no_ieee: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_max_f16_e64 v0, -v0, -4.0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %min = call half @llvm.minnum.f16(half 4.0, half %a) @@ -1168,7 +1139,6 @@ ; GFX11-LABEL: v_fneg_negk_minnum_f16_ieee: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_max_f16_e64 v0, -v0, -v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_max_f16_e32 v0, 4.0, v0 @@ -1196,7 +1166,6 @@ ; GFX11-LABEL: v_fneg_negk_minnum_f16_no_ieee: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_max_f16_e64 v0, -v0, 4.0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %min = call half @llvm.minnum.f16(half -4.0, half %a) @@ -1224,7 +1193,6 @@ ; GFX11-LABEL: v_fneg_0_minnum_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_min_f16_e32 v0, 0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_xor_b32_e32 v0, 0x8000, v0 @@ -1254,7 +1222,6 @@ ; GFX11-LABEL: v_fneg_neg0_minnum_f16_ieee: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_max_f16_e64 v0, -v0, -v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_max_f16_e32 v0, 0, v0 @@ -1285,7 +1252,6 @@ ; GFX11-LABEL: v_fneg_inv2pi_minnum_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_max_f16_e32 v0, v0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_min_f16_e32 v0, 0.15915494, v0 @@ -1317,7 +1283,6 @@ ; GFX11-LABEL: v_fneg_neg_inv2pi_minnum_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_max_f16_e32 v0, v0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_min_f16_e32 v0, 0.15915494, v0 @@ -1346,7 +1311,6 @@ ; GFX11-LABEL: v_fneg_neg0_minnum_f16_no_ieee: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_max_f16_e64 v0, -v0, 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %min = call half @llvm.minnum.f16(half -0.0, half %a) @@ -1378,7 +1342,6 @@ ; GFX11-LABEL: v_fneg_0_minnum_foldable_use_f16_ieee: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_max_f16_e32 v0, v0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_min_f16_e32 v0, 0, v0 @@ -1414,7 +1377,6 @@ ; GFX11-LABEL: v_fneg_inv2pi_minnum_foldable_use_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_max_f16_e32 v0, v0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_min_f16_e32 v0, 0.15915494, v0 @@ -1448,7 +1410,6 @@ ; GFX11-LABEL: v_fneg_0_minnum_foldable_use_f16_no_ieee: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_min_f16_e32 v0, 0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_mul_f16_e64 v0, -v0, v1 @@ -1485,7 +1446,6 @@ ; GFX11-LABEL: v_fneg_minnum_multi_use_minnum_f16_ieee: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_max_f16_e64 v1, -v1, -v1 ; GFX11-NEXT: v_max_f16_e64 v0, -v0, -v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -1524,7 +1484,6 @@ ; GFX11-LABEL: v_fneg_minnum_multi_use_minnum_f16_no_ieee: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_min_f16_e32 v0, v0, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_mul_f16_e32 v1, 4.0, v0 @@ -1566,7 +1525,6 @@ ; GFX11-LABEL: v_fneg_maxnum_f16_ieee: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_max_f16_e64 v1, -v1, -v1 ; GFX11-NEXT: v_max_f16_e64 v0, -v0, -v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -1597,7 +1555,6 @@ ; GFX11-LABEL: v_fneg_maxnum_f16_no_ieee: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_min_f16_e64 v0, -v0, -v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %max = call half @llvm.maxnum.f16(half %a, half %b) @@ -1621,7 +1578,6 @@ ; GFX11-LABEL: v_fneg_self_maxnum_f16_ieee: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_xor_b32_e32 v0, 0x8000, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %max = call half @llvm.maxnum.f16(half %a, half %a) @@ -1645,7 +1601,6 @@ ; GFX11-LABEL: v_fneg_self_maxnum_f16_no_ieee: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_xor_b32_e32 v0, 0x8000, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %max = call half @llvm.maxnum.f16(half %a, half %a) @@ -1673,7 +1628,6 @@ ; GFX11-LABEL: v_fneg_posk_maxnum_f16_ieee: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_max_f16_e64 v0, -v0, -v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_min_f16_e32 v0, -4.0, v0 @@ -1701,7 +1655,6 @@ ; GFX11-LABEL: v_fneg_posk_maxnum_f16_no_ieee: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_min_f16_e64 v0, -v0, -4.0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %max = call half @llvm.maxnum.f16(half 4.0, half %a) @@ -1729,7 +1682,6 @@ ; GFX11-LABEL: v_fneg_negk_maxnum_f16_ieee: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_max_f16_e64 v0, -v0, -v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_min_f16_e32 v0, 4.0, v0 @@ -1757,7 +1709,6 @@ ; GFX11-LABEL: v_fneg_negk_maxnum_f16_no_ieee: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_min_f16_e64 v0, -v0, 4.0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %max = call half @llvm.maxnum.f16(half -4.0, half %a) @@ -1785,7 +1736,6 @@ ; GFX11-LABEL: v_fneg_0_maxnum_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_max_f16_e32 v0, 0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_xor_b32_e32 v0, 0x8000, v0 @@ -1815,7 +1765,6 @@ ; GFX11-LABEL: v_fneg_neg0_maxnum_f16_ieee: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_max_f16_e64 v0, -v0, -v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_min_f16_e32 v0, 0, v0 @@ -1843,7 +1792,6 @@ ; GFX11-LABEL: v_fneg_neg0_maxnum_f16_no_ieee: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_min_f16_e64 v0, -v0, 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %max = call half @llvm.maxnum.f16(half -0.0, half %a) @@ -1875,7 +1823,6 @@ ; GFX11-LABEL: v_fneg_0_maxnum_foldable_use_f16_ieee: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_max_f16_e32 v0, v0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_max_f16_e32 v0, 0, v0 @@ -1909,7 +1856,6 @@ ; GFX11-LABEL: v_fneg_0_maxnum_foldable_use_f16_no_ieee: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_max_f16_e32 v0, 0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_mul_f16_e64 v0, -v0, v1 @@ -1946,7 +1892,6 @@ ; GFX11-LABEL: v_fneg_maxnum_multi_use_maxnum_f16_ieee: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_max_f16_e64 v1, -v1, -v1 ; GFX11-NEXT: v_max_f16_e64 v0, -v0, -v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -1985,7 +1930,6 @@ ; GFX11-LABEL: v_fneg_maxnum_multi_use_maxnum_f16_no_ieee: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_max_f16_e32 v0, v0, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_mul_f16_e32 v1, 4.0, v0 @@ -2045,7 +1989,6 @@ ; GFX11-SAFE-LABEL: v_fneg_fma_f16: ; GFX11-SAFE: ; %bb.0: ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SAFE-NEXT: v_fmac_f16_e32 v2, v0, v1 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SAFE-NEXT: v_xor_b32_e32 v0, 0x8000, v2 @@ -2054,7 +1997,6 @@ ; GFX11-NSZ-LABEL: v_fneg_fma_f16: ; GFX11-NSZ: ; %bb.0: ; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NSZ-NEXT: v_fma_f16 v0, v0, -v1, -v2 ; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31] %fma = call half @llvm.fma.f16(half %a, half %b, half %c) @@ -2086,7 +2028,6 @@ ; GFX11-LABEL: v_fneg_fma_store_use_fma_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fma_f16 v1, v0, v1, v2 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_xor_b32_e32 v0, 0x8000, v1 @@ -2144,7 +2085,6 @@ ; GFX11-SAFE-LABEL: v_fneg_fma_multi_use_fma_f16: ; GFX11-SAFE: ; %bb.0: ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SAFE-NEXT: v_fmac_f16_e32 v2, v0, v1 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SAFE-NEXT: v_xor_b32_e32 v0, 0x8000, v2 @@ -2154,7 +2094,6 @@ ; GFX11-NSZ-LABEL: v_fneg_fma_multi_use_fma_f16: ; GFX11-NSZ: ; %bb.0: ; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NSZ-NEXT: v_fma_f16 v0, v0, -v1, -v2 ; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NSZ-NEXT: v_mul_f16_e32 v1, -4.0, v0 @@ -2209,7 +2148,6 @@ ; GFX11-SAFE-LABEL: v_fneg_fma_fneg_x_y_f16: ; GFX11-SAFE: ; %bb.0: ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SAFE-NEXT: v_fma_f16 v0, -v0, v1, v2 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SAFE-NEXT: v_xor_b32_e32 v0, 0x8000, v0 @@ -2218,7 +2156,6 @@ ; GFX11-NSZ-LABEL: v_fneg_fma_fneg_x_y_f16: ; GFX11-NSZ: ; %bb.0: ; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NSZ-NEXT: v_fma_f16 v0, v0, v1, -v2 ; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31] %fneg.a = fneg half %a @@ -2269,7 +2206,6 @@ ; GFX11-SAFE-LABEL: v_fneg_fma_x_fneg_y_f16: ; GFX11-SAFE: ; %bb.0: ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SAFE-NEXT: v_fma_f16 v0, v0, -v1, v2 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SAFE-NEXT: v_xor_b32_e32 v0, 0x8000, v0 @@ -2278,7 +2214,6 @@ ; GFX11-NSZ-LABEL: v_fneg_fma_x_fneg_y_f16: ; GFX11-NSZ: ; %bb.0: ; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NSZ-NEXT: v_fma_f16 v0, v0, v1, -v2 ; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31] %fneg.b = fneg half %b @@ -2329,7 +2264,6 @@ ; GFX11-SAFE-LABEL: v_fneg_fma_fneg_fneg_y_f16: ; GFX11-SAFE: ; %bb.0: ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SAFE-NEXT: v_fmac_f16_e32 v2, v0, v1 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SAFE-NEXT: v_xor_b32_e32 v0, 0x8000, v2 @@ -2338,7 +2272,6 @@ ; GFX11-NSZ-LABEL: v_fneg_fma_fneg_fneg_y_f16: ; GFX11-NSZ: ; %bb.0: ; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NSZ-NEXT: v_fma_f16 v0, v0, -v1, -v2 ; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31] %fneg.a = fneg half %a @@ -2390,7 +2323,6 @@ ; GFX11-SAFE-LABEL: v_fneg_fma_fneg_x_fneg_f16: ; GFX11-SAFE: ; %bb.0: ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SAFE-NEXT: v_fma_f16 v0, -v0, v1, -v2 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SAFE-NEXT: v_xor_b32_e32 v0, 0x8000, v0 @@ -2399,7 +2331,6 @@ ; GFX11-NSZ-LABEL: v_fneg_fma_fneg_x_fneg_f16: ; GFX11-NSZ: ; %bb.0: ; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NSZ-NEXT: v_fma_f16 v0, v0, v1, v2 ; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31] %fneg.a = fneg half %a @@ -2451,7 +2382,6 @@ ; GFX11-SAFE-LABEL: v_fneg_fma_x_y_fneg_f16: ; GFX11-SAFE: ; %bb.0: ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SAFE-NEXT: v_fma_f16 v0, v0, v1, -v2 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SAFE-NEXT: v_xor_b32_e32 v0, 0x8000, v0 @@ -2460,7 +2390,6 @@ ; GFX11-NSZ-LABEL: v_fneg_fma_x_y_fneg_f16: ; GFX11-NSZ: ; %bb.0: ; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NSZ-NEXT: v_fma_f16 v0, v0, -v1, v2 ; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31] %fneg.c = fneg half %c @@ -2517,7 +2446,6 @@ ; GFX11-SAFE-LABEL: v_fneg_fma_store_use_fneg_x_y_f16: ; GFX11-SAFE: ; %bb.0: ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SAFE-NEXT: v_fma_f16 v1, -v0, v1, v2 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-SAFE-NEXT: v_xor_b32_e32 v2, 0x8000, v1 @@ -2528,7 +2456,6 @@ ; GFX11-NSZ-LABEL: v_fneg_fma_store_use_fneg_x_y_f16: ; GFX11-NSZ: ; %bb.0: ; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NSZ-NEXT: v_fma_f16 v2, v0, v1, -v2 ; GFX11-NSZ-NEXT: v_xor_b32_e32 v1, 0x8000, v0 ; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -2595,7 +2522,6 @@ ; GFX11-SAFE-LABEL: v_fneg_fma_multi_use_fneg_x_y_f16: ; GFX11-SAFE: ; %bb.0: ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SAFE-NEXT: v_fma_f16 v1, -v0, v1, v2 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-SAFE-NEXT: v_xor_b32_e32 v2, 0x8000, v1 @@ -2606,7 +2532,6 @@ ; GFX11-NSZ-LABEL: v_fneg_fma_multi_use_fneg_x_y_f16: ; GFX11-NSZ: ; %bb.0: ; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NSZ-NEXT: v_fma_f16 v2, v0, v1, -v2 ; GFX11-NSZ-NEXT: v_mul_f16_e64 v1, -v0, v3 ; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -2667,7 +2592,6 @@ ; GFX11-SAFE-LABEL: v_fneg_fmad_f16: ; GFX11-SAFE: ; %bb.0: ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SAFE-NEXT: v_fmac_f16_e32 v2, v0, v1 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SAFE-NEXT: v_xor_b32_e32 v0, 0x8000, v2 @@ -2676,7 +2600,6 @@ ; GFX11-NSZ-LABEL: v_fneg_fmad_f16: ; GFX11-NSZ: ; %bb.0: ; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NSZ-NEXT: v_fma_f16 v0, v0, -v1, -v2 ; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31] %fma = call half @llvm.fmuladd.f16(half %a, half %b, half %c) @@ -2798,7 +2721,6 @@ ; GFX11-SAFE-LABEL: v_fneg_fmad_v4f32: ; GFX11-SAFE: ; %bb.0: ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SAFE-NEXT: v_pk_fma_f16 v0, v0, v2, v4 ; GFX11-SAFE-NEXT: v_pk_fma_f16 v1, v1, v3, v5 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) @@ -2809,7 +2731,6 @@ ; GFX11-NSZ-LABEL: v_fneg_fmad_v4f32: ; GFX11-NSZ: ; %bb.0: ; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NSZ-NEXT: v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[0,1,1] neg_hi:[0,1,1] ; GFX11-NSZ-NEXT: v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[0,1,1] neg_hi:[0,1,1] ; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31] @@ -2864,7 +2785,6 @@ ; GFX11-SAFE-LABEL: v_fneg_fmad_multi_use_fmad_f16: ; GFX11-SAFE: ; %bb.0: ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SAFE-NEXT: v_fmac_f16_e32 v2, v0, v1 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SAFE-NEXT: v_xor_b32_e32 v0, 0x8000, v2 @@ -2874,7 +2794,6 @@ ; GFX11-NSZ-LABEL: v_fneg_fmad_multi_use_fmad_f16: ; GFX11-NSZ: ; %bb.0: ; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NSZ-NEXT: v_fma_f16 v0, v0, -v1, -v2 ; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NSZ-NEXT: v_mul_f16_e32 v1, -4.0, v0 @@ -2909,7 +2828,6 @@ ; GFX11-LABEL: v_fneg_fp_extend_f16_to_f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_xor_b32_e32 v0, 0x8000, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 @@ -2937,7 +2855,6 @@ ; GFX11-LABEL: v_fneg_fp_extend_fneg_f16_to_f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 @@ -2969,7 +2886,6 @@ ; GFX11-LABEL: v_fneg_fp_extend_store_use_fneg_f16_to_f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v2, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v2 @@ -3005,7 +2921,6 @@ ; GFX11-LABEL: v_fneg_multi_use_fp_extend_fneg_f16_to_f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_cvt_f64_f32_e32 v[2:3], v0 @@ -3042,7 +2957,6 @@ ; GFX11-LABEL: v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 @@ -3075,7 +2989,6 @@ ; GFX11-LABEL: v_fneg_multi_use_fp_extend_fneg_f16_to_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cvt_f32_f16_e32 v1, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_xor_b32_e32 v0, 0x80000000, v1 @@ -3192,7 +3105,6 @@ ; GFX11-LABEL: v_fneg_fp_round_f64_to_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_4) ; GFX11-NEXT: v_and_or_b32 v0, 0x1ff, v1, v0 @@ -3340,7 +3252,6 @@ ; GFX11-LABEL: v_fneg_fp_round_fneg_f64_to_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_or_b32 v0, 0x1ff, v1, v0 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 8, v1 ; GFX11-NEXT: v_bfe_u32 v3, v1, 20, 11 @@ -3496,7 +3407,6 @@ ; GFX11-LABEL: v_fneg_fp_round_store_use_fneg_f64_to_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_or_b32 v2, 0x1ff, v1, v0 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v1 ; GFX11-NEXT: v_bfe_u32 v4, v1, 20, 11 @@ -3655,7 +3565,6 @@ ; GFX11-LABEL: v_fneg_fp_round_multi_use_fneg_f64_to_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_or_b32 v4, 0x1ff, v1, v0 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 8, v1 ; GFX11-NEXT: v_bfe_u32 v6, v1, 20, 11 @@ -3810,7 +3719,6 @@ ; GFX11-LABEL: v_fneg_multi_use_fp_round_fneg_f64_to_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_or_b32 v0, 0x1ff, v1, v0 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 8, v1 ; GFX11-NEXT: v_bfe_u32 v3, v1, 20, 11 @@ -3885,7 +3793,6 @@ ; GFX11-LABEL: v_fneg_trunc_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_trunc_f16_e64 v0, -v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %trunc = call half @llvm.trunc.f16(half %a) @@ -3957,7 +3864,6 @@ ; GFX11-SAFE-LABEL: v_fneg_round_f16: ; GFX11-SAFE: ; %bb.0: ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SAFE-NEXT: v_trunc_f16_e32 v1, v0 ; GFX11-SAFE-NEXT: s_movk_i32 s0, 0x3c00 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) @@ -3974,7 +3880,6 @@ ; GFX11-NSZ-LABEL: v_fneg_round_f16: ; GFX11-NSZ: ; %bb.0: ; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NSZ-NEXT: v_trunc_f16_e32 v1, v0 ; GFX11-NSZ-NEXT: s_movk_i32 s0, 0x3c00 ; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) @@ -4012,7 +3917,6 @@ ; GFX11-LABEL: v_fneg_rint_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_rndne_f16_e64 v0, -v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %rint = call half @llvm.rint.f16(half %a) @@ -4042,7 +3946,6 @@ ; GFX11-LABEL: v_fneg_nearbyint_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_rndne_f16_e64 v0, -v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %nearbyint = call half @llvm.nearbyint.f16(half %a) @@ -4076,7 +3979,6 @@ ; GFX11-LABEL: v_fneg_sin_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f16_e32 v0, 0xb118, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_sin_f16_e32 v0, v0 @@ -4106,7 +4008,6 @@ ; GFX11-LABEL: v_fneg_canonicalize_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_max_f16_e64 v0, -v0, -v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %trunc = call half @llvm.canonicalize.f16(half %a) @@ -4174,7 +4075,6 @@ ; GFX11-LABEL: v_fneg_copytoreg_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v6, 0x3ff, v31 ; GFX11-NEXT: v_mul_f16_e32 v2, v2, v3 ; GFX11-NEXT: s_mov_b32 s0, exec_lo @@ -4243,7 +4143,6 @@ ; GFX11-LABEL: v_fneg_inlineasm_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f16_e64 v0, v0, -v1 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use v0 @@ -4288,7 +4187,6 @@ ; GFX11-LABEL: v_fneg_inlineasm_multi_use_src_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f16_e32 v0, v2, v3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_xor_b32_e32 v1, 0x8000, v0 @@ -4336,7 +4234,6 @@ ; GFX11-LABEL: multiuse_fneg_2_vop3_users_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fma_f16 v3, -v0, v1, v2 ; GFX11-NEXT: v_fma_f16 v1, -v0, v2, 2.0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -4377,7 +4274,6 @@ ; GFX11-LABEL: multiuse_fneg_2_vop2_users_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f16_e64 v3, -v0, v1 ; GFX11-NEXT: v_mul_f16_e64 v1, -v0, v2 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -4416,7 +4312,6 @@ ; GFX11-LABEL: multiuse_fneg_vop2_vop3_users_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fma_f16 v0, -v2, v3, 2.0 ; GFX11-NEXT: v_mul_f16_e64 v1, -v2, v4 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -4487,7 +4382,6 @@ ; GFX11-SAFE-LABEL: free_fold_src_code_size_cost_use_f16: ; GFX11-SAFE: ; %bb.0: ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SAFE-NEXT: v_fma_f16 v1, v2, v3, 2.0 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SAFE-NEXT: v_mul_f16_e64 v0, -v1, v4 @@ -4497,7 +4391,6 @@ ; GFX11-NSZ-LABEL: free_fold_src_code_size_cost_use_f16: ; GFX11-NSZ: ; %bb.0: ; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NSZ-NEXT: v_fma_f16 v1, v2, -v3, -2.0 ; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NSZ-NEXT: v_mul_f16_e32 v0, v1, v4 @@ -4543,7 +4436,6 @@ ; GFX11-LABEL: one_use_cost_to_fold_into_src_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_trunc_f16_e32 v0, v2 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_fma_f16 v0, -v0, v3, v4 @@ -4586,7 +4478,6 @@ ; GFX11-LABEL: multi_use_cost_to_fold_into_src: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_trunc_f16_e32 v1, v2 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_fma_f16 v0, -v1, v3, v4 @@ -4657,7 +4548,6 @@ ; GFX11-LABEL: fneg_fma_fneg_dagcombine_loop: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_pk_fma_f16 v1, v1, v2, 0x8000 op_sel_hi:[1,1,0] neg_lo:[0,1,0] neg_hi:[0,1,0] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_pk_add_f16 v0, v1, v0 neg_lo:[0,1] neg_hi:[0,1] @@ -4693,7 +4583,6 @@ ; GFX11-LABEL: nnan_fmul_neg1_to_fneg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f16_e64 v0, -v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %mul = fmul half %x, -1.0 @@ -4723,7 +4612,6 @@ ; GFX11-LABEL: denormal_fmul_neg1_to_fneg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f16_e64 v0, -v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %mul = fmul nnan half %x, -1.0 @@ -4755,7 +4643,6 @@ ; GFX11-LABEL: denorm_snan_fmul_neg1_to_fneg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f16_e64 v0, v0, -v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_mul_f16_e32 v0, v0, v1 @@ -4787,7 +4674,6 @@ ; GFX11-LABEL: flush_snan_fmul_neg1_to_fneg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_max_f16_e64 v0, -v0, -v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_mul_f16_e32 v0, v0, v1 @@ -4824,7 +4710,6 @@ ; GFX11-LABEL: fadd_select_fneg_fneg_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -4877,7 +4762,6 @@ ; GFX11-LABEL: fadd_select_fneg_fneg_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) diff --git a/llvm/test/CodeGen/AMDGPU/fneg-fold-legalize-dag-increase-insts.ll b/llvm/test/CodeGen/AMDGPU/fneg-fold-legalize-dag-increase-insts.ll --- a/llvm/test/CodeGen/AMDGPU/fneg-fold-legalize-dag-increase-insts.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-fold-legalize-dag-increase-insts.ll @@ -21,7 +21,6 @@ ; GFX10-LABEL: testfn: ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_f64 v[4:5], v[4:5], -v[0:1] ; GFX10-NEXT: v_add_f64 v[0:1], v[4:5], -v[2:3] ; GFX10-NEXT: v_add_f64 v[2:3], -v[2:3], -v[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll b/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll --- a/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll @@ -15,7 +15,6 @@ ; GFX11-LABEL: fneg_xor_select_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 @@ -41,7 +40,6 @@ ; GFX11-LABEL: fneg_xor_select_v2i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX11-NEXT: v_and_b32_e32 v1, 1, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) @@ -81,14 +79,12 @@ ; GFX11-LABEL: fneg_xor_select_i32_multi_use: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc_lo ; GFX11-NEXT: v_xor_b32_e32 v0, 0x80000000, v1 ; GFX11-NEXT: global_store_b32 v[3:4], v1, off -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %select = select i1 %cond, i32 %arg0, i32 %arg1 store i32 %select, ptr addrspace(1) %ptr @@ -109,7 +105,6 @@ ; GFX11-LABEL: fneg_xor_select_i64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 @@ -138,7 +133,6 @@ ; GFX11-LABEL: fneg_xor_select_v2i64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 @@ -167,7 +161,6 @@ ; GFX11-LABEL: fneg_xor_select_i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 @@ -216,7 +209,6 @@ ; GFX11-LABEL: fneg_xor_select_v2i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v2 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v3 @@ -261,14 +253,12 @@ ; GFX11-LABEL: fneg_xor_select_i16_multi_use: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc_lo ; GFX11-NEXT: v_xor_b32_e32 v0, 0xffff8000, v1 ; GFX11-NEXT: global_store_b16 v[3:4], v1, off -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %select = select i1 %cond, i16 %arg0, i16 %arg1 store i16 %select, ptr addrspace(1) %ptr @@ -306,7 +296,6 @@ ; GFX11-LABEL: fneg_xor_select_i64_multi_user: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v7, v1 :: v_dual_and_b32 v0, 1, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 @@ -314,7 +303,6 @@ ; GFX11-NEXT: v_cndmask_b32_e64 v2, -v4, -v2, vcc_lo ; GFX11-NEXT: global_store_b64 v[5:6], v[0:1], off ; GFX11-NEXT: v_mov_b32_e32 v1, v2 -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %select = select i1 %cond, i64 %arg0, i64 %arg1 store i64 %select, ptr addrspace(1) %ptr @@ -339,7 +327,6 @@ ; GFX11-LABEL: select_fneg_xor_select_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX11-NEXT: v_xor_b32_e32 v2, 0x80000000, v2 ; GFX11-NEXT: v_and_b32_e32 v1, 1, v1 @@ -374,7 +361,6 @@ ; GFX11-LABEL: select_fneg_select_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX11-NEXT: v_and_b32_e32 v1, 1, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) @@ -404,7 +390,6 @@ ; GFX11-LABEL: fneg_xor_select_f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 @@ -446,7 +431,6 @@ ; GFX11-LABEL: fneg_xor_select_f64_multi_user: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v7, v1 :: v_dual_and_b32 v0, 1, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 @@ -455,7 +439,6 @@ ; GFX11-NEXT: v_xor_b32_e32 v2, 0x80000000, v1 ; GFX11-NEXT: global_store_b64 v[5:6], v[0:1], off ; GFX11-NEXT: v_mov_b32_e32 v1, v2 -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %select = select i1 %cond, double %arg0, double %arg1 store double %select, ptr addrspace(1) %ptr @@ -477,7 +460,6 @@ ; GFX11-LABEL: fneg_xor_select_i64_user_with_srcmods: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 @@ -509,7 +491,6 @@ ; GFX11-LABEL: select_fneg_select_fneg_f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX11-NEXT: v_xor_b32_e32 v3, 0x80000000, v3 ; GFX11-NEXT: v_and_b32_e32 v1, 1, v1 @@ -548,7 +529,6 @@ ; GFX11-LABEL: select_fneg_xor_select_i64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX11-NEXT: v_xor_b32_e32 v3, 0x80000000, v3 ; GFX11-NEXT: v_and_b32_e32 v1, 1, v1 @@ -601,7 +581,6 @@ ; GFX11-LABEL: select_fneg_select_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX11-NEXT: v_xor_b32_e32 v2, 0x8000, v2 ; GFX11-NEXT: v_and_b32_e32 v1, 1, v1 @@ -638,7 +617,6 @@ ; GFX11-LABEL: select_fneg_xor_select_i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX11-NEXT: v_xor_b32_e32 v2, 0xffff8000, v2 ; GFX11-NEXT: v_and_b32_e32 v1, 1, v1 @@ -724,7 +702,6 @@ ; GFX11-LABEL: select_fneg_select_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_xor_b32_e32 v4, 0x80008000, v4 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v5 ; GFX11-NEXT: v_and_b32_e32 v3, 1, v3 @@ -810,7 +787,6 @@ ; GFX11-LABEL: select_fneg_xor_select_v2i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_xor_b32_e32 v4, 0x80008000, v4 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v5 ; GFX11-NEXT: v_and_b32_e32 v3, 1, v3 @@ -862,7 +838,6 @@ ; GFX11-LABEL: cospiD_pattern0: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v5, 1, v0 ; GFX11-NEXT: v_cmp_lt_i32_e64 s0, 1, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) @@ -903,7 +878,6 @@ ; GFX11-LABEL: cospiD_pattern1: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v5, 1, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_3) ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v5 @@ -959,7 +933,6 @@ ; GFX11-LABEL: cospiD_pattern0_half: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v3, 1, v0 ; GFX11-NEXT: v_cmp_lt_i16_e32 vcc_lo, 1, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo @@ -1011,7 +984,6 @@ ; GFX11-LABEL: cospiD_pattern1_half: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v3, 1, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v3 @@ -1038,7 +1010,6 @@ ; GFX11-LABEL: fneg_f64_bitcast_vector_i64_to_f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %bitcast = bitcast i64 %arg to double @@ -1056,7 +1027,6 @@ ; GFX11-LABEL: fneg_f64_bitcast_vector_v2i32_to_f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %bitcast = bitcast <2 x i32> %arg to double @@ -1074,7 +1044,6 @@ ; GFX11-LABEL: fneg_f64_bitcast_vector_v2f32_to_f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %bitcast = bitcast <2 x float> %arg to double @@ -1104,7 +1073,6 @@ ; GFX11-LABEL: fneg_f64_bitcast_vector_v4i16_to_f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %bitcast = bitcast <4 x i16> %arg to double @@ -1136,7 +1104,6 @@ ; GFX11-LABEL: fneg_f64_bitcast_vector_v4f16_to_f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %bitcast = bitcast <4 x half> %arg to double @@ -1154,7 +1121,6 @@ ; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2i32_to_f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %insert.0 = insertelement <2 x i32> poison, i32 %elt0, i32 0 @@ -1174,7 +1140,6 @@ ; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2f32_to_f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %insert.0 = insertelement <2 x float> poison, float %elt0, i32 0 @@ -1209,7 +1174,6 @@ ; GFX11-LABEL: fneg_f64_bitcast_build_vector_v4i16_to_f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -1251,7 +1215,6 @@ ; GFX11-LABEL: fneg_f64_bitcast_build_vector_v4f16_to_f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -1289,7 +1252,6 @@ ; GFX11-LABEL: fneg_f64_bitcast_build_vector_v4bf16_to_f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_perm_b32 v2, v2, v3, 0x3020706 ; GFX11-NEXT: v_perm_b32 v0, v0, v1, 0x3020706 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -1314,7 +1276,6 @@ ; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2i32_to_f64_modifier_user: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f64 v[0:1], -v[0:1], v[2:3] ; GFX11-NEXT: s_setpc_b64 s[30:31] %insert.0 = insertelement <2 x i32> poison, i32 %elt0, i32 0 @@ -1338,7 +1299,6 @@ ; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2i32_to_f64_multi_modifier_user: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f64 v[6:7], -v[0:1], v[2:3] ; GFX11-NEXT: v_mul_f64 v[2:3], v[4:5], -v[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -1366,7 +1326,6 @@ ; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2i32_to_f64_modifier_user_integer_neg_source: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_mul_f64 v[0:1], -v[0:1], v[2:3] @@ -1390,7 +1349,6 @@ ; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2f32_foldable_sources_to_f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_sub_f32_e32 v1, -2.0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %fadd = fadd nsz nnan float %elt1, 2.0 @@ -1421,11 +1379,9 @@ ; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_source_user: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_xor_b32_e32 v4, 0x80000000, v1 ; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off ; GFX11-NEXT: v_mov_b32_e32 v1, v4 -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %insert.0 = insertelement <2 x float> poison, float %elt0, i32 0 %insert.1 = insertelement <2 x float> %insert.0, float %elt1, i32 1 @@ -1447,7 +1403,6 @@ ; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_source_foldable_user: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_xor_b32_e32 v4, 0x80000000, v1 ; GFX11-NEXT: v_dual_add_f32 v2, v0, v2 :: v_dual_add_f32 v3, v1, v3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -1475,7 +1430,6 @@ ; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_user: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v2, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v3 @@ -1500,7 +1454,6 @@ ; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_foldable_user: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_add_f64 v[2:3], v[0:1], v[2:3] ; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/fneg.ll b/llvm/test/CodeGen/AMDGPU/fneg.ll --- a/llvm/test/CodeGen/AMDGPU/fneg.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg.ll @@ -114,7 +114,6 @@ ; FUNC-LABEL: {{^}}v_fneg_i32: ; GCN: s_waitcnt -; GFX11: s_waitcnt_vscnt ; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 ; GCN-NEXT: s_setpc_b64 define i32 @v_fneg_i32(i32 %in) { @@ -135,7 +134,6 @@ ; FUNC-LABEL: {{^}}v_fneg_i32_fp_use: ; GCN: s_waitcnt -; GFX11: s_waitcnt_vscnt ; GCN-NEXT: v_sub_f32_e32 v0, 2.0, v0 ; GCN-NEXT: s_setpc_b64 define float @v_fneg_i32_fp_use(i32 %in) { @@ -155,7 +153,6 @@ ; FUNC-LABEL: {{^}}v_fneg_i64: ; GCN: s_waitcnt -; GFX11: s_waitcnt_vscnt ; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 ; GCN-NEXT: s_setpc_b64 define i64 @v_fneg_i64(i64 %in) { @@ -175,7 +172,6 @@ ; FUNC-LABEL: {{^}}v_fneg_i64_fp_use: ; GCN: s_waitcnt -; GFX11: s_waitcnt_vscnt ; GCN-NEXT: v_add_f64 v[0:1], -v[0:1], 2.0 ; GCN-NEXT: s_setpc_b64 define double @v_fneg_i64_fp_use(i64 %in) { @@ -187,7 +183,6 @@ ; FUNC-LABEL: {{^}}v_fneg_i16: ; GCN: s_waitcnt -; GFX11: s_waitcnt_vscnt ; GCN-NEXT: v_xor_b32_e32 v0, 0xffff8000, v0 ; GCN-NEXT: s_setpc_b64 define i16 @v_fneg_i16(i16 %in) { diff --git a/llvm/test/CodeGen/AMDGPU/fold-fabs.ll b/llvm/test/CodeGen/AMDGPU/fold-fabs.ll --- a/llvm/test/CodeGen/AMDGPU/fold-fabs.ll +++ b/llvm/test/CodeGen/AMDGPU/fold-fabs.ll @@ -5,7 +5,6 @@ ; GFX10-LABEL: fold_abs_in_branch: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX10-NEXT: s_mov_b32 s4, exec_lo ; GFX10-NEXT: v_add_f32_e32 v1, v0, v1 @@ -37,7 +36,6 @@ ; GFX10-LABEL: fold_abs_in_branch_multiple_users: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX10-NEXT: s_mov_b32 s4, exec_lo ; GFX10-NEXT: v_add_f32_e32 v0, v0, v1 @@ -73,7 +71,6 @@ ; GFX10-LABEL: fold_abs_in_branch_undef: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_f32_e64 v0, |s4|, |s4| ; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, 1.0, v0 ; GFX10-NEXT: s_cbranch_vccnz .LBB2_2 @@ -102,7 +99,6 @@ ; GFX10-LABEL: fold_abs_in_branch_poison: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_f32_e64 v0, |s4|, |s4| ; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, 1.0, v0 ; GFX10-NEXT: s_cbranch_vccnz .LBB3_2 @@ -131,7 +127,6 @@ ; GFX10-LABEL: fold_abs_in_branch_fabs: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX10-NEXT: s_mov_b32 s4, exec_lo ; GFX10-NEXT: v_add_f32_e32 v1, v0, v1 @@ -164,7 +159,6 @@ ; GFX10-LABEL: fold_abs_in_branch_phi: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX10-NEXT: s_mov_b32 s4, exec_lo ; GFX10-NEXT: v_add_f32_e32 v0, v0, v1 @@ -210,7 +204,6 @@ ; GFX10-LABEL: fold_neg_in_branch: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX10-NEXT: s_mov_b32 s4, exec_lo ; GFX10-NEXT: v_add_f32_e32 v0, v0, v1 diff --git a/llvm/test/CodeGen/AMDGPU/fp-min-max-flat-atomics.ll b/llvm/test/CodeGen/AMDGPU/fp-min-max-flat-atomics.ll --- a/llvm/test/CodeGen/AMDGPU/fp-min-max-flat-atomics.ll +++ b/llvm/test/CodeGen/AMDGPU/fp-min-max-flat-atomics.ll @@ -42,7 +42,6 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: flat_store_dword v[3:4], v0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: flat_atomic_fmin_f32_rtn: @@ -51,7 +50,6 @@ ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: flat_store_b32 v[3:4], v0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog %ret = call float @llvm.amdgcn.flat.atomic.fmin.f32.p1.f32(ptr %ptr, float %data) store float %ret, ptr %out @@ -65,7 +63,6 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: flat_store_dword v[3:4], v0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: flat_atomic_fmax_f32_rtn: @@ -74,7 +71,6 @@ ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: flat_store_b32 v[3:4], v0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog %ret = call float @llvm.amdgcn.flat.atomic.fmax.f32.p1.f32(ptr %ptr, float %data) store float %ret, ptr %out diff --git a/llvm/test/CodeGen/AMDGPU/fpext-free.ll b/llvm/test/CodeGen/AMDGPU/fpext-free.ll --- a/llvm/test/CodeGen/AMDGPU/fpext-free.ll +++ b/llvm/test/CodeGen/AMDGPU/fpext-free.ll @@ -11,7 +11,6 @@ ; GFX11-LABEL: fadd_fpext_fmul_f16_to_f32: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f16_e32 v0, v0, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 @@ -43,7 +42,6 @@ ; GFX11-LABEL: fadd_fpext_fmul_f16_to_f64: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f16_e32 v0, v0, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 @@ -72,7 +70,6 @@ ; GFX11-LABEL: fadd_fpext_fmul_f32_to_f64: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 @@ -98,7 +95,6 @@ ; GFX11-LABEL: fadd_fpext_fmul_f16_to_f32_commute: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f16_e32 v0, v0, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 @@ -131,7 +127,6 @@ ; GFX11-LABEL: fadd_muladd_fpext_fmul_f16_to_f32: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f16_e32 v2, v2, v3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[0,0,1] @@ -168,7 +163,6 @@ ; GFX11-LABEL: fadd_muladd_fpext_fmul_f16_to_f32_commute: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f16_e32 v2, v2, v3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[0,0,1] @@ -203,7 +197,6 @@ ; GFX11-LABEL: fadd_fmad_fpext_fmul_f16_to_f32: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f16_e32 v2, v2, v3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[0,0,1] @@ -241,7 +234,6 @@ ; GFX11-LABEL: fadd_fma_fpext_fmul_f16_to_f32: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f16_e32 v2, v2, v3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[0,0,1] @@ -276,7 +268,6 @@ ; GFX11-LABEL: fadd_fma_fpext_fmul_f16_to_f32_commute: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f16_e32 v2, v2, v3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[0,0,1] @@ -313,7 +304,6 @@ ; GFX11-LABEL: fadd_fpext_fmuladd_f16_to_f32: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f16_e32 v3, v3, v4 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_fmac_f16_e32 v3, v1, v2 @@ -349,7 +339,6 @@ ; GFX11-LABEL: fadd_fpext_fma_f16_to_f32: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f16_e32 v3, v3, v4 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_fmac_f16_e32 v3, v1, v2 @@ -385,7 +374,6 @@ ; GFX11-LABEL: fadd_fpext_fma_f16_to_f32_commute: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f16_e32 v3, v3, v4 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_fmac_f16_e32 v3, v1, v2 @@ -423,7 +411,6 @@ ; GFX11-LABEL: fsub_fpext_fmul_f16_to_f32: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f16_e32 v0, v0, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 @@ -456,14 +443,12 @@ ; GFX11-F32FLUSH-LABEL: fsub_fpext_fmul_f16_to_f32_commute: ; GFX11-F32FLUSH: ; %bb.0: ; %entry ; GFX11-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-F32FLUSH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-F32FLUSH-NEXT: v_fma_mix_f32 v0, -v1, v2, v0 op_sel_hi:[1,1,0] ; GFX11-F32FLUSH-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-F32DENORM-LABEL: fsub_fpext_fmul_f16_to_f32_commute: ; GFX11-F32DENORM: ; %bb.0: ; %entry ; GFX11-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-F32DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-F32DENORM-NEXT: v_mul_f16_e32 v1, v1, v2 ; GFX11-F32DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-F32DENORM-NEXT: v_cvt_f32_f16_e32 v1, v1 @@ -496,7 +481,6 @@ ; GFX11-LABEL: fsub_fpext_fneg_fmul_f16_to_f32: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f16_e64 v0, v0, -v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 @@ -530,7 +514,6 @@ ; GFX11-LABEL: fsub_fneg_fpext_fmul_f16_to_f32: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f16_e64 v0, v0, -v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 @@ -564,7 +547,6 @@ ; GFX11-LABEL: fsub_muladd_fpext_mul_f16_to_f32: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f16_e32 v3, v3, v4 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_fma_mix_f32 v0, v0, v1, v3 op_sel_hi:[0,0,1] @@ -602,7 +584,6 @@ ; GFX11-LABEL: fsub_fpext_muladd_mul_f16_to_f32: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f16_e32 v3, v3, v4 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_fmac_f16_e32 v3, v0, v1 @@ -633,7 +614,6 @@ ; GFX11-LABEL: fsub_muladd_fpext_mul_f16_to_f32_commute: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f16_e32 v3, v3, v4 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_fma_mix_f32 v1, v1, v2, v3 op_sel_hi:[0,0,1] @@ -670,7 +650,6 @@ ; GFX11-LABEL: fsub_fpext_muladd_mul_f16_to_f32_commute: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f16_e32 v3, v3, v4 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_fmac_f16_e32 v3, v1, v2 diff --git a/llvm/test/CodeGen/AMDGPU/fpow.ll b/llvm/test/CodeGen/AMDGPU/fpow.ll --- a/llvm/test/CodeGen/AMDGPU/fpow.ll +++ b/llvm/test/CodeGen/AMDGPU/fpow.ll @@ -42,7 +42,6 @@ ; GFX10-LABEL: v_pow_f32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_log_f32_e32 v0, v0 ; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 ; GFX10-NEXT: v_exp_f32_e32 v0, v0 @@ -51,7 +50,6 @@ ; GFX11-LABEL: v_pow_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_log_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v1, v0 @@ -110,7 +108,6 @@ ; GFX10-LABEL: v_pow_v2f32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_log_f32_e32 v0, v0 ; GFX10-NEXT: v_log_f32_e32 v1, v1 ; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v2, v0 @@ -122,7 +119,6 @@ ; GFX11-LABEL: v_pow_v2f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_log_f32_e32 v0, v0 ; GFX11-NEXT: v_log_f32_e32 v1, v1 ; GFX11-NEXT: s_waitcnt_depctr 0xfff @@ -184,7 +180,6 @@ ; GFX10-LABEL: v_pow_f16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX10-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX10-NEXT: v_log_f32_e32 v0, v0 @@ -196,7 +191,6 @@ ; GFX11-LABEL: v_pow_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX11-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) @@ -288,7 +282,6 @@ ; GFX10-LABEL: v_pow_v2f16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX10-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 @@ -307,7 +300,6 @@ ; GFX11-LABEL: v_pow_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1 @@ -415,7 +407,6 @@ ; GFX10-LABEL: v_pow_v2f16_fneg_lhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX10-NEXT: v_cvt_f32_f16_e64 v0, -v0 ; GFX10-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 @@ -434,7 +425,6 @@ ; GFX11-LABEL: v_pow_v2f16_fneg_lhs: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX11-NEXT: v_cvt_f32_f16_e64 v0, -v0 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1 @@ -543,7 +533,6 @@ ; GFX10-LABEL: v_pow_v2f16_fneg_rhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX10-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 @@ -562,7 +551,6 @@ ; GFX11-LABEL: v_pow_v2f16_fneg_rhs: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1 @@ -675,7 +663,6 @@ ; GFX10-LABEL: v_pow_v2f16_fneg_lhs_rhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX10-NEXT: v_cvt_f32_f16_e64 v0, -v0 ; GFX10-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 @@ -694,7 +681,6 @@ ; GFX11-LABEL: v_pow_v2f16_fneg_lhs_rhs: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX11-NEXT: v_cvt_f32_f16_e64 v0, -v0 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1 @@ -765,7 +751,6 @@ ; GFX10-LABEL: v_pow_f32_fabs_lhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_log_f32_e64 v0, |v0| ; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 ; GFX10-NEXT: v_exp_f32_e32 v0, v0 @@ -774,7 +759,6 @@ ; GFX11-LABEL: v_pow_f32_fabs_lhs: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_log_f32_e64 v0, |v0| ; GFX11-NEXT: s_waitcnt_depctr 0xfff ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v1, v0 @@ -822,7 +806,6 @@ ; GFX10-LABEL: v_pow_f32_fabs_rhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_log_f32_e32 v0, v0 ; GFX10-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0 ; GFX10-NEXT: v_exp_f32_e32 v0, v0 @@ -831,7 +814,6 @@ ; GFX11-LABEL: v_pow_f32_fabs_rhs: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_log_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff ; GFX11-NEXT: v_mul_dx9_zero_f32_e64 v0, |v1|, v0 @@ -879,7 +861,6 @@ ; GFX10-LABEL: v_pow_f32_fabs_lhs_rhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_log_f32_e64 v0, |v0| ; GFX10-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0 ; GFX10-NEXT: v_exp_f32_e32 v0, v0 @@ -888,7 +869,6 @@ ; GFX11-LABEL: v_pow_f32_fabs_lhs_rhs: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_log_f32_e64 v0, |v0| ; GFX11-NEXT: s_waitcnt_depctr 0xfff ; GFX11-NEXT: v_mul_dx9_zero_f32_e64 v0, |v1|, v0 diff --git a/llvm/test/CodeGen/AMDGPU/fract-match.ll b/llvm/test/CodeGen/AMDGPU/fract-match.ll --- a/llvm/test/CodeGen/AMDGPU/fract-match.ll +++ b/llvm/test/CodeGen/AMDGPU/fract-match.ll @@ -96,14 +96,12 @@ ; GFX11-LABEL: safe_math_fract_f32: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fract_f32_e32 v3, v0 ; GFX11-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX11-NEXT: v_floor_f32_e32 v4, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc_lo ; GFX11-NEXT: global_store_b32 v[1:2], v4, off -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: %floor = tail call float @llvm.floor.f32(float %x) @@ -179,11 +177,9 @@ ; GFX11-LABEL: safe_math_fract_f32_noinf_check: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_floor_f32_e32 v3, v0 ; GFX11-NEXT: v_fract_f32_e32 v0, v0 ; GFX11-NEXT: global_store_b32 v[1:2], v3, off -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: %floor = tail call float @llvm.floor.f32(float %x) @@ -259,7 +255,6 @@ ; GFX11-LABEL: no_nan_check_math_fract_f32: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_floor_f32_e32 v3, v0 ; GFX11-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) @@ -267,7 +262,6 @@ ; GFX11-NEXT: global_store_b32 v[1:2], v3, off ; GFX11-NEXT: v_min_f32_e32 v4, 0x3f7fffff, v4 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc_lo -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: %floor = tail call float @llvm.floor.f32(float %x) @@ -318,7 +312,6 @@ ; GFX11-LABEL: basic_fract_f32_nonans: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fract_f32_e32 v0, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: @@ -364,7 +357,6 @@ ; GFX11-LABEL: basic_fract_f32_flags_minnum: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_floor_f32_e32 v1, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -415,7 +407,6 @@ ; GFX11-LABEL: basic_fract_f32_flags_fsub: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fract_f32_e32 v0, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: @@ -473,7 +464,6 @@ ; GFX11-LABEL: basic_fract_v2f32_nonans: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fract_f32_e32 v0, v0 ; GFX11-NEXT: v_fract_f32_e32 v1, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -544,13 +534,11 @@ ; GFX11-LABEL: basic_fract_f32_multi_use_fsub_nonans: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_floor_f32_e32 v3, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_sub_f32_e32 v3, v0, v3 ; GFX11-NEXT: v_fract_f32_e32 v0, v0 ; GFX11-NEXT: global_store_b32 v[1:2], v3, off -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: %floor = tail call float @llvm.floor.f32(float %x) @@ -598,7 +586,6 @@ ; GFX11-LABEL: nnan_minnum_fract_f32: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fract_f32_e32 v0, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: @@ -646,7 +633,6 @@ ; GFX11-LABEL: nnan_fsub_fract_f32: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_floor_f32_e32 v1, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -695,7 +681,6 @@ ; GFX11-LABEL: nnan_floor_fract_f32: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_floor_f32_e32 v1, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -746,7 +731,6 @@ ; GFX11-LABEL: nnan_src_fract_f32: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fract_f32_e32 v0, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: @@ -793,7 +777,6 @@ ; GFX11-LABEL: not_fract_f32_wrong_const: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_floor_f32_e32 v1, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -843,7 +826,6 @@ ; GFX11-LABEL: not_fract_f32_swapped_fsub: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_floor_f32_e32 v1, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_sub_f32_e32 v0, v1, v0 @@ -893,7 +875,6 @@ ; GFX11-LABEL: not_fract_f32_not_floor: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_trunc_f32_e32 v1, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -943,7 +924,6 @@ ; GFX11-LABEL: not_fract_f32_different_floor: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_floor_f32_e32 v1, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -993,7 +973,6 @@ ; GFX11-LABEL: not_fract_f32_maxnum: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_floor_f32_e32 v1, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -1017,26 +996,10 @@ ; IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], float [[X]], float [[MIN]] ; IR-NEXT: ret float [[COND]] ; -; GFX6-LABEL: fcmp_uno_check_is_nan_f32: -; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: s_setpc_b64 s[30:31] -; -; GFX7-LABEL: fcmp_uno_check_is_nan_f32: -; GFX7: ; %bb.0: ; %entry -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: fcmp_uno_check_is_nan_f32: -; GFX8: ; %bb.0: ; %entry -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: fcmp_uno_check_is_nan_f32: -; GFX11: ; %bb.0: ; %entry -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: fcmp_uno_check_is_nan_f32: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] entry: %floor = tail call float @llvm.floor.f32(float %x) %sub = fsub float %x, %floor @@ -1089,7 +1052,6 @@ ; GFX11-LABEL: select_nan_fract_f32: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fract_f32_e32 v0, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: @@ -1143,7 +1105,6 @@ ; GFX11-LABEL: commuted_select_nan_fract_f32: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fract_f32_e32 v0, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: @@ -1199,7 +1160,6 @@ ; GFX11-LABEL: wrong_commuted_nan_select_f32: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_floor_f32_e32 v1, v0 ; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -1269,7 +1229,6 @@ ; GFX11-LABEL: basic_fract_f16_nonan: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fract_f16_e32 v0, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: @@ -1348,7 +1307,6 @@ ; GFX11-LABEL: basic_fract_v2f16_nonan: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX11-NEXT: v_fract_f16_e32 v0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -1409,7 +1367,6 @@ ; GFX11-LABEL: basic_fract_f64_nanans: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fract_f64_e32 v[0:1], v[0:1] ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: @@ -1500,11 +1457,9 @@ ; GFX11-LABEL: safe_math_fract_f16_noinf_check: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_floor_f16_e32 v3, v0 ; GFX11-NEXT: v_fract_f16_e32 v0, v0 ; GFX11-NEXT: global_store_b16 v[1:2], v3, off -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: %floor = tail call half @llvm.floor.f16(half %x) @@ -1587,11 +1542,9 @@ ; GFX11-LABEL: safe_math_fract_f64_noinf_check: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_floor_f64_e32 v[4:5], v[0:1] ; GFX11-NEXT: v_fract_f64_e32 v[0:1], v[0:1] ; GFX11-NEXT: global_store_b64 v[2:3], v[4:5], off -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: %floor = tail call double @llvm.floor.f64(double %x) @@ -1645,7 +1598,6 @@ ; GFX11-LABEL: select_nan_fract_f32_flags_select: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fract_f32_e32 v0, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: @@ -1699,7 +1651,6 @@ ; GFX11-LABEL: select_nan_fract_f32_flags_minnum: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fract_f32_e32 v0, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: @@ -1807,7 +1758,6 @@ ; GFX11-LABEL: safe_math_fract_v2f32: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fract_f32_e32 v6, v0 ; GFX11-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX11-NEXT: v_fract_f32_e32 v7, v1 @@ -1817,7 +1767,6 @@ ; GFX11-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v1| ; GFX11-NEXT: global_store_b64 v[2:3], v[4:5], off ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc_lo -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: %floor = tail call <2 x float> @llvm.floor.v2f32(<2 x float> %x) @@ -1923,7 +1872,6 @@ ; GFX11-LABEL: safe_math_fract_f64: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, 0 ; GFX11-NEXT: s_mov_b32 s1, 0x7ff00000 ; GFX11-NEXT: v_fract_f64_e32 v[4:5], v[0:1] @@ -1932,7 +1880,6 @@ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) ; GFX11-NEXT: v_dual_cndmask_b32 v0, 0, v4 :: v_dual_cndmask_b32 v1, 0, v5 ; GFX11-NEXT: global_store_b64 v[2:3], v[6:7], off -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: %floor = tail call double @llvm.floor.f64(double %x) @@ -2046,14 +1993,12 @@ ; GFX11-LABEL: safe_math_fract_f16: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fract_f16_e32 v3, v0 ; GFX11-NEXT: v_cmp_neq_f16_e64 vcc_lo, 0x7c00, |v0| ; GFX11-NEXT: v_floor_f16_e32 v4, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc_lo ; GFX11-NEXT: global_store_b16 v[1:2], v4, off -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: %floor = tail call half @llvm.floor.f16(half %x) @@ -2207,7 +2152,6 @@ ; GFX11-LABEL: safe_math_fract_v2f16: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v0 ; GFX11-NEXT: v_fract_f16_e32 v6, v0 ; GFX11-NEXT: v_floor_f16_e32 v5, v0 @@ -2222,7 +2166,6 @@ ; GFX11-NEXT: v_cndmask_b32_e64 v0, v6, 0, s0 ; GFX11-NEXT: global_store_b32 v[1:2], v4, off ; GFX11-NEXT: v_pack_b32_f16 v0, v0, v3 -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: %floor = tail call <2 x half> @llvm.floor.v2f16(<2 x half> %x) @@ -2355,7 +2298,6 @@ ; GFX11-LABEL: safe_math_fract_v2f64: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fract_f64_e32 v[10:11], v[0:1] ; GFX11-NEXT: v_cmp_class_f64_e64 s0, v[0:1], 0x204 ; GFX11-NEXT: v_fract_f64_e32 v[12:13], v[2:3] @@ -2367,7 +2309,6 @@ ; GFX11-NEXT: v_cndmask_b32_e64 v2, v12, 0, s1 ; GFX11-NEXT: v_cndmask_b32_e64 v3, v13, 0, s1 ; GFX11-NEXT: global_store_b128 v[4:5], v[6:9], off -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: %floor = tail call <2 x double> @llvm.floor.v2f64(<2 x double> %x) @@ -2404,5 +2345,3 @@ declare <2 x double> @llvm.fabs.v2f64(<2 x double>) #0 attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; GCN: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/fshr.ll b/llvm/test/CodeGen/AMDGPU/fshr.ll --- a/llvm/test/CodeGen/AMDGPU/fshr.ll +++ b/llvm/test/CodeGen/AMDGPU/fshr.ll @@ -609,14 +609,12 @@ ; GFX10-LABEL: v_fshr_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_alignbit_b32 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_fshr_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_alignbit_b32 v0, v0, v1, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] %ret = call i32 @llvm.fshr.i32(i32 %src0, i32 %src1, i32 %src2) @@ -639,7 +637,6 @@ ; GFX10-LABEL: v_fshr_v2i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_alignbit_b32 v0, v0, v2, v4 ; GFX10-NEXT: v_alignbit_b32 v1, v1, v3, v5 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -647,7 +644,6 @@ ; GFX11-LABEL: v_fshr_v2i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_alignbit_b32 v0, v0, v2, v4 ; GFX11-NEXT: v_alignbit_b32 v1, v1, v3, v5 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -672,7 +668,6 @@ ; GFX10-LABEL: v_fshr_v3i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_alignbit_b32 v0, v0, v3, v6 ; GFX10-NEXT: v_alignbit_b32 v1, v1, v4, v7 ; GFX10-NEXT: v_alignbit_b32 v2, v2, v5, v8 @@ -681,7 +676,6 @@ ; GFX11-LABEL: v_fshr_v3i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_alignbit_b32 v0, v0, v3, v6 ; GFX11-NEXT: v_alignbit_b32 v1, v1, v4, v7 ; GFX11-NEXT: v_alignbit_b32 v2, v2, v5, v8 @@ -708,7 +702,6 @@ ; GFX10-LABEL: v_fshr_v4i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_alignbit_b32 v0, v0, v4, v8 ; GFX10-NEXT: v_alignbit_b32 v1, v1, v5, v9 ; GFX10-NEXT: v_alignbit_b32 v2, v2, v6, v10 @@ -718,7 +711,6 @@ ; GFX11-LABEL: v_fshr_v4i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_alignbit_b32 v0, v0, v4, v8 ; GFX11-NEXT: v_alignbit_b32 v1, v1, v5, v9 ; GFX11-NEXT: v_alignbit_b32 v2, v2, v6, v10 @@ -765,7 +757,6 @@ ; GFX10-LABEL: v_fshr_i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 ; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2 ; GFX10-NEXT: v_lshrrev_b16 v1, v2, v1 @@ -776,7 +767,6 @@ ; GFX11-LABEL: v_fshr_i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshlrev_b16 v0, 1, v0 ; GFX11-NEXT: v_xor_b32_e32 v3, -1, v2 ; GFX11-NEXT: v_lshrrev_b16 v1, v2, v1 @@ -842,7 +832,6 @@ ; GFX10-LABEL: v_fshr_v2i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] ; GFX10-NEXT: v_and_b32_e32 v2, 0xf000f, v2 @@ -855,7 +844,6 @@ ; GFX11-LABEL: v_fshr_v2i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_xor_b32_e32 v3, -1, v2 ; GFX11-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] ; GFX11-NEXT: v_and_b32_e32 v2, 0xf000f, v2 @@ -945,7 +933,6 @@ ; GFX10-LABEL: v_fshr_v3i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v7, 16, v4 ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v2 @@ -970,7 +957,6 @@ ; GFX11-LABEL: v_fshr_v3i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v0 ; GFX11-NEXT: v_lshrrev_b32_e32 v7, 16, v4 ; GFX11-NEXT: v_lshrrev_b32_e32 v8, 16, v2 @@ -1093,7 +1079,6 @@ ; GFX10-LABEL: v_fshr_v4i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v3 ; GFX10-NEXT: v_lshrrev_b32_e32 v7, 16, v5 ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v1 @@ -1127,7 +1112,6 @@ ; GFX11-LABEL: v_fshr_v4i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v3 ; GFX11-NEXT: v_lshrrev_b32_e32 v7, 16, v5 ; GFX11-NEXT: v_lshrrev_b32_e32 v8, 16, v1 @@ -1205,7 +1189,6 @@ ; GFX10-LABEL: v_fshr_i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] ; GFX10-NEXT: v_not_b32_e32 v5, v4 ; GFX10-NEXT: v_lshrrev_b64 v[2:3], v4, v[2:3] @@ -1217,7 +1200,6 @@ ; GFX11-LABEL: v_fshr_i64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] ; GFX11-NEXT: v_not_b32_e32 v5, v4 ; GFX11-NEXT: v_lshrrev_b64 v[2:3], v4, v[2:3] @@ -1291,7 +1273,6 @@ ; GFX10-LABEL: v_fshr_v2i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] ; GFX10-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] ; GFX10-NEXT: v_not_b32_e32 v9, v8 @@ -1309,7 +1290,6 @@ ; GFX11-LABEL: v_fshr_v2i64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] ; GFX11-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] ; GFX11-NEXT: v_not_b32_e32 v9, v8 @@ -1378,7 +1358,6 @@ ; GFX10-LABEL: v_fshr_i24: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v3, 0xffffff, v2 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 8, v1 ; GFX10-NEXT: v_mul_hi_u32 v3, 0xaaaaaab, v3 @@ -1391,7 +1370,6 @@ ; GFX11-LABEL: v_fshr_i24: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v3, 0xffffff, v2 ; GFX11-NEXT: v_lshlrev_b32_e32 v1, 8, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -1476,7 +1454,6 @@ ; GFX10-LABEL: v_fshr_v2i24: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v6, 0xffffff, v4 ; GFX10-NEXT: v_and_b32_e32 v7, 0xffffff, v5 ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 8, v2 @@ -1496,7 +1473,6 @@ ; GFX11-LABEL: v_fshr_v2i24: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v6, 0xffffff, v4 ; GFX11-NEXT: v_and_b32_e32 v7, 0xffffff, v5 ; GFX11-NEXT: v_lshlrev_b32_e32 v2, 8, v2 diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll @@ -128,7 +128,6 @@ ; GFX10-LABEL: test_call_external_void_func_i1_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -163,7 +162,6 @@ ; GFX11-LABEL: test_call_external_void_func_i1_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -197,7 +195,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i1_imm: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -269,7 +266,6 @@ ; GFX10-LABEL: test_call_external_void_func_i1_signext: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -306,7 +302,6 @@ ; GFX11-LABEL: test_call_external_void_func_i1_signext: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -342,7 +337,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i1_signext: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -417,7 +411,6 @@ ; GFX10-LABEL: test_call_external_void_func_i1_zeroext: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -454,7 +447,6 @@ ; GFX11-LABEL: test_call_external_void_func_i1_zeroext: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -490,7 +482,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i1_zeroext: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -562,7 +553,6 @@ ; GFX10-LABEL: test_call_external_void_func_i8_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -596,7 +586,6 @@ ; GFX11-LABEL: test_call_external_void_func_i8_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -630,7 +619,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i8_imm: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -699,7 +687,6 @@ ; GFX10-LABEL: test_call_external_void_func_i8_signext: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -734,7 +721,6 @@ ; GFX11-LABEL: test_call_external_void_func_i8_signext: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -769,7 +755,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i8_signext: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -840,7 +825,6 @@ ; GFX10-LABEL: test_call_external_void_func_i8_zeroext: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -875,7 +859,6 @@ ; GFX11-LABEL: test_call_external_void_func_i8_zeroext: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -910,7 +893,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i8_zeroext: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -980,7 +962,6 @@ ; GFX10-LABEL: test_call_external_void_func_i16_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -1014,7 +995,6 @@ ; GFX11-LABEL: test_call_external_void_func_i16_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -1048,7 +1028,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i16_imm: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -1117,7 +1096,6 @@ ; GFX10-LABEL: test_call_external_void_func_i16_signext: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -1152,7 +1130,6 @@ ; GFX11-LABEL: test_call_external_void_func_i16_signext: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -1187,7 +1164,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i16_signext: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -1258,7 +1234,6 @@ ; GFX10-LABEL: test_call_external_void_func_i16_zeroext: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -1293,7 +1268,6 @@ ; GFX11-LABEL: test_call_external_void_func_i16_zeroext: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -1328,7 +1302,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i16_zeroext: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -1398,7 +1371,6 @@ ; GFX10-LABEL: test_call_external_void_func_i32_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -1432,7 +1404,6 @@ ; GFX11-LABEL: test_call_external_void_func_i32_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -1466,7 +1437,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i32_imm: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -1535,7 +1505,6 @@ ; GFX10-LABEL: test_call_external_void_func_i64_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -1570,7 +1539,6 @@ ; GFX11-LABEL: test_call_external_void_func_i64_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -1604,7 +1572,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i64_imm: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -1675,7 +1642,6 @@ ; GFX10-LABEL: test_call_external_void_func_v2i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -1711,7 +1677,6 @@ ; GFX11-LABEL: test_call_external_void_func_v2i64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -1747,7 +1712,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i64: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -1821,7 +1785,6 @@ ; GFX10-LABEL: test_call_external_void_func_v2i64_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -1858,7 +1821,6 @@ ; GFX11-LABEL: test_call_external_void_func_v2i64_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -1893,7 +1855,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i64_imm: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -1968,7 +1929,6 @@ ; GFX10-LABEL: test_call_external_void_func_v3i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -2006,7 +1966,6 @@ ; GFX11-LABEL: test_call_external_void_func_v3i64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -2042,7 +2001,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i64: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -2123,7 +2081,6 @@ ; GFX10-LABEL: test_call_external_void_func_v4i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -2163,7 +2120,6 @@ ; GFX11-LABEL: test_call_external_void_func_v4i64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -2200,7 +2156,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i64: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -2276,7 +2231,6 @@ ; GFX10-LABEL: test_call_external_void_func_f16_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -2310,7 +2264,6 @@ ; GFX11-LABEL: test_call_external_void_func_f16_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -2344,7 +2297,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_f16_imm: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -2412,7 +2364,6 @@ ; GFX10-LABEL: test_call_external_void_func_f32_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -2446,7 +2397,6 @@ ; GFX11-LABEL: test_call_external_void_func_f32_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -2480,7 +2430,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_f32_imm: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -2549,7 +2498,6 @@ ; GFX10-LABEL: test_call_external_void_func_v2f32_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -2584,7 +2532,6 @@ ; GFX11-LABEL: test_call_external_void_func_v2f32_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -2618,7 +2565,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f32_imm: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -2689,7 +2635,6 @@ ; GFX10-LABEL: test_call_external_void_func_v3f32_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -2725,7 +2670,6 @@ ; GFX11-LABEL: test_call_external_void_func_v3f32_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -2760,7 +2704,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f32_imm: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -2834,7 +2777,6 @@ ; GFX10-LABEL: test_call_external_void_func_v5f32_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -2872,7 +2814,6 @@ ; GFX11-LABEL: test_call_external_void_func_v5f32_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -2908,7 +2849,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v5f32_imm: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -2981,7 +2921,6 @@ ; GFX10-LABEL: test_call_external_void_func_f64_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -3016,7 +2955,6 @@ ; GFX11-LABEL: test_call_external_void_func_f64_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -3050,7 +2988,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_f64_imm: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -3122,7 +3059,6 @@ ; GFX10-LABEL: test_call_external_void_func_v2f64_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -3159,7 +3095,6 @@ ; GFX11-LABEL: test_call_external_void_func_v2f64_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -3194,7 +3129,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f64_imm: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -3270,7 +3204,6 @@ ; GFX10-LABEL: test_call_external_void_func_v3f64_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -3309,7 +3242,6 @@ ; GFX11-LABEL: test_call_external_void_func_v3f64_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -3345,7 +3277,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f64_imm: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -3418,7 +3349,6 @@ ; GFX10-LABEL: test_call_external_void_func_v2i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -3452,7 +3382,6 @@ ; GFX11-LABEL: test_call_external_void_func_v2i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -3486,7 +3415,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i16: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -3555,7 +3483,6 @@ ; GFX10-LABEL: test_call_external_void_func_v3i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -3589,7 +3516,6 @@ ; GFX11-LABEL: test_call_external_void_func_v3i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -3623,7 +3549,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i16: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -3692,7 +3617,6 @@ ; GFX10-LABEL: test_call_external_void_func_v3f16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -3726,7 +3650,6 @@ ; GFX11-LABEL: test_call_external_void_func_v3f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -3760,7 +3683,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f16: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -3830,7 +3752,6 @@ ; GFX10-LABEL: test_call_external_void_func_v3i16_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -3865,7 +3786,6 @@ ; GFX11-LABEL: test_call_external_void_func_v3i16_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -3899,7 +3819,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i16_imm: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -3969,7 +3888,6 @@ ; GFX10-LABEL: test_call_external_void_func_v3f16_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -4004,7 +3922,6 @@ ; GFX11-LABEL: test_call_external_void_func_v3f16_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -4039,7 +3956,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f16_imm: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -4108,7 +4024,6 @@ ; GFX10-LABEL: test_call_external_void_func_v4i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -4142,7 +4057,6 @@ ; GFX11-LABEL: test_call_external_void_func_v4i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -4176,7 +4090,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i16: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -4246,7 +4159,6 @@ ; GFX10-LABEL: test_call_external_void_func_v4i16_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -4281,7 +4193,6 @@ ; GFX11-LABEL: test_call_external_void_func_v4i16_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -4316,7 +4227,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i16_imm: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -4385,7 +4295,6 @@ ; GFX10-LABEL: test_call_external_void_func_v2f16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -4419,7 +4328,6 @@ ; GFX11-LABEL: test_call_external_void_func_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -4453,7 +4361,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f16: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -4522,7 +4429,6 @@ ; GFX10-LABEL: test_call_external_void_func_v2i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -4556,7 +4462,6 @@ ; GFX11-LABEL: test_call_external_void_func_v2i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -4590,7 +4495,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i32: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -4660,7 +4564,6 @@ ; GFX10-LABEL: test_call_external_void_func_v2i32_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -4695,7 +4598,6 @@ ; GFX11-LABEL: test_call_external_void_func_v2i32_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -4729,7 +4631,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i32_imm: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -4800,7 +4701,6 @@ ; GFX10-LABEL: test_call_external_void_func_v3i32_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -4836,7 +4736,6 @@ ; GFX11-LABEL: test_call_external_void_func_v3i32_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -4871,7 +4770,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i32_imm: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -4944,7 +4842,6 @@ ; GFX10-LABEL: test_call_external_void_func_v3i32_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -4981,7 +4878,6 @@ ; GFX11-LABEL: test_call_external_void_func_v3i32_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -5016,7 +4912,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i32_i32: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -5087,7 +4982,6 @@ ; GFX10-LABEL: test_call_external_void_func_v4i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -5121,7 +5015,6 @@ ; GFX11-LABEL: test_call_external_void_func_v4i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -5155,7 +5048,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i32: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -5227,7 +5119,6 @@ ; GFX10-LABEL: test_call_external_void_func_v4i32_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -5264,7 +5155,6 @@ ; GFX11-LABEL: test_call_external_void_func_v4i32_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -5299,7 +5189,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i32_imm: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -5374,7 +5263,6 @@ ; GFX10-LABEL: test_call_external_void_func_v5i32_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -5412,7 +5300,6 @@ ; GFX11-LABEL: test_call_external_void_func_v5i32_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -5448,7 +5335,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v5i32_imm: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -5524,7 +5410,6 @@ ; GFX10-LABEL: test_call_external_void_func_v8i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -5563,7 +5448,6 @@ ; GFX11-LABEL: test_call_external_void_func_v8i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -5602,7 +5486,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v8i32: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -5684,7 +5567,6 @@ ; GFX10-LABEL: test_call_external_void_func_v8i32_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -5725,7 +5607,6 @@ ; GFX11-LABEL: test_call_external_void_func_v8i32_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -5762,7 +5643,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v8i32_imm: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -5843,7 +5723,6 @@ ; GFX10-LABEL: test_call_external_void_func_v16i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -5884,7 +5763,6 @@ ; GFX11-LABEL: test_call_external_void_func_v16i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -5925,7 +5803,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v16i32: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -6013,7 +5890,6 @@ ; GFX10-LABEL: test_call_external_void_func_v32i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -6058,7 +5934,6 @@ ; GFX11-LABEL: test_call_external_void_func_v32i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -6103,7 +5978,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v32i32: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -6198,7 +6072,6 @@ ; GFX10-LABEL: test_call_external_void_func_v32i32_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -6246,7 +6119,6 @@ ; GFX11-LABEL: test_call_external_void_func_v32i32_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -6293,7 +6165,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v32i32_i32: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -6386,7 +6257,6 @@ ; GFX10-LABEL: test_call_external_i32_func_i32_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -6429,7 +6299,6 @@ ; GFX11-LABEL: test_call_external_i32_func_i32_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -6472,7 +6341,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_i32_func_i32_imm: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -6554,7 +6422,6 @@ ; GFX10-LABEL: test_call_external_void_func_struct_i8_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -6593,7 +6460,6 @@ ; GFX11-LABEL: test_call_external_void_func_struct_i8_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -6632,7 +6498,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_struct_i8_i32: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -6711,7 +6576,6 @@ ; GFX10-LABEL: test_call_external_void_func_byval_struct_i8_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -6749,7 +6613,6 @@ ; GFX11-LABEL: test_call_external_void_func_byval_struct_i8_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -6787,7 +6650,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_byval_struct_i8_i32: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -6877,7 +6739,6 @@ ; GFX10-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -6925,7 +6786,6 @@ ; GFX11-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -6971,7 +6831,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -7088,7 +6947,6 @@ ; GFX10-LABEL: test_call_external_void_func_v16i8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -7145,7 +7003,6 @@ ; GFX11-LABEL: test_call_external_void_func_v16i8: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -7198,7 +7055,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v16i8: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -7353,7 +7209,6 @@ ; GFX10-LABEL: tail_call_byval_align16: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s6, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1 @@ -7449,7 +7304,6 @@ ; GFX11-LABEL: tail_call_byval_align16: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s4, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s0, -1 @@ -7540,7 +7394,6 @@ ; GFX10-SCRATCH-LABEL: tail_call_byval_align16: ; GFX10-SCRATCH: ; %bb.0: ; %entry ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -7671,7 +7524,6 @@ ; GFX10-LABEL: test_call_external_void_func_i1_imm_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -7706,7 +7558,6 @@ ; GFX11-LABEL: test_call_external_void_func_i1_imm_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -7740,7 +7591,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i1_imm_inreg: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -7811,7 +7661,6 @@ ; GFX10-LABEL: test_call_external_void_func_i8_imm_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -7847,7 +7696,6 @@ ; GFX11-LABEL: test_call_external_void_func_i8_imm_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -7883,7 +7731,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i8_imm_inreg: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -7955,7 +7802,6 @@ ; GFX10-LABEL: test_call_external_void_func_i16_imm_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -7991,7 +7837,6 @@ ; GFX11-LABEL: test_call_external_void_func_i16_imm_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -8027,7 +7872,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i16_imm_inreg: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -8099,7 +7943,6 @@ ; GFX10-LABEL: test_call_external_void_func_i32_imm_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -8135,7 +7978,6 @@ ; GFX11-LABEL: test_call_external_void_func_i32_imm_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -8171,7 +8013,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i32_imm_inreg: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -8246,7 +8087,6 @@ ; GFX10-LABEL: test_call_external_void_func_i64_imm_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -8285,7 +8125,6 @@ ; GFX11-LABEL: test_call_external_void_func_i64_imm_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -8324,7 +8163,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i64_imm_inreg: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -8406,7 +8244,6 @@ ; GFX10-LABEL: test_call_external_void_func_v2i64_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -8449,7 +8286,6 @@ ; GFX11-LABEL: test_call_external_void_func_v2i64_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -8492,7 +8328,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i64_inreg: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -8581,7 +8416,6 @@ ; GFX10-LABEL: test_call_external_void_func_v2i64_imm_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -8626,7 +8460,6 @@ ; GFX11-LABEL: test_call_external_void_func_v2i64_imm_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -8671,7 +8504,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i64_imm_inreg: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -8765,7 +8597,6 @@ ; GFX10-LABEL: test_call_external_void_func_v3i64_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -8814,7 +8645,6 @@ ; GFX11-LABEL: test_call_external_void_func_v3i64_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -8863,7 +8693,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i64_inreg: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -8970,7 +8799,6 @@ ; GFX10-LABEL: test_call_external_void_func_v4i64_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -9025,7 +8853,6 @@ ; GFX11-LABEL: test_call_external_void_func_v4i64_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -9080,7 +8907,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i64_inreg: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -9173,7 +8999,6 @@ ; GFX10-LABEL: test_call_external_void_func_f16_imm_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -9209,7 +9034,6 @@ ; GFX11-LABEL: test_call_external_void_func_f16_imm_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -9245,7 +9069,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_f16_imm_inreg: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -9317,7 +9140,6 @@ ; GFX10-LABEL: test_call_external_void_func_f32_imm_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -9353,7 +9175,6 @@ ; GFX11-LABEL: test_call_external_void_func_f32_imm_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -9389,7 +9210,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_f32_imm_inreg: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -9464,7 +9284,6 @@ ; GFX10-LABEL: test_call_external_void_func_v2f32_imm_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -9503,7 +9322,6 @@ ; GFX11-LABEL: test_call_external_void_func_v2f32_imm_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -9542,7 +9360,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f32_imm_inreg: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -9623,7 +9440,6 @@ ; GFX10-LABEL: test_call_external_void_func_v3f32_imm_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -9665,7 +9481,6 @@ ; GFX11-LABEL: test_call_external_void_func_v3f32_imm_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -9707,7 +9522,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f32_imm_inreg: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -9797,7 +9611,6 @@ ; GFX10-LABEL: test_call_external_void_func_v5f32_imm_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -9845,7 +9658,6 @@ ; GFX11-LABEL: test_call_external_void_func_v5f32_imm_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -9893,7 +9705,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v5f32_imm_inreg: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -9980,7 +9791,6 @@ ; GFX10-LABEL: test_call_external_void_func_f64_imm_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -10019,7 +9829,6 @@ ; GFX11-LABEL: test_call_external_void_func_f64_imm_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -10058,7 +9867,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_f64_imm_inreg: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -10142,7 +9950,6 @@ ; GFX10-LABEL: test_call_external_void_func_v2f64_imm_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -10187,7 +9994,6 @@ ; GFX11-LABEL: test_call_external_void_func_v2f64_imm_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -10232,7 +10038,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f64_imm_inreg: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -10328,7 +10133,6 @@ ; GFX10-LABEL: test_call_external_void_func_v3f64_imm_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -10379,7 +10183,6 @@ ; GFX11-LABEL: test_call_external_void_func_v3f64_imm_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -10430,7 +10233,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f64_imm_inreg: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -10517,7 +10319,6 @@ ; GFX10-LABEL: test_call_external_void_func_v2i16_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -10553,7 +10354,6 @@ ; GFX11-LABEL: test_call_external_void_func_v2i16_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -10589,7 +10389,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i16_inreg: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -10664,7 +10463,6 @@ ; GFX10-LABEL: test_call_external_void_func_v3i16_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -10702,7 +10500,6 @@ ; GFX11-LABEL: test_call_external_void_func_v3i16_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -10740,7 +10537,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i16_inreg: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -10817,7 +10613,6 @@ ; GFX10-LABEL: test_call_external_void_func_v3f16_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -10855,7 +10650,6 @@ ; GFX11-LABEL: test_call_external_void_func_v3f16_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -10893,7 +10687,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f16_inreg: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -10971,7 +10764,6 @@ ; GFX10-LABEL: test_call_external_void_func_v3i16_imm_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -11010,7 +10802,6 @@ ; GFX11-LABEL: test_call_external_void_func_v3i16_imm_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -11049,7 +10840,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i16_imm_inreg: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -11127,7 +10917,6 @@ ; GFX10-LABEL: test_call_external_void_func_v3f16_imm_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -11166,7 +10955,6 @@ ; GFX11-LABEL: test_call_external_void_func_v3f16_imm_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -11205,7 +10993,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f16_imm_inreg: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -11282,7 +11069,6 @@ ; GFX10-LABEL: test_call_external_void_func_v4i16_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -11320,7 +11106,6 @@ ; GFX11-LABEL: test_call_external_void_func_v4i16_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -11358,7 +11143,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i16_inreg: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -11436,7 +11220,6 @@ ; GFX10-LABEL: test_call_external_void_func_v4i16_imm_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -11475,7 +11258,6 @@ ; GFX11-LABEL: test_call_external_void_func_v4i16_imm_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -11514,7 +11296,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i16_imm_inreg: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -11589,7 +11370,6 @@ ; GFX10-LABEL: test_call_external_void_func_v2f16_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -11625,7 +11405,6 @@ ; GFX11-LABEL: test_call_external_void_func_v2f16_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -11661,7 +11440,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f16_inreg: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -11736,7 +11514,6 @@ ; GFX10-LABEL: test_call_external_void_func_v2i32_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -11774,7 +11551,6 @@ ; GFX11-LABEL: test_call_external_void_func_v2i32_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -11812,7 +11588,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i32_inreg: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -11890,7 +11665,6 @@ ; GFX10-LABEL: test_call_external_void_func_v2i32_imm_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -11929,7 +11703,6 @@ ; GFX11-LABEL: test_call_external_void_func_v2i32_imm_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -11968,7 +11741,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i32_imm_inreg: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -12049,7 +11821,6 @@ ; GFX10-LABEL: test_call_external_void_func_v3i32_imm_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -12091,7 +11862,6 @@ ; GFX11-LABEL: test_call_external_void_func_v3i32_imm_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -12133,7 +11903,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i32_imm_inreg: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -12220,7 +11989,6 @@ ; GFX10-LABEL: test_call_external_void_func_v3i32_i32_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -12265,7 +12033,6 @@ ; GFX11-LABEL: test_call_external_void_func_v3i32_i32_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -12310,7 +12077,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i32_i32_inreg: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -12397,7 +12163,6 @@ ; GFX10-LABEL: test_call_external_void_func_v4i32_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -12439,7 +12204,6 @@ ; GFX11-LABEL: test_call_external_void_func_v4i32_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -12481,7 +12245,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i32_inreg: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -12569,7 +12332,6 @@ ; GFX10-LABEL: test_call_external_void_func_v4i32_imm_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -12614,7 +12376,6 @@ ; GFX11-LABEL: test_call_external_void_func_v4i32_imm_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -12659,7 +12420,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i32_imm_inreg: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -12752,7 +12512,6 @@ ; GFX10-LABEL: test_call_external_void_func_v5i32_imm_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -12800,7 +12559,6 @@ ; GFX11-LABEL: test_call_external_void_func_v5i32_imm_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -12848,7 +12606,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v5i32_imm_inreg: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -12948,7 +12705,6 @@ ; GFX10-LABEL: test_call_external_void_func_v8i32_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -13000,7 +12756,6 @@ ; GFX11-LABEL: test_call_external_void_func_v8i32_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -13052,7 +12807,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v8i32_inreg: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -13163,7 +12917,6 @@ ; GFX10-LABEL: test_call_external_void_func_v8i32_imm_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -13220,7 +12973,6 @@ ; GFX11-LABEL: test_call_external_void_func_v8i32_imm_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -13277,7 +13029,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v8i32_imm_inreg: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -13402,7 +13153,6 @@ ; GFX10-LABEL: test_call_external_void_func_v16i32_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -13470,7 +13220,6 @@ ; GFX11-LABEL: test_call_external_void_func_v16i32_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -13538,7 +13287,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v16i32_inreg: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -13720,7 +13468,6 @@ ; GFX10-LABEL: test_call_external_void_func_v32i32_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -13833,7 +13580,6 @@ ; GFX11-LABEL: test_call_external_void_func_v32i32_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -13941,7 +13687,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v32i32_inreg: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -14170,7 +13915,6 @@ ; GFX10-LABEL: test_call_external_void_func_v32i32_i32_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -14288,7 +14032,6 @@ ; GFX11-LABEL: test_call_external_void_func_v32i32_i32_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -14399,7 +14142,6 @@ ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v32i32_i32_inreg: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -14557,7 +14299,6 @@ ; GFX10-LABEL: stack_passed_arg_alignment_v32i32_f64: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -14597,7 +14338,6 @@ ; GFX11-LABEL: stack_passed_arg_alignment_v32i32_f64: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -14632,7 +14372,6 @@ ; GFX10-SCRATCH-LABEL: stack_passed_arg_alignment_v32i32_f64: ; GFX10-SCRATCH: ; %bb.0: ; %entry ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -14742,7 +14481,6 @@ ; GFX10-LABEL: stack_12xv3i32: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -14815,7 +14553,6 @@ ; GFX11-LABEL: stack_12xv3i32: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -14867,7 +14604,6 @@ ; GFX10-SCRATCH-LABEL: stack_12xv3i32: ; GFX10-SCRATCH: ; %bb.0: ; %entry ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -15031,7 +14767,6 @@ ; GFX10-LABEL: stack_8xv5i32: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -15112,7 +14847,6 @@ ; GFX11-LABEL: stack_8xv5i32: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -15168,7 +14902,6 @@ ; GFX10-SCRATCH-LABEL: stack_8xv5i32: ; GFX10-SCRATCH: ; %bb.0: ; %entry ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -15334,7 +15067,6 @@ ; GFX10-LABEL: stack_8xv5f32: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -15415,7 +15147,6 @@ ; GFX11-LABEL: stack_8xv5f32: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -15477,7 +15208,6 @@ ; GFX10-SCRATCH-LABEL: stack_8xv5f32: ; GFX10-SCRATCH: ; %bb.0: ; %entry ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll @@ -45,7 +45,6 @@ ; GFX10-LABEL: test_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -85,7 +84,6 @@ ; GFX11-LABEL: test_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -157,7 +155,6 @@ ; GFX10-LABEL: void_func_void_clobber_s28_s29: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_xor_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 @@ -181,13 +178,11 @@ ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_void_clobber_s28_s29: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 @@ -210,7 +205,6 @@ ; GFX11-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] call void asm sideeffect "; clobber", "~{s[30:31]}"() #0 call void asm sideeffect "; clobber", "~{s[28:29]}"() #0 @@ -260,7 +254,6 @@ ; GFX10-LABEL: test_call_void_func_void_mayclobber_s31: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -303,7 +296,6 @@ ; GFX11-LABEL: test_call_void_func_void_mayclobber_s31: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -390,7 +382,6 @@ ; GFX10-LABEL: test_call_void_func_void_mayclobber_v31: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -433,7 +424,6 @@ ; GFX11-LABEL: test_call_void_func_void_mayclobber_v31: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -522,7 +512,6 @@ ; GFX10-LABEL: test_call_void_func_void_preserves_s33: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -565,7 +554,6 @@ ; GFX11-LABEL: test_call_void_func_void_preserves_s33: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -653,7 +641,6 @@ ; GFX10-LABEL: test_call_void_func_void_preserves_s34: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -696,7 +683,6 @@ ; GFX11-LABEL: test_call_void_func_void_preserves_s34: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -782,7 +768,6 @@ ; GFX10-LABEL: test_call_void_func_void_preserves_v40: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -823,7 +808,6 @@ ; GFX11-LABEL: test_call_void_func_void_preserves_v40: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -887,7 +871,6 @@ ; GFX10-LABEL: void_func_void_clobber_s33: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 @@ -902,13 +885,11 @@ ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s4 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_void_clobber_s33: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 @@ -922,7 +903,6 @@ ; GFX11-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] call void asm sideeffect "; clobber", "~{s33}"() #0 ret void @@ -949,7 +929,6 @@ ; GFX10-LABEL: void_func_void_clobber_s34: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 @@ -964,13 +943,11 @@ ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s4 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_void_clobber_s34: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 @@ -984,7 +961,6 @@ ; GFX11-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] call void asm sideeffect "; clobber", "~{s34}"() #0 ret void @@ -1023,7 +999,6 @@ ; GFX10-LABEL: test_call_void_func_void_clobber_s33: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -1056,7 +1031,6 @@ ; GFX11-LABEL: test_call_void_func_void_clobber_s33: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -1122,7 +1096,6 @@ ; GFX10-LABEL: test_call_void_func_void_clobber_s34: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -1155,7 +1128,6 @@ ; GFX11-LABEL: test_call_void_func_void_clobber_s34: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -1230,7 +1202,6 @@ ; GFX10-LABEL: callee_saved_sgpr_kernel: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -1272,7 +1243,6 @@ ; GFX11-LABEL: callee_saved_sgpr_kernel: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -1367,7 +1337,6 @@ ; GFX10-LABEL: callee_saved_sgpr_vgpr_kernel: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -1418,7 +1387,6 @@ ; GFX11-LABEL: callee_saved_sgpr_vgpr_kernel: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll @@ -13,7 +13,6 @@ ; GFX10PLUS-LABEL: return_i1: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] entry: @@ -51,7 +50,6 @@ ; GFX10-LABEL: call_i1: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s36, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_xor_saveexec_b32 s34, -1 @@ -81,7 +79,6 @@ ; GFX11-LABEL: call_i1: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s2, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 @@ -121,7 +118,6 @@ ; GFX10PLUS-LABEL: return_i16: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 10 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] entry: @@ -159,7 +155,6 @@ ; GFX10-LABEL: call_i16: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s36, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_xor_saveexec_b32 s34, -1 @@ -189,7 +184,6 @@ ; GFX11-LABEL: call_i16: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s2, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 @@ -229,7 +223,6 @@ ; GFX10PLUS-LABEL: return_2xi16: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] entry: @@ -267,7 +260,6 @@ ; GFX10-LABEL: call_2xi16: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s36, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_xor_saveexec_b32 s34, -1 @@ -297,7 +289,6 @@ ; GFX11-LABEL: call_2xi16: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s2, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 @@ -338,7 +329,6 @@ ; GFX10-LABEL: return_3xi16: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX10-NEXT: v_mov_b32_e32 v1, 3 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -346,7 +336,6 @@ ; GFX11-LABEL: return_3xi16: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v0, 0x20001 :: v_dual_mov_b32 v1, 3 ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: @@ -384,7 +373,6 @@ ; GFX10-LABEL: call_3xi16: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s36, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_xor_saveexec_b32 s34, -1 @@ -414,7 +402,6 @@ ; GFX11-LABEL: call_3xi16: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s2, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 @@ -555,7 +542,6 @@ ; GFX10-LABEL: return_100xi32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 @@ -661,7 +647,6 @@ ; GFX11-LABEL: return_100xi32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0 ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0 ; GFX11-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 0 @@ -811,7 +796,6 @@ ; GFX10-LABEL: call_100xi32: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s36, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_xor_saveexec_b32 s34, -1 @@ -906,7 +890,6 @@ ; GFX11-LABEL: call_100xi32: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s2, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 @@ -1526,7 +1509,6 @@ ; GFX10-LABEL: return_512xi32: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:1020 ; GFX10-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:2044 @@ -2040,13 +2022,11 @@ ; GFX10-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:8 ; GFX10-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4 ; GFX10-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: return_512xi32: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, 0 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: s_mov_b32 s3, s0 @@ -2311,7 +2291,6 @@ ; GFX11-NEXT: s_add_i32 s0, s0, 16 ; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 ; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: ret <512 x i32> zeroinitializer @@ -2350,7 +2329,6 @@ ; GFX10-LABEL: call_512xi32: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s36, s33 ; GFX10-NEXT: s_add_i32 s33, s32, 0xffe0 ; GFX10-NEXT: s_and_b32 s33, s33, 0xffff0000 @@ -2382,7 +2360,6 @@ ; GFX11-LABEL: call_512xi32: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s34, s33 ; GFX11-NEXT: s_add_i32 s33, s32, 0x7ff ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) @@ -2581,7 +2558,6 @@ ; GFX10-LABEL: return_72xi32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill @@ -2716,13 +2692,11 @@ ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:176 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:180 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: return_72xi32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_clause 0xe ; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:220 ; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:216 @@ -2849,7 +2823,6 @@ ; GFX11-NEXT: scratch_load_b32 v41, off, s32 offset:216 ; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:220 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] ret <72 x i32> %val } @@ -3140,7 +3113,6 @@ ; GFX10-LABEL: call_72xi32: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s36, s33 ; GFX10-NEXT: s_add_i32 s33, s32, 0x3fe0 ; GFX10-NEXT: s_and_b32 s33, s33, 0xffffc000 @@ -3420,7 +3392,6 @@ ; GFX11-LABEL: call_72xi32: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s45, s33 ; GFX11-NEXT: s_add_i32 s33, s32, 0x1ff ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) diff --git a/llvm/test/CodeGen/AMDGPU/imm16.ll b/llvm/test/CodeGen/AMDGPU/imm16.ll --- a/llvm/test/CodeGen/AMDGPU/imm16.ll +++ b/llvm/test/CodeGen/AMDGPU/imm16.ll @@ -1804,19 +1804,15 @@ ; GFX10-LABEL: mul_inline_imm_0.5_i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] ; GFX10-NEXT: v_mul_lo_u16 v2, 0x3800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00] ; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] ; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] ; ; GFX11-LABEL: mul_inline_imm_0.5_i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: v_mul_lo_u16 v2, 0x3800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00] ; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] ; ; VI-LABEL: mul_inline_imm_0.5_i16: @@ -1848,19 +1844,15 @@ ; GFX10-LABEL: mul_inline_imm_neg_0.5_i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] ; GFX10-NEXT: v_mul_lo_u16 v2, 0xb800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xb8,0xff,0xff] ; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] ; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] ; ; GFX11-LABEL: mul_inline_imm_neg_0.5_i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: v_mul_lo_u16 v2, 0xb800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xb8,0xff,0xff] ; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] ; ; VI-LABEL: mul_inline_imm_neg_0.5_i16: @@ -1892,19 +1884,15 @@ ; GFX10-LABEL: mul_inline_imm_1.0_i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] ; GFX10-NEXT: v_mul_lo_u16 v2, 0x3c00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x3c,0x00,0x00] ; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] ; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] ; ; GFX11-LABEL: mul_inline_imm_1.0_i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: v_mul_lo_u16 v2, 0x3c00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x3c,0x00,0x00] ; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] ; ; VI-LABEL: mul_inline_imm_1.0_i16: @@ -1936,19 +1924,15 @@ ; GFX10-LABEL: mul_inline_imm_neg_1.0_i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] ; GFX10-NEXT: v_mul_lo_u16 v2, 0xbc00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xbc,0xff,0xff] ; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] ; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] ; ; GFX11-LABEL: mul_inline_imm_neg_1.0_i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: v_mul_lo_u16 v2, 0xbc00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xbc,0xff,0xff] ; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] ; ; VI-LABEL: mul_inline_imm_neg_1.0_i16: @@ -1980,19 +1964,15 @@ ; GFX10-LABEL: shl_inline_imm_2.0_i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] ; GFX10-NEXT: v_lshlrev_b16 v2, v2, 0x4000 ; encoding: [0x02,0x00,0x14,0xd7,0x02,0xff,0x01,0x00,0x00,0x40,0x00,0x00] ; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] ; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] ; ; GFX11-LABEL: shl_inline_imm_2.0_i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: v_lshlrev_b16 v2, v2, 0x4000 ; encoding: [0x02,0x00,0x38,0xd7,0x02,0xff,0x01,0x00,0x00,0x40,0x00,0x00] ; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] ; ; VI-LABEL: shl_inline_imm_2.0_i16: @@ -2024,19 +2004,15 @@ ; GFX10-LABEL: shl_inline_imm_neg_2.0_i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] ; GFX10-NEXT: v_lshlrev_b16 v2, v2, 0xc000 ; encoding: [0x02,0x00,0x14,0xd7,0x02,0xff,0x01,0x00,0x00,0xc0,0xff,0xff] ; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] ; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] ; ; GFX11-LABEL: shl_inline_imm_neg_2.0_i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: v_lshlrev_b16 v2, v2, 0xc000 ; encoding: [0x02,0x00,0x38,0xd7,0x02,0xff,0x01,0x00,0x00,0xc0,0xff,0xff] ; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] ; ; VI-LABEL: shl_inline_imm_neg_2.0_i16: @@ -2068,19 +2044,15 @@ ; GFX10-LABEL: mul_inline_imm_4.0_i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] ; GFX10-NEXT: v_mul_lo_u16 v2, 0x4400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x44,0x00,0x00] ; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] ; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] ; ; GFX11-LABEL: mul_inline_imm_4.0_i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: v_mul_lo_u16 v2, 0x4400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x44,0x00,0x00] ; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] ; ; VI-LABEL: mul_inline_imm_4.0_i16: @@ -2112,19 +2084,15 @@ ; GFX10-LABEL: mul_inline_imm_neg_4.0_i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] ; GFX10-NEXT: v_mul_lo_u16 v2, 0xc400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0xff,0xff] ; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] ; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] ; ; GFX11-LABEL: mul_inline_imm_neg_4.0_i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: v_mul_lo_u16 v2, 0xc400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0xff,0xff] ; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] ; ; VI-LABEL: mul_inline_imm_neg_4.0_i16: @@ -2156,19 +2124,15 @@ ; GFX10-LABEL: mul_inline_imm_inv2pi_i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] ; GFX10-NEXT: v_mul_lo_u16 v2, 0x3118, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x18,0x31,0x00,0x00] ; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] ; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] ; ; GFX11-LABEL: mul_inline_imm_inv2pi_i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: v_mul_lo_u16 v2, 0x3118, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x18,0x31,0x00,0x00] ; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] ; ; VI-LABEL: mul_inline_imm_inv2pi_i16: diff --git a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll --- a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll +++ b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll @@ -7,7 +7,6 @@ ; GFX11-LABEL: f0: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s2, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 @@ -41,7 +40,6 @@ ; GFX11-LABEL: f1: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-NEXT: v_mov_b32_e32 v2, 0 diff --git a/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll b/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll --- a/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll +++ b/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll @@ -125,7 +125,6 @@ ; GFX10-SDAG-LABEL: clpeak_imad_pat_i32: ; GFX10-SDAG: ; %bb.0: ; %entry ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, v2, v0 @@ -137,7 +136,6 @@ ; GFX10-GISEL-LABEL: clpeak_imad_pat_i32: ; GFX10-GISEL: ; %bb.0: ; %entry ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v0 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v2, v2, v1 ; GFX10-GISEL-NEXT: v_add3_u32 v0, v0, v2, 1 @@ -255,7 +253,6 @@ ; GFX10-SDAG-LABEL: clpeak_imad_pat_i16: ; GFX10-SDAG: ; %bb.0: ; %entry ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 ; GFX10-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 ; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 @@ -266,7 +263,6 @@ ; GFX10-GISEL-LABEL: clpeak_imad_pat_i16: ; GFX10-GISEL: ; %bb.0: ; %entry ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 ; GFX10-GISEL-NEXT: v_add_nc_u16 v2, v1, 1 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 @@ -456,7 +452,6 @@ ; GFX10-SDAG-LABEL: clpeak_imad_pat_v2i16: ; GFX10-SDAG: ; %bb.0: ; %entry ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0] ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1 ; GFX10-SDAG-NEXT: v_pk_add_u16 v0, v2, v0 @@ -470,7 +465,6 @@ ; GFX10-GISEL-LABEL: clpeak_imad_pat_v2i16: ; GFX10-GISEL: ; %bb.0: ; %entry ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0] ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v2, v2, v1 ; GFX10-GISEL-NEXT: v_pk_add_u16 v0, v0, v2 @@ -710,7 +704,6 @@ ; GFX10-SDAG-LABEL: clpeak_imad_pat_v3i16: ; GFX10-SDAG: ; %bb.0: ; %entry ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0] ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v4, v1, v3 @@ -732,7 +725,6 @@ ; GFX10-GISEL-LABEL: clpeak_imad_pat_v3i16: ; GFX10-GISEL: ; %bb.0: ; %entry ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: v_pk_add_u16 v4, v0, 1 op_sel_hi:[1,0] ; GFX10-GISEL-NEXT: v_pk_add_u16 v5, v1, 1 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v4, v4, v2 @@ -1071,7 +1063,6 @@ ; GFX10-SDAG-LABEL: clpeak_imad_pat_v4i16: ; GFX10-SDAG: ; %bb.0: ; %entry ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0] ; GFX10-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0] ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v4, v1, v3 @@ -1093,7 +1084,6 @@ ; GFX10-GISEL-LABEL: clpeak_imad_pat_v4i16: ; GFX10-GISEL: ; %bb.0: ; %entry ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: v_pk_add_u16 v4, v0, 1 op_sel_hi:[1,0] ; GFX10-GISEL-NEXT: v_pk_add_u16 v5, v1, 1 op_sel_hi:[1,0] ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v4, v4, v2 @@ -1215,7 +1205,6 @@ ; GFX10-SDAG-LABEL: clpeak_umad_pat_i16: ; GFX10-SDAG: ; %bb.0: ; %entry ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 ; GFX10-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 ; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 @@ -1226,7 +1215,6 @@ ; GFX10-GISEL-LABEL: clpeak_umad_pat_i16: ; GFX10-GISEL: ; %bb.0: ; %entry ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 ; GFX10-GISEL-NEXT: v_add_nc_u16 v2, v1, 1 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 @@ -1416,7 +1404,6 @@ ; GFX10-SDAG-LABEL: clpeak_umad_pat_v2i16: ; GFX10-SDAG: ; %bb.0: ; %entry ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0] ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1 ; GFX10-SDAG-NEXT: v_pk_add_u16 v0, v2, v0 @@ -1430,7 +1417,6 @@ ; GFX10-GISEL-LABEL: clpeak_umad_pat_v2i16: ; GFX10-GISEL: ; %bb.0: ; %entry ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0] ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v2, v2, v1 ; GFX10-GISEL-NEXT: v_pk_add_u16 v0, v0, v2 @@ -1670,7 +1656,6 @@ ; GFX10-SDAG-LABEL: clpeak_umad_pat_v3i16: ; GFX10-SDAG: ; %bb.0: ; %entry ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0] ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v4, v1, v3 @@ -1692,7 +1677,6 @@ ; GFX10-GISEL-LABEL: clpeak_umad_pat_v3i16: ; GFX10-GISEL: ; %bb.0: ; %entry ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: v_pk_add_u16 v4, v0, 1 op_sel_hi:[1,0] ; GFX10-GISEL-NEXT: v_pk_add_u16 v5, v1, 1 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v4, v4, v2 @@ -2031,7 +2015,6 @@ ; GFX10-SDAG-LABEL: clpeak_umad_pat_v4i16: ; GFX10-SDAG: ; %bb.0: ; %entry ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0] ; GFX10-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0] ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v4, v1, v3 @@ -2053,7 +2036,6 @@ ; GFX10-GISEL-LABEL: clpeak_umad_pat_v4i16: ; GFX10-GISEL: ; %bb.0: ; %entry ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: v_pk_add_u16 v4, v0, 1 op_sel_hi:[1,0] ; GFX10-GISEL-NEXT: v_pk_add_u16 v5, v1, 1 op_sel_hi:[1,0] ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v4, v4, v2 @@ -2254,7 +2236,6 @@ ; GFX10-SDAG-LABEL: clpeak_imad_pat_v2i32: ; GFX10-SDAG: ; %bb.0: ; %entry ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v1 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v4, v0, v2 @@ -2272,7 +2253,6 @@ ; GFX10-GISEL-LABEL: clpeak_imad_pat_v2i32: ; GFX10-GISEL: ; %bb.0: ; %entry ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v4, 1, v0 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v5, 1, v1 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v4, v4, v2 @@ -2533,7 +2513,6 @@ ; GFX10-SDAG-LABEL: clpeak_imad_pat_v3i32: ; GFX10-SDAG: ; %bb.0: ; %entry ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v1 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v2, 1, v2 @@ -2557,7 +2536,6 @@ ; GFX10-GISEL-LABEL: clpeak_imad_pat_v3i32: ; GFX10-GISEL: ; %bb.0: ; %entry ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v6, 1, v0 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v7, 1, v1 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v8, 1, v2 @@ -2852,7 +2830,6 @@ ; GFX10-SDAG-LABEL: clpeak_imad_pat_v4i32: ; GFX10-SDAG: ; %bb.0: ; %entry ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v1 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v2, 1, v2 @@ -2882,7 +2859,6 @@ ; GFX10-GISEL-LABEL: clpeak_imad_pat_v4i32: ; GFX10-GISEL: ; %bb.0: ; %entry ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v8, 1, v0 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v9, 1, v1 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v10, 1, v2 @@ -3050,7 +3026,6 @@ ; GFX10-SDAG-LABEL: clpeak_imad_pat_i24: ; GFX10-SDAG: ; %bb.0: ; %entry ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 24 ; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 24 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0 @@ -3064,7 +3039,6 @@ ; GFX10-GISEL-LABEL: clpeak_imad_pat_i24: ; GFX10-GISEL: ; %bb.0: ; %entry ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 24 ; GFX10-GISEL-NEXT: v_bfe_i32 v1, v1, 0, 24 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v0 @@ -3214,7 +3188,6 @@ ; GFX10-SDAG-LABEL: clpeak_imad_pat_u24: ; GFX10-SDAG: ; %bb.0: ; %entry ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 0xffffff, v0 ; GFX10-SDAG-NEXT: v_and_b32_e32 v1, 0xffffff, v1 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0 @@ -3228,7 +3201,6 @@ ; GFX10-GISEL-LABEL: clpeak_imad_pat_u24: ; GFX10-GISEL: ; %bb.0: ; %entry ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffffff, v0 ; GFX10-GISEL-NEXT: v_and_b32_e32 v1, 0xffffff, v1 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v0 @@ -3350,7 +3322,6 @@ ; GFX10-SDAG-LABEL: clpeak_imad_pat_i8: ; GFX10-SDAG: ; %bb.0: ; %entry ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 ; GFX10-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 ; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 @@ -3361,7 +3332,6 @@ ; GFX10-GISEL-LABEL: clpeak_imad_pat_i8: ; GFX10-GISEL: ; %bb.0: ; %entry ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 ; GFX10-GISEL-NEXT: v_add_nc_u16 v2, v1, 1 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 @@ -3548,7 +3518,6 @@ ; GFX10-SDAG-LABEL: clpeak_imad_pat_v2i8: ; GFX10-SDAG: ; %bb.0: ; %entry ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_nc_u16 v1, v1, 1 ; GFX10-SDAG-NEXT: v_add_nc_u16 v0, v0, 1 ; GFX10-SDAG-NEXT: v_mad_u16 v4, v1, v3, v1 @@ -3569,7 +3538,6 @@ ; GFX10-GISEL-LABEL: clpeak_imad_pat_v2i8: ; GFX10-GISEL: ; %bb.0: ; %entry ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: v_add_nc_u16 v4, v0, 1 ; GFX10-GISEL-NEXT: v_add_nc_u16 v5, v1, 1 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v4, v4, v2 @@ -3904,7 +3872,6 @@ ; GFX10-SDAG-LABEL: clpeak_imad_pat_i64: ; GFX10-SDAG: ; %bb.0: ; %entry ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v4, vcc_lo, v0, 1 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, 0, v1, vcc_lo ; GFX10-SDAG-NEXT: v_mul_lo_u32 v7, v4, v3 @@ -3930,7 +3897,6 @@ ; GFX10-GISEL-LABEL: clpeak_imad_pat_i64: ; GFX10-GISEL: ; %bb.0: ; %entry ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: v_add_co_u32 v6, vcc_lo, v0, 1 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, 0, v1, vcc_lo ; GFX10-GISEL-NEXT: v_mad_u64_u32 v[4:5], null, v6, v2, 0 @@ -4534,7 +4500,6 @@ ; GFX10-SDAG-LABEL: clpeak_imad_pat_v2i64: ; GFX10-SDAG: ; %bb.0: ; %entry ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v8, vcc_lo, v0, 1 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, 0, v1, vcc_lo ; GFX10-SDAG-NEXT: v_add_co_u32 v10, vcc_lo, v2, 1 @@ -4580,7 +4545,6 @@ ; GFX10-GISEL-LABEL: clpeak_imad_pat_v2i64: ; GFX10-GISEL: ; %bb.0: ; %entry ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: v_add_co_u32 v10, vcc_lo, v0, 1 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v11, vcc_lo, 0, v1, vcc_lo ; GFX10-GISEL-NEXT: v_add_co_u32 v12, vcc_lo, v2, 1 @@ -4853,7 +4817,6 @@ ; GFX10-SDAG-LABEL: v_multi_use_mul_chain_add_other_use_all: ; GFX10-SDAG: ; %bb.0: ; %bb ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, v2, v0 @@ -4872,7 +4835,6 @@ ; GFX10-GISEL-LABEL: v_multi_use_mul_chain_add_other_use_all: ; GFX10-GISEL: ; %bb.0: ; %bb ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v0 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v2, v2, v1 ; GFX10-GISEL-NEXT: v_add3_u32 v0, v0, v2, 1 @@ -5086,7 +5048,6 @@ ; GFX10-SDAG-LABEL: v_multi_use_mul_chain_add_other_use_some: ; GFX10-SDAG: ; %bb.0: ; %bb ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, v2, v0 @@ -5103,7 +5064,6 @@ ; GFX10-GISEL-LABEL: v_multi_use_mul_chain_add_other_use_some: ; GFX10-GISEL: ; %bb.0: ; %bb ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v0 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v2, v2, v1 ; GFX10-GISEL-NEXT: v_add3_u32 v0, v0, v2, 1 @@ -5299,7 +5259,6 @@ ; GFX10-SDAG-LABEL: clpeak_imad_pat_i32_x2: ; GFX10-SDAG: ; %bb.0: ; %entry ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, v2, v0 @@ -5319,7 +5278,6 @@ ; GFX10-GISEL-LABEL: clpeak_imad_pat_i32_x2: ; GFX10-GISEL: ; %bb.0: ; %entry ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v0 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v2, v2, v1 ; GFX10-GISEL-NEXT: v_add3_u32 v0, v0, v2, 1 @@ -5654,7 +5612,6 @@ ; GFX10-SDAG-LABEL: clpeak_imad_pat_v2i32_x2: ; GFX10-SDAG: ; %bb.0: ; %entry ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v1 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v4, v0, v2 @@ -5688,7 +5645,6 @@ ; GFX10-GISEL-LABEL: clpeak_imad_pat_v2i32_x2: ; GFX10-GISEL: ; %bb.0: ; %entry ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v4, 1, v0 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v5, 1, v1 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v4, v4, v2 @@ -5894,7 +5850,6 @@ ; GFX10-SDAG-LABEL: clpeak_imad_pat_i16_x2: ; GFX10-SDAG: ; %bb.0: ; %entry ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 ; GFX10-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 ; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 @@ -5909,7 +5864,6 @@ ; GFX10-GISEL-LABEL: clpeak_imad_pat_i16_x2: ; GFX10-GISEL: ; %bb.0: ; %entry ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 ; GFX10-GISEL-NEXT: v_add_nc_u16 v2, v1, 1 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 @@ -6094,7 +6048,6 @@ ; GFX10-SDAG-LABEL: clpeak_umad_pat_i16_x2: ; GFX10-SDAG: ; %bb.0: ; %entry ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 ; GFX10-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 ; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 @@ -6109,7 +6062,6 @@ ; GFX10-GISEL-LABEL: clpeak_umad_pat_i16_x2: ; GFX10-GISEL: ; %bb.0: ; %entry ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 ; GFX10-GISEL-NEXT: v_add_nc_u16 v2, v1, 1 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 @@ -6423,7 +6375,6 @@ ; GFX10-SDAG-LABEL: clpeak_imad_pat_v2i16_x2: ; GFX10-SDAG: ; %bb.0: ; %entry ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0] ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1 ; GFX10-SDAG-NEXT: v_pk_add_u16 v0, v2, v0 @@ -6445,7 +6396,6 @@ ; GFX10-GISEL-LABEL: clpeak_imad_pat_v2i16_x2: ; GFX10-GISEL: ; %bb.0: ; %entry ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0] ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v2, v2, v1 ; GFX10-GISEL-NEXT: v_pk_add_u16 v0, v0, v2 @@ -6759,7 +6709,6 @@ ; GFX10-SDAG-LABEL: clpeak_umad_pat_v2i16_x2: ; GFX10-SDAG: ; %bb.0: ; %entry ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0] ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1 ; GFX10-SDAG-NEXT: v_pk_add_u16 v0, v2, v0 @@ -6781,7 +6730,6 @@ ; GFX10-GISEL-LABEL: clpeak_umad_pat_v2i16_x2: ; GFX10-GISEL: ; %bb.0: ; %entry ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0] ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v2, v2, v1 ; GFX10-GISEL-NEXT: v_pk_add_u16 v0, v0, v2 @@ -6849,7 +6797,6 @@ ; GFX10-LABEL: multi_use_mul_mad_i32_var: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_lo_u32 v1, v0, v1 ; GFX10-NEXT: v_add_nc_u32_e32 v0, v1, v2 ; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v3 @@ -6921,7 +6868,6 @@ ; GFX10-SDAG-LABEL: multi_use_mul_mad_i16_var: ; GFX10-SDAG: ; %bb.0: ; %entry ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_mad_u16 v2, v0, v1, v2 ; GFX10-SDAG-NEXT: v_mad_u16 v0, v0, v1, v3 ; GFX10-SDAG-NEXT: v_perm_b32 v0, v0, v2, 0x5040100 @@ -6930,7 +6876,6 @@ ; GFX10-GISEL-LABEL: multi_use_mul_mad_i16_var: ; GFX10-GISEL: ; %bb.0: ; %entry ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 ; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v0, v2 ; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, v3 @@ -6979,7 +6924,6 @@ ; GFX10-LABEL: other_use_mul_mad_i32_var: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_lo_u32 v1, v0, v1 ; GFX10-NEXT: v_add_nc_u32_e32 v0, v1, v2 ; GFX10-NEXT: ds_write_b32 v3, v1 @@ -7039,7 +6983,6 @@ ; GFX10-SDAG-LABEL: other_use_mul_mad_i16_var: ; GFX10-SDAG: ; %bb.0: ; %entry ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_mul_lo_u16 v4, v0, v1 ; GFX10-SDAG-NEXT: v_mad_u16 v0, v0, v1, v2 ; GFX10-SDAG-NEXT: ds_write_b16 v3, v4 @@ -7049,7 +6992,6 @@ ; GFX10-GISEL-LABEL: other_use_mul_mad_i16_var: ; GFX10-GISEL: ; %bb.0: ; %entry ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v0, v1 ; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v1, v2 ; GFX10-GISEL-NEXT: ds_write_b16 v3, v1 @@ -7146,7 +7088,6 @@ ; GFX10-LABEL: multi_use_mul_mad_v2i16_var: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_mul_lo_u16 v1, v0, v1 ; GFX10-NEXT: v_pk_add_u16 v0, v1, v2 ; GFX10-NEXT: v_pk_add_u16 v1, v1, v3 @@ -7251,7 +7192,6 @@ ; GFX10-LABEL: other_use_mul_mad_v2i16_var: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_mul_lo_u16 v1, v0, v1 ; GFX10-NEXT: v_pk_add_u16 v0, v1, v2 ; GFX10-NEXT: ds_write_b32 v3, v1 diff --git a/llvm/test/CodeGen/AMDGPU/known-never-nan.ll b/llvm/test/CodeGen/AMDGPU/known-never-nan.ll --- a/llvm/test/CodeGen/AMDGPU/known-never-nan.ll +++ b/llvm/test/CodeGen/AMDGPU/known-never-nan.ll @@ -5,7 +5,6 @@ ; CHECK-LABEL: known_nnan_extract_vector_elt: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; CHECK-NEXT: v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; CHECK-NEXT: v_lshlrev_b32_e32 v1, 4, v2 ; CHECK-NEXT: v_add_f16_e32 v2, 1.0, v3 @@ -27,7 +26,6 @@ ; CHECK-LABEL: fma_not_fmaxnm_maybe_nan: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; CHECK-NEXT: v_cvt_f32_u32_e32 v0, v0 ; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v1 ; CHECK-NEXT: v_fmaak_f32 v0, v1, v0, 0xff800000 diff --git a/llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll b/llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll --- a/llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll +++ b/llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll @@ -17,7 +17,6 @@ ; CHECK-LABEL: use_module: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 ; CHECK-NEXT: ds_write_b16 v0, v0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) @@ -40,7 +39,6 @@ ; CHECK-LABEL: use_extern_normal: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; CHECK-NEXT: s_getpc_b64 s[6:7] ; CHECK-NEXT: s_add_u32 s6, s6, llvm.amdgcn.dynlds.offset.table@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s7, s7, llvm.amdgcn.dynlds.offset.table@rel32@hi+12 @@ -65,7 +63,6 @@ ; CHECK-LABEL: use_extern_overalign: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; CHECK-NEXT: s_getpc_b64 s[6:7] ; CHECK-NEXT: s_add_u32 s6, s6, llvm.amdgcn.dynlds.offset.table@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s7, s7, llvm.amdgcn.dynlds.offset.table@rel32@hi+12 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fma.legacy.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fma.legacy.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fma.legacy.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fma.legacy.ll @@ -8,14 +8,12 @@ ; GFX10-LABEL: v_fma: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fma_legacy_f32 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_fma: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fma_dx9_zero_f32 v0, v0, v1, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] %fma = call float @llvm.amdgcn.fma.legacy(float %a, float %b, float %c) @@ -26,14 +24,12 @@ ; GFX10-LABEL: v_fma_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fma_legacy_f32 v0, 0x41200000, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_fma_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fma_dx9_zero_f32 v0, 0x41200000, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %fma = call float @llvm.amdgcn.fma.legacy(float %a, float 10.0, float %c) @@ -44,14 +40,12 @@ ; GFX10-LABEL: v_fabs_fma: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fma_legacy_f32 v0, |v0|, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_fabs_fma: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fma_dx9_zero_f32 v0, |v0|, v1, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] %fabs.a = call float @llvm.fabs.f32(float %a) @@ -63,14 +57,12 @@ ; GFX10-LABEL: v_fneg_fabs_fma: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fma_legacy_f32 v0, v0, -|v1|, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_fneg_fabs_fma: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fma_dx9_zero_f32 v0, v0, -|v1|, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] %fabs.b = call float @llvm.fabs.f32(float %b) @@ -83,14 +75,12 @@ ; GFX10-LABEL: v_fneg_fma: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fma_legacy_f32 v0, v0, v1, -v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_fneg_fma: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fma_dx9_zero_f32 v0, v0, v1, -v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] %neg.c = fneg float %c @@ -102,14 +92,12 @@ ; GFX10-LABEL: v_fma_const_const: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fma_legacy_f32 v0, v0, 2.0, -1.0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_fma_const_const: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fma_dx9_zero_f32 v0, v0, 2.0, -1.0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %fma = call float @llvm.amdgcn.fma.legacy(float %a, float 2.0, float -1.0) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll @@ -119,7 +119,6 @@ ; GFX10-NEXT: image_load v[0:4], v5, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm tfe ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dword v6, v4, s[8:9] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: load_1d_tfe: @@ -135,7 +134,6 @@ ; GFX11-NEXT: image_load v[0:4], v5, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm tfe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v6, v4, s[8:9] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) @@ -223,7 +221,6 @@ ; GFX10-NEXT: image_load v[0:4], v5, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm lwe ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dword v6, v4, s[8:9] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: load_1d_lwe: @@ -239,7 +236,6 @@ ; GFX11-NEXT: image_load v[0:4], v5, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm lwe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v6, v4, s[8:9] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog main_body: %v = call {<4 x float>, i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 2, i32 0) @@ -366,7 +362,6 @@ ; GFX10-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dword v7, v4, s[8:9] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: load_2d_tfe: @@ -382,7 +377,6 @@ ; GFX11-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v7, v4, s[8:9] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2d.v4f32i32.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) @@ -513,7 +507,6 @@ ; GFX10-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm tfe lwe ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dword v8, v4, s[8:9] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: load_3d_tfe_lwe: @@ -530,7 +523,6 @@ ; GFX11-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm tfe lwe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v8, v4, s[8:9] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.3d.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 3, i32 0) @@ -661,7 +653,6 @@ ; GFX10-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm lwe ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dword v8, v4, s[8:9] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: load_cube_lwe: @@ -678,7 +669,6 @@ ; GFX11-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm lwe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v8, v4, s[8:9] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.cube.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 2, i32 0) @@ -805,7 +795,6 @@ ; GFX10-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm tfe ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dword v7, v4, s[8:9] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: load_1darray_tfe: @@ -821,7 +810,6 @@ ; GFX11-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm tfe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v7, v4, s[8:9] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1darray.v4f32i32.i32(i32 15, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 1, i32 0) @@ -952,7 +940,6 @@ ; GFX10-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm lwe ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dword v8, v4, s[8:9] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: load_2darray_lwe: @@ -969,7 +956,6 @@ ; GFX11-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm lwe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v8, v4, s[8:9] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2darray.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 2, i32 0) @@ -1100,7 +1086,6 @@ ; GFX10-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm tfe lwe ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dword v8, v4, s[8:9] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: load_2dmsaa_both: @@ -1117,7 +1102,6 @@ ; GFX11-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm tfe lwe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v8, v4, s[8:9] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2dmsaa.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 3, i32 0) @@ -1252,7 +1236,6 @@ ; GFX10-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dword v9, v4, s[8:9] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: load_2darraymsaa_tfe: @@ -1269,7 +1252,6 @@ ; GFX11-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v9, v4, s[8:9] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2darraymsaa.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0) @@ -1396,7 +1378,6 @@ ; GFX10-NEXT: image_load_mip v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm lwe ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dword v7, v4, s[8:9] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: load_mip_1d_lwe: @@ -1412,7 +1393,6 @@ ; GFX11-NEXT: image_load_mip v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm lwe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v7, v4, s[8:9] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.mip.1d.v4f32i32.i32(i32 15, i32 %s, i32 %mip, <8 x i32> %rsrc, i32 2, i32 0) @@ -1543,7 +1523,6 @@ ; GFX10-NEXT: image_load_mip v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dword v8, v4, s[8:9] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: load_mip_2d_tfe: @@ -1560,7 +1539,6 @@ ; GFX11-NEXT: image_load_mip v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v8, v4, s[8:9] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 1, i32 0) @@ -1948,7 +1926,6 @@ ; GFX10-NEXT: image_load v[0:3], v4, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm tfe ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dword v5, v3, s[8:9] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: load_1d_tfe_V4_dmask3: @@ -1962,7 +1939,6 @@ ; GFX11-NEXT: image_load v[0:3], v4, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm tfe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v5, v3, s[8:9] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) @@ -2038,7 +2014,6 @@ ; GFX10-NEXT: image_load v[0:2], v3, s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_1D unorm tfe ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dword v4, v2, s[8:9] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: load_1d_tfe_V4_dmask2: @@ -2051,7 +2026,6 @@ ; GFX11-NEXT: image_load v[0:2], v3, s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_1D unorm tfe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v4, v2, s[8:9] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 6, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) @@ -2121,7 +2095,6 @@ ; GFX10-NEXT: image_load v[0:1], v2, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm tfe ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dword v3, v1, s[8:9] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: load_1d_tfe_V4_dmask1: @@ -2132,7 +2105,6 @@ ; GFX11-NEXT: image_load v[0:1], v2, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm tfe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v3, v1, s[8:9] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) @@ -2202,7 +2174,6 @@ ; GFX10-NEXT: image_load v[0:1], v2, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm tfe ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dword v3, v1, s[8:9] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: load_1d_tfe_V2_dmask1: @@ -2213,7 +2184,6 @@ ; GFX11-NEXT: image_load v[0:1], v2, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm tfe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v3, v1, s[8:9] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog main_body: %v = call {<2 x float>,i32} @llvm.amdgcn.image.load.1d.v2f32i32.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll @@ -108,7 +108,6 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, v2 ; GFX10-NEXT: global_store_dword v4, v3, s[12:13] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: image_sample_2d_f16_tfe: @@ -123,7 +122,6 @@ ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v0, v2 ; GFX11-NEXT: global_store_b32 v4, v3, s[12:13] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog main_body: %tex = call {half,i32} @llvm.amdgcn.image.sample.2d.f16i32.f32(i32 1, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 1, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll @@ -97,7 +97,6 @@ ; GFX10-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D tfe ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dword v6, v4, s[12:13] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: sample_1d_tfe: @@ -116,7 +115,6 @@ ; GFX11-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D tfe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v6, v4, s[12:13] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) @@ -560,7 +558,6 @@ ; GFX10-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D lwe ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dword v6, v4, s[12:13] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: sample_1d_lwe: @@ -579,7 +576,6 @@ ; GFX11-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D lwe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v6, v4, s[12:13] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 2, i32 0) @@ -1625,7 +1621,6 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, v9 ; GFX10-NEXT: global_store_dword v11, v10, s[12:13] -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: sample_c_d_o_2darray_V1_tfe: @@ -1637,7 +1632,6 @@ ; GFX11-NEXT: image_sample_c_d_o v[0:1], [v10, v9, v2, v3, v[4:8]], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v11, v1, s[12:13] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog main_body: %v = call {float,i32} @llvm.amdgcn.image.sample.c.d.o.2darray.f32i32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.ll @@ -537,7 +537,6 @@ ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off ; GFX11-NEXT: v_mov_b32_e32 v0, v6 -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog %load = call { <4 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v4i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) %data = extractvalue { <4 x i32>, i32 } %load, 0 @@ -579,7 +578,6 @@ ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off ; GFX11-NEXT: v_mov_b32_e32 v0, v6 -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog %load = call { <4 x float>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v4f32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) %data = extractvalue { <4 x float>, i32 } %load, 0 @@ -622,7 +620,6 @@ ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b96 v[0:1], v[2:4], off ; GFX11-NEXT: v_mov_b32_e32 v0, v5 -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog %load = call { <3 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v3i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) %data = extractvalue { <3 x i32>, i32 } %load, 0 @@ -665,7 +662,6 @@ ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b96 v[0:1], v[2:4], off ; GFX11-NEXT: v_mov_b32_e32 v0, v5 -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog %load = call { <3 x float>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v3f32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) %data = extractvalue { <3 x float>, i32 } %load, 0 @@ -707,7 +703,6 @@ ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off ; GFX11-NEXT: v_mov_b32_e32 v0, v4 -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog %load = call { <2 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v2i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) %data = extractvalue { <2 x i32>, i32 } %load, 0 @@ -749,7 +744,6 @@ ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off ; GFX11-NEXT: v_mov_b32_e32 v0, v4 -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog %load = call { <2 x float>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v2f32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) %data = extractvalue { <2 x float>, i32 } %load, 0 @@ -791,7 +785,6 @@ ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v[0:1], v2, off ; GFX11-NEXT: v_mov_b32_e32 v0, v3 -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog %load = call { i32, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) %data = extractvalue { i32, i32 } %load, 0 @@ -833,7 +826,6 @@ ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v[0:1], v2, off ; GFX11-NEXT: v_mov_b32_e32 v0, v3 -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog %load = call { float, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_f32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) %data = extractvalue { float, i32 } %load, 0 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.format.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.format.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.format.ll @@ -537,7 +537,6 @@ ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off ; GFX11-NEXT: v_mov_b32_e32 v0, v6 -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog %load = call { <4 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v4i32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0) %data = extractvalue { <4 x i32>, i32 } %load, 0 @@ -579,7 +578,6 @@ ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off ; GFX11-NEXT: v_mov_b32_e32 v0, v6 -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog %load = call { <4 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v4f32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0) %data = extractvalue { <4 x float>, i32 } %load, 0 @@ -622,7 +620,6 @@ ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b96 v[0:1], v[2:4], off ; GFX11-NEXT: v_mov_b32_e32 v0, v5 -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog %load = call { <3 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v3i32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0) %data = extractvalue { <3 x i32>, i32 } %load, 0 @@ -665,7 +662,6 @@ ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b96 v[0:1], v[2:4], off ; GFX11-NEXT: v_mov_b32_e32 v0, v5 -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog %load = call { <3 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v3f32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0) %data = extractvalue { <3 x float>, i32 } %load, 0 @@ -707,7 +703,6 @@ ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off ; GFX11-NEXT: v_mov_b32_e32 v0, v4 -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog %load = call { <2 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v2i32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0) %data = extractvalue { <2 x i32>, i32 } %load, 0 @@ -749,7 +744,6 @@ ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off ; GFX11-NEXT: v_mov_b32_e32 v0, v4 -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog %load = call { <2 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v2f32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0) %data = extractvalue { <2 x float>, i32 } %load, 0 @@ -791,7 +785,6 @@ ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v[0:1], v2, off ; GFX11-NEXT: v_mov_b32_e32 v0, v3 -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog %load = call { i32, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_i32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0) %data = extractvalue { i32, i32 } %load, 0 @@ -833,7 +826,6 @@ ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v[0:1], v2, off ; GFX11-NEXT: v_mov_b32_e32 v0, v3 -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog %load = call { float, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_f32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0) %data = extractvalue { float, i32 } %load, 0 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sudot4.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sudot4.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sudot4.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sudot4.ll @@ -7,7 +7,6 @@ ; GFX11-LABEL: test_llvm_amdgcn_sudot4_uu: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: v_dot4_i32_iu8 v0, v0, v1, v2 ; encoding: [0x00,0x40,0x16,0xcc,0x00,0x03,0x0a,0x1c] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] entry: @@ -19,7 +18,6 @@ ; GFX11-LABEL: test_llvm_amdgcn_sudot4_us: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: v_dot4_i32_iu8 v0, v0, v1, v2 neg_lo:[0,1,0] ; encoding: [0x00,0x40,0x16,0xcc,0x00,0x03,0x0a,0x5c] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] entry: @@ -31,7 +29,6 @@ ; GFX11-LABEL: test_llvm_amdgcn_sudot4_su: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: v_dot4_i32_iu8 v0, v0, v1, v2 neg_lo:[1,0,0] ; encoding: [0x00,0x40,0x16,0xcc,0x00,0x03,0x0a,0x3c] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] entry: @@ -43,7 +40,6 @@ ; GFX11-LABEL: test_llvm_amdgcn_sudot4_ss: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: v_dot4_i32_iu8 v0, v0, v1, v2 neg_lo:[1,1,0] ; encoding: [0x00,0x40,0x16,0xcc,0x00,0x03,0x0a,0x7c] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] entry: @@ -57,7 +53,6 @@ ; GFX11-LABEL: test_llvm_amdgcn_sudot4_uu_clamp: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: v_dot4_i32_iu8 v0, v0, v1, v2 clamp ; encoding: [0x00,0xc0,0x16,0xcc,0x00,0x03,0x0a,0x1c] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] entry: @@ -69,7 +64,6 @@ ; GFX11-LABEL: test_llvm_amdgcn_sudot4_us_clamp: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: v_dot4_i32_iu8 v0, v0, v1, v2 neg_lo:[0,1,0] clamp ; encoding: [0x00,0xc0,0x16,0xcc,0x00,0x03,0x0a,0x5c] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] entry: @@ -81,7 +75,6 @@ ; GFX11-LABEL: test_llvm_amdgcn_sudot4_su_clamp: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: v_dot4_i32_iu8 v0, v0, v1, v2 neg_lo:[1,0,0] clamp ; encoding: [0x00,0xc0,0x16,0xcc,0x00,0x03,0x0a,0x3c] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] entry: @@ -93,7 +86,6 @@ ; GFX11-LABEL: test_llvm_amdgcn_sudot4_ss_clamp: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: v_dot4_i32_iu8 v0, v0, v1, v2 neg_lo:[1,1,0] clamp ; encoding: [0x00,0xc0,0x16,0xcc,0x00,0x03,0x0a,0x7c] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] entry: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sudot8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sudot8.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sudot8.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sudot8.ll @@ -7,7 +7,6 @@ ; GFX11-LABEL: test_llvm_amdgcn_sudot8_uu: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: v_dot8_i32_iu4 v0, v0, v1, v2 ; encoding: [0x00,0x40,0x18,0xcc,0x00,0x03,0x0a,0x1c] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] entry: @@ -19,7 +18,6 @@ ; GFX11-LABEL: test_llvm_amdgcn_sudot8_us: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: v_dot8_i32_iu4 v0, v0, v1, v2 neg_lo:[0,1,0] ; encoding: [0x00,0x40,0x18,0xcc,0x00,0x03,0x0a,0x5c] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] entry: @@ -31,7 +29,6 @@ ; GFX11-LABEL: test_llvm_amdgcn_sudot8_su: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: v_dot8_i32_iu4 v0, v0, v1, v2 neg_lo:[1,0,0] ; encoding: [0x00,0x40,0x18,0xcc,0x00,0x03,0x0a,0x3c] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] entry: @@ -43,7 +40,6 @@ ; GFX11-LABEL: test_llvm_amdgcn_sudot8_ss: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: v_dot8_i32_iu4 v0, v0, v1, v2 neg_lo:[1,1,0] ; encoding: [0x00,0x40,0x18,0xcc,0x00,0x03,0x0a,0x7c] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] entry: @@ -57,7 +53,6 @@ ; GFX11-LABEL: test_llvm_amdgcn_sudot8_uu_clamp: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: v_dot8_i32_iu4 v0, v0, v1, v2 clamp ; encoding: [0x00,0xc0,0x18,0xcc,0x00,0x03,0x0a,0x1c] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] entry: @@ -69,7 +64,6 @@ ; GFX11-LABEL: test_llvm_amdgcn_sudot8_us_clamp: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: v_dot8_i32_iu4 v0, v0, v1, v2 neg_lo:[0,1,0] clamp ; encoding: [0x00,0xc0,0x18,0xcc,0x00,0x03,0x0a,0x5c] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] entry: @@ -81,7 +75,6 @@ ; GFX11-LABEL: test_llvm_amdgcn_sudot8_su_clamp: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: v_dot8_i32_iu4 v0, v0, v1, v2 neg_lo:[1,0,0] clamp ; encoding: [0x00,0xc0,0x18,0xcc,0x00,0x03,0x0a,0x3c] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] entry: @@ -93,7 +86,6 @@ ; GFX11-LABEL: test_llvm_amdgcn_sudot8_ss_clamp: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] ; GFX11-NEXT: v_dot8_i32_iu4 v0, v0, v1, v2 neg_lo:[1,1,0] clamp ; encoding: [0x00,0xc0,0x18,0xcc,0x00,0x03,0x0a,0x7c] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] entry: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll @@ -116,14 +116,12 @@ ; GFX10CHECK-LABEL: zeromask_f16: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_mov_b32_e32 v0, 0 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] ; ; GFX11CHECK-LABEL: zeromask_f16: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_mov_b32_e32 v0, 0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 0) @@ -171,28 +169,24 @@ ; GFX10SELDAG-LABEL: allflags_f16: ; GFX10SELDAG: ; %bb.0: ; GFX10SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10SELDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10SELDAG-NEXT: v_mov_b32_e32 v0, 1 ; GFX10SELDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX10GLISEL-LABEL: allflags_f16: ; GFX10GLISEL: ; %bb.0: ; GFX10GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10GLISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10GLISEL-NEXT: v_mov_b32_e32 v0, -1 ; GFX10GLISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-LABEL: allflags_f16: ; GFX11SELDAG: ; %bb.0: ; GFX11SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11SELDAG-NEXT: v_mov_b32_e32 v0, 1 ; GFX11SELDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-LABEL: allflags_f16: ; GFX11GLISEL: ; %bb.0: ; GFX11GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11GLISEL-NEXT: v_mov_b32_e32 v0, -1 ; GFX11GLISEL-NEXT: s_setpc_b64 s[30:31] %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 1023) ; 0x3ff @@ -243,7 +237,6 @@ ; GFX10CHECK-LABEL: snan_f16: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 1 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -251,7 +244,6 @@ ; GFX11CHECK-LABEL: snan_f16: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 1 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] @@ -297,7 +289,6 @@ ; GFX10CHECK-LABEL: qnan_f16: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 2 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -305,7 +296,6 @@ ; GFX11CHECK-LABEL: qnan_f16: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 2 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] @@ -351,7 +341,6 @@ ; GFX10CHECK-LABEL: posinf_f16: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x200 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -359,7 +348,6 @@ ; GFX11CHECK-LABEL: posinf_f16: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x200 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] @@ -403,7 +391,6 @@ ; GFX10CHECK-LABEL: neginf_f16: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 4 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -411,7 +398,6 @@ ; GFX11CHECK-LABEL: neginf_f16: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 4 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] @@ -469,7 +455,6 @@ ; GFX10CHECK-LABEL: posnormal_f16: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x100 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -477,7 +462,6 @@ ; GFX11CHECK-LABEL: posnormal_f16: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x100 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] @@ -533,7 +517,6 @@ ; GFX10CHECK-LABEL: negnormal_f16: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 8 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -541,7 +524,6 @@ ; GFX11CHECK-LABEL: negnormal_f16: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 8 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] @@ -590,7 +572,6 @@ ; GFX10CHECK-LABEL: possubnormal_f16: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x80 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -598,7 +579,6 @@ ; GFX11CHECK-LABEL: possubnormal_f16: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x80 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] @@ -653,7 +633,6 @@ ; GFX10CHECK-LABEL: negsubnormal_f16: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 16 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -661,7 +640,6 @@ ; GFX11CHECK-LABEL: negsubnormal_f16: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 16 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] @@ -703,7 +681,6 @@ ; GFX10CHECK-LABEL: poszero_f16: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 64 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -711,7 +688,6 @@ ; GFX11CHECK-LABEL: poszero_f16: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 64 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] @@ -755,7 +731,6 @@ ; GFX10CHECK-LABEL: negzero_f16: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 32 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -763,7 +738,6 @@ ; GFX11CHECK-LABEL: negzero_f16: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 32 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] @@ -809,7 +783,6 @@ ; GFX10CHECK-LABEL: posfinite_f16: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x1c0 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -817,7 +790,6 @@ ; GFX11CHECK-LABEL: posfinite_f16: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x1c0 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] @@ -869,7 +841,6 @@ ; GFX10CHECK-LABEL: negfinite_f16: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 56 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -877,7 +848,6 @@ ; GFX11CHECK-LABEL: negfinite_f16: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 56 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] @@ -923,7 +893,6 @@ ; GFX10CHECK-LABEL: isnan_f16: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 3 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -931,7 +900,6 @@ ; GFX11CHECK-LABEL: isnan_f16: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 3 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] @@ -981,7 +949,6 @@ ; GFX10CHECK-LABEL: not_isnan_f16: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x3fc ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -989,7 +956,6 @@ ; GFX11CHECK-LABEL: not_isnan_f16: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x3fc ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] @@ -1050,7 +1016,6 @@ ; GFX10CHECK-LABEL: isnan_v2f16: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_mov_b32_e32 v1, 3 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 3 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4 @@ -1062,7 +1027,6 @@ ; GFX11CHECK-LABEL: isnan_v2f16: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 3 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 @@ -1165,7 +1129,6 @@ ; GFX10SELDAG-LABEL: isnan_v3f16: ; GFX10SELDAG: ; %bb.0: ; GFX10SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10SELDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10SELDAG-NEXT: v_cmp_u_f16_sdwa s4, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1 ; GFX10SELDAG-NEXT: v_cmp_u_f16_e32 vcc_lo, v0, v0 ; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 @@ -1178,7 +1141,6 @@ ; GFX10GLISEL-LABEL: isnan_v3f16: ; GFX10GLISEL: ; %bb.0: ; GFX10GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10GLISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10GLISEL-NEXT: v_mov_b32_e32 v2, 3 ; GFX10GLISEL-NEXT: v_cmp_class_f16_e64 s4, v0, 3 ; GFX10GLISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s4 @@ -1193,7 +1155,6 @@ ; GFX11SELDAG-LABEL: isnan_v3f16: ; GFX11SELDAG: ; %bb.0: ; GFX11SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11SELDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX11SELDAG-NEXT: v_cmp_u_f16_e32 vcc_lo, v0, v0 ; GFX11SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo @@ -1207,7 +1168,6 @@ ; GFX11GLISEL-LABEL: isnan_v3f16: ; GFX11GLISEL: ; %bb.0: ; GFX11GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11GLISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX11GLISEL-NEXT: v_cmp_class_f16_e64 s0, v0, 3 ; GFX11GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 @@ -1332,7 +1292,6 @@ ; GFX10SELDAG-LABEL: isnan_v4f16: ; GFX10SELDAG: ; %bb.0: ; GFX10SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10SELDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10SELDAG-NEXT: v_mov_b32_e32 v2, 3 ; GFX10SELDAG-NEXT: v_cmp_class_f16_e64 s5, v0, 3 ; GFX10SELDAG-NEXT: v_cmp_class_f16_sdwa s4, v1, v2 src0_sel:WORD_1 src1_sel:DWORD @@ -1349,7 +1308,6 @@ ; GFX10GLISEL-LABEL: isnan_v4f16: ; GFX10GLISEL: ; %bb.0: ; GFX10GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10GLISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10GLISEL-NEXT: v_mov_b32_e32 v3, 3 ; GFX10GLISEL-NEXT: v_cmp_class_f16_e64 s4, v0, 3 ; GFX10GLISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s4 @@ -1366,7 +1324,6 @@ ; GFX11CHECK-LABEL: isnan_v4f16: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 3 ; GFX11CHECK-NEXT: v_lshrrev_b32_e32 v3, 16, v0 ; GFX11CHECK-NEXT: v_lshrrev_b32_e32 v4, 16, v1 @@ -1420,7 +1377,6 @@ ; GFX10CHECK-LABEL: isnan_f16_strictfp: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 3 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -1428,7 +1384,6 @@ ; GFX11CHECK-LABEL: isnan_f16_strictfp: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 3 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] @@ -1476,7 +1431,6 @@ ; GFX10CHECK-LABEL: isinf_f16: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x204 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -1484,7 +1438,6 @@ ; GFX11CHECK-LABEL: isinf_f16: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x204 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] @@ -1532,7 +1485,6 @@ ; GFX10CHECK-LABEL: isfinite_f16: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x1f8 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -1540,7 +1492,6 @@ ; GFX11CHECK-LABEL: isfinite_f16: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x1f8 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] @@ -1595,7 +1546,6 @@ ; GFX10CHECK-LABEL: issubnormal_or_zero_f16: ; GFX10CHECK: ; %bb.0: ; %entry ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0xf0 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -1603,7 +1553,6 @@ ; GFX11CHECK-LABEL: issubnormal_or_zero_f16: ; GFX11CHECK: ; %bb.0: ; %entry ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0xf0 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] @@ -1664,7 +1613,6 @@ ; GFX10CHECK-LABEL: not_issubnormal_or_zero_f16: ; GFX10CHECK: ; %bb.0: ; %entry ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x30f ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -1672,7 +1620,6 @@ ; GFX11CHECK-LABEL: not_issubnormal_or_zero_f16: ; GFX11CHECK: ; %bb.0: ; %entry ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x30f ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] @@ -1724,7 +1671,6 @@ ; GFX10CHECK-LABEL: isnormal_f16: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x108 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -1732,7 +1678,6 @@ ; GFX11CHECK-LABEL: isnormal_f16: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x108 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] @@ -1791,7 +1736,6 @@ ; GFX10CHECK-LABEL: not_isnormal_f16: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x2f7 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -1799,7 +1743,6 @@ ; GFX11CHECK-LABEL: not_isnormal_f16: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x2f7 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] @@ -1869,7 +1812,6 @@ ; GFX10CHECK-LABEL: not_is_plus_normal_f16: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x2ff ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -1877,7 +1819,6 @@ ; GFX11CHECK-LABEL: not_is_plus_normal_f16: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x2ff ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] @@ -1947,7 +1888,6 @@ ; GFX10CHECK-LABEL: not_is_neg_normal_f16: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x3f7 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -1955,7 +1895,6 @@ ; GFX11CHECK-LABEL: not_is_neg_normal_f16: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x3f7 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] @@ -2005,7 +1944,6 @@ ; GFX10CHECK-LABEL: issubnormal_f16: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x90 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -2013,7 +1951,6 @@ ; GFX11CHECK-LABEL: issubnormal_f16: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x90 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] @@ -2071,7 +2008,6 @@ ; GFX10CHECK-LABEL: not_issubnormal_f16: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x36f ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -2079,7 +2015,6 @@ ; GFX11CHECK-LABEL: not_issubnormal_f16: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x36f ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] @@ -2125,7 +2060,6 @@ ; GFX10CHECK-LABEL: iszero_f16: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x60 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -2133,7 +2067,6 @@ ; GFX11CHECK-LABEL: iszero_f16: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x60 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] @@ -2192,7 +2125,6 @@ ; GFX10CHECK-LABEL: not_iszero_f16: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x39f ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -2200,7 +2132,6 @@ ; GFX11CHECK-LABEL: not_iszero_f16: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x39f ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] @@ -2248,7 +2179,6 @@ ; GFX10CHECK-LABEL: ispositive_f16: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x3c0 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -2256,7 +2186,6 @@ ; GFX11CHECK-LABEL: ispositive_f16: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x3c0 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] @@ -2318,7 +2247,6 @@ ; GFX10CHECK-LABEL: not_ispositive_f16: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 63 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -2326,7 +2254,6 @@ ; GFX11CHECK-LABEL: not_ispositive_f16: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 63 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] @@ -2384,7 +2311,6 @@ ; GFX10CHECK-LABEL: isnegative_f16: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 60 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -2392,7 +2318,6 @@ ; GFX11CHECK-LABEL: isnegative_f16: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 60 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] @@ -2448,7 +2373,6 @@ ; GFX10CHECK-LABEL: not_isnegative_f16: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x3c3 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -2456,7 +2380,6 @@ ; GFX11CHECK-LABEL: not_isnegative_f16: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x3c3 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] @@ -2508,7 +2431,6 @@ ; GFX10CHECK-LABEL: iszero_or_nan_f16: ; GFX10CHECK: ; %bb.0: ; %entry ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x63 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -2516,7 +2438,6 @@ ; GFX11CHECK-LABEL: iszero_or_nan_f16: ; GFX11CHECK: ; %bb.0: ; %entry ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x63 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] @@ -2569,7 +2490,6 @@ ; GFX10CHECK-LABEL: iszero_or_nan_f_daz: ; GFX10CHECK: ; %bb.0: ; %entry ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x63 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -2577,7 +2497,6 @@ ; GFX11CHECK-LABEL: iszero_or_nan_f_daz: ; GFX11CHECK: ; %bb.0: ; %entry ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x63 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] @@ -2630,7 +2549,6 @@ ; GFX10CHECK-LABEL: iszero_or_nan_f_maybe_daz: ; GFX10CHECK: ; %bb.0: ; %entry ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x63 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -2638,7 +2556,6 @@ ; GFX11CHECK-LABEL: iszero_or_nan_f_maybe_daz: ; GFX11CHECK: ; %bb.0: ; %entry ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x63 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] @@ -2706,7 +2623,6 @@ ; GFX10CHECK-LABEL: not_iszero_or_nan_f16: ; GFX10CHECK: ; %bb.0: ; %entry ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x39c ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -2714,7 +2630,6 @@ ; GFX11CHECK-LABEL: not_iszero_or_nan_f16: ; GFX11CHECK: ; %bb.0: ; %entry ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x39c ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] @@ -2782,7 +2697,6 @@ ; GFX10CHECK-LABEL: not_iszero_or_nan_f_daz: ; GFX10CHECK: ; %bb.0: ; %entry ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x39c ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -2790,7 +2704,6 @@ ; GFX11CHECK-LABEL: not_iszero_or_nan_f_daz: ; GFX11CHECK: ; %bb.0: ; %entry ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x39c ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] @@ -2858,7 +2771,6 @@ ; GFX10CHECK-LABEL: not_iszero_or_nan_f_maybe_daz: ; GFX10CHECK: ; %bb.0: ; %entry ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x39c ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -2866,7 +2778,6 @@ ; GFX11CHECK-LABEL: not_iszero_or_nan_f_maybe_daz: ; GFX11CHECK: ; %bb.0: ; %entry ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x39c ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] @@ -2919,7 +2830,6 @@ ; GFX10CHECK-LABEL: iszero_or_qnan_f16: ; GFX10CHECK: ; %bb.0: ; %entry ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x62 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -2927,7 +2837,6 @@ ; GFX11CHECK-LABEL: iszero_or_qnan_f16: ; GFX11CHECK: ; %bb.0: ; %entry ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x62 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] @@ -2986,7 +2895,6 @@ ; GFX10CHECK-LABEL: iszero_or_snan_f16: ; GFX10CHECK: ; %bb.0: ; %entry ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x61 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -2994,7 +2902,6 @@ ; GFX11CHECK-LABEL: iszero_or_snan_f16: ; GFX11CHECK: ; %bb.0: ; %entry ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x61 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] @@ -3072,7 +2979,6 @@ ; GFX10CHECK-LABEL: not_iszero_or_qnan_f16: ; GFX10CHECK: ; %bb.0: ; %entry ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x39d ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -3080,7 +2986,6 @@ ; GFX11CHECK-LABEL: not_iszero_or_qnan_f16: ; GFX11CHECK: ; %bb.0: ; %entry ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x39d ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] @@ -3154,7 +3059,6 @@ ; GFX10CHECK-LABEL: not_iszero_or_snan_f16: ; GFX10CHECK: ; %bb.0: ; %entry ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x39e ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -3162,7 +3066,6 @@ ; GFX11CHECK-LABEL: not_iszero_or_snan_f16: ; GFX11CHECK: ; %bb.0: ; %entry ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x39e ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] @@ -3213,7 +3116,6 @@ ; GFX10CHECK-LABEL: isinf_or_nan_f16: ; GFX10CHECK: ; %bb.0: ; %entry ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x207 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -3221,7 +3123,6 @@ ; GFX11CHECK-LABEL: isinf_or_nan_f16: ; GFX11CHECK: ; %bb.0: ; %entry ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x207 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] @@ -3270,7 +3171,6 @@ ; GFX10CHECK-LABEL: not_isinf_or_nan_f16: ; GFX10CHECK: ; %bb.0: ; %entry ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x1f8 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -3278,7 +3178,6 @@ ; GFX11CHECK-LABEL: not_isinf_or_nan_f16: ; GFX11CHECK: ; %bb.0: ; %entry ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x1f8 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] @@ -3329,7 +3228,6 @@ ; GFX10CHECK-LABEL: isfinite_or_nan_f: ; GFX10CHECK: ; %bb.0: ; %entry ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x1fb ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -3337,7 +3235,6 @@ ; GFX11CHECK-LABEL: isfinite_or_nan_f: ; GFX11CHECK: ; %bb.0: ; %entry ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x1fb ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] @@ -3386,7 +3283,6 @@ ; GFX10CHECK-LABEL: not_isfinite_or_nan_f: ; GFX10CHECK: ; %bb.0: ; %entry ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x204 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -3394,7 +3290,6 @@ ; GFX11CHECK-LABEL: not_isfinite_or_nan_f: ; GFX11CHECK: ; %bb.0: ; %entry ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x204 ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll @@ -197,7 +197,6 @@ ; GFX10CHECK-LABEL: isnan_f32: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 3 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -205,7 +204,6 @@ ; GFX11CHECK-LABEL: isnan_f32: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 3 ; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 @@ -245,7 +243,6 @@ ; GFX10CHECK-LABEL: isnan_v2f32: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 3 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v1, 3 @@ -255,7 +252,6 @@ ; GFX11CHECK-LABEL: isnan_v2f32: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 3 ; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 @@ -303,7 +299,6 @@ ; GFX10CHECK-LABEL: isnan_v3f32: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 3 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v1, 3 @@ -315,7 +310,6 @@ ; GFX11CHECK-LABEL: isnan_v3f32: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 3 ; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 @@ -372,7 +366,6 @@ ; GFX10CHECK-LABEL: isnan_v4f32: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 3 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v1, 3 @@ -386,7 +379,6 @@ ; GFX11CHECK-LABEL: isnan_v4f32: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 3 ; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 @@ -451,7 +443,6 @@ ; GFX10CHECK-LABEL: isnan_v5f32: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 3 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v1, 3 @@ -467,7 +458,6 @@ ; GFX11CHECK-LABEL: isnan_v5f32: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 3 ; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 @@ -541,7 +531,6 @@ ; GFX10CHECK-LABEL: isnan_v6f32: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 3 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v1, 3 @@ -559,7 +548,6 @@ ; GFX11CHECK-LABEL: isnan_v6f32: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 3 ; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 @@ -641,7 +629,6 @@ ; GFX10CHECK-LABEL: isnan_v7f32: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 3 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v1, 3 @@ -661,7 +648,6 @@ ; GFX11CHECK-LABEL: isnan_v7f32: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 3 ; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 @@ -752,7 +738,6 @@ ; GFX10CHECK-LABEL: isnan_v8f32: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 3 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v1, 3 @@ -774,7 +759,6 @@ ; GFX11CHECK-LABEL: isnan_v8f32: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 3 ; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 @@ -915,7 +899,6 @@ ; GFX10CHECK-LABEL: isnan_v16f32: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 3 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v1, 3 @@ -953,7 +936,6 @@ ; GFX11CHECK-LABEL: isnan_v16f32: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 3 ; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 @@ -1024,7 +1006,6 @@ ; GFX10CHECK-LABEL: isnan_f64: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f64_e64 s4, v[0:1], 3 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -1032,7 +1013,6 @@ ; GFX11CHECK-LABEL: isnan_f64: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f64_e64 s0, v[0:1], 3 ; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 @@ -1066,7 +1046,6 @@ ; GFX10CHECK-LABEL: isnan_f32_strictfp: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 3 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -1074,7 +1053,6 @@ ; GFX11CHECK-LABEL: isnan_f32_strictfp: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 3 ; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 @@ -1108,7 +1086,6 @@ ; GFX10CHECK-LABEL: isnan_f64_strictfp: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f64_e64 s4, v[0:1], 3 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -1116,7 +1093,6 @@ ; GFX11CHECK-LABEL: isnan_f64_strictfp: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f64_e64 s0, v[0:1], 3 ; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 @@ -1153,7 +1129,6 @@ ; GFX10CHECK-LABEL: isinf_f32: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 0x204 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -1161,7 +1136,6 @@ ; GFX11CHECK-LABEL: isinf_f32: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 0x204 ; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 @@ -1198,7 +1172,6 @@ ; GFX10CHECK-LABEL: isinf_f64: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f64_e64 s4, v[0:1], 0x204 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -1206,7 +1179,6 @@ ; GFX11CHECK-LABEL: isinf_f64: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f64_e64 s0, v[0:1], 0x204 ; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 @@ -1243,7 +1215,6 @@ ; GFX10CHECK-LABEL: isfinite_f32: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 0x1f8 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -1251,7 +1222,6 @@ ; GFX11CHECK-LABEL: isfinite_f32: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 0x1f8 ; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 @@ -1288,7 +1258,6 @@ ; GFX10CHECK-LABEL: isfinite_f64: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f64_e64 s4, v[0:1], 0x1f8 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -1296,7 +1265,6 @@ ; GFX11CHECK-LABEL: isfinite_f64: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f64_e64 s0, v[0:1], 0x1f8 ; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 @@ -1333,7 +1301,6 @@ ; GFX10CHECK-LABEL: isnormal_f32: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 0x108 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -1341,7 +1308,6 @@ ; GFX11CHECK-LABEL: isnormal_f32: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 0x108 ; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 @@ -1384,7 +1350,6 @@ ; GFX10CHECK-LABEL: isnormal_v2f64: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f64_e64 s4, v[0:1], 0x108 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: v_cmp_class_f64_e64 s4, v[2:3], 0x108 @@ -1394,7 +1359,6 @@ ; GFX11CHECK-LABEL: isnormal_v2f64: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f64_e64 s0, v[0:1], 0x108 ; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 @@ -1433,7 +1397,6 @@ ; GFX10CHECK-LABEL: issubnormal_f32: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 0x90 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -1441,7 +1404,6 @@ ; GFX11CHECK-LABEL: issubnormal_f32: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 0x90 ; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 @@ -1478,7 +1440,6 @@ ; GFX10CHECK-LABEL: iszero_f32: ; GFX10CHECK: ; %bb.0: ; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 0x60 ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] @@ -1486,7 +1447,6 @@ ; GFX11CHECK-LABEL: iszero_f32: ; GFX11CHECK: ; %bb.0: ; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 0x60 ; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll b/llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll @@ -31,7 +31,6 @@ ; GFX11-LABEL: test_ldexp_f32_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_ldexp_f32 v0, v2, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call float @llvm.ldexp.f32.i32(float %a, i32 %b) @@ -63,7 +62,6 @@ ; GFX11-LABEL: test_ldexp_v2f32_v2i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_ldexp_f32 v0, v2, v4 ; GFX11-NEXT: v_ldexp_f32 v1, v3, v5 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -99,7 +97,6 @@ ; GFX11-LABEL: test_ldexp_v3f32_v3i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_ldexp_f32 v0, v2, v5 ; GFX11-NEXT: v_ldexp_f32 v1, v3, v6 ; GFX11-NEXT: v_ldexp_f32 v2, v4, v7 @@ -139,7 +136,6 @@ ; GFX11-LABEL: test_ldexp_v4f32_v4i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_ldexp_f32 v0, v2, v6 ; GFX11-NEXT: v_ldexp_f32 v1, v3, v7 ; GFX11-NEXT: v_ldexp_f32 v2, v4, v8 @@ -150,63 +146,22 @@ } define double @test_ldexp_f64_i32(double %a, i32 %b) { -; GFX6-LABEL: test_ldexp_f64_i32: -; GFX6: ; %bb.0: -; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 -; GFX6-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: test_ldexp_f64_i32: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: test_ldexp_f64_i32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: test_ldexp_f64_i32: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: test_ldexp_f64_i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 +; GCN-NEXT: s_setpc_b64 s[30:31] %result = call double @llvm.ldexp.f64.i32(double %a, i32 %b) ret double %result } define <2 x double> @test_ldexp_v2f64_v2i32(<2 x double> %a, <2 x i32> %b) { -; GFX6-LABEL: test_ldexp_v2f64_v2i32: -; GFX6: ; %bb.0: -; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4 -; GFX6-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5 -; GFX6-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: test_ldexp_v2f64_v2i32: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4 -; GFX8-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: test_ldexp_v2f64_v2i32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4 -; GFX9-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: test_ldexp_v2f64_v2i32: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4 -; GFX11-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: test_ldexp_v2f64_v2i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4 +; GCN-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5 +; GCN-NEXT: s_setpc_b64 s[30:31] %result = call <2 x double> @llvm.ldexp.v2f64.v2i32(<2 x double> %a, <2 x i32> %b) ret <2 x double> %result } @@ -259,7 +214,6 @@ ; GFX11-SDAG-LABEL: test_ldexp_f16_i8: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1 @@ -297,7 +251,6 @@ ; GFX11-GISEL-LABEL: test_ldexp_f16_i8: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -333,7 +286,6 @@ ; GFX11-LABEL: test_ldexp_f16_i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_ldexp_f16_e32 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -379,7 +331,6 @@ ; GFX11-SDAG-LABEL: test_ldexp_f16_i32: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000 ; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-SDAG-NEXT: v_med3_i32 v1, v1, s0, 0x7fff @@ -415,7 +366,6 @@ ; GFX11-GISEL-LABEL: test_ldexp_f16_i32: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2 @@ -464,7 +414,6 @@ ; GFX11-SDAG-LABEL: test_ldexp_v2f16_v2i32: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000 ; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v0 ; GFX11-SDAG-NEXT: v_med3_i32 v2, v2, s0, 0x7fff @@ -514,7 +463,6 @@ ; GFX11-GISEL-LABEL: test_ldexp_v2f16_v2i32: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff ; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) @@ -564,7 +512,6 @@ ; GFX11-SDAG-LABEL: test_ldexp_v2f16_v2i16: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v1 ; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v0 ; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1 @@ -605,7 +552,6 @@ ; GFX11-GISEL-LABEL: test_ldexp_v2f16_v2i16: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1 ; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1 @@ -636,5 +582,3 @@ declare <2 x double> @llvm.ldexp.v2f64.v2i32(<2 x double>, <2 x i32>) #0 attributes #0 = { nounwind readnone } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; GCN: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log.ll b/llvm/test/CodeGen/AMDGPU/llvm.log.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.log.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.log.ll @@ -669,7 +669,6 @@ ; GFX1100-LABEL: v_log_f32: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 @@ -699,7 +698,6 @@ ; GFX1100-LABEL: v_log_fabs_f32: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e64 v0, |v0| ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 @@ -730,7 +728,6 @@ ; GFX1100-LABEL: v_log_fneg_fabs_f32: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e64 v0, -|v0| ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 @@ -762,7 +759,6 @@ ; GFX1100-LABEL: v_log_fneg_f32: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e64 v0, -v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 @@ -793,7 +789,6 @@ ; GFX1100-LABEL: v_log_f32_fast: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 @@ -823,7 +818,6 @@ ; GFX1100-LABEL: v_log_f32_unsafe_math_attr: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 @@ -853,7 +847,6 @@ ; GFX1100-LABEL: v_log_f32_approx_fn_attr: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 @@ -883,7 +876,6 @@ ; GFX1100-LABEL: v_log_f32_ninf: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 @@ -913,7 +905,6 @@ ; GFX1100-LABEL: v_log_f32_afn: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 @@ -943,7 +934,6 @@ ; GFX1100-LABEL: v_log_f32_afn_daz: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 @@ -973,7 +963,6 @@ ; GFX1100-LABEL: v_log_f32_afn_dynamic: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 @@ -1003,7 +992,6 @@ ; GFX1100-LABEL: v_fabs_log_f32_afn: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e64 v0, |v0| ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 @@ -1034,7 +1022,6 @@ ; GFX1100-LABEL: v_log_f32_daz: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 @@ -1064,7 +1051,6 @@ ; GFX1100-LABEL: v_log_f32_nnan: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 @@ -1094,7 +1080,6 @@ ; GFX1100-LABEL: v_log_f32_nnan_daz: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 @@ -1124,7 +1109,6 @@ ; GFX1100-LABEL: v_log_f32_nnan_dynamic: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 @@ -1154,7 +1138,6 @@ ; GFX1100-LABEL: v_log_f32_ninf_daz: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 @@ -1184,7 +1167,6 @@ ; GFX1100-LABEL: v_log_f32_ninf_dynamic: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 @@ -1214,7 +1196,6 @@ ; GFX1100-LABEL: v_log_f32_nnan_ninf: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 @@ -1244,7 +1225,6 @@ ; GFX1100-LABEL: v_log_f32_nnan_ninf_daz: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 @@ -1274,7 +1254,6 @@ ; GFX1100-LABEL: v_log_f32_nnan_ninf_dynamic: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 @@ -1304,7 +1283,6 @@ ; GFX1100-LABEL: v_log_f32_fast_daz: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 @@ -1334,7 +1312,6 @@ ; GFX1100-LABEL: v_log_f32_dynamic_mode: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 @@ -1364,7 +1341,6 @@ ; GFX1100-LABEL: v_log_f32_undef: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, s0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 @@ -1401,7 +1377,6 @@ ; GFX1100-SDAG-LABEL: v_log_f32_0: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, 0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 @@ -1410,7 +1385,6 @@ ; GFX1100-GISEL-LABEL: v_log_f32_0: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, 0x3f317218 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0xff800000, v0 @@ -1441,7 +1415,6 @@ ; GFX1100-LABEL: v_log_f32_from_fpext_f16: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-NEXT: v_log_f32_e32 v0, v0 @@ -1508,7 +1481,6 @@ ; GFX1100-LABEL: v_log_f32_from_fpext_math_f16: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_add_f16_e32 v0, v0, v1 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-NEXT: v_cvt_f32_f16_e32 v0, v0 @@ -1553,7 +1525,6 @@ ; GFX1100-SDAG-LABEL: v_log_f32_from_fpext_bf16: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 @@ -1562,7 +1533,6 @@ ; GFX1100-GISEL-LABEL: v_log_f32_from_fpext_bf16: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 @@ -1620,7 +1590,6 @@ ; GFX1100-LABEL: v_log_f16: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f16_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f16_e32 v0, 0x398c, v0 @@ -1675,7 +1644,6 @@ ; GFX1100-LABEL: v_log_fabs_f16: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f16_e64 v0, |v0| ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f16_e32 v0, 0x398c, v0 @@ -1731,7 +1699,6 @@ ; GFX1100-LABEL: v_log_fneg_fabs_f16: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f16_e64 v0, -|v0| ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f16_e32 v0, 0x398c, v0 @@ -1788,7 +1755,6 @@ ; GFX1100-LABEL: v_log_fneg_f16: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f16_e64 v0, -v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f16_e32 v0, 0x398c, v0 @@ -1844,7 +1810,6 @@ ; GFX1100-LABEL: v_log_f16_fast: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f16_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f16_e32 v0, 0x398c, v0 @@ -1925,7 +1890,6 @@ ; GFX1100-LABEL: v_log_v2f16: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX1100-NEXT: v_log_f16_e32 v0, v0 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) @@ -2028,7 +1992,6 @@ ; GFX1100-SDAG-LABEL: v_log_fabs_v2f16: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX1100-SDAG-NEXT: v_log_f16_e64 v0, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) @@ -2042,7 +2005,6 @@ ; GFX1100-GISEL-LABEL: v_log_fabs_v2f16: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 @@ -2152,7 +2114,6 @@ ; GFX1100-SDAG-LABEL: v_log_fneg_fabs_v2f16: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX1100-SDAG-NEXT: v_log_f16_e64 v0, -|v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) @@ -2166,7 +2127,6 @@ ; GFX1100-GISEL-LABEL: v_log_fneg_fabs_v2f16: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 @@ -2277,7 +2237,6 @@ ; GFX1100-SDAG-LABEL: v_log_fneg_v2f16: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX1100-SDAG-NEXT: v_log_f16_e64 v0, -v0 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) @@ -2291,7 +2250,6 @@ ; GFX1100-GISEL-LABEL: v_log_fneg_v2f16: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 @@ -2380,7 +2338,6 @@ ; GFX1100-LABEL: v_log_v2f16_fast: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX1100-NEXT: v_log_f16_e32 v0, v0 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll @@ -669,7 +669,6 @@ ; GFX1100-LABEL: v_log10_f32: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 @@ -699,7 +698,6 @@ ; GFX1100-LABEL: v_log10_fabs_f32: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e64 v0, |v0| ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 @@ -730,7 +728,6 @@ ; GFX1100-LABEL: v_log10_fneg_fabs_f32: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e64 v0, -|v0| ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 @@ -762,7 +759,6 @@ ; GFX1100-LABEL: v_log10_fneg_f32: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e64 v0, -v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 @@ -793,7 +789,6 @@ ; GFX1100-LABEL: v_log10_f32_fast: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 @@ -823,7 +818,6 @@ ; GFX1100-LABEL: v_log10_f32_unsafe_math_attr: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 @@ -853,7 +847,6 @@ ; GFX1100-LABEL: v_log10_f32_approx_fn_attr: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 @@ -883,7 +876,6 @@ ; GFX1100-LABEL: v_log10_f32_ninf: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 @@ -913,7 +905,6 @@ ; GFX1100-LABEL: v_log10_f32_afn: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 @@ -943,7 +934,6 @@ ; GFX1100-LABEL: v_log10_f32_afn_daz: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 @@ -973,7 +963,6 @@ ; GFX1100-LABEL: v_log10_f32_afn_dynamic: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 @@ -1003,7 +992,6 @@ ; GFX1100-LABEL: v_fabs_log10_f32_afn: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e64 v0, |v0| ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 @@ -1034,7 +1022,6 @@ ; GFX1100-LABEL: v_log10_f32_daz: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 @@ -1064,7 +1051,6 @@ ; GFX1100-LABEL: v_log10_f32_nnan: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 @@ -1094,7 +1080,6 @@ ; GFX1100-LABEL: v_log10_f32_nnan_daz: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 @@ -1124,7 +1109,6 @@ ; GFX1100-LABEL: v_log10_f32_nnan_dynamic: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 @@ -1154,7 +1138,6 @@ ; GFX1100-LABEL: v_log10_f32_ninf_daz: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 @@ -1184,7 +1167,6 @@ ; GFX1100-LABEL: v_log10_f32_ninf_dynamic: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 @@ -1214,7 +1196,6 @@ ; GFX1100-LABEL: v_log10_f32_nnan_ninf: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 @@ -1244,7 +1225,6 @@ ; GFX1100-LABEL: v_log10_f32_nnan_ninf_daz: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 @@ -1274,7 +1254,6 @@ ; GFX1100-LABEL: v_log10_f32_nnan_ninf_dynamic: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 @@ -1304,7 +1283,6 @@ ; GFX1100-LABEL: v_log10_f32_fast_daz: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 @@ -1334,7 +1312,6 @@ ; GFX1100-LABEL: v_log10_f32_dynamic_mode: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 @@ -1364,7 +1341,6 @@ ; GFX1100-LABEL: v_log10_f32_undef: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, s0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 @@ -1401,7 +1377,6 @@ ; GFX1100-SDAG-LABEL: v_log10_f32_0: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, 0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 @@ -1410,7 +1385,6 @@ ; GFX1100-GISEL-LABEL: v_log10_f32_0: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, 0x3e9a209b ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0xff800000, v0 @@ -1441,7 +1415,6 @@ ; GFX1100-LABEL: v_log10_f32_from_fpext_f16: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-NEXT: v_log_f32_e32 v0, v0 @@ -1508,7 +1481,6 @@ ; GFX1100-LABEL: v_log10_f32_from_fpext_math_f16: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_add_f16_e32 v0, v0, v1 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-NEXT: v_cvt_f32_f16_e32 v0, v0 @@ -1553,7 +1525,6 @@ ; GFX1100-SDAG-LABEL: v_log10_f32_from_fpext_bf16: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 @@ -1562,7 +1533,6 @@ ; GFX1100-GISEL-LABEL: v_log10_f32_from_fpext_bf16: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 @@ -1620,7 +1590,6 @@ ; GFX1100-LABEL: v_log10_f16: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f16_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 @@ -1675,7 +1644,6 @@ ; GFX1100-LABEL: v_log10_fabs_f16: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f16_e64 v0, |v0| ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 @@ -1731,7 +1699,6 @@ ; GFX1100-LABEL: v_log10_fneg_fabs_f16: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f16_e64 v0, -|v0| ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 @@ -1788,7 +1755,6 @@ ; GFX1100-LABEL: v_log10_fneg_f16: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f16_e64 v0, -v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 @@ -1844,7 +1810,6 @@ ; GFX1100-LABEL: v_log10_f16_fast: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f16_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 @@ -1925,7 +1890,6 @@ ; GFX1100-LABEL: v_log10_v2f16: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX1100-NEXT: v_log_f16_e32 v0, v0 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) @@ -2028,7 +1992,6 @@ ; GFX1100-SDAG-LABEL: v_log10_fabs_v2f16: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX1100-SDAG-NEXT: v_log_f16_e64 v0, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) @@ -2042,7 +2005,6 @@ ; GFX1100-GISEL-LABEL: v_log10_fabs_v2f16: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 @@ -2152,7 +2114,6 @@ ; GFX1100-SDAG-LABEL: v_log10_fneg_fabs_v2f16: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX1100-SDAG-NEXT: v_log_f16_e64 v0, -|v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) @@ -2166,7 +2127,6 @@ ; GFX1100-GISEL-LABEL: v_log10_fneg_fabs_v2f16: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 @@ -2277,7 +2237,6 @@ ; GFX1100-SDAG-LABEL: v_log10_fneg_v2f16: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX1100-SDAG-NEXT: v_log_f16_e64 v0, -v0 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) @@ -2291,7 +2250,6 @@ ; GFX1100-GISEL-LABEL: v_log10_fneg_v2f16: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 @@ -2380,7 +2338,6 @@ ; GFX1100-LABEL: v_log10_v2f16_fast: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX1100-NEXT: v_log_f16_e32 v0, v0 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log2.ll b/llvm/test/CodeGen/AMDGPU/llvm.log2.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.log2.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.log2.ll @@ -532,7 +532,6 @@ ; GFX1100-LABEL: v_log2_f32: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -559,7 +558,6 @@ ; GFX1100-LABEL: v_log2_fabs_f32: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e64 v0, |v0| ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -587,7 +585,6 @@ ; GFX1100-LABEL: v_log2_fneg_fabs_f32: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e64 v0, -|v0| ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -616,7 +613,6 @@ ; GFX1100-LABEL: v_log2_fneg_f32: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e64 v0, -v0 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -644,7 +640,6 @@ ; GFX1100-LABEL: v_log2_f32_fast: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -671,7 +666,6 @@ ; GFX1100-LABEL: v_log2_f32_unsafe_math_attr: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -698,7 +692,6 @@ ; GFX1100-LABEL: v_log2_f32_approx_fn_attr: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -725,7 +718,6 @@ ; GFX1100-LABEL: v_log2_f32_ninf: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -752,7 +744,6 @@ ; GFX1100-LABEL: v_log2_f32_afn: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -779,7 +770,6 @@ ; GFX1100-LABEL: v_log2_f32_afn_daz: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -806,7 +796,6 @@ ; GFX1100-LABEL: v_log2_f32_afn_dynamic: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -833,7 +822,6 @@ ; GFX1100-LABEL: v_fabs_log2_f32_afn: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e64 v0, |v0| ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -861,7 +849,6 @@ ; GFX1100-LABEL: v_log2_f32_daz: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -888,7 +875,6 @@ ; GFX1100-LABEL: v_log2_f32_nnan: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -915,7 +901,6 @@ ; GFX1100-LABEL: v_log2_f32_nnan_daz: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -942,7 +927,6 @@ ; GFX1100-LABEL: v_log2_f32_nnan_dynamic: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -969,7 +953,6 @@ ; GFX1100-LABEL: v_log2_f32_ninf_daz: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -996,7 +979,6 @@ ; GFX1100-LABEL: v_log2_f32_ninf_dynamic: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -1023,7 +1005,6 @@ ; GFX1100-LABEL: v_log2_f32_nnan_ninf: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -1050,7 +1031,6 @@ ; GFX1100-LABEL: v_log2_f32_nnan_ninf_daz: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -1077,7 +1057,6 @@ ; GFX1100-LABEL: v_log2_f32_nnan_ninf_dynamic: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -1104,7 +1083,6 @@ ; GFX1100-LABEL: v_log2_f32_fast_daz: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -1131,7 +1109,6 @@ ; GFX1100-LABEL: v_log2_f32_dynamic_mode: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -1158,7 +1135,6 @@ ; GFX1100-LABEL: v_log2_f32_undef: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f32_e32 v0, s0 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -1191,14 +1167,12 @@ ; GFX1100-SDAG-LABEL: v_log2_f32_0: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, 0 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log2_f32_0: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, 0xff800000 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -1226,7 +1200,6 @@ ; GFX1100-LABEL: v_log2_f32_from_fpext_f16: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-NEXT: v_log_f32_e32 v0, v0 @@ -1287,7 +1260,6 @@ ; GFX1100-LABEL: v_log2_f32_from_fpext_math_f16: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_add_f16_e32 v0, v0, v1 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-NEXT: v_cvt_f32_f16_e32 v0, v0 @@ -1328,14 +1300,12 @@ ; GFX1100-SDAG-LABEL: v_log2_f32_from_fpext_bf16: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log2_f32_from_fpext_bf16: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 @@ -1387,7 +1357,6 @@ ; GFX1100-LABEL: v_log2_f16: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f16_e32 v0, v0 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -1436,7 +1405,6 @@ ; GFX1100-LABEL: v_log2_fabs_f16: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f16_e64 v0, |v0| ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -1486,7 +1454,6 @@ ; GFX1100-LABEL: v_log2_fneg_fabs_f16: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f16_e64 v0, -|v0| ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -1537,7 +1504,6 @@ ; GFX1100-LABEL: v_log2_fneg_f16: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f16_e64 v0, -v0 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -1587,7 +1553,6 @@ ; GFX1100-LABEL: v_log2_f16_fast: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_log_f16_e32 v0, v0 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -1662,7 +1627,6 @@ ; GFX1100-LABEL: v_log2_v2f16: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX1100-NEXT: v_log_f16_e32 v0, v0 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -1749,7 +1713,6 @@ ; GFX1100-SDAG-LABEL: v_log2_fabs_v2f16: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX1100-SDAG-NEXT: v_log_f16_e64 v0, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -1761,7 +1724,6 @@ ; GFX1100-GISEL-LABEL: v_log2_fabs_v2f16: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 @@ -1854,7 +1816,6 @@ ; GFX1100-SDAG-LABEL: v_log2_fneg_fabs_v2f16: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX1100-SDAG-NEXT: v_log_f16_e64 v0, -|v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -1866,7 +1827,6 @@ ; GFX1100-GISEL-LABEL: v_log2_fneg_fabs_v2f16: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 @@ -1960,7 +1920,6 @@ ; GFX1100-SDAG-LABEL: v_log2_fneg_v2f16: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX1100-SDAG-NEXT: v_log_f16_e64 v0, -v0 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -1972,7 +1931,6 @@ ; GFX1100-GISEL-LABEL: v_log2_fneg_v2f16: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 @@ -2054,7 +2012,6 @@ ; GFX1100-LABEL: v_log2_v2f16_fast: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX1100-NEXT: v_log_f16_e32 v0, v0 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll b/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll @@ -53,7 +53,6 @@ ; GFX10-LABEL: umulo_i64_v_v: ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v4, v0 ; GFX10-NEXT: v_mov_b32_e32 v5, v1 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v4, v2, 0 @@ -76,7 +75,6 @@ ; GFX11-LABEL: umulo_i64_v_v: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v4, v0 :: v_dual_mov_b32 v5, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3) ; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v4, v2, 0 @@ -179,7 +177,6 @@ ; GFX10-LABEL: smulo_i64_v_v: ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v4, v0 ; GFX10-NEXT: v_mov_b32_e32 v5, v1 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v4, v2, 0 @@ -214,7 +211,6 @@ ; GFX11-LABEL: smulo_i64_v_v: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v4, v0 :: v_dual_mov_b32 v5, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3) ; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v4, v2, 0 @@ -599,7 +595,6 @@ ; GFX10-LABEL: smulo_i64_v_4: ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshlrev_b64 v[4:5], 2, v[0:1] ; GFX10-NEXT: v_alignbit_b32 v3, v1, v0, 30 ; GFX10-NEXT: v_ashrrev_i64 v[5:6], 2, v[4:5] @@ -612,7 +607,6 @@ ; GFX11-LABEL: smulo_i64_v_4: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshlrev_b64 v[4:5], 2, v[0:1] ; GFX11-NEXT: v_alignbit_b32 v3, v1, v0, 30 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -657,7 +651,6 @@ ; GFX10-LABEL: umulo_i64_v_4: ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v7, 0x3fffffff, v1 ; GFX10-NEXT: v_mov_b32_e32 v6, v0 ; GFX10-NEXT: v_lshlrev_b64 v[4:5], 2, v[0:1] @@ -671,7 +664,6 @@ ; GFX11-LABEL: umulo_i64_v_4: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v6, v0 :: v_dual_and_b32 v7, 0x3fffffff, v1 ; GFX11-NEXT: v_lshlrev_b64 v[4:5], 2, v[0:1] ; GFX11-NEXT: v_alignbit_b32 v3, v1, v0, 30 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.powi.ll b/llvm/test/CodeGen/AMDGPU/llvm.powi.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.powi.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.powi.ll @@ -18,7 +18,6 @@ ; GFX11-LABEL: v_powi_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX11-NEXT: v_cvt_f32_i32_e32 v1, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) @@ -48,7 +47,6 @@ ; GFX11-LABEL: v_powi_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_log_f32_e32 v0, v0 ; GFX11-NEXT: v_cvt_f32_i32_e32 v1, v1 ; GFX11-NEXT: s_waitcnt_depctr 0xfff @@ -70,7 +68,6 @@ ; GFX11-LABEL: v_powi_0_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %res = call float @llvm.powi.f32.i32(float %l, i32 0) @@ -86,7 +83,6 @@ ; GFX11-LABEL: v_powi_1_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %res = call float @llvm.powi.f32.i32(float %l, i32 1) ret float %res @@ -128,7 +124,6 @@ ; GFX11-LABEL: v_powi_neg1_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_div_scale_f32 v1, null, v0, v0, 1.0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_rcp_f32_e32 v2, v1 @@ -160,7 +155,6 @@ ; GFX11-LABEL: v_powi_2_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %res = call float @llvm.powi.f32.i32(float %l, i32 2) @@ -205,7 +199,6 @@ ; GFX11-LABEL: v_powi_neg2_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_div_scale_f32 v1, null, v0, v0, 1.0 @@ -240,7 +233,6 @@ ; GFX11-LABEL: v_powi_4_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 @@ -261,7 +253,6 @@ ; GFX11-LABEL: v_powi_8_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 @@ -284,7 +275,6 @@ ; GFX11-LABEL: v_powi_16_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 @@ -312,7 +302,6 @@ ; GFX11-LABEL: v_powi_128_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 @@ -378,7 +367,6 @@ ; GFX11-LABEL: v_powi_neg128_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 diff --git a/llvm/test/CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir b/llvm/test/CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir --- a/llvm/test/CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir @@ -1,6 +1,6 @@ # RUN: llc -march=amdgcn -mcpu=gfx802 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s # RUN: llc -march=amdgcn -mcpu=gfx900 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s -# RUN: llc -march=amdgcn -mcpu=gfx1010 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN,GFX10 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s # GCN-LABEL: {{^}}lo_to_lo: # GCN: v_mov_b32_sdwa v1, v0 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 @@ -48,7 +48,6 @@ # GCN-LABEL: {{^}}lo_to_lo_samereg: # GCN: s_waitcnt -# GFX10-NEXT: s_waitcnt_vscnt # GCN-NEXT: s_endpgm name: lo_to_lo_samereg tracksRegLiveness: true @@ -83,7 +82,6 @@ # GCN-LABEL: {{^}}hi_to_hi_samereg: # GCN: s_waitcnt -# GFX10-NEXT: s_waitcnt_vscnt # GCN-NEXT: s_endpgm name: hi_to_hi_samereg tracksRegLiveness: true @@ -182,7 +180,6 @@ # NB: copy of undef just killed instead of expansion # GCN-LABEL: {{^}}lo_to_lo_undef: # GCN: s_waitcnt -# GFX10-NEXT: s_waitcnt_vscnt # GCN-NEXT: v_mov_b32_e32 v2, v1 # GCN-NEXT: s_endpgm name: lo_to_lo_undef diff --git a/llvm/test/CodeGen/AMDGPU/load-local.128.ll b/llvm/test/CodeGen/AMDGPU/load-local.128.ll --- a/llvm/test/CodeGen/AMDGPU/load-local.128.ll +++ b/llvm/test/CodeGen/AMDGPU/load-local.128.ll @@ -35,7 +35,6 @@ ; GFX10-LABEL: load_lds_v4i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ds_read_b128 v[0:3], v0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -43,7 +42,6 @@ ; GFX11-LABEL: load_lds_v4i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_load_b128 v[0:3], v0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -224,7 +222,6 @@ ; GFX10-LABEL: load_lds_v4i32_align1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ds_read_u8 v1, v0 ; GFX10-NEXT: ds_read_u8 v2, v0 offset:1 ; GFX10-NEXT: ds_read_u8 v3, v0 offset:2 @@ -266,7 +263,6 @@ ; GFX11-LABEL: load_lds_v4i32_align1: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_load_u8 v1, v0 ; GFX11-NEXT: ds_load_u8 v2, v0 offset:1 ; GFX11-NEXT: ds_load_u8 v3, v0 offset:2 @@ -397,7 +393,6 @@ ; GFX10-LABEL: load_lds_v4i32_align2: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ds_read_u16 v1, v0 ; GFX10-NEXT: ds_read_u16 v2, v0 offset:2 ; GFX10-NEXT: ds_read_u16 v3, v0 offset:4 @@ -419,7 +414,6 @@ ; GFX11-LABEL: load_lds_v4i32_align2: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_load_u16 v1, v0 ; GFX11-NEXT: ds_load_u16 v2, v0 offset:2 ; GFX11-NEXT: ds_load_u16 v3, v0 offset:4 @@ -478,7 +472,6 @@ ; GFX10-LABEL: load_lds_v4i32_align4: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; GFX10-NEXT: ds_read2_b32 v[0:1], v0 offset1:1 ; GFX10-NEXT: ds_read2_b32 v[2:3], v2 offset0:2 offset1:3 @@ -488,7 +481,6 @@ ; GFX11-LABEL: load_lds_v4i32_align4: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v2, v0 ; GFX11-NEXT: ds_load_2addr_b32 v[0:1], v0 offset1:1 ; GFX11-NEXT: ds_load_2addr_b32 v[2:3], v2 offset0:2 offset1:3 @@ -528,7 +520,6 @@ ; GFX10-LABEL: load_lds_v4i32_align8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ds_read2_b64 v[0:3], v0 offset1:1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -536,7 +527,6 @@ ; GFX11-LABEL: load_lds_v4i32_align8: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_load_2addr_b64 v[0:3], v0 offset1:1 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -574,7 +564,6 @@ ; GFX10-LABEL: load_lds_v4i32_align16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ds_read_b128 v[0:3], v0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -582,7 +571,6 @@ ; GFX11-LABEL: load_lds_v4i32_align16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_load_b128 v[0:3], v0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/load-local.96.ll b/llvm/test/CodeGen/AMDGPU/load-local.96.ll --- a/llvm/test/CodeGen/AMDGPU/load-local.96.ll +++ b/llvm/test/CodeGen/AMDGPU/load-local.96.ll @@ -35,7 +35,6 @@ ; GFX10-LABEL: load_lds_v3i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ds_read_b96 v[0:2], v0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -43,7 +42,6 @@ ; GFX11-LABEL: load_lds_v3i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_load_b96 v[0:2], v0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -189,7 +187,6 @@ ; GFX10-LABEL: load_lds_v3i32_align1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ds_read_u8 v1, v0 ; GFX10-NEXT: ds_read_u8 v2, v0 offset:1 ; GFX10-NEXT: ds_read_u8 v3, v0 offset:2 @@ -222,7 +219,6 @@ ; GFX11-LABEL: load_lds_v3i32_align1: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_load_u8 v1, v0 ; GFX11-NEXT: ds_load_u8 v2, v0 offset:1 ; GFX11-NEXT: ds_load_u8 v3, v0 offset:2 @@ -327,7 +323,6 @@ ; GFX10-LABEL: load_lds_v3i32_align2: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ds_read_u16 v1, v0 ; GFX10-NEXT: ds_read_u16 v2, v0 offset:2 ; GFX10-NEXT: ds_read_u16 v3, v0 offset:4 @@ -345,7 +340,6 @@ ; GFX11-LABEL: load_lds_v3i32_align2: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_load_u16 v1, v0 ; GFX11-NEXT: ds_load_u16 v2, v0 offset:2 ; GFX11-NEXT: ds_load_u16 v3, v0 offset:4 @@ -398,7 +392,6 @@ ; GFX10-LABEL: load_lds_v3i32_align4: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; GFX10-NEXT: ds_read2_b32 v[0:1], v0 offset1:1 ; GFX10-NEXT: ds_read_b32 v2, v2 offset:8 @@ -408,7 +401,6 @@ ; GFX11-LABEL: load_lds_v3i32_align4: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v2, v0 ; GFX11-NEXT: ds_load_2addr_b32 v[0:1], v0 offset1:1 ; GFX11-NEXT: ds_load_b32 v2, v2 offset:8 @@ -452,7 +444,6 @@ ; GFX10-LABEL: load_lds_v3i32_align8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; GFX10-NEXT: ds_read_b64 v[0:1], v0 ; GFX10-NEXT: ds_read_b32 v2, v2 offset:8 @@ -462,7 +453,6 @@ ; GFX11-LABEL: load_lds_v3i32_align8: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v2, v0 ; GFX11-NEXT: ds_load_b64 v[0:1], v0 ; GFX11-NEXT: ds_load_b32 v2, v2 offset:8 @@ -502,7 +492,6 @@ ; GFX10-LABEL: load_lds_v3i32_align16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ds_read_b96 v[0:2], v0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -510,7 +499,6 @@ ; GFX11-LABEL: load_lds_v3i32_align16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_load_b96 v[0:2], v0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll b/llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll --- a/llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll +++ b/llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll @@ -13,7 +13,6 @@ ; GFX11-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fma_mixhi_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -63,7 +62,6 @@ ; GFX11-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_constlo: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v3, 0x3c00 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] @@ -121,7 +119,6 @@ ; GFX11-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_mov_b32_e32 v0, v3 @@ -178,7 +175,6 @@ ; SDAG-GFX11-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; SDAG-GFX11-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] ; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; SDAG-GFX11-NEXT: v_lshlrev_b32_e32 v0, 16, v0 @@ -212,7 +208,6 @@ ; GISEL-GFX11-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack: ; GISEL-GFX11: ; %bb.0: ; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GISEL-GFX11-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GISEL-GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 @@ -252,7 +247,6 @@ ; SDAG-GFX11-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; SDAG-GFX11-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] ; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; SDAG-GFX11-NEXT: v_lshlrev_b32_e32 v0, 16, v0 @@ -286,7 +280,6 @@ ; GISEL-GFX11-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext: ; GISEL-GFX11: ; %bb.0: ; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GISEL-GFX11-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GISEL-GFX11-NEXT: v_bfe_i32 v0, v0, 0, 16 @@ -339,7 +332,6 @@ ; GFX11-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0 @@ -395,7 +387,6 @@ ; GFX11-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fma_mixhi_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -455,7 +446,6 @@ ; GFX11-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] ; GFX11-NEXT: v_fma_mixhi_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp ; GFX11-NEXT: global_store_b16 v[0:1], v3, off dlc diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll --- a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll +++ b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll @@ -15,7 +15,6 @@ ; GFX1100-LABEL: mixlo_simple: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -61,7 +60,6 @@ ; GFX1100-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f16lo: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -116,7 +114,6 @@ ; GFX1100-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -168,7 +165,6 @@ ; GFX1100-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -230,7 +226,6 @@ ; GFX1100-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-NEXT: v_cvt_f16_f32_e32 v0, v0 @@ -291,7 +286,6 @@ ; GFX1100-LABEL: v_mad_mix_v2f32: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] @@ -395,7 +389,6 @@ ; GFX1100-LABEL: v_mad_mix_v3f32: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] ; GFX1100-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1] ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -544,7 +537,6 @@ ; GFX1100-LABEL: v_mad_mix_v4f32: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] ; GFX1100-NEXT: v_fma_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1] ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) @@ -731,7 +723,6 @@ ; GFX1100-LABEL: v_mad_mix_v2f32_clamp_postcvt: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp @@ -854,7 +845,6 @@ ; SDAG-GFX1100-LABEL: v_mad_mix_v3f32_clamp_postcvt: ; SDAG-GFX1100: ; %bb.0: ; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; SDAG-GFX1100-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1] ; SDAG-GFX1100-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp ; SDAG-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) @@ -943,7 +933,6 @@ ; GISEL-GFX1100-LABEL: v_mad_mix_v3f32_clamp_postcvt: ; GISEL-GFX1100: ; %bb.0: ; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GISEL-GFX1100-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp ; GISEL-GFX1100-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1] ; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) @@ -1047,7 +1036,6 @@ ; GFX1100-LABEL: v_mad_mix_v4f32_clamp_postcvt: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp ; GFX1100-NEXT: v_fma_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1] clamp ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) @@ -1241,7 +1229,6 @@ ; SDAG-GFX1100-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo: ; SDAG-GFX1100: ; %bb.0: ; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; SDAG-GFX1100-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp ; SDAG-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; SDAG-GFX1100-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] @@ -1306,7 +1293,6 @@ ; GISEL-GFX1100-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo: ; GISEL-GFX1100: ; %bb.0: ; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GISEL-GFX1100-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] ; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GISEL-GFX1100-NEXT: v_mov_b32_e32 v4, v3 @@ -1399,7 +1385,6 @@ ; SDAG-GFX1100-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi: ; SDAG-GFX1100: ; %bb.0: ; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; SDAG-GFX1100-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] ; SDAG-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; SDAG-GFX1100-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp @@ -1464,7 +1449,6 @@ ; GISEL-GFX1100-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi: ; GISEL-GFX1100: ; %bb.0: ; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GISEL-GFX1100-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp ; GISEL-GFX1100-NEXT: v_fma_mixlo_f16 v4, v0, v1, v2 op_sel_hi:[1,1,1] ; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) @@ -1564,7 +1548,6 @@ ; SDAG-GFX1100-LABEL: v_mad_mix_v2f32_clamp_precvt: ; SDAG-GFX1100: ; %bb.0: ; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp ; SDAG-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) @@ -1636,7 +1619,6 @@ ; GISEL-GFX1100-LABEL: v_mad_mix_v2f32_clamp_precvt: ; GISEL-GFX1100: ; %bb.0: ; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp ; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) @@ -1712,7 +1694,6 @@ ; SDAG-GFX1100-LABEL: v_mad_mix_v3f32_clamp_precvt: ; SDAG-GFX1100: ; %bb.0: ; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp @@ -1804,7 +1785,6 @@ ; GISEL-GFX1100-LABEL: v_mad_mix_v3f32_clamp_precvt: ; GISEL-GFX1100: ; %bb.0: ; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp @@ -1894,7 +1874,6 @@ ; SDAG-GFX1100-LABEL: v_mad_mix_v4f32_clamp_precvt: ; SDAG-GFX1100: ; %bb.0: ; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v7, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp @@ -2011,7 +1990,6 @@ ; GISEL-GFX1100-LABEL: v_mad_mix_v4f32_clamp_precvt: ; GISEL-GFX1100: ; %bb.0: ; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix.ll b/llvm/test/CodeGen/AMDGPU/mad-mix.ll --- a/llvm/test/CodeGen/AMDGPU/mad-mix.ll +++ b/llvm/test/CodeGen/AMDGPU/mad-mix.ll @@ -15,7 +15,6 @@ ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -65,7 +64,6 @@ ; GFX1100-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -121,7 +119,6 @@ ; GFX1100-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -174,7 +171,6 @@ ; GFX1100-LABEL: v_mad_mix_v2f32: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1] ; GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -282,7 +278,6 @@ ; GFX1100-LABEL: v_mad_mix_v2f32_shuffle: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,0,1] op_sel_hi:[1,1,1] ; GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, v2 op_sel:[0,1,1] op_sel_hi:[1,1,1] ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -368,7 +363,6 @@ ; GFX1100-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1] ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -428,7 +422,6 @@ ; GFX1100-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1] ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -479,7 +472,6 @@ ; GFX1100-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel_hi:[1,1,1] ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -531,7 +523,6 @@ ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -578,7 +569,6 @@ ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0] ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -626,7 +616,6 @@ ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, |v2| op_sel_hi:[1,1,0] ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -674,7 +663,6 @@ ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, -|v2| op_sel_hi:[1,1,0] ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -727,7 +715,6 @@ ; SDAG-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1: ; SDAG-GFX1100: ; %bb.0: ; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; SDAG-GFX1100-NEXT: s_mov_b32 s0, 1.0 ; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, s0 op_sel_hi:[1,1,0] @@ -764,7 +751,6 @@ ; GISEL-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1: ; GISEL-GFX1100: ; %bb.0: ; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GISEL-GFX1100-NEXT: v_mov_b32_e32 v2, 1.0 ; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] @@ -801,7 +787,6 @@ ; SDAG-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi: ; SDAG-GFX1100: ; %bb.0: ; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; SDAG-GFX1100-NEXT: s_mov_b32 s0, 0.15915494 ; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, s0 op_sel_hi:[1,1,0] @@ -838,7 +823,6 @@ ; GISEL-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi: ; GISEL-GFX1100: ; %bb.0: ; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GISEL-GFX1100-NEXT: v_mov_b32_e32 v2, 0.15915494 ; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] @@ -882,7 +866,6 @@ ; SDAG-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: ; SDAG-GFX1100: ; %bb.0: ; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; SDAG-GFX1100-NEXT: s_mov_b32 s0, 0x3e230000 ; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, s0 op_sel_hi:[1,1,0] @@ -919,7 +902,6 @@ ; GISEL-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: ; GISEL-GFX1100: ; %bb.0: ; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GISEL-GFX1100-NEXT: v_mov_b32_e32 v2, 0x3e230000 ; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] @@ -968,7 +950,6 @@ ; SDAG-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: ; SDAG-GFX1100: ; %bb.0: ; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; SDAG-GFX1100-NEXT: s_mov_b32 s0, 0x367c0000 ; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, s0 op_sel_hi:[1,1,0] @@ -1005,7 +986,6 @@ ; GISEL-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: ; GISEL-GFX1100: ; %bb.0: ; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GISEL-GFX1100-NEXT: v_mov_b32_e32 v2, 0x367c0000 ; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GISEL-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] @@ -1053,7 +1033,6 @@ ; GFX1100-LABEL: v_mad_mix_v2f32_f32imm1: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: s_mov_b32 s0, 1.0 ; GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1100-NEXT: v_fma_mix_f32 v2, v0, v1, s0 op_sel_hi:[1,1,0] @@ -1154,7 +1133,6 @@ ; GFX1100-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: s_mov_b32 s0, 0x3e230000 ; GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1100-NEXT: v_fma_mix_f32 v2, v0, v1, s0 op_sel_hi:[1,1,0] @@ -1260,7 +1238,6 @@ ; GFX1100-LABEL: v_mad_mix_v2f32_f32imminv2pi: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: s_mov_b32 s0, 0.15915494 ; GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1100-NEXT: v_fma_mix_f32 v2, v0, v1, s0 op_sel_hi:[1,1,0] @@ -1364,7 +1341,6 @@ ; GFX1100-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -1419,7 +1395,6 @@ ; GFX1100-LABEL: no_mix_simple: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_fma_f32 v0, v0, v1, v2 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -1454,7 +1429,6 @@ ; GFX1100-LABEL: no_mix_simple_fabs: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_fma_f32 v0, |v0|, v1, v2 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -1494,7 +1468,6 @@ ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -1548,7 +1521,6 @@ ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -1598,7 +1570,6 @@ ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX1100-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX1100-NEXT: v_cvt_f32_f16_e32 v2, v2 @@ -1665,7 +1636,6 @@ ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX1100-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -1726,7 +1696,6 @@ ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -1777,7 +1746,6 @@ ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -1825,7 +1793,6 @@ ; GFX1100-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1] ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -1891,7 +1858,6 @@ ; GFX1100-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-NEXT: v_xor_b32_e32 v0, 0x8000, v0 @@ -1955,7 +1921,6 @@ ; GFX1100-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -2011,7 +1976,6 @@ ; GFX1100-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -2078,7 +2042,6 @@ ; GFX1100-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -2145,7 +2108,6 @@ ; GFX1100-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; diff --git a/llvm/test/CodeGen/AMDGPU/mad.u16.ll b/llvm/test/CodeGen/AMDGPU/mad.u16.ll --- a/llvm/test/CodeGen/AMDGPU/mad.u16.ll +++ b/llvm/test/CodeGen/AMDGPU/mad.u16.ll @@ -117,14 +117,12 @@ ; GFX10-LABEL: v_mad_u16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mad_u16 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_mad_u16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mad_u16 v0, v0, v1, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] %mul = mul i16 %arg0, %arg1 @@ -148,7 +146,6 @@ ; GFX10-LABEL: v_mad_u16_zext: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mad_u16 v0, v0, v1, v2 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -156,7 +153,6 @@ ; GFX11-LABEL: v_mad_u16_zext: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mad_u16 v0, v0, v1, v2 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 @@ -185,7 +181,6 @@ ; GFX10-LABEL: v_mad_u16_zext64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mad_u16 v0, v0, v1, v2 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 @@ -194,7 +189,6 @@ ; GFX11-LABEL: v_mad_u16_zext64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mad_u16 v0, v0, v1, v2 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0xffff, v0 diff --git a/llvm/test/CodeGen/AMDGPU/mad_64_32.ll b/llvm/test/CodeGen/AMDGPU/mad_64_32.ll --- a/llvm/test/CodeGen/AMDGPU/mad_64_32.ll +++ b/llvm/test/CodeGen/AMDGPU/mad_64_32.ll @@ -31,7 +31,6 @@ ; GFX11-LABEL: mad_i64_i32_sextops: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_mad_i64_i32 v[0:1], null, v5, v4, v[2:3] @@ -68,7 +67,6 @@ ; GFX11-LABEL: mad_i64_i32_sextops_commute: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_mad_i64_i32 v[0:1], null, v5, v4, v[2:3] @@ -105,7 +103,6 @@ ; GFX11-LABEL: mad_u64_u32_zextops: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v5, v4, v[2:3] @@ -142,7 +139,6 @@ ; GFX11-LABEL: mad_u64_u32_zextops_commute: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v5, v4, v[2:3] @@ -243,7 +239,6 @@ ; GFX11-LABEL: mad_i64_i32_sextops_i32_i128: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mad_u64_u32 v[6:7], null, v0, v1, 0 ; GFX11-NEXT: v_mov_b32_e32 v8, 0 ; GFX11-NEXT: v_ashrrev_i32_e32 v14, 31, v0 @@ -304,7 +299,6 @@ ; GFX11-LABEL: mad_i64_i32_sextops_i32_i63: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_mad_i64_i32 v[0:1], null, v5, v4, v[2:3] @@ -349,7 +343,6 @@ ; GFX11-LABEL: mad_i64_i32_sextops_i31_i63: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_bfe_i32 v4, v1, 0, 31 ; GFX11-NEXT: v_bfe_i32 v5, v0, 0, 31 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -398,7 +391,6 @@ ; GFX11-LABEL: mad_i64_i32_extops_i32_i64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v5, v4, v[2:3] @@ -439,7 +431,6 @@ ; GFX11-LABEL: mad_u64_u32_bitops: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v3, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v3, v2, v[4:5] @@ -486,7 +477,6 @@ ; GFX11-LABEL: mad_u64_u32_bitops_lhs_mask_small: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v0 ; GFX11-NEXT: v_mov_b32_e32 v6, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -539,7 +529,6 @@ ; GFX11-LABEL: mad_u64_u32_bitops_rhs_mask_small: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v6, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v6, v2, v[4:5] @@ -579,7 +568,6 @@ ; GFX11-LABEL: mad_i64_i32_bitops: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v3, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_mad_i64_i32 v[0:1], null, v3, v2, v[4:5] @@ -619,7 +607,6 @@ ; GFX11-LABEL: mad_i64_i32_unpack_i64ops: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mad_u64_u32 v[2:3], null, v1, v0, v[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3 @@ -739,7 +726,6 @@ ; GFX11-LABEL: mad_i64_i32_twice: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mad_i64_i32 v[6:7], null, v0, v1, v[2:3] ; GFX11-NEXT: v_mad_i64_i32 v[2:3], null, v0, v1, v[4:5] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) @@ -804,7 +790,6 @@ ; GFX11-LABEL: mad_i64_i32_thrice: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mad_i64_i32 v[8:9], null, v0, v1, 0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v8, v2 @@ -865,7 +850,6 @@ ; GFX11-LABEL: mad_i64_i32_secondary_use: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mad_i64_i32 v[4:5], null, v0, v1, 0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v4, v2 @@ -922,7 +906,6 @@ ; GFX11-LABEL: mad_i48_i48: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v6, v1 :: v_dual_mov_b32 v7, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3) ; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v7, v2, v[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/mad_u64_u32.ll b/llvm/test/CodeGen/AMDGPU/mad_u64_u32.ll --- a/llvm/test/CodeGen/AMDGPU/mad_u64_u32.ll +++ b/llvm/test/CodeGen/AMDGPU/mad_u64_u32.ll @@ -313,7 +313,6 @@ ; GFX10-NEXT: v_add_nc_u32_e32 v0, v1, v2 ; GFX10-NEXT: flat_store_dword v[0:1], v1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: mad_i32_vvv_multiuse: @@ -322,7 +321,6 @@ ; GFX11-NEXT: v_add_nc_u32_e32 v0, v1, v2 ; GFX11-NEXT: flat_store_b32 v[0:1], v1 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog %mul = mul i32 %a, %b %add = add i32 %mul, %c diff --git a/llvm/test/CodeGen/AMDGPU/memory_clause.ll b/llvm/test/CodeGen/AMDGPU/memory_clause.ll --- a/llvm/test/CodeGen/AMDGPU/memory_clause.ll +++ b/llvm/test/CodeGen/AMDGPU/memory_clause.ll @@ -205,7 +205,6 @@ ; GCN-SCRATCH-LABEL: mubuf_clause: ; GCN-SCRATCH: ; %bb.0: ; %bb ; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GCN-SCRATCH-NEXT: v_lshlrev_b32_e32 v2, 4, v31 ; GCN-SCRATCH-NEXT: v_and_b32_e32 v18, 0x3ff0, v2 ; GCN-SCRATCH-NEXT: v_add_nc_u32_e32 v0, v0, v18 @@ -229,7 +228,6 @@ ; GCN-SCRATCH-NEXT: scratch_store_dwordx4 v18, v[10:13], off ; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GCN-SCRATCH-NEXT: scratch_store_dwordx4 v19, v[14:17], off -; GCN-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GCN-SCRATCH-NEXT: s_setpc_b64 s[30:31] bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -322,7 +320,6 @@ ; GCN-SCRATCH-LABEL: load_global_d16_hi: ; GCN-SCRATCH: ; %bb.0: ; %entry ; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GCN-SCRATCH-NEXT: v_mov_b32_e32 v5, v2 ; GCN-SCRATCH-NEXT: s_clause 0x1 ; GCN-SCRATCH-NEXT: global_load_short_d16_hi v5, v[0:1], off @@ -331,7 +328,6 @@ ; GCN-SCRATCH-NEXT: global_store_dword v[3:4], v5, off ; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GCN-SCRATCH-NEXT: global_store_dword v[3:4], v2, off offset:128 -; GCN-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GCN-SCRATCH-NEXT: s_setpc_b64 s[30:31] entry: %gep = getelementptr inbounds i16, ptr addrspace(1) %in, i64 32 @@ -365,7 +361,6 @@ ; GCN-SCRATCH-LABEL: load_global_d16_lo: ; GCN-SCRATCH: ; %bb.0: ; %entry ; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GCN-SCRATCH-NEXT: v_mov_b32_e32 v5, v2 ; GCN-SCRATCH-NEXT: s_clause 0x1 ; GCN-SCRATCH-NEXT: global_load_short_d16 v5, v[0:1], off @@ -374,7 +369,6 @@ ; GCN-SCRATCH-NEXT: global_store_dword v[3:4], v5, off ; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GCN-SCRATCH-NEXT: global_store_dword v[3:4], v2, off offset:128 -; GCN-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GCN-SCRATCH-NEXT: s_setpc_b64 s[30:31] entry: %gep = getelementptr inbounds i16, ptr addrspace(1) %in, i64 32 diff --git a/llvm/test/CodeGen/AMDGPU/minmax.ll b/llvm/test/CodeGen/AMDGPU/minmax.ll --- a/llvm/test/CodeGen/AMDGPU/minmax.ll +++ b/llvm/test/CodeGen/AMDGPU/minmax.ll @@ -6,7 +6,6 @@ ; GFX11-LABEL: test_minmax_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_maxmin_i32 v0, v0, v1, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] %smax = call i32 @llvm.smax.i32(i32 %a, i32 %b) @@ -46,7 +45,6 @@ ; GFX11-LABEL: test_minmax_commuted_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_maxmin_i32 v0, v0, v1, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] %smax = call i32 @llvm.smax.i32(i32 %a, i32 %b) @@ -58,7 +56,6 @@ ; GFX11-LABEL: test_maxmin_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_minmax_i32 v0, v0, v1, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] %smin = call i32 @llvm.smin.i32(i32 %a, i32 %b) @@ -70,7 +67,6 @@ ; GFX11-LABEL: test_maxmin_commuted_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_minmax_i32 v0, v0, v1, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] %smin = call i32 @llvm.smin.i32(i32 %a, i32 %b) @@ -82,10 +78,8 @@ ; GFX11-LABEL: test_smed3_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_med3_i32 v2, v2, v3, v4 ; GFX11-NEXT: global_store_b32 v[0:1], v2, off -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %tmp0 = call i32 @llvm.smin.i32(i32 %x, i32 %y) %tmp1 = call i32 @llvm.smax.i32(i32 %x, i32 %y) @@ -99,7 +93,6 @@ ; GFX11-LABEL: test_minmax_u32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_maxmin_u32 v0, v0, v1, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] %umax = call i32 @llvm.umax.i32(i32 %a, i32 %b) @@ -139,7 +132,6 @@ ; GFX11-LABEL: test_minmax_commuted_u32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_maxmin_u32 v0, v0, v1, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] %umax = call i32 @llvm.umax.i32(i32 %a, i32 %b) @@ -151,7 +143,6 @@ ; GFX11-LABEL: test_maxmin_u32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_minmax_u32 v0, v0, v1, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] %umin = call i32 @llvm.umin.i32(i32 %a, i32 %b) @@ -163,7 +154,6 @@ ; GFX11-LABEL: test_maxmin_commuted_u32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_minmax_u32 v0, v0, v1, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] %umin = call i32 @llvm.umin.i32(i32 %a, i32 %b) @@ -175,10 +165,8 @@ ; GFX11-LABEL: test_umed3_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_med3_u32 v2, v2, v3, v4 ; GFX11-NEXT: global_store_b32 v[0:1], v2, off -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %tmp0 = call i32 @llvm.umin.i32(i32 %x, i32 %y) %tmp1 = call i32 @llvm.umax.i32(i32 %x, i32 %y) @@ -192,7 +180,6 @@ ; SDAG-LABEL: test_minmax_f32_ieee_true: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; SDAG-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0 ; SDAG-NEXT: v_max_f32_e32 v2, v2, v2 ; SDAG-NEXT: v_maxmin_f32 v0, v0, v1, v2 @@ -201,7 +188,6 @@ ; GISEL-LABEL: test_minmax_f32_ieee_true: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GISEL-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1 ; GISEL-NEXT: v_max_f32_e32 v2, v2, v2 ; GISEL-NEXT: v_maxmin_f32 v0, v0, v1, v2 @@ -251,7 +237,6 @@ ; SDAG-LABEL: test_maxmin_f32_ieee_true: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; SDAG-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0 ; SDAG-NEXT: v_max_f32_e32 v2, v2, v2 ; SDAG-NEXT: v_minmax_f32 v0, v0, v1, v2 @@ -260,7 +245,6 @@ ; GISEL-LABEL: test_maxmin_f32_ieee_true: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GISEL-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1 ; GISEL-NEXT: v_max_f32_e32 v2, v2, v2 ; GISEL-NEXT: v_minmax_f32 v0, v0, v1, v2 @@ -284,10 +268,8 @@ ; GFX11-LABEL: test_med3_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_med3_f32 v2, v2, v3, v4 ; GFX11-NEXT: global_store_b32 v[0:1], v2, off -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %tmp0 = call float @llvm.minnum.f32(float %x, float %y) %tmp1 = call float @llvm.maxnum.f32(float %x, float %y) @@ -337,7 +319,6 @@ ; SDAG-LABEL: test_minmax_commuted_f16_ieee_true: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; SDAG-NEXT: v_max_f16_e32 v1, v1, v1 ; SDAG-NEXT: v_max_f16_e32 v0, v0, v0 ; SDAG-NEXT: v_max_f16_e32 v2, v2, v2 @@ -347,7 +328,6 @@ ; GISEL-LABEL: test_minmax_commuted_f16_ieee_true: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GISEL-NEXT: v_max_f16_e32 v0, v0, v0 ; GISEL-NEXT: v_max_f16_e32 v1, v1, v1 ; GISEL-NEXT: v_max_f16_e32 v2, v2, v2 @@ -372,7 +352,6 @@ ; SDAG-LABEL: test_maxmin_commuted_f16_ieee_true: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; SDAG-NEXT: v_max_f16_e32 v1, v1, v1 ; SDAG-NEXT: v_max_f16_e32 v0, v0, v0 ; SDAG-NEXT: v_max_f16_e32 v2, v2, v2 @@ -382,7 +361,6 @@ ; GISEL-LABEL: test_maxmin_commuted_f16_ieee_true: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GISEL-NEXT: v_max_f16_e32 v0, v0, v0 ; GISEL-NEXT: v_max_f16_e32 v1, v1, v1 ; GISEL-NEXT: v_max_f16_e32 v2, v2, v2 @@ -397,10 +375,8 @@ ; GFX11-LABEL: test_med3_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_med3_f16 v2, v2, v3, v4 ; GFX11-NEXT: global_store_b16 v[0:1], v2, off -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %tmp0 = call half @llvm.minnum.f16(half %x, half %y) %tmp1 = call half @llvm.maxnum.f16(half %x, half %y) diff --git a/llvm/test/CodeGen/AMDGPU/module-lds-false-sharing.ll b/llvm/test/CodeGen/AMDGPU/module-lds-false-sharing.ll --- a/llvm/test/CodeGen/AMDGPU/module-lds-false-sharing.ll +++ b/llvm/test/CodeGen/AMDGPU/module-lds-false-sharing.ll @@ -150,7 +150,6 @@ ; GFX10-LABEL: nonkernel: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; GFX10-NEXT: ds_write_b32 v0, v0 offset:8 @@ -173,7 +172,6 @@ ; G_GFX10-LABEL: nonkernel: ; G_GFX10: ; %bb.0: ; G_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; G_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; G_GFX10-NEXT: v_mov_b32_e32 v2, 0 ; G_GFX10-NEXT: v_mov_b32_e32 v3, 8 ; G_GFX10-NEXT: v_mov_b32_e32 v0, 0 diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll --- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll +++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll @@ -38,7 +38,6 @@ ; GFX1010_W32-LABEL: mubuf_vgpr: ; GFX1010_W32: ; %bb.0: ; GFX1010_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1010_W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1010_W32-NEXT: s_mov_b32 s5, exec_lo ; GFX1010_W32-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1 ; GFX1010_W32-NEXT: v_readfirstlane_b32 s8, v0 @@ -64,7 +63,6 @@ ; GFX1010_W64-LABEL: mubuf_vgpr: ; GFX1010_W64: ; %bb.0: ; GFX1010_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1010_W64-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1010_W64-NEXT: s_mov_b64 s[6:7], exec ; GFX1010_W64-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1 ; GFX1010_W64-NEXT: v_readfirstlane_b32 s8, v0 @@ -90,7 +88,6 @@ ; GFX1100_W32-LABEL: mubuf_vgpr: ; GFX1100_W32: ; %bb.0: ; GFX1100_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100_W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100_W32-NEXT: s_mov_b32 s1, exec_lo ; GFX1100_W32-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1 ; GFX1100_W32-NEXT: v_readfirstlane_b32 s4, v0 @@ -117,7 +114,6 @@ ; GFX1100_W64-LABEL: mubuf_vgpr: ; GFX1100_W64: ; %bb.0: ; GFX1100_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100_W64-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100_W64-NEXT: s_mov_b64 s[2:3], exec ; GFX1100_W64-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1 ; GFX1100_W64-NEXT: v_readfirstlane_b32 s4, v0 @@ -281,7 +277,6 @@ ; GFX1010_W32-LABEL: mubuf_vgpr_adjacent_in_block: ; GFX1010_W32: ; %bb.0: ; %entry ; GFX1010_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1010_W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1010_W32-NEXT: s_mov_b32 s5, exec_lo ; GFX1010_W32-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1010_W32-NEXT: v_readfirstlane_b32 s8, v0 @@ -328,7 +323,6 @@ ; GFX1010_W64-LABEL: mubuf_vgpr_adjacent_in_block: ; GFX1010_W64: ; %bb.0: ; %entry ; GFX1010_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1010_W64-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1010_W64-NEXT: s_mov_b64 s[6:7], exec ; GFX1010_W64-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1010_W64-NEXT: v_readfirstlane_b32 s8, v0 @@ -375,7 +369,6 @@ ; GFX1100_W32-LABEL: mubuf_vgpr_adjacent_in_block: ; GFX1100_W32: ; %bb.0: ; %entry ; GFX1100_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100_W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100_W32-NEXT: s_mov_b32 s1, exec_lo ; GFX1100_W32-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1100_W32-NEXT: v_readfirstlane_b32 s4, v0 @@ -425,7 +418,6 @@ ; GFX1100_W64-LABEL: mubuf_vgpr_adjacent_in_block: ; GFX1100_W64: ; %bb.0: ; %entry ; GFX1100_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100_W64-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100_W64-NEXT: s_mov_b64 s[2:3], exec ; GFX1100_W64-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1100_W64-NEXT: v_readfirstlane_b32 s4, v0 @@ -729,7 +721,6 @@ ; GFX1010_W32-LABEL: mubuf_vgpr_outside_entry: ; GFX1010_W32: ; %bb.0: ; %entry ; GFX1010_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1010_W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1010_W32-NEXT: ;;#ASMSTART ; GFX1010_W32-NEXT: s_mov_b32 s4, 17 ; GFX1010_W32-NEXT: ;;#ASMEND @@ -786,7 +777,6 @@ ; GFX1010_W64-LABEL: mubuf_vgpr_outside_entry: ; GFX1010_W64: ; %bb.0: ; %entry ; GFX1010_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1010_W64-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1010_W64-NEXT: ;;#ASMSTART ; GFX1010_W64-NEXT: s_mov_b32 s4, 17 ; GFX1010_W64-NEXT: ;;#ASMEND @@ -843,7 +833,6 @@ ; GFX1100_W32-LABEL: mubuf_vgpr_outside_entry: ; GFX1100_W32: ; %bb.0: ; %entry ; GFX1100_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100_W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100_W32-NEXT: ;;#ASMSTART ; GFX1100_W32-NEXT: s_mov_b32 s4, 17 ; GFX1100_W32-NEXT: ;;#ASMEND @@ -904,7 +893,6 @@ ; GFX1100_W64-LABEL: mubuf_vgpr_outside_entry: ; GFX1100_W64: ; %bb.0: ; %entry ; GFX1100_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100_W64-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100_W64-NEXT: ;;#ASMSTART ; GFX1100_W64-NEXT: s_mov_b32 s4, 17 ; GFX1100_W64-NEXT: ;;#ASMEND diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll --- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll +++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll @@ -37,7 +37,6 @@ ; GFX1010_W32-LABEL: mubuf_vgpr: ; GFX1010_W32: ; %bb.0: ; GFX1010_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1010_W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1010_W32-NEXT: s_mov_b32 s5, exec_lo ; GFX1010_W32-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1 ; GFX1010_W32-NEXT: v_readfirstlane_b32 s8, v0 @@ -63,7 +62,6 @@ ; GFX1010_W64-LABEL: mubuf_vgpr: ; GFX1010_W64: ; %bb.0: ; GFX1010_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1010_W64-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1010_W64-NEXT: s_mov_b64 s[6:7], exec ; GFX1010_W64-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1 ; GFX1010_W64-NEXT: v_readfirstlane_b32 s8, v0 @@ -89,7 +87,6 @@ ; GFX1100_W32-LABEL: mubuf_vgpr: ; GFX1100_W32: ; %bb.0: ; GFX1100_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100_W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100_W32-NEXT: s_mov_b32 s1, exec_lo ; GFX1100_W32-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1 ; GFX1100_W32-NEXT: v_readfirstlane_b32 s4, v0 @@ -116,7 +113,6 @@ ; GFX1100_W64-LABEL: mubuf_vgpr: ; GFX1100_W64: ; %bb.0: ; GFX1100_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100_W64-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100_W64-NEXT: s_mov_b64 s[2:3], exec ; GFX1100_W64-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1 ; GFX1100_W64-NEXT: v_readfirstlane_b32 s4, v0 @@ -292,7 +288,6 @@ ; GFX1010_W32-LABEL: mubuf_vgpr_adjacent_in_block: ; GFX1010_W32: ; %bb.0: ; %entry ; GFX1010_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1010_W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1010_W32-NEXT: s_mov_b32 s5, exec_lo ; GFX1010_W32-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1010_W32-NEXT: v_readfirstlane_b32 s8, v0 @@ -339,7 +334,6 @@ ; GFX1010_W64-LABEL: mubuf_vgpr_adjacent_in_block: ; GFX1010_W64: ; %bb.0: ; %entry ; GFX1010_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1010_W64-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1010_W64-NEXT: s_mov_b64 s[6:7], exec ; GFX1010_W64-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1010_W64-NEXT: v_readfirstlane_b32 s8, v0 @@ -386,7 +380,6 @@ ; GFX1100_W32-LABEL: mubuf_vgpr_adjacent_in_block: ; GFX1100_W32: ; %bb.0: ; %entry ; GFX1100_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100_W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100_W32-NEXT: s_mov_b32 s1, exec_lo ; GFX1100_W32-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1100_W32-NEXT: v_readfirstlane_b32 s4, v0 @@ -436,7 +429,6 @@ ; GFX1100_W64-LABEL: mubuf_vgpr_adjacent_in_block: ; GFX1100_W64: ; %bb.0: ; %entry ; GFX1100_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100_W64-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100_W64-NEXT: s_mov_b64 s[2:3], exec ; GFX1100_W64-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1100_W64-NEXT: v_readfirstlane_b32 s4, v0 @@ -764,7 +756,6 @@ ; GFX1010_W32-LABEL: mubuf_vgpr_outside_entry: ; GFX1010_W32: ; %bb.0: ; %entry ; GFX1010_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1010_W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1010_W32-NEXT: ;;#ASMSTART ; GFX1010_W32-NEXT: s_mov_b32 s4, 17 ; GFX1010_W32-NEXT: ;;#ASMEND @@ -821,7 +812,6 @@ ; GFX1010_W64-LABEL: mubuf_vgpr_outside_entry: ; GFX1010_W64: ; %bb.0: ; %entry ; GFX1010_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1010_W64-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1010_W64-NEXT: ;;#ASMSTART ; GFX1010_W64-NEXT: s_mov_b32 s4, 17 ; GFX1010_W64-NEXT: ;;#ASMEND @@ -878,7 +868,6 @@ ; GFX1100_W32-LABEL: mubuf_vgpr_outside_entry: ; GFX1100_W32: ; %bb.0: ; %entry ; GFX1100_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100_W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100_W32-NEXT: ;;#ASMSTART ; GFX1100_W32-NEXT: s_mov_b32 s4, 17 ; GFX1100_W32-NEXT: ;;#ASMEND @@ -939,7 +928,6 @@ ; GFX1100_W64-LABEL: mubuf_vgpr_outside_entry: ; GFX1100_W64: ; %bb.0: ; %entry ; GFX1100_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100_W64-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100_W64-NEXT: ;;#ASMSTART ; GFX1100_W64-NEXT: s_mov_b32 s4, 17 ; GFX1100_W64-NEXT: ;;#ASMEND diff --git a/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll b/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll --- a/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll +++ b/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll @@ -20,7 +20,6 @@ ; GFX10-LABEL: flat_inst_valu_offset_1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, 1 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] @@ -30,7 +29,6 @@ ; GFX11-LABEL: flat_inst_valu_offset_1: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:1 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -50,7 +48,6 @@ ; GFX10-SDAG-LABEL: flat_inst_valu_offset_11bit_max: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1] @@ -60,7 +57,6 @@ ; GFX11-LABEL: flat_inst_valu_offset_11bit_max: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:2047 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -68,7 +64,6 @@ ; GFX10-GISEL-LABEL: flat_inst_valu_offset_11bit_max: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_mov_b64 s[4:5], 0x7ff ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -93,7 +88,6 @@ ; GFX10-SDAG-LABEL: flat_inst_valu_offset_12bit_max: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1] @@ -103,7 +97,6 @@ ; GFX11-LABEL: flat_inst_valu_offset_12bit_max: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:4095 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -111,7 +104,6 @@ ; GFX10-GISEL-LABEL: flat_inst_valu_offset_12bit_max: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_mov_b64 s[4:5], 0xfff ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -138,7 +130,6 @@ ; GFX10-SDAG-LABEL: flat_inst_valu_offset_13bit_max: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1] @@ -148,7 +139,6 @@ ; GFX11-SDAG-LABEL: flat_inst_valu_offset_13bit_max: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095 @@ -170,7 +160,6 @@ ; GFX10-GISEL-LABEL: flat_inst_valu_offset_13bit_max: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_mov_b64 s[4:5], 0x1fff ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -183,7 +172,6 @@ ; GFX11-GISEL-LABEL: flat_inst_valu_offset_13bit_max: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: s_mov_b64 s[0:1], 0x1fff ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 @@ -211,7 +199,6 @@ ; GFX10-SDAG-LABEL: flat_inst_valu_offset_neg_11bit_max: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo ; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1] @@ -221,7 +208,6 @@ ; GFX11-SDAG-LABEL: flat_inst_valu_offset_neg_11bit_max: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] @@ -244,7 +230,6 @@ ; GFX10-GISEL-LABEL: flat_inst_valu_offset_neg_11bit_max: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xf800 ; GFX10-GISEL-NEXT: s_mov_b32 s5, -1 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 @@ -258,7 +243,6 @@ ; GFX11-GISEL-LABEL: flat_inst_valu_offset_neg_11bit_max: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xf800 ; GFX11-GISEL-NEXT: s_mov_b32 s1, -1 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -287,7 +271,6 @@ ; GFX10-SDAG-LABEL: flat_inst_valu_offset_neg_12bit_max: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo ; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1] @@ -297,7 +280,6 @@ ; GFX11-SDAG-LABEL: flat_inst_valu_offset_neg_12bit_max: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] @@ -320,7 +302,6 @@ ; GFX10-GISEL-LABEL: flat_inst_valu_offset_neg_12bit_max: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xf000 ; GFX10-GISEL-NEXT: s_mov_b32 s5, -1 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 @@ -334,7 +315,6 @@ ; GFX11-GISEL-LABEL: flat_inst_valu_offset_neg_12bit_max: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xf000 ; GFX11-GISEL-NEXT: s_mov_b32 s1, -1 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -363,7 +343,6 @@ ; GFX10-SDAG-LABEL: flat_inst_valu_offset_neg_13bit_max: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo ; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1] @@ -373,7 +352,6 @@ ; GFX11-SDAG-LABEL: flat_inst_valu_offset_neg_13bit_max: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] @@ -396,7 +374,6 @@ ; GFX10-GISEL-LABEL: flat_inst_valu_offset_neg_13bit_max: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xe000 ; GFX10-GISEL-NEXT: s_mov_b32 s5, -1 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 @@ -410,7 +387,6 @@ ; GFX11-GISEL-LABEL: flat_inst_valu_offset_neg_13bit_max: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xe000 ; GFX11-GISEL-NEXT: s_mov_b32 s1, -1 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -437,7 +413,6 @@ ; GFX10-SDAG-LABEL: flat_inst_valu_offset_2x_11bit_max: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1] @@ -447,7 +422,6 @@ ; GFX11-LABEL: flat_inst_valu_offset_2x_11bit_max: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:4095 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -455,7 +429,6 @@ ; GFX10-GISEL-LABEL: flat_inst_valu_offset_2x_11bit_max: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_mov_b64 s[4:5], 0xfff ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -482,7 +455,6 @@ ; GFX10-SDAG-LABEL: flat_inst_valu_offset_2x_12bit_max: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1] @@ -492,7 +464,6 @@ ; GFX11-SDAG-LABEL: flat_inst_valu_offset_2x_12bit_max: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095 @@ -514,7 +485,6 @@ ; GFX10-GISEL-LABEL: flat_inst_valu_offset_2x_12bit_max: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_mov_b64 s[4:5], 0x1fff ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -527,7 +497,6 @@ ; GFX11-GISEL-LABEL: flat_inst_valu_offset_2x_12bit_max: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: s_mov_b64 s[0:1], 0x1fff ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 @@ -555,7 +524,6 @@ ; GFX10-SDAG-LABEL: flat_inst_valu_offset_2x_13bit_max: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x3fff, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1] @@ -565,7 +533,6 @@ ; GFX11-SDAG-LABEL: flat_inst_valu_offset_2x_13bit_max: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x3000, v0 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095 @@ -587,7 +554,6 @@ ; GFX10-GISEL-LABEL: flat_inst_valu_offset_2x_13bit_max: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_mov_b64 s[4:5], 0x3fff ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -600,7 +566,6 @@ ; GFX11-GISEL-LABEL: flat_inst_valu_offset_2x_13bit_max: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: s_mov_b64 s[0:1], 0x3fff ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 @@ -628,7 +593,6 @@ ; GFX10-SDAG-LABEL: flat_inst_valu_offset_2x_neg_11bit_max: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo ; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1] @@ -638,7 +602,6 @@ ; GFX11-SDAG-LABEL: flat_inst_valu_offset_2x_neg_11bit_max: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] @@ -661,7 +624,6 @@ ; GFX10-GISEL-LABEL: flat_inst_valu_offset_2x_neg_11bit_max: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xf000 ; GFX10-GISEL-NEXT: s_mov_b32 s5, -1 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 @@ -675,7 +637,6 @@ ; GFX11-GISEL-LABEL: flat_inst_valu_offset_2x_neg_11bit_max: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xf000 ; GFX11-GISEL-NEXT: s_mov_b32 s1, -1 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -704,7 +665,6 @@ ; GFX10-SDAG-LABEL: flat_inst_valu_offset_2x_neg_12bit_max: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo ; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1] @@ -714,7 +674,6 @@ ; GFX11-SDAG-LABEL: flat_inst_valu_offset_2x_neg_12bit_max: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] @@ -737,7 +696,6 @@ ; GFX10-GISEL-LABEL: flat_inst_valu_offset_2x_neg_12bit_max: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xe000 ; GFX10-GISEL-NEXT: s_mov_b32 s5, -1 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 @@ -751,7 +709,6 @@ ; GFX11-GISEL-LABEL: flat_inst_valu_offset_2x_neg_12bit_max: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xe000 ; GFX11-GISEL-NEXT: s_mov_b32 s1, -1 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -780,7 +737,6 @@ ; GFX10-SDAG-LABEL: flat_inst_valu_offset_2x_neg_13bit_max: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffc000, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo ; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1] @@ -790,7 +746,6 @@ ; GFX11-SDAG-LABEL: flat_inst_valu_offset_2x_neg_13bit_max: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffc000, v0 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] @@ -813,7 +768,6 @@ ; GFX10-GISEL-LABEL: flat_inst_valu_offset_2x_neg_13bit_max: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xc000 ; GFX10-GISEL-NEXT: s_mov_b32 s5, -1 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 @@ -827,7 +781,6 @@ ; GFX11-GISEL-LABEL: flat_inst_valu_offset_2x_neg_13bit_max: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xc000 ; GFX11-GISEL-NEXT: s_mov_b32 s1, -1 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -857,7 +810,6 @@ ; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_split0: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo ; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1] @@ -867,7 +819,6 @@ ; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_split0: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:2047 @@ -890,7 +841,6 @@ ; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_split0: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x7ff ; GFX10-GISEL-NEXT: s_mov_b32 s5, 2 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 @@ -904,7 +854,6 @@ ; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_split0: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x7ff ; GFX11-GISEL-NEXT: s_mov_b32 s1, 2 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -934,7 +883,6 @@ ; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_split1: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo ; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1] @@ -944,7 +892,6 @@ ; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_split1: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:2048 @@ -967,7 +914,6 @@ ; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_split1: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x800 ; GFX10-GISEL-NEXT: s_mov_b32 s5, 2 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 @@ -981,7 +927,6 @@ ; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_split1: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x800 ; GFX11-GISEL-NEXT: s_mov_b32 s1, 2 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -1011,7 +956,6 @@ ; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_split0: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo ; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1] @@ -1021,7 +965,6 @@ ; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_split0: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095 @@ -1044,7 +987,6 @@ ; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_split0: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xfff ; GFX10-GISEL-NEXT: s_mov_b32 s5, 2 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 @@ -1058,7 +1000,6 @@ ; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_split0: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xfff ; GFX11-GISEL-NEXT: s_mov_b32 s1, 2 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -1088,7 +1029,6 @@ ; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_split1: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo ; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1] @@ -1098,7 +1038,6 @@ ; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_split1: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] @@ -1121,7 +1060,6 @@ ; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_split1: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x1000 ; GFX10-GISEL-NEXT: s_mov_b32 s5, 2 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 @@ -1135,7 +1073,6 @@ ; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_split1: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x1000 ; GFX11-GISEL-NEXT: s_mov_b32 s1, 2 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -1165,7 +1102,6 @@ ; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_split0: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo ; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1] @@ -1175,7 +1111,6 @@ ; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_split0: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095 @@ -1198,7 +1133,6 @@ ; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_split0: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x1fff ; GFX10-GISEL-NEXT: s_mov_b32 s5, 2 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 @@ -1212,7 +1146,6 @@ ; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_split0: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x1fff ; GFX11-GISEL-NEXT: s_mov_b32 s1, 2 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -1242,7 +1175,6 @@ ; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_split1: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo ; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1] @@ -1252,7 +1184,6 @@ ; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_split1: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] @@ -1275,7 +1206,6 @@ ; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_split1: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x2000 ; GFX10-GISEL-NEXT: s_mov_b32 s5, 2 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 @@ -1289,7 +1219,6 @@ ; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_split1: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x2000 ; GFX11-GISEL-NEXT: s_mov_b32 s1, 2 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -1320,7 +1249,6 @@ ; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo ; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1] @@ -1330,7 +1258,6 @@ ; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] @@ -1353,7 +1280,6 @@ ; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x7ff ; GFX10-GISEL-NEXT: s_brev_b32 s5, 1 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 @@ -1367,7 +1293,6 @@ ; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x7ff ; GFX11-GISEL-NEXT: s_brev_b32 s1, 1 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -1398,7 +1323,6 @@ ; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo ; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1] @@ -1408,7 +1332,6 @@ ; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] @@ -1431,7 +1354,6 @@ ; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x800 ; GFX10-GISEL-NEXT: s_brev_b32 s5, 1 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 @@ -1445,7 +1367,6 @@ ; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x800 ; GFX11-GISEL-NEXT: s_brev_b32 s1, 1 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -1476,7 +1397,6 @@ ; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo ; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1] @@ -1486,7 +1406,6 @@ ; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] @@ -1509,7 +1428,6 @@ ; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xfff ; GFX10-GISEL-NEXT: s_brev_b32 s5, 1 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 @@ -1523,7 +1441,6 @@ ; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xfff ; GFX11-GISEL-NEXT: s_brev_b32 s1, 1 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -1554,7 +1471,6 @@ ; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo ; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1] @@ -1564,7 +1480,6 @@ ; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] @@ -1587,7 +1502,6 @@ ; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x1000 ; GFX10-GISEL-NEXT: s_brev_b32 s5, 1 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 @@ -1601,7 +1515,6 @@ ; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x1000 ; GFX11-GISEL-NEXT: s_brev_b32 s1, 1 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -1632,7 +1545,6 @@ ; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo ; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1] @@ -1642,7 +1554,6 @@ ; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] @@ -1665,7 +1576,6 @@ ; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x1fff ; GFX10-GISEL-NEXT: s_brev_b32 s5, 1 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 @@ -1679,7 +1589,6 @@ ; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x1fff ; GFX11-GISEL-NEXT: s_brev_b32 s1, 1 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -1710,7 +1619,6 @@ ; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo ; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1] @@ -1720,7 +1628,6 @@ ; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] @@ -1743,7 +1650,6 @@ ; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x2000 ; GFX10-GISEL-NEXT: s_brev_b32 s5, 1 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 @@ -1757,7 +1663,6 @@ ; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x2000 ; GFX11-GISEL-NEXT: s_brev_b32 s1, 1 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) diff --git a/llvm/test/CodeGen/AMDGPU/offset-split-global.ll b/llvm/test/CodeGen/AMDGPU/offset-split-global.ll --- a/llvm/test/CodeGen/AMDGPU/offset-split-global.ll +++ b/llvm/test/CodeGen/AMDGPU/offset-split-global.ll @@ -20,7 +20,6 @@ ; GFX10-LABEL: global_inst_valu_offset_1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -28,7 +27,6 @@ ; GFX11-LABEL: global_inst_valu_offset_1: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:1 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -48,7 +46,6 @@ ; GFX10-LABEL: global_inst_valu_offset_11bit_max: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -56,7 +53,6 @@ ; GFX11-LABEL: global_inst_valu_offset_11bit_max: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:2047 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -76,7 +72,6 @@ ; GFX10-GISEL-LABEL: global_inst_valu_offset_12bit_max: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_mov_b64 s[4:5], 0xfff ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -89,7 +84,6 @@ ; GFX11-LABEL: global_inst_valu_offset_12bit_max: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:4095 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -97,7 +91,6 @@ ; GFX10-SDAG-LABEL: global_inst_valu_offset_12bit_max: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 @@ -124,7 +117,6 @@ ; GFX10-GISEL-LABEL: global_inst_valu_offset_13bit_max: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_mov_b64 s[4:5], 0x1fff ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -137,7 +129,6 @@ ; GFX11-GISEL-LABEL: global_inst_valu_offset_13bit_max: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: s_mov_b64 s[0:1], 0x1fff ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 @@ -160,7 +151,6 @@ ; GFX10-SDAG-LABEL: global_inst_valu_offset_13bit_max: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1800, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 @@ -170,7 +160,6 @@ ; GFX11-SDAG-LABEL: global_inst_valu_offset_13bit_max: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:4095 @@ -192,7 +181,6 @@ ; GFX10-LABEL: global_inst_valu_offset_neg_11bit_max: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-2048 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -200,7 +188,6 @@ ; GFX11-LABEL: global_inst_valu_offset_neg_11bit_max: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:-2048 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -220,7 +207,6 @@ ; GFX10-GISEL-LABEL: global_inst_valu_offset_neg_12bit_max: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xf000 ; GFX10-GISEL-NEXT: s_mov_b32 s5, -1 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 @@ -234,7 +220,6 @@ ; GFX11-LABEL: global_inst_valu_offset_neg_12bit_max: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:-4096 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -242,7 +227,6 @@ ; GFX10-SDAG-LABEL: global_inst_valu_offset_neg_12bit_max: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off @@ -270,7 +254,6 @@ ; GFX10-GISEL-LABEL: global_inst_valu_offset_neg_13bit_max: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xe000 ; GFX10-GISEL-NEXT: s_mov_b32 s5, -1 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 @@ -284,7 +267,6 @@ ; GFX11-GISEL-LABEL: global_inst_valu_offset_neg_13bit_max: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xe000 ; GFX11-GISEL-NEXT: s_mov_b32 s1, -1 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -308,7 +290,6 @@ ; GFX10-SDAG-LABEL: global_inst_valu_offset_neg_13bit_max: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off @@ -318,7 +299,6 @@ ; GFX11-SDAG-LABEL: global_inst_valu_offset_neg_13bit_max: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo ; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off @@ -340,7 +320,6 @@ ; GFX10-GISEL-LABEL: global_inst_valu_offset_2x_11bit_max: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_mov_b64 s[4:5], 0xfff ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -353,7 +332,6 @@ ; GFX11-LABEL: global_inst_valu_offset_2x_11bit_max: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:4095 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -361,7 +339,6 @@ ; GFX10-SDAG-LABEL: global_inst_valu_offset_2x_11bit_max: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 @@ -388,7 +365,6 @@ ; GFX10-GISEL-LABEL: global_inst_valu_offset_2x_12bit_max: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_mov_b64 s[4:5], 0x1fff ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -401,7 +377,6 @@ ; GFX11-GISEL-LABEL: global_inst_valu_offset_2x_12bit_max: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: s_mov_b64 s[0:1], 0x1fff ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 @@ -424,7 +399,6 @@ ; GFX10-SDAG-LABEL: global_inst_valu_offset_2x_12bit_max: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1800, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 @@ -434,7 +408,6 @@ ; GFX11-SDAG-LABEL: global_inst_valu_offset_2x_12bit_max: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:4095 @@ -461,7 +434,6 @@ ; GFX10-GISEL-LABEL: global_inst_valu_offset_2x_13bit_max: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_mov_b64 s[4:5], 0x3fff ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -474,7 +446,6 @@ ; GFX11-GISEL-LABEL: global_inst_valu_offset_2x_13bit_max: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: s_mov_b64 s[0:1], 0x3fff ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 @@ -497,7 +468,6 @@ ; GFX10-SDAG-LABEL: global_inst_valu_offset_2x_13bit_max: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x3800, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 @@ -507,7 +477,6 @@ ; GFX11-SDAG-LABEL: global_inst_valu_offset_2x_13bit_max: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x3000, v0 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:4095 @@ -529,7 +498,6 @@ ; GFX10-GISEL-LABEL: global_inst_valu_offset_2x_neg_11bit_max: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xf000 ; GFX10-GISEL-NEXT: s_mov_b32 s5, -1 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 @@ -543,7 +511,6 @@ ; GFX11-LABEL: global_inst_valu_offset_2x_neg_11bit_max: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:-4096 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -551,7 +518,6 @@ ; GFX10-SDAG-LABEL: global_inst_valu_offset_2x_neg_11bit_max: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off @@ -579,7 +545,6 @@ ; GFX10-GISEL-LABEL: global_inst_valu_offset_2x_neg_12bit_max: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xe000 ; GFX10-GISEL-NEXT: s_mov_b32 s5, -1 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 @@ -593,7 +558,6 @@ ; GFX11-GISEL-LABEL: global_inst_valu_offset_2x_neg_12bit_max: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xe000 ; GFX11-GISEL-NEXT: s_mov_b32 s1, -1 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -617,7 +581,6 @@ ; GFX10-SDAG-LABEL: global_inst_valu_offset_2x_neg_12bit_max: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off @@ -627,7 +590,6 @@ ; GFX11-SDAG-LABEL: global_inst_valu_offset_2x_neg_12bit_max: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo ; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off @@ -655,7 +617,6 @@ ; GFX10-GISEL-LABEL: global_inst_valu_offset_2x_neg_13bit_max: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xc000 ; GFX10-GISEL-NEXT: s_mov_b32 s5, -1 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 @@ -669,7 +630,6 @@ ; GFX11-GISEL-LABEL: global_inst_valu_offset_2x_neg_13bit_max: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xc000 ; GFX11-GISEL-NEXT: s_mov_b32 s1, -1 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -693,7 +653,6 @@ ; GFX10-SDAG-LABEL: global_inst_valu_offset_2x_neg_13bit_max: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffc000, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off @@ -703,7 +662,6 @@ ; GFX11-SDAG-LABEL: global_inst_valu_offset_2x_neg_13bit_max: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffc000, v0 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo ; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off @@ -732,7 +690,6 @@ ; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split0: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x7ff ; GFX10-GISEL-NEXT: s_mov_b32 s5, 2 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 @@ -746,7 +703,6 @@ ; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split0: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x7ff ; GFX11-GISEL-NEXT: s_mov_b32 s1, 2 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -770,7 +726,6 @@ ; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_split0: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 @@ -780,7 +735,6 @@ ; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_split0: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo ; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:2047 @@ -809,7 +763,6 @@ ; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split1: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x800 ; GFX10-GISEL-NEXT: s_mov_b32 s5, 2 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 @@ -823,7 +776,6 @@ ; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split1: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x800 ; GFX11-GISEL-NEXT: s_mov_b32 s1, 2 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -847,7 +799,6 @@ ; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_split1: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off @@ -857,7 +808,6 @@ ; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_split1: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo ; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:2048 @@ -886,7 +836,6 @@ ; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split0: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xfff ; GFX10-GISEL-NEXT: s_mov_b32 s5, 2 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 @@ -900,7 +849,6 @@ ; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split0: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xfff ; GFX11-GISEL-NEXT: s_mov_b32 s1, 2 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -924,7 +872,6 @@ ; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_split0: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 @@ -934,7 +881,6 @@ ; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_split0: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo ; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:4095 @@ -963,7 +909,6 @@ ; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split1: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x1000 ; GFX10-GISEL-NEXT: s_mov_b32 s5, 2 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 @@ -977,7 +922,6 @@ ; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split1: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x1000 ; GFX11-GISEL-NEXT: s_mov_b32 s1, 2 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -1001,7 +945,6 @@ ; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_split1: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off @@ -1011,7 +954,6 @@ ; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_split1: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo ; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off @@ -1040,7 +982,6 @@ ; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split0: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x1fff ; GFX10-GISEL-NEXT: s_mov_b32 s5, 2 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 @@ -1054,7 +995,6 @@ ; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split0: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x1fff ; GFX11-GISEL-NEXT: s_mov_b32 s1, 2 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -1078,7 +1018,6 @@ ; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_split0: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1800, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 @@ -1088,7 +1027,6 @@ ; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_split0: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo ; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:4095 @@ -1117,7 +1055,6 @@ ; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split1: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x2000 ; GFX10-GISEL-NEXT: s_mov_b32 s5, 2 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 @@ -1131,7 +1068,6 @@ ; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split1: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x2000 ; GFX11-GISEL-NEXT: s_mov_b32 s1, 2 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -1155,7 +1091,6 @@ ; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_split1: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off @@ -1165,7 +1100,6 @@ ; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_split1: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo ; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off @@ -1194,7 +1128,6 @@ ; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x7ff ; GFX10-GISEL-NEXT: s_brev_b32 s5, 1 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 @@ -1208,7 +1141,6 @@ ; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x7ff ; GFX11-GISEL-NEXT: s_brev_b32 s1, 1 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -1233,7 +1165,6 @@ ; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 @@ -1243,7 +1174,6 @@ ; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo ; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:-2049 @@ -1272,7 +1202,6 @@ ; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x800 ; GFX10-GISEL-NEXT: s_brev_b32 s5, 1 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 @@ -1286,7 +1215,6 @@ ; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x800 ; GFX11-GISEL-NEXT: s_brev_b32 s1, 1 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -1311,7 +1239,6 @@ ; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off @@ -1321,7 +1248,6 @@ ; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo ; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:-2048 @@ -1350,7 +1276,6 @@ ; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xfff ; GFX10-GISEL-NEXT: s_brev_b32 s5, 1 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 @@ -1364,7 +1289,6 @@ ; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xfff ; GFX11-GISEL-NEXT: s_brev_b32 s1, 1 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -1389,7 +1313,6 @@ ; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 @@ -1399,7 +1322,6 @@ ; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo ; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:-1 @@ -1428,7 +1350,6 @@ ; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x1000 ; GFX10-GISEL-NEXT: s_brev_b32 s5, 1 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 @@ -1442,7 +1363,6 @@ ; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x1000 ; GFX11-GISEL-NEXT: s_brev_b32 s1, 1 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -1467,7 +1387,6 @@ ; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off @@ -1477,7 +1396,6 @@ ; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo ; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off @@ -1506,7 +1424,6 @@ ; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x1fff ; GFX10-GISEL-NEXT: s_brev_b32 s5, 1 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 @@ -1520,7 +1437,6 @@ ; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x1fff ; GFX11-GISEL-NEXT: s_brev_b32 s1, 1 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -1545,7 +1461,6 @@ ; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 @@ -1555,7 +1470,6 @@ ; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo ; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:-1 @@ -1584,7 +1498,6 @@ ; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x2000 ; GFX10-GISEL-NEXT: s_brev_b32 s5, 1 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 @@ -1598,7 +1511,6 @@ ; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x2000 ; GFX11-GISEL-NEXT: s_brev_b32 s1, 1 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -1623,7 +1535,6 @@ ; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off @@ -1633,7 +1544,6 @@ ; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo ; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off diff --git a/llvm/test/CodeGen/AMDGPU/permute_i8.ll b/llvm/test/CodeGen/AMDGPU/permute_i8.ll --- a/llvm/test/CodeGen/AMDGPU/permute_i8.ll +++ b/llvm/test/CodeGen/AMDGPU/permute_i8.ll @@ -6,12 +6,10 @@ ; GFX10-LABEL: shuffle6766: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v0, v[2:3], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_perm_b32 v0, v0, v0, 0x6060706 ; GFX10-NEXT: global_store_dword v[4:5], v0, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: shuffle6766: @@ -35,13 +33,11 @@ ; GFX10-LABEL: shuffle3744: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v6, v[0:1], off ; GFX10-NEXT: global_load_dword v7, v[2:3], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_perm_b32 v0, v6, v7, 0x307 ; GFX10-NEXT: global_store_dword v[4:5], v0, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: shuffle3744: @@ -66,12 +62,10 @@ ; GFX10-LABEL: shuffle4445: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v0, v[2:3], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_perm_b32 v0, v0, v0, 0x5040404 ; GFX10-NEXT: global_store_dword v[4:5], v0, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: shuffle4445: @@ -95,12 +89,10 @@ ; GFX10-LABEL: shuffle0101: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_perm_b32 v0, v0, v0, 0x5040504 ; GFX10-NEXT: global_store_dword v[4:5], v0, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: shuffle0101: @@ -124,13 +116,11 @@ ; GFX10-LABEL: shuffle1004: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v6, v[0:1], off ; GFX10-NEXT: global_load_dword v7, v[2:3], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_perm_b32 v0, v6, v7, 0x40405 ; GFX10-NEXT: global_store_dword v[4:5], v0, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: shuffle1004: @@ -157,14 +147,12 @@ ; GFX10-LABEL: shuffle7533: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: flat_load_dword v6, v[0:1] ; GFX10-NEXT: flat_load_dword v7, v[2:3] ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_perm_b32 v0, v7, v6, 0x3030507 ; GFX10-NEXT: flat_store_dword v[4:5], v0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: shuffle7533: @@ -189,13 +177,11 @@ ; GFX10-LABEL: shuffle7767: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: flat_load_dword v0, v[2:3] ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_perm_b32 v0, v0, v0, 0x7060707 ; GFX10-NEXT: flat_store_dword v[4:5], v0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: shuffle7767: @@ -219,7 +205,6 @@ ; GFX10-LABEL: shuffle0554: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ds_read_b32 v0, v0 ; GFX10-NEXT: ds_read_b32 v1, v1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) @@ -250,7 +235,6 @@ ; GFX10-LABEL: shuffle2127: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ds_read_b32 v0, v0 ; GFX10-NEXT: ds_read_b32 v1, v1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) @@ -281,14 +265,12 @@ ; GFX10-LABEL: shuffle5047: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen ; GFX10-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_perm_b32 v0, v4, v3, 0x7040005 ; GFX10-NEXT: buffer_store_dword v0, v2, s[0:3], 0 offen -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: shuffle5047: @@ -313,13 +295,11 @@ ; GFX10-LABEL: shuffle3546: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v6, v[0:1], off ; GFX10-NEXT: global_load_dword v7, v[2:3], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_perm_b32 v0, v6, v7, 0x2000107 ; GFX10-NEXT: global_store_dword v[4:5], v0, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: shuffle3546: @@ -345,12 +325,10 @@ ; GFX10-LABEL: shuffle7330ud2: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_perm_b32 v0, v0, v0, 0x4070706 ; GFX10-NEXT: global_store_dword v[2:3], v0, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: shuffle7330ud2: @@ -373,12 +351,10 @@ ; GFX10-LABEL: shuffle5341ud2: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_perm_b32 v0, v0, v0, 0x5040706 ; GFX10-NEXT: global_store_dword v[2:3], v0, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: shuffle5341ud2: @@ -401,12 +377,10 @@ ; GFX10-LABEL: shuffle6106ud2: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_perm_b32 v0, v0, v0, 0x5040504 ; GFX10-NEXT: global_store_dword v[2:3], v0, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: shuffle6106ud2: @@ -430,12 +404,10 @@ ; GFX10-LABEL: shuffle4327ud2: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_perm_b32 v0, v0, v0, 0x7060706 ; GFX10-NEXT: global_store_dword v[2:3], v0, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: shuffle4327ud2: @@ -458,12 +430,10 @@ ; GFX10-LABEL: shuffle3263ud2: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_perm_b32 v0, v0, v0, 0x7060607 ; GFX10-NEXT: global_store_dword v[2:3], v0, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: shuffle3263ud2: @@ -486,12 +456,10 @@ ; GFX10-LABEL: shuffle2763ud2: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_perm_b32 v0, v0, v0, 0x7060706 ; GFX10-NEXT: global_store_dword v[2:3], v0, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: shuffle2763ud2: @@ -514,12 +482,10 @@ ; GFX10-LABEL: shuffle1327ud2: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_perm_b32 v0, v0, v0, 0x7060705 ; GFX10-NEXT: global_store_dword v[2:3], v0, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: shuffle1327ud2: @@ -542,12 +508,10 @@ ; GFX10-LABEL: shuffle0605ud2: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_perm_b32 v0, v0, v0, 0x5040504 ; GFX10-NEXT: global_store_dword v[2:3], v0, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: shuffle0605ud2: @@ -570,14 +534,12 @@ ; GFX10-LABEL: insertUsesOr: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: v_lshlrev_b16 v1, 8, v4 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX10-NEXT: global_store_dword v[5:6], v0, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: insertUsesOr: @@ -603,7 +565,6 @@ ; GFX10-LABEL: addUsesOr: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v4, v[0:1], off ; GFX10-NEXT: global_load_dword v7, v[2:3], off ; GFX10-NEXT: s_waitcnt vmcnt(1) @@ -621,7 +582,6 @@ ; GFX10-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX10-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX10-NEXT: global_store_dword v[5:6], v0, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: addUsesOr: @@ -714,7 +674,6 @@ ; GFX10-LABEL: add: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v4, v[0:1], off ; GFX10-NEXT: global_load_dword v7, v[2:3], off ; GFX10-NEXT: s_waitcnt vmcnt(1) @@ -734,7 +693,6 @@ ; GFX10-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX10-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX10-NEXT: global_store_dword v[5:6], v0, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: add: @@ -768,7 +726,6 @@ ; GFX10-LABEL: add_div: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4 @@ -788,7 +745,6 @@ ; GFX10-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX10-NEXT: global_store_dword v[5:6], v0, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: add_div: @@ -826,7 +782,6 @@ ; GFX10-LABEL: add_store: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v4, v[0:1], off ; GFX10-NEXT: global_load_dword v9, v[2:3], off ; GFX10-NEXT: s_waitcnt vmcnt(1) @@ -844,7 +799,6 @@ ; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX10-NEXT: global_store_dword v[5:6], v1, off ; GFX10-NEXT: global_store_dword v[7:8], v0, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: add_store: @@ -881,7 +835,6 @@ ; GFX10-LABEL: add_store_div_16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4 @@ -905,7 +858,6 @@ ; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX10-NEXT: global_store_dword v[5:6], v1, off ; GFX10-NEXT: global_store_dword v[7:8], v0, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: add_store_div_16: @@ -951,7 +903,6 @@ ; GFX10-LABEL: add_store_div: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4 @@ -979,7 +930,6 @@ ; GFX10-NEXT: v_perm_b32 v1, v4, v9, 0x10705 ; GFX10-NEXT: global_store_dword v[5:6], v0, off ; GFX10-NEXT: global_store_dword v[7:8], v1, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: add_store_div: @@ -1025,7 +975,6 @@ ; GFX10-LABEL: and_store_div: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4 @@ -1049,7 +998,6 @@ ; GFX10-NEXT: v_perm_b32 v1, v4, v9, 0x5070006 ; GFX10-NEXT: global_store_dword v[5:6], v0, off ; GFX10-NEXT: global_store_dword v[7:8], v1, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: and_store_div: @@ -1097,7 +1045,6 @@ ; GFX10-LABEL: ashr_store_div: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4 @@ -1122,7 +1069,6 @@ ; GFX10-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX10-NEXT: global_store_dword v[5:6], v1, off ; GFX10-NEXT: global_store_dword v[7:8], v0, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: ashr_store_div: @@ -1171,7 +1117,6 @@ ; GFX10-LABEL: bc_store_div: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4 @@ -1184,7 +1129,6 @@ ; GFX10-NEXT: v_perm_b32 v0, v9, v4, 0x7060104 ; GFX10-NEXT: global_store_dword v[7:8], v0, off ; GFX10-NEXT: global_store_dword v[5:6], v0, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: bc_store_div: @@ -1222,7 +1166,6 @@ ; GFX10-LABEL: eve_store_div: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4 @@ -1237,7 +1180,6 @@ ; GFX10-NEXT: v_perm_b32 v1, v5, v4, 0x1020305 ; GFX10-NEXT: global_store_byte v[9:10], v0, off ; GFX10-NEXT: global_store_dword v[7:8], v1, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: eve_store_div: @@ -1277,7 +1219,6 @@ ; GFX10-LABEL: ive_store_div: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v9, 0x3ff, v31 ; GFX10-NEXT: v_lshlrev_b32_e32 v9, 2, v9 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v9 @@ -1300,7 +1241,6 @@ ; GFX10-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX10-NEXT: global_store_dword v[5:6], v1, off ; GFX10-NEXT: global_store_dword v[7:8], v0, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: ive_store_div: @@ -1347,7 +1287,6 @@ ; GFX10-LABEL: lhsr_store_div: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4 @@ -1370,7 +1309,6 @@ ; GFX10-NEXT: v_perm_b32 v1, v9, v4, 0x1030707 ; GFX10-NEXT: global_store_dword v[5:6], v0, off ; GFX10-NEXT: global_store_dword v[7:8], v1, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: lhsr_store_div: @@ -1418,7 +1356,6 @@ ; GFX10-LABEL: mul_store_div: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4 @@ -1445,7 +1382,6 @@ ; GFX10-NEXT: v_perm_b32 v1, v4, v9, 0x2000504 ; GFX10-NEXT: global_store_dword v[5:6], v0, off ; GFX10-NEXT: global_store_dword v[7:8], v1, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: mul_store_div: @@ -1492,7 +1428,6 @@ ; GFX10-LABEL: or_store_div: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4 ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4 @@ -1515,7 +1450,6 @@ ; GFX10-NEXT: v_perm_b32 v1, v9, v4, 0x2010005 ; GFX10-NEXT: global_store_dword v[5:6], v0, off ; GFX10-NEXT: global_store_dword v[7:8], v1, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: or_store_div: @@ -1562,7 +1496,6 @@ ; GFX10-LABEL: sdiv_store_div: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4 ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4 @@ -1635,7 +1568,6 @@ ; GFX10-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX10-NEXT: global_store_dword v[5:6], v0, off ; GFX10-NEXT: global_store_dword v[7:8], v1, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: sdiv_store_div: @@ -1733,7 +1665,6 @@ ; GFX10-LABEL: sext_store_div: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4 ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4 @@ -1754,7 +1685,6 @@ ; GFX10-NEXT: v_perm_b32 v2, v9, v4, 0x3010707 ; GFX10-NEXT: global_store_dwordx2 v[7:8], v[0:1], off ; GFX10-NEXT: global_store_dword v[5:6], v2, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: sext_store_div: @@ -1799,7 +1729,6 @@ ; GFX10-LABEL: shl_store_div: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4 @@ -1822,7 +1751,6 @@ ; GFX10-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX10-NEXT: global_store_dword v[5:6], v0, off ; GFX10-NEXT: global_store_dword v[7:8], v1, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: shl_store_div: @@ -1870,7 +1798,6 @@ ; GFX10-LABEL: sitofp_store_div: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4 ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4 @@ -1894,7 +1821,6 @@ ; GFX10-NEXT: v_cvt_f32_i32_sdwa v1, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 ; GFX10-NEXT: global_store_dwordx4 v[7:8], v[0:3], off ; GFX10-NEXT: global_store_dword v[5:6], v4, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: sitofp_store_div: @@ -1943,7 +1869,6 @@ ; GFX10-LABEL: srem_store_div: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4 ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4 @@ -2027,7 +1952,6 @@ ; GFX10-NEXT: v_perm_b32 v1, v4, v9, 0x2070306 ; GFX10-NEXT: global_store_dword v[5:6], v0, off ; GFX10-NEXT: global_store_dword v[7:8], v1, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: srem_store_div: @@ -2136,7 +2060,6 @@ ; GFX10-LABEL: sub_store_div: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4 ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4 @@ -2162,7 +2085,6 @@ ; GFX10-NEXT: v_or_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX10-NEXT: global_store_dword v[5:6], v1, off ; GFX10-NEXT: global_store_dword v[7:8], v0, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: sub_store_div: @@ -2210,7 +2132,6 @@ ; GFX10-LABEL: sv_store_div: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4 @@ -2222,7 +2143,6 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_perm_b32 v0, v4, v5, 0x50705 ; GFX10-NEXT: global_store_dword v[7:8], v0, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: sv_store_div: @@ -2259,7 +2179,6 @@ ; GFX10-LABEL: trunc_store_div: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4 @@ -2283,7 +2202,6 @@ ; GFX10-NEXT: v_and_b32_e32 v0, 15, v0 ; GFX10-NEXT: global_store_byte v[7:8], v0, off ; GFX10-NEXT: global_store_dword v[5:6], v1, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: trunc_store_div: @@ -2331,7 +2249,6 @@ ; GFX10-LABEL: udiv: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4 ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4 @@ -2384,7 +2301,6 @@ ; GFX10-NEXT: v_or_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX10-NEXT: global_store_dword v[5:6], v1, off ; GFX10-NEXT: global_store_dword v[7:8], v0, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: udiv: @@ -2462,7 +2378,6 @@ ; GFX10-LABEL: uitofp_store_div: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4 ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4 @@ -2480,7 +2395,6 @@ ; GFX10-NEXT: v_perm_b32 v4, v4, v9, 0x5020104 ; GFX10-NEXT: global_store_dwordx4 v[7:8], v[0:3], off ; GFX10-NEXT: global_store_dword v[5:6], v4, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: uitofp_store_div: @@ -2523,7 +2437,6 @@ ; GFX10-LABEL: urem_store_div: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4 ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4 @@ -2584,7 +2497,6 @@ ; GFX10-NEXT: v_or_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX10-NEXT: global_store_dword v[5:6], v1, off ; GFX10-NEXT: global_store_dword v[7:8], v0, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: urem_store_div: @@ -2670,7 +2582,6 @@ ; GFX10-LABEL: xor_store_div: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4 @@ -2696,7 +2607,6 @@ ; GFX10-NEXT: v_perm_b32 v1, v9, v4, 0x5060307 ; GFX10-NEXT: global_store_dword v[5:6], v0, off ; GFX10-NEXT: global_store_dword v[7:8], v1, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: xor_store_div: @@ -2747,7 +2657,6 @@ ; GFX10-LABEL: zext_store_div: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4 @@ -2768,7 +2677,6 @@ ; GFX10-NEXT: v_perm_b32 v1, v3, v10, 0x5040100 ; GFX10-NEXT: global_store_dwordx2 v[7:8], v[0:1], off ; GFX10-NEXT: global_store_dword v[5:6], v2, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: zext_store_div: diff --git a/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll b/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll --- a/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll +++ b/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll @@ -21,14 +21,12 @@ ; GFX10-LABEL: shl_i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshlrev_b16 v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: shl_i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshlrev_b16 v0, v1, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %res = shl i16 %x, %y @@ -51,14 +49,12 @@ ; GFX10-LABEL: lshr_i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshrrev_b16 v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: lshr_i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b16 v0, v1, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %res = lshr i16 %x, %y @@ -81,14 +77,12 @@ ; GFX10-LABEL: ashr_i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_ashrrev_i16 v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: ashr_i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_ashrrev_i16 v0, v1, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %res = ashr i16 %x, %y @@ -111,14 +105,12 @@ ; GFX10-LABEL: add_u16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_nc_u16 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: add_u16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_add_nc_u16 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %res = add i16 %x, %y @@ -141,14 +133,12 @@ ; GFX10-LABEL: sub_u16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_sub_nc_u16 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: sub_u16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_sub_nc_u16 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %res = sub i16 %x, %y @@ -171,14 +161,12 @@ ; GFX10-LABEL: mul_lo_u16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_lo_u16 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: mul_lo_u16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_lo_u16 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %res = mul i16 %x, %y @@ -201,14 +189,12 @@ ; GFX10-LABEL: min_u16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_min_u16 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: min_u16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_min_u16 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %cmp = icmp ule i16 %x, %y @@ -232,14 +218,12 @@ ; GFX10-LABEL: min_i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_min_i16 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: min_i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_min_i16 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %cmp = icmp sle i16 %x, %y @@ -263,14 +247,12 @@ ; GFX10-LABEL: max_u16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_max_u16 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: max_u16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_max_u16 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %cmp = icmp uge i16 %x, %y @@ -294,14 +276,12 @@ ; GFX10-LABEL: max_i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_max_i16 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: max_i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_max_i16 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %cmp = icmp sge i16 %x, %y @@ -325,7 +305,6 @@ ; GFX10-LABEL: shl_i16_zext_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshlrev_b16 v0, v1, v0 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -333,7 +312,6 @@ ; GFX11-LABEL: shl_i16_zext_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshlrev_b16 v0, v1, v0 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -358,7 +336,6 @@ ; GFX10-LABEL: lshr_i16_zext_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshrrev_b16 v0, v1, v0 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -366,7 +343,6 @@ ; GFX11-LABEL: lshr_i16_zext_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b16 v0, v1, v0 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -391,7 +367,6 @@ ; GFX10-LABEL: ashr_i16_zext_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_ashrrev_i16 v0, v1, v0 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -399,7 +374,6 @@ ; GFX11-LABEL: ashr_i16_zext_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_ashrrev_i16 v0, v1, v0 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -424,7 +398,6 @@ ; GFX10-LABEL: add_u16_zext_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_nc_u16 v0, v0, v1 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -432,7 +405,6 @@ ; GFX11-LABEL: add_u16_zext_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_add_nc_u16 v0, v0, v1 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -457,7 +429,6 @@ ; GFX10-LABEL: sub_u16_zext_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_sub_nc_u16 v0, v0, v1 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -465,7 +436,6 @@ ; GFX11-LABEL: sub_u16_zext_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_sub_nc_u16 v0, v0, v1 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -490,7 +460,6 @@ ; GFX10-LABEL: mul_lo_u16_zext_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_lo_u16 v0, v0, v1 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -498,7 +467,6 @@ ; GFX11-LABEL: mul_lo_u16_zext_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_lo_u16 v0, v0, v1 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -523,7 +491,6 @@ ; GFX10-LABEL: min_u16_zext_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_min_u16 v0, v0, v1 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -531,7 +498,6 @@ ; GFX11-LABEL: min_u16_zext_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_min_u16 v0, v0, v1 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -557,7 +523,6 @@ ; GFX10-LABEL: min_i16_zext_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_min_i16 v0, v0, v1 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -565,7 +530,6 @@ ; GFX11-LABEL: min_i16_zext_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_min_i16 v0, v0, v1 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -591,7 +555,6 @@ ; GFX10-LABEL: max_u16_zext_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_max_u16 v0, v0, v1 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -599,7 +562,6 @@ ; GFX11-LABEL: max_u16_zext_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_max_u16 v0, v0, v1 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -625,7 +587,6 @@ ; GFX10-LABEL: max_i16_zext_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_max_i16 v0, v0, v1 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -633,7 +594,6 @@ ; GFX11-LABEL: max_i16_zext_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_max_i16 v0, v0, v1 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -659,7 +619,6 @@ ; GFX10-LABEL: zext_fadd_f16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_f16_e32 v0, v0, v1 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -667,7 +626,6 @@ ; GFX11-LABEL: zext_fadd_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_add_f16_e32 v0, v0, v1 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -694,7 +652,6 @@ ; GFX10-LABEL: zext_fma_f16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fmac_f16_e32 v2, v0, v1 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -702,7 +659,6 @@ ; GFX11-LABEL: zext_fma_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fmac_f16_e32 v2, v0, v1 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -729,7 +685,6 @@ ; GFX10-LABEL: zext_div_fixup_f16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_div_fixup_f16 v0, v0, v1, v2 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -737,7 +692,6 @@ ; GFX11-LABEL: zext_div_fixup_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_div_fixup_f16 v0, v0, v1, v2 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -766,7 +720,6 @@ ; GFX10-LABEL: zext_fptrunc_f16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -774,7 +727,6 @@ ; GFX11-LABEL: zext_fptrunc_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -809,7 +761,6 @@ ; GFX10-LABEL: zext_fptrunc_fma_f16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -817,7 +768,6 @@ ; GFX11-LABEL: zext_fptrunc_fma_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/ptrmask.ll b/llvm/test/CodeGen/AMDGPU/ptrmask.ll --- a/llvm/test/CodeGen/AMDGPU/ptrmask.ll +++ b/llvm/test/CodeGen/AMDGPU/ptrmask.ll @@ -14,7 +14,6 @@ ; GFX10PLUS-LABEL: v_ptrmask_global_variable_i64: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v2 ; GFX10PLUS-NEXT: v_and_b32_e32 v1, v1, v3 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -33,7 +32,6 @@ ; GFX10-LABEL: v_ptrmask_global_variable_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v0, v0, v2 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -41,7 +39,6 @@ ; GFX11-LABEL: v_ptrmask_global_variable_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, v0, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] %masked = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %ptr, i32 %mask) @@ -59,7 +56,6 @@ ; GFX10-LABEL: v_ptrmask_global_variable_i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -67,7 +63,6 @@ ; GFX11-LABEL: v_ptrmask_global_variable_i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v2 ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -85,7 +80,6 @@ ; GFX10PLUS-LABEL: v_ptrmask_local_variable_i64: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %masked = call ptr addrspace(3) @llvm.ptrmask.p3.i64(ptr addrspace(3) %ptr, i64 %mask) @@ -102,7 +96,6 @@ ; GFX10PLUS-LABEL: v_ptrmask_local_variable_i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %masked = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) %ptr, i32 %mask) @@ -119,14 +112,12 @@ ; GFX10-LABEL: v_ptrmask_local_variable_i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_ptrmask_local_variable_i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX11-NEXT: v_and_b32_e32 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll b/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll --- a/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll +++ b/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll @@ -34,7 +34,6 @@ ; GFX10-LABEL: v_mul_add_1_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, v[0:1] ; GFX10-NEXT: s_setpc_b64 s[30:31] %add = add i32 %y, 1 @@ -66,7 +65,6 @@ ; GFX10-LABEL: v_mul_add_1_i32_commute: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, v[0:1] ; GFX10-NEXT: s_setpc_b64 s[30:31] %add = add i32 %y, 1 @@ -98,7 +96,6 @@ ; GFX10-LABEL: v_mul_add_x_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, v[0:1] ; GFX10-NEXT: s_setpc_b64 s[30:31] %mul = mul i32 %x, %y @@ -131,7 +128,6 @@ ; GFX10-LABEL: v_mul_sub_1_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_nc_u32_e32 v1, -1, v1 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -165,7 +161,6 @@ ; GFX10-LABEL: v_mul_sub_1_i32_commute: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_nc_u32_e32 v1, -1, v1 ; GFX10-NEXT: v_mul_lo_u32 v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -199,7 +194,6 @@ ; GFX10-LABEL: v_mul_sub_x_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_lo_u32 v1, v0, v1 ; GFX10-NEXT: v_sub_nc_u32_e32 v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -233,7 +227,6 @@ ; GFX10-LABEL: v_mul_add_2_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_nc_u32_e32 v1, 2, v1 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -267,7 +260,6 @@ ; GFX10-LABEL: v_mul_sub_2_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_nc_u32_e32 v1, -2, v1 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -301,7 +293,6 @@ ; GFX10-LABEL: v_mul_add_65_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_nc_u32_e32 v1, 0x41, v1 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -335,7 +326,6 @@ ; GFX10-LABEL: v_mul_sub_65_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_nc_u32_e32 v1, 0xffffffbf, v1 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -366,7 +356,6 @@ ; GFX10-LABEL: v_mul_add_1_i24_zext: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mad_u32_u24 v0, v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %add = add i24 %y, 1 @@ -399,7 +388,6 @@ ; GFX10-LABEL: v_mul_sub_1_i24_zext: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_nc_u32_e32 v1, -1, v1 ; GFX10-NEXT: v_mul_u32_u24_e32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -430,7 +418,6 @@ ; GFX10-LABEL: v_add_mul_i24_zext_1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mad_u32_u24 v0, v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %mul = mul i24 %x, %y @@ -460,7 +447,6 @@ ; GFX10-LABEL: v_mul_add_1_i24_sext: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mad_u32_u24 v0, v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %add = add i24 %y, 1 @@ -490,7 +476,6 @@ ; GFX10-LABEL: v_add_mul_i24_sext_1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mad_u32_u24 v0, v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %mul = mul i24 %x, %y @@ -523,7 +508,6 @@ ; GFX10-LABEL: v_mul_sub_1_i24_sext: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_nc_u32_e32 v1, -1, v1 ; GFX10-NEXT: v_mul_u32_u24_e32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -556,7 +540,6 @@ ; GFX10-LABEL: v_mul_add_1_i25_zext: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, v[0:1] ; GFX10-NEXT: s_setpc_b64 s[30:31] %add = add i25 %y, 1 @@ -589,7 +572,6 @@ ; GFX10-LABEL: v_mul_sub_1_i25_zext: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_nc_u32_e32 v1, 0x1ffffff, v1 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -622,7 +604,6 @@ ; GFX10-LABEL: v_mul_add_1_i25_sext: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, v[0:1] ; GFX10-NEXT: s_setpc_b64 s[30:31] %add = add i25 %y, 1 @@ -655,7 +636,6 @@ ; GFX10-LABEL: v_mul_sub_1_i25_sext: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_nc_u32_e32 v1, 0x1ffffff, v1 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -689,7 +669,6 @@ ; GFX10-LABEL: v_mul_add_1_i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mad_u16 v0, v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %add = add i16 %y, 1 @@ -723,7 +702,6 @@ ; GFX10-LABEL: v_mul_add_1_i16_zext_result: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mad_u16 v0, v0, v1, v0 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -758,7 +736,6 @@ ; GFX10-LABEL: v_mul_add_1_i16_commute: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mad_u16 v0, v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %add = add i16 %y, 1 @@ -790,7 +767,6 @@ ; GFX10-LABEL: v_mul_add_x_i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mad_u16 v0, v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %mul = mul i16 %x, %y @@ -825,7 +801,6 @@ ; GFX10-LABEL: v_mul_sub_1_i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_nc_u16 v1, v1, -1 ; GFX10-NEXT: v_mul_lo_u16 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -861,7 +836,6 @@ ; GFX10-LABEL: v_mul_sub_1_i16_commute: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_nc_u16 v1, v1, -1 ; GFX10-NEXT: v_mul_lo_u16 v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -897,7 +871,6 @@ ; GFX10-LABEL: v_mul_sub_x_i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_lo_u16 v1, v0, v1 ; GFX10-NEXT: v_sub_nc_u16 v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -933,7 +906,6 @@ ; GFX10-LABEL: v_mul_add_2_i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_nc_u16 v1, v1, 2 ; GFX10-NEXT: v_mul_lo_u16 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -969,7 +941,6 @@ ; GFX10-LABEL: v_mul_sub_2_i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_nc_u16 v1, v1, -2 ; GFX10-NEXT: v_mul_lo_u16 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -1027,7 +998,6 @@ ; GFX10-LABEL: v_mul_add_1_i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mad_u64_u32 v[4:5], null, v0, v2, v[0:1] ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v3 ; GFX10-NEXT: v_mul_lo_u32 v1, v1, v2 @@ -1088,7 +1058,6 @@ ; GFX10-LABEL: v_mul_add_1_i64_commute: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mad_u64_u32 v[4:5], null, v0, v2, v[0:1] ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v3 ; GFX10-NEXT: v_mul_lo_u32 v1, v1, v2 @@ -1149,7 +1118,6 @@ ; GFX10-LABEL: v_mul_add_x_i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mad_u64_u32 v[4:5], null, v0, v2, v[0:1] ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v3 ; GFX10-NEXT: v_mul_lo_u32 v1, v1, v2 @@ -1215,7 +1183,6 @@ ; GFX10-LABEL: v_mul_sub_1_i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, -1 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, -1, v3, vcc_lo ; GFX10-NEXT: v_mul_lo_u32 v4, v1, v2 @@ -1282,7 +1249,6 @@ ; GFX10-LABEL: v_mul_sub_1_i64_commute: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, -1 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, -1, v3, vcc_lo ; GFX10-NEXT: v_mul_lo_u32 v4, v2, v1 @@ -1347,7 +1313,6 @@ ; GFX10-LABEL: v_mul_sub_x_i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_lo_u32 v4, v1, v2 ; GFX10-NEXT: v_mul_lo_u32 v5, v0, v3 ; GFX10-NEXT: v_mad_u64_u32 v[2:3], null, v0, v2, 0 @@ -1414,7 +1379,6 @@ ; GFX10-LABEL: v_mul_add_2_i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, 2 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo ; GFX10-NEXT: v_mul_lo_u32 v4, v1, v2 @@ -1481,7 +1445,6 @@ ; GFX10-LABEL: v_mul_sub_2_i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, -2 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, -1, v3, vcc_lo ; GFX10-NEXT: v_mul_lo_u32 v4, v1, v2 @@ -1533,7 +1496,6 @@ ; GFX10-LABEL: v_mul_add_1_i32_multiple: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v3, v1 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v3, v[0:1] ; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, v2, v3, v[2:3] @@ -1571,7 +1533,6 @@ ; GFX10-LABEL: v_mul_add_1_i32_other_use: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_nc_u32_e32 v1, 1, v1 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -1619,7 +1580,6 @@ ; GFX10-LABEL: v_mul_add_1_i32_chain: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v0 ; GFX10-NEXT: v_mul_lo_u32 v1, v2, v1 ; GFX10-NEXT: v_add_nc_u32_e32 v2, v1, v2 @@ -1670,7 +1630,6 @@ ; GFX10-LABEL: v_mul_add_1_v2i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0] ; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -1715,7 +1674,6 @@ ; GFX10-LABEL: v_mul_add_1_v2i16_commute: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0] ; GFX10-NEXT: v_pk_mul_lo_u16 v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -1761,7 +1719,6 @@ ; GFX10-LABEL: v_mul_add_x_v2i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_mul_lo_u16 v1, v0, v1 ; GFX10-NEXT: v_pk_add_u16 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -1806,7 +1763,6 @@ ; GFX10-LABEL: v_mul_sub_1_v2i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_sub_i16 v1, v1, 1 op_sel_hi:[1,0] ; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -1851,7 +1807,6 @@ ; GFX10-LABEL: v_mul_sub_1_v2i16_commute: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_sub_i16 v1, v1, 1 op_sel_hi:[1,0] ; GFX10-NEXT: v_pk_mul_lo_u16 v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -1899,7 +1854,6 @@ ; GFX10-LABEL: v_mul_sub_x_v2i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_mul_lo_u16 v1, v0, v1 ; GFX10-NEXT: v_pk_sub_i16 v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -1944,7 +1898,6 @@ ; GFX10-LABEL: v_mul_add_2_v2i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_sub_u16 v1, v1, -2 op_sel_hi:[1,0] ; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -1989,7 +1942,6 @@ ; GFX10-LABEL: v_mul_sub_2_v2i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_sub_i16 v1, v1, 2 op_sel_hi:[1,0] ; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -2037,7 +1989,6 @@ ; GFX10-LABEL: v_mul_add_1_v2i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mad_u64_u32 v[4:5], null, v0, v2, v[0:1] ; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, v1, v3, v[1:2] ; GFX10-NEXT: v_mov_b32_e32 v0, v4 @@ -2086,7 +2037,6 @@ ; GFX10-LABEL: v_mul_add_1_v2i32_commute: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mad_u64_u32 v[4:5], null, v0, v2, v[0:1] ; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, v1, v3, v[1:2] ; GFX10-NEXT: v_mov_b32_e32 v0, v4 @@ -2135,7 +2085,6 @@ ; GFX10-LABEL: v_mul_add_x_v2i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mad_u64_u32 v[4:5], null, v0, v2, v[0:1] ; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, v1, v3, v[1:2] ; GFX10-NEXT: v_mov_b32_e32 v0, v4 @@ -2176,7 +2125,6 @@ ; GFX10-LABEL: v_mul_sub_1_v2i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_nc_u32_e32 v2, -1, v2 ; GFX10-NEXT: v_add_nc_u32_e32 v3, -1, v3 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v2 @@ -2218,7 +2166,6 @@ ; GFX10-LABEL: v_mul_sub_1_v2i32_commute: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_nc_u32_e32 v2, -1, v2 ; GFX10-NEXT: v_add_nc_u32_e32 v3, -1, v3 ; GFX10-NEXT: v_mul_lo_u32 v0, v2, v0 @@ -2269,7 +2216,6 @@ ; GFX10-LABEL: v_mul_sub_x_v2i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_lo_u32 v2, v0, v2 ; GFX10-NEXT: v_mul_lo_u32 v3, v1, v3 ; GFX10-NEXT: v_sub_nc_u32_e32 v0, v2, v0 @@ -2311,7 +2257,6 @@ ; GFX10-LABEL: v_mul_add_2_v2i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_nc_u32_e32 v2, 2, v2 ; GFX10-NEXT: v_add_nc_u32_e32 v3, 2, v3 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v2 @@ -2353,7 +2298,6 @@ ; GFX10-LABEL: v_mul_sub_2_v2i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_nc_u32_e32 v2, -2, v2 ; GFX10-NEXT: v_add_nc_u32_e32 v3, -2, v3 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v2 @@ -2389,7 +2333,6 @@ ; GFX10-LABEL: v_mul_add_1_v2i24: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mad_u32_u24 v0, v0, v2, v0 ; GFX10-NEXT: v_mad_u32_u24 v1, v1, v3, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -2423,7 +2366,6 @@ ; GFX10-LABEL: v_mul_add_1_v2i24_commute: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mad_u32_u24 v0, v0, v2, v0 ; GFX10-NEXT: v_mad_u32_u24 v1, v1, v3, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -2457,7 +2399,6 @@ ; GFX10-LABEL: v_mul_add_x_v2i24: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mad_u32_u24 v0, v0, v2, v0 ; GFX10-NEXT: v_mad_u32_u24 v1, v1, v3, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -2497,7 +2438,6 @@ ; GFX10-LABEL: v_mul_sub_1_v2i24: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_nc_u32_e32 v2, -1, v2 ; GFX10-NEXT: v_add_nc_u32_e32 v3, -1, v3 ; GFX10-NEXT: v_mul_u32_u24_e32 v0, v0, v2 @@ -2539,7 +2479,6 @@ ; GFX10-LABEL: v_mul_sub_1_v2i24_commute: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_nc_u32_e32 v2, -1, v2 ; GFX10-NEXT: v_add_nc_u32_e32 v3, -1, v3 ; GFX10-NEXT: v_mul_u32_u24_e32 v0, v2, v0 @@ -2581,7 +2520,6 @@ ; GFX10-LABEL: v_mul_sub_x_v2i24: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_u32_u24_e32 v2, v0, v2 ; GFX10-NEXT: v_mul_u32_u24_e32 v3, v1, v3 ; GFX10-NEXT: v_sub_nc_u32_e32 v0, v2, v0 @@ -2623,7 +2561,6 @@ ; GFX10-LABEL: v_mul_add_2_v2i24: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_nc_u32_e32 v2, 2, v2 ; GFX10-NEXT: v_add_nc_u32_e32 v3, 2, v3 ; GFX10-NEXT: v_mul_u32_u24_e32 v0, v0, v2 @@ -2665,7 +2602,6 @@ ; GFX10-LABEL: v_mul_sub_2_v2i24: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_nc_u32_e32 v2, -2, v2 ; GFX10-NEXT: v_add_nc_u32_e32 v3, -2, v3 ; GFX10-NEXT: v_mul_u32_u24_e32 v0, v0, v2 @@ -2700,7 +2636,6 @@ ; GFX10-LABEL: v_mul_9_add_52_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, 9, 52 ; GFX10-NEXT: s_setpc_b64 s[30:31] %mul = mul i32 %arg, 9 @@ -2731,7 +2666,6 @@ ; GFX10-LABEL: v_mul_9_add_52_i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mad_u16 v0, v0, 9, 52 ; GFX10-NEXT: s_setpc_b64 s[30:31] %mul = mul i16 %arg, 9 @@ -2774,7 +2708,6 @@ ; GFX10-LABEL: v_mul_9_add_52_v2i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, 9 op_sel_hi:[1,0] ; GFX10-NEXT: v_pk_add_u16 v0, v0, 52 op_sel_hi:[1,0] ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -2832,7 +2765,6 @@ ; GFX10-LABEL: v_mul_9_add_52_i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v2, v1 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, 9, 52 ; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, v2, 9, v[1:2] @@ -2866,7 +2798,6 @@ ; GFX10-LABEL: v_mul_5_add_1_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, 5, 1 ; GFX10-NEXT: s_setpc_b64 s[30:31] %mul = mul i32 %arg, 5 @@ -2902,7 +2833,6 @@ ; GFX10-LABEL: v_mul_284_add_82_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_movk_i32 s4, 0x11c ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, s4, 0x52 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -2934,7 +2864,6 @@ ; GFX10-LABEL: v_mul_5_add_1_i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mad_u16 v0, v0, 5, 1 ; GFX10-NEXT: s_setpc_b64 s[30:31] %mul = mul i16 %arg, 5 @@ -2971,7 +2900,6 @@ ; GFX10-LABEL: v_mul_284_add_82_i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_movk_i32 s4, 0x11c ; GFX10-NEXT: v_mad_u16 v0, v0, s4, 0x52 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -3015,7 +2943,6 @@ ; GFX10-LABEL: v_mul_5_add_1_v2i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, 5 op_sel_hi:[1,0] ; GFX10-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0] ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -3065,7 +2992,6 @@ ; GFX10-LABEL: v_mul_284_add_82_v2i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_mul_lo_u16 v0, 0x11c, v0 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_add_u16 v0, 0x52, v0 op_sel_hi:[0,1] ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -3123,7 +3049,6 @@ ; GFX10-LABEL: v_mul_5_add_1_i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v2, v1 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, 5, 1 ; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, v2, 5, v[1:2] @@ -3197,7 +3122,6 @@ ; GFX10-LABEL: v_mul_284_add_82_i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b64 s[4:5], 0x52 ; GFX10-NEXT: v_mov_b32_e32 v2, v1 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, 0x11c, v0, s[4:5] @@ -3272,7 +3196,6 @@ ; GFX10-LABEL: v_mul_934584645_add_8234599_i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b64 s[4:5], 0x7da667 ; GFX10-NEXT: v_mov_b32_e32 v2, v1 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, 0x37b4a145, v0, s[4:5] @@ -3579,7 +3502,6 @@ ; GFX10-LABEL: v_mul_add_1_i8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mad_u16 v0, v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %add = add i8 %y, 1 @@ -3612,7 +3534,6 @@ ; GFX10-LABEL: v_mul_add_1_i8_commute: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mad_u16 v0, v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %add = add i8 %y, 1 @@ -3644,7 +3565,6 @@ ; GFX10-LABEL: v_mul_add_1_i8_zext: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mad_u16 v0, v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %add = add i8 %y, 1 @@ -3676,7 +3596,6 @@ ; GFX10-LABEL: v_mul_add_1_i8_zext_commute: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mad_u16 v0, v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %add = add i8 %y, 1 @@ -3724,7 +3643,6 @@ ; GFX10-LABEL: v_mul_add_1_v2i8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mad_u16 v1, v1, v3, v1 ; GFX10-NEXT: v_mad_u16 v0, v0, v2, v0 ; GFX10-NEXT: v_lshlrev_b16 v2, 8, v1 @@ -3776,7 +3694,6 @@ ; GFX10-LABEL: v_mul_add_1_v2i8_commute: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mad_u16 v1, v1, v3, v1 ; GFX10-NEXT: v_mad_u16 v0, v0, v2, v0 ; GFX10-NEXT: v_lshlrev_b16 v2, 8, v1 diff --git a/llvm/test/CodeGen/AMDGPU/roundeven.ll b/llvm/test/CodeGen/AMDGPU/roundeven.ll --- a/llvm/test/CodeGen/AMDGPU/roundeven.ll +++ b/llvm/test/CodeGen/AMDGPU/roundeven.ll @@ -40,7 +40,6 @@ ; GFX10PLUS-LABEL: v_roundeven_f32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_rndne_f32_e32 v0, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] ; @@ -71,7 +70,6 @@ ; SDAG_GFX10PLUS-LABEL: v_roundeven_f32: ; SDAG_GFX10PLUS: ; %bb.0: ; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG_GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v0, v0 ; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %roundeven = call float @llvm.roundeven.f32(float %x) @@ -110,7 +108,6 @@ ; GFX10PLUS-LABEL: v_roundeven_v2f32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_rndne_f32_e32 v0, v0 ; GFX10PLUS-NEXT: v_rndne_f32_e32 v1, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -146,7 +143,6 @@ ; SDAG_GFX10PLUS-LABEL: v_roundeven_v2f32: ; SDAG_GFX10PLUS: ; %bb.0: ; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG_GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v0, v0 ; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v1, v1 ; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -190,7 +186,6 @@ ; GFX10PLUS-LABEL: v_roundeven_v3f32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_rndne_f32_e32 v0, v0 ; GFX10PLUS-NEXT: v_rndne_f32_e32 v1, v1 ; GFX10PLUS-NEXT: v_rndne_f32_e32 v2, v2 @@ -231,7 +226,6 @@ ; SDAG_GFX10PLUS-LABEL: v_roundeven_v3f32: ; SDAG_GFX10PLUS: ; %bb.0: ; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG_GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v0, v0 ; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v1, v1 ; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v2, v2 @@ -280,7 +274,6 @@ ; GFX10PLUS-LABEL: v_roundeven_v4f32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_rndne_f32_e32 v0, v0 ; GFX10PLUS-NEXT: v_rndne_f32_e32 v1, v1 ; GFX10PLUS-NEXT: v_rndne_f32_e32 v2, v2 @@ -326,7 +319,6 @@ ; SDAG_GFX10PLUS-LABEL: v_roundeven_v4f32: ; SDAG_GFX10PLUS: ; %bb.0: ; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG_GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v0, v0 ; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v1, v1 ; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v2, v2 @@ -368,7 +360,6 @@ ; GFX10PLUS-LABEL: v_roundeven_f16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_rndne_f16_e32 v0, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] ; @@ -403,7 +394,6 @@ ; SDAG_GFX10PLUS-LABEL: v_roundeven_f16: ; SDAG_GFX10PLUS: ; %bb.0: ; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG_GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; SDAG_GFX10PLUS-NEXT: v_rndne_f16_e32 v0, v0 ; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %roundeven = call half @llvm.roundeven.f16(half %x) @@ -452,7 +442,6 @@ ; GFX10-LABEL: v_roundeven_v2f16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_rndne_f16_e32 v1, v0 ; GFX10-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX10-NEXT: v_pack_b32_f16 v0, v1, v0 @@ -461,7 +450,6 @@ ; GFX11-LABEL: v_roundeven_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX11-NEXT: v_rndne_f16_e32 v0, v0 ; GFX11-NEXT: v_rndne_f16_e32 v1, v1 @@ -509,7 +497,6 @@ ; SDAG_GFX10-LABEL: v_roundeven_v2f16: ; SDAG_GFX10: ; %bb.0: ; SDAG_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; SDAG_GFX10-NEXT: v_rndne_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; SDAG_GFX10-NEXT: v_rndne_f16_e32 v0, v0 ; SDAG_GFX10-NEXT: v_pack_b32_f16 v0, v0, v1 @@ -518,7 +505,6 @@ ; SDAG_GFX11-LABEL: v_roundeven_v2f16: ; SDAG_GFX11: ; %bb.0: ; SDAG_GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; SDAG_GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; SDAG_GFX11-NEXT: v_rndne_f16_e32 v0, v0 ; SDAG_GFX11-NEXT: v_rndne_f16_e32 v1, v1 @@ -582,7 +568,6 @@ ; GFX10-LABEL: v_roundeven_v2f16_fneg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 ; GFX10-NEXT: v_rndne_f16_e32 v1, v0 ; GFX10-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 @@ -592,7 +577,6 @@ ; GFX11-LABEL: v_roundeven_v2f16_fneg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX11-NEXT: v_rndne_f16_e32 v0, v0 @@ -649,7 +633,6 @@ ; SDAG_GFX10-LABEL: v_roundeven_v2f16_fneg: ; SDAG_GFX10: ; %bb.0: ; SDAG_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; SDAG_GFX10-NEXT: v_rndne_f16_sdwa v1, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; SDAG_GFX10-NEXT: v_rndne_f16_e64 v0, -v0 ; SDAG_GFX10-NEXT: v_pack_b32_f16 v0, v0, v1 @@ -658,7 +641,6 @@ ; SDAG_GFX11-LABEL: v_roundeven_v2f16_fneg: ; SDAG_GFX11: ; %bb.0: ; SDAG_GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; SDAG_GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; SDAG_GFX11-NEXT: v_rndne_f16_e64 v0, -v0 ; SDAG_GFX11-NEXT: v_rndne_f16_e64 v1, -v1 @@ -729,7 +711,6 @@ ; GFX10-LABEL: v_roundeven_v4f16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_rndne_f16_e32 v2, v0 ; GFX10-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX10-NEXT: v_rndne_f16_e32 v3, v1 @@ -741,7 +722,6 @@ ; GFX11-LABEL: v_roundeven_v4f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1 ; GFX11-NEXT: v_rndne_f16_e32 v0, v0 @@ -811,7 +791,6 @@ ; SDAG_GFX10-LABEL: v_roundeven_v4f16: ; SDAG_GFX10: ; %bb.0: ; SDAG_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; SDAG_GFX10-NEXT: v_rndne_f16_sdwa v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; SDAG_GFX10-NEXT: v_rndne_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; SDAG_GFX10-NEXT: v_rndne_f16_e32 v0, v0 @@ -823,7 +802,6 @@ ; SDAG_GFX11-LABEL: v_roundeven_v4f16: ; SDAG_GFX11: ; %bb.0: ; SDAG_GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; SDAG_GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; SDAG_GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1 ; SDAG_GFX11-NEXT: v_rndne_f16_e32 v1, v1 @@ -866,7 +844,6 @@ ; GFX10PLUS-LABEL: v_roundeven_f32_fabs: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_rndne_f32_e64 v0, |v0| ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] ; @@ -897,7 +874,6 @@ ; SDAG_GFX10PLUS-LABEL: v_roundeven_f32_fabs: ; SDAG_GFX10PLUS: ; %bb.0: ; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG_GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e64 v0, |v0| ; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %fabs.x = call float @llvm.fabs.f32(float %x) @@ -987,7 +963,6 @@ ; GFX10PLUS-LABEL: v_roundeven_f32_fneg: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_rndne_f32_e64 v0, -v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] ; @@ -1018,7 +993,6 @@ ; SDAG_GFX10PLUS-LABEL: v_roundeven_f32_fneg: ; SDAG_GFX10PLUS: ; %bb.0: ; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG_GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e64 v0, -v0 ; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %neg.x = fneg float %x @@ -1063,7 +1037,6 @@ ; GFX10PLUS-LABEL: v_roundeven_f64: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] ; @@ -1104,7 +1077,6 @@ ; SDAG_GFX10PLUS-LABEL: v_roundeven_f64: ; SDAG_GFX10PLUS: ; %bb.0: ; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG_GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; SDAG_GFX10PLUS-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] ; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %roundeven = call double @llvm.roundeven.f64(double %x) @@ -1149,7 +1121,6 @@ ; GFX10PLUS-LABEL: v_roundeven_f64_fneg: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_rndne_f64_e64 v[0:1], -v[0:1] ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] ; @@ -1191,7 +1162,6 @@ ; SDAG_GFX10PLUS-LABEL: v_roundeven_f64_fneg: ; SDAG_GFX10PLUS: ; %bb.0: ; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG_GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; SDAG_GFX10PLUS-NEXT: v_rndne_f64_e64 v[0:1], -v[0:1] ; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %neg.x = fneg double %x @@ -1246,7 +1216,6 @@ ; GFX10PLUS-LABEL: v_roundeven_v2f64: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] ; GFX10PLUS-NEXT: v_rndne_f64_e32 v[2:3], v[2:3] ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -1297,7 +1266,6 @@ ; SDAG_GFX10PLUS-LABEL: v_roundeven_v2f64: ; SDAG_GFX10PLUS: ; %bb.0: ; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG_GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; SDAG_GFX10PLUS-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] ; SDAG_GFX10PLUS-NEXT: v_rndne_f64_e32 v[2:3], v[2:3] ; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/saddsat.ll b/llvm/test/CodeGen/AMDGPU/saddsat.ll --- a/llvm/test/CodeGen/AMDGPU/saddsat.ll +++ b/llvm/test/CodeGen/AMDGPU/saddsat.ll @@ -37,7 +37,6 @@ ; GFX10PLUS-LABEL: v_saddsat_i8: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX10PLUS-NEXT: v_lshlrev_b16 v0, 8, v0 ; GFX10PLUS-NEXT: v_add_nc_i16 v0, v0, v1 clamp @@ -80,7 +79,6 @@ ; GFX10PLUS-LABEL: v_saddsat_i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_add_nc_i16 v0, v0, v1 clamp ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = call i16 @llvm.sadd.sat.i16(i16 %lhs, i16 %rhs) @@ -121,7 +119,6 @@ ; GFX10PLUS-LABEL: v_saddsat_i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_add_nc_i32 v0, v0, v1 clamp ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = call i32 @llvm.sadd.sat.i32(i32 %lhs, i32 %rhs) @@ -180,7 +177,6 @@ ; GFX10PLUS-LABEL: v_saddsat_v2i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_add_i16 v0, v0, v1 clamp ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> %lhs, <2 x i16> %rhs) @@ -252,7 +248,6 @@ ; GFX10PLUS-LABEL: v_saddsat_v3i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_add_i16 v0, v0, v2 clamp ; GFX10PLUS-NEXT: v_pk_add_i16 v1, v1, v3 clamp ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -341,7 +336,6 @@ ; GFX10PLUS-LABEL: v_saddsat_v4i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_add_i16 v0, v0, v2 clamp ; GFX10PLUS-NEXT: v_pk_add_i16 v1, v1, v3 clamp ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -399,7 +393,6 @@ ; GFX10PLUS-LABEL: v_saddsat_v2i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_add_nc_i32 v0, v0, v2 clamp ; GFX10PLUS-NEXT: v_add_nc_i32 v1, v1, v3 clamp ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -453,7 +446,6 @@ ; GFX10-LABEL: v_saddsat_i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, v0, v2 ; GFX10-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo ; GFX10-NEXT: v_cmp_gt_i64_e64 s4, 0, v[2:3] @@ -468,7 +460,6 @@ ; GFX11-LABEL: v_saddsat_i64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_add_co_u32 v4, vcc_lo, v0, v2 ; GFX11-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo ; GFX11-NEXT: v_cmp_gt_i64_e64 s0, 0, v[2:3] diff --git a/llvm/test/CodeGen/AMDGPU/select-constant-xor.ll b/llvm/test/CodeGen/AMDGPU/select-constant-xor.ll --- a/llvm/test/CodeGen/AMDGPU/select-constant-xor.ll +++ b/llvm/test/CodeGen/AMDGPU/select-constant-xor.ll @@ -5,7 +5,6 @@ ; CHECK-LABEL: xori64i32: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; CHECK-NEXT: v_ashrrev_i32_e32 v0, 31, v1 ; CHECK-NEXT: v_xor_b32_e32 v0, 0x7fffffff, v0 ; CHECK-NEXT: s_setpc_b64 s[30:31] @@ -19,7 +18,6 @@ ; CHECK-LABEL: selecti64i64: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v1 ; CHECK-NEXT: v_xor_b32_e32 v0, 0x7fffffff, v1 ; CHECK-NEXT: s_setpc_b64 s[30:31] @@ -32,7 +30,6 @@ ; CHECK-LABEL: selecti64i32: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; CHECK-NEXT: v_ashrrev_i32_e32 v0, 31, v1 ; CHECK-NEXT: v_xor_b32_e32 v0, 0x7fffffff, v0 ; CHECK-NEXT: s_setpc_b64 s[30:31] @@ -45,7 +42,6 @@ ; CHECK-LABEL: selecti32i64: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; CHECK-NEXT: v_xor_b32_e32 v0, 0x7fffffff, v1 ; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v1 @@ -61,7 +57,6 @@ ; CHECK-LABEL: xori32i8: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; CHECK-NEXT: v_ashrrev_i32_e32 v0, 31, v0 ; CHECK-NEXT: v_xor_b32_e32 v0, 0x54, v0 ; CHECK-NEXT: s_setpc_b64 s[30:31] @@ -75,7 +70,6 @@ ; CHECK-LABEL: selecti32i32: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; CHECK-NEXT: v_ashrrev_i32_e32 v0, 31, v0 ; CHECK-NEXT: v_xor_b32_e32 v0, 0x54, v0 ; CHECK-NEXT: s_setpc_b64 s[30:31] @@ -88,7 +82,6 @@ ; CHECK-LABEL: selecti32i8: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; CHECK-NEXT: v_ashrrev_i32_e32 v0, 31, v0 ; CHECK-NEXT: v_xor_b32_e32 v0, 0x54, v0 ; CHECK-NEXT: s_setpc_b64 s[30:31] @@ -101,7 +94,6 @@ ; CHECK-LABEL: selecti8i32: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; CHECK-NEXT: v_bfe_i32 v0, v0, 0, 8 ; CHECK-NEXT: v_mov_b32_e32 v1, 0x54 ; CHECK-NEXT: v_ashrrev_i16 v0, 7, v0 @@ -116,7 +108,6 @@ ; CHECK-LABEL: icmpasreq: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; CHECK-NEXT: v_cmp_gt_i32_e32 vcc_lo, 0, v0 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] @@ -130,7 +121,6 @@ ; CHECK-LABEL: icmpasrne: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; CHECK-NEXT: v_cmp_lt_i32_e32 vcc_lo, -1, v0 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] @@ -144,7 +134,6 @@ ; CHECK-LABEL: oneusecmp: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; CHECK-NEXT: v_cmp_gt_i32_e32 vcc_lo, 0, v0 ; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v0 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo diff --git a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.f16.ll b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.f16.ll --- a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.f16.ll @@ -33,7 +33,6 @@ ; GFX11-LABEL: add_select_fabs_fabs_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -77,7 +76,6 @@ ; GFX11-LABEL: add_select_multi_use_lhs_fabs_fabs_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo ; GFX11-NEXT: v_add_f16_e64 v1, |v1|, v3 @@ -124,7 +122,6 @@ ; GFX11-LABEL: add_select_multi_store_use_lhs_fabs_fabs_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo ; GFX11-NEXT: v_and_b32_e32 v1, 0x7fff, v1 @@ -171,7 +168,6 @@ ; GFX11-LABEL: add_select_multi_use_rhs_fabs_fabs_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo ; GFX11-NEXT: v_add_f16_e64 v1, |v2|, v4 @@ -216,7 +212,6 @@ ; GFX11-LABEL: add_select_fabs_var_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v1, 0x7fff, v1 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -256,7 +251,6 @@ ; GFX11-LABEL: add_select_fabs_negk_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v1, 0x7fff, v1 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -295,7 +289,6 @@ ; GFX11-LABEL: add_select_fabs_negk_negk_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v2, 0xc000 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -333,7 +326,6 @@ ; GFX11-LABEL: add_select_posk_posk_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v2, 0x4000 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -372,7 +364,6 @@ ; GFX11-LABEL: add_select_negk_fabs_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v1, 0x7fff, v1 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -413,7 +404,6 @@ ; GFX11-LABEL: add_select_negliteralk_fabs_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v1, 0x7fff, v1 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -452,7 +442,6 @@ ; GFX11-LABEL: add_select_fabs_posk_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v1, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -490,7 +479,6 @@ ; GFX11-LABEL: add_select_posk_fabs_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v1, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -529,7 +517,6 @@ ; GFX11-LABEL: add_select_fneg_fneg_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -573,7 +560,6 @@ ; GFX11-LABEL: add_select_multi_use_lhs_fneg_fneg_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo ; GFX11-NEXT: v_sub_f16_e32 v1, v4, v1 @@ -620,7 +606,6 @@ ; GFX11-LABEL: add_select_multi_store_use_lhs_fneg_fneg_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo ; GFX11-NEXT: v_xor_b32_e32 v1, 0x8000, v1 @@ -667,7 +652,6 @@ ; GFX11-LABEL: add_select_multi_use_rhs_fneg_fneg_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo ; GFX11-NEXT: v_sub_f16_e32 v1, v4, v2 @@ -712,7 +696,6 @@ ; GFX11-LABEL: add_select_fneg_var_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_xor_b32_e32 v1, 0x8000, v1 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -751,7 +734,6 @@ ; GFX11-LABEL: add_select_fneg_negk_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v1, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -790,7 +772,6 @@ ; GFX11-LABEL: add_select_fneg_inv2pi_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0xb118, v1, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -829,7 +810,6 @@ ; GFX11-LABEL: add_select_fneg_neginv2pi_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x3118, v1, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -866,7 +846,6 @@ ; GFX11-LABEL: add_select_negk_negk_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v2, 0xc000 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -905,7 +884,6 @@ ; GFX11-LABEL: add_select_negliteralk_negliteralk_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v2, 0xe800 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -942,7 +920,6 @@ ; GFX11-LABEL: add_select_fneg_negk_negk_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v2, 0xc000 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -981,7 +958,6 @@ ; GFX11-LABEL: add_select_negk_fneg_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v1, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -1019,7 +995,6 @@ ; GFX11-LABEL: add_select_fneg_posk_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v1, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -1057,7 +1032,6 @@ ; GFX11-LABEL: add_select_posk_fneg_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v1, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -1098,7 +1072,6 @@ ; GFX11-LABEL: add_select_negfabs_fabs_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_or_b32_e32 v1, 0x8000, v1 ; GFX11-NEXT: v_and_b32_e32 v2, 0x7fff, v2 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 @@ -1143,7 +1116,6 @@ ; GFX11-LABEL: add_select_fabs_negfabs_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v1, 0x7fff, v1 ; GFX11-NEXT: v_or_b32_e32 v2, 0x8000, v2 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 @@ -1188,7 +1160,6 @@ ; GFX11-LABEL: add_select_neg_fabs_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_xor_b32_e32 v1, 0x8000, v1 ; GFX11-NEXT: v_and_b32_e32 v2, 0x7fff, v2 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 @@ -1232,7 +1203,6 @@ ; GFX11-LABEL: add_select_fabs_neg_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v1, 0x7fff, v1 ; GFX11-NEXT: v_xor_b32_e32 v2, 0x8000, v2 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 @@ -1275,7 +1245,6 @@ ; GFX11-LABEL: add_select_neg_negfabs_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v2, 0x7fff, v2 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -1318,7 +1287,6 @@ ; GFX11-LABEL: add_select_negfabs_neg_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v1, 0x7fff, v1 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -1360,7 +1328,6 @@ ; GFX11-LABEL: mul_select_negfabs_posk_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_or_b32_e32 v1, 0x8000, v1 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -1401,7 +1368,6 @@ ; GFX11-LABEL: mul_select_posk_negfabs_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_or_b32_e32 v1, 0x8000, v1 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -1442,7 +1408,6 @@ ; GFX11-LABEL: mul_select_negfabs_negk_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_or_b32_e32 v1, 0x8000, v1 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -1483,7 +1448,6 @@ ; GFX11-LABEL: mul_select_negk_negfabs_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_or_b32_e32 v1, 0x8000, v1 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -1526,7 +1490,6 @@ ; GFX11-SAFE-LABEL: select_fneg_posk_src_add_f16: ; GFX11-SAFE: ; %bb.0: ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SAFE-NEXT: v_add_f16_e32 v1, 4.0, v1 ; GFX11-SAFE-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -1556,7 +1519,6 @@ ; GFX11-NSZ-LABEL: select_fneg_posk_src_add_f16: ; GFX11-NSZ: ; %bb.0: ; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NSZ-NEXT: v_sub_f16_e32 v1, -4.0, v1 ; GFX11-NSZ-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -1593,7 +1555,6 @@ ; GFX11-SAFE-LABEL: select_fneg_posk_src_sub_f16: ; GFX11-SAFE: ; %bb.0: ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SAFE-NEXT: v_add_f16_e32 v1, -4.0, v1 ; GFX11-SAFE-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -1623,7 +1584,6 @@ ; GFX11-NSZ-LABEL: select_fneg_posk_src_sub_f16: ; GFX11-NSZ: ; %bb.0: ; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NSZ-NEXT: v_sub_f16_e32 v1, 4.0, v1 ; GFX11-NSZ-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -1659,7 +1619,6 @@ ; GFX11-LABEL: select_fneg_posk_src_mul_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mul_f16_e32 v1, -4.0, v1 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -1698,7 +1657,6 @@ ; GFX11-SAFE-LABEL: select_fneg_posk_src_fma_f16: ; GFX11-SAFE: ; %bb.0: ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SAFE-NEXT: v_fmac_f16_e32 v2, 4.0, v1 ; GFX11-SAFE-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -1730,7 +1688,6 @@ ; GFX11-NSZ-LABEL: select_fneg_posk_src_fma_f16: ; GFX11-NSZ: ; %bb.0: ; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NSZ-NEXT: v_fma_f16 v1, v1, -4.0, -v2 ; GFX11-NSZ-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -1770,7 +1727,6 @@ ; GFX11-SAFE-LABEL: select_fneg_posk_src_fmad_f16: ; GFX11-SAFE: ; %bb.0: ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SAFE-NEXT: v_fmac_f16_e32 v2, 4.0, v1 ; GFX11-SAFE-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -1803,7 +1759,6 @@ ; GFX11-NSZ-LABEL: select_fneg_posk_src_fmad_f16: ; GFX11-NSZ: ; %bb.0: ; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NSZ-NEXT: v_fma_f16 v1, v1, -4.0, -v2 ; GFX11-NSZ-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_2) diff --git a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll --- a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll +++ b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll @@ -68,7 +68,6 @@ ; GFX11-LABEL: add_select_fabs_fabs_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) @@ -160,7 +159,6 @@ ; GFX11-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 ; GFX11-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 @@ -248,7 +246,6 @@ ; GFX11-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) @@ -343,7 +340,6 @@ ; GFX11-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 ; GFX11-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 @@ -427,7 +423,6 @@ ; GFX11-LABEL: add_select_fabs_var_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v3 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 @@ -500,7 +495,6 @@ ; GFX11-LABEL: add_select_fabs_negk_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) @@ -571,7 +565,6 @@ ; GFX11-LABEL: add_select_fabs_negk_negk_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v3, 0xc000 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) @@ -636,7 +629,6 @@ ; GFX11-LABEL: add_select_posk_posk_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v3, 0x4000 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) @@ -706,7 +698,6 @@ ; GFX11-LABEL: add_select_negk_fabs_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) @@ -779,7 +770,6 @@ ; GFX11-LABEL: add_select_negliteralk_fabs_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) @@ -851,7 +841,6 @@ ; GFX11-LABEL: add_select_fabs_posk_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) @@ -923,7 +912,6 @@ ; GFX11-LABEL: add_select_posk_fabs_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) @@ -997,7 +985,6 @@ ; GFX11-LABEL: add_select_fneg_fneg_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v2 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v3 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 @@ -1082,7 +1069,6 @@ ; GFX11-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v2 ; GFX11-NEXT: v_lshrrev_b32_e32 v7, 16, v3 @@ -1167,7 +1153,6 @@ ; GFX11-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v2 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v3 @@ -1255,7 +1240,6 @@ ; GFX11-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v2 ; GFX11-NEXT: v_lshrrev_b32_e32 v7, 16, v3 @@ -1340,7 +1324,6 @@ ; GFX11-LABEL: add_select_fneg_var_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v3 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 @@ -1411,7 +1394,6 @@ ; GFX11-LABEL: add_select_fneg_negk_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v2 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) @@ -1481,7 +1463,6 @@ ; GFX11-LABEL: add_select_fneg_inv2pi_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v2 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) @@ -1551,7 +1532,6 @@ ; GFX11-LABEL: add_select_fneg_neginv2pi_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v2 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) @@ -1615,7 +1595,6 @@ ; GFX11-LABEL: add_select_negk_negk_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v3, 0xc000 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) @@ -1680,7 +1659,6 @@ ; GFX11-LABEL: add_select_negliteralk_negliteralk_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v3, 0xe800 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) @@ -1743,7 +1721,6 @@ ; GFX11-LABEL: add_select_fneg_negk_negk_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v3, 0xc000 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) @@ -1812,7 +1789,6 @@ ; GFX11-LABEL: add_select_negk_fneg_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v2 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) @@ -1881,7 +1857,6 @@ ; GFX11-LABEL: add_select_fneg_posk_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v2 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) @@ -1950,7 +1925,6 @@ ; GFX11-LABEL: add_select_posk_fneg_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v2 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) @@ -2031,7 +2005,6 @@ ; GFX11-LABEL: add_select_negfabs_fabs_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_or_b32_e32 v2, 0x80008000, v2 ; GFX11-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 @@ -2118,7 +2091,6 @@ ; GFX11-LABEL: add_select_fabs_negfabs_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_or_b32_e32 v3, 0x80008000, v3 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) @@ -2205,7 +2177,6 @@ ; GFX11-LABEL: add_select_neg_fabs_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 ; GFX11-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 @@ -2291,7 +2262,6 @@ ; GFX11-LABEL: add_select_fabs_neg_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_xor_b32_e32 v3, 0x80008000, v3 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) @@ -2371,7 +2341,6 @@ ; GFX11-LABEL: add_select_neg_negfabs_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v2 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 @@ -2450,7 +2419,6 @@ ; GFX11-LABEL: add_select_negfabs_neg_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v3 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 ; GFX11-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 @@ -2529,7 +2497,6 @@ ; GFX11-LABEL: mul_select_negfabs_posk_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_or_b32_e32 v2, 0x80008000, v2 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) @@ -2606,7 +2573,6 @@ ; GFX11-LABEL: mul_select_posk_negfabs_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_or_b32_e32 v2, 0x80008000, v2 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) @@ -2683,7 +2649,6 @@ ; GFX11-LABEL: mul_select_negfabs_negk_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_or_b32_e32 v2, 0x80008000, v2 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) @@ -2760,7 +2725,6 @@ ; GFX11-LABEL: mul_select_negk_negfabs_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_or_b32_e32 v2, 0x80008000, v2 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) @@ -2844,7 +2808,6 @@ ; GFX11-SAFE-LABEL: select_fneg_posk_src_add_v2f16: ; GFX11-SAFE: ; %bb.0: ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SAFE-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0] ; GFX11-SAFE-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -2904,7 +2867,6 @@ ; GFX11-NSZ-LABEL: select_fneg_posk_src_add_v2f16: ; GFX11-NSZ: ; %bb.0: ; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NSZ-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] ; GFX11-NSZ-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) @@ -2982,7 +2944,6 @@ ; GFX11-SAFE-LABEL: select_fneg_posk_src_sub_v2f16: ; GFX11-SAFE: ; %bb.0: ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SAFE-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] ; GFX11-SAFE-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -3042,7 +3003,6 @@ ; GFX11-NSZ-LABEL: select_fneg_posk_src_sub_v2f16: ; GFX11-NSZ: ; %bb.0: ; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NSZ-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] ; GFX11-NSZ-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) @@ -3108,7 +3068,6 @@ ; GFX11-LABEL: select_fneg_posk_src_mul_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_pk_mul_f16 v2, v2, -4.0 op_sel_hi:[1,0] ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) @@ -3192,7 +3151,6 @@ ; GFX11-SAFE-LABEL: select_fneg_posk_src_fma_v2f16: ; GFX11-SAFE: ; %bb.0: ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SAFE-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1] ; GFX11-SAFE-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -3238,7 +3196,6 @@ ; GFX11-NSZ-LABEL: select_fneg_posk_src_fma_v2f16: ; GFX11-NSZ: ; %bb.0: ; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NSZ-NEXT: v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1] ; GFX11-NSZ-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) @@ -3324,7 +3281,6 @@ ; GFX11-SAFE-LABEL: select_fneg_posk_src_fmad_v2f16: ; GFX11-SAFE: ; %bb.0: ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SAFE-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1] ; GFX11-SAFE-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -3391,7 +3347,6 @@ ; GFX11-NSZ-LABEL: select_fneg_posk_src_fmad_v2f16: ; GFX11-NSZ: ; %bb.0: ; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NSZ-NEXT: v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1] ; GFX11-NSZ-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) diff --git a/llvm/test/CodeGen/AMDGPU/setcc-multiple-use.ll b/llvm/test/CodeGen/AMDGPU/setcc-multiple-use.ll --- a/llvm/test/CodeGen/AMDGPU/setcc-multiple-use.ll +++ b/llvm/test/CodeGen/AMDGPU/setcc-multiple-use.ll @@ -11,7 +11,6 @@ ; CHECK-LABEL: f: ; CHECK: ; %bb.0: ; %bb ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 ; CHECK-NEXT: ds_read_b32 v0, v0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll --- a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll +++ b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll @@ -1911,35 +1911,21 @@ } define void @skip_mode_switch(i32 %arg) { -; SI-LABEL: skip_mode_switch: -; SI: ; %bb.0: ; %entry -; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc -; SI-NEXT: s_cbranch_execz .LBB16_2 -; SI-NEXT: ; %bb.1: ; %bb.0 -; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3 -; SI-NEXT: .LBB16_2: ; %bb.1 -; SI-NEXT: s_or_b64 exec, exec, s[4:5] -; SI-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-WAVE64-LABEL: skip_mode_switch: -; GFX10-WAVE64: ; %bb.0: ; %entry -; GFX10-WAVE64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WAVE64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB16_2 -; GFX10-WAVE64-NEXT: ; %bb.1: ; %bb.0 -; GFX10-WAVE64-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3 -; GFX10-WAVE64-NEXT: .LBB16_2: ; %bb.1 -; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX10-WAVE64-NEXT: s_setpc_b64 s[30:31] +; WAVE64-LABEL: skip_mode_switch: +; WAVE64: ; %bb.0: ; %entry +; WAVE64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; WAVE64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; WAVE64-NEXT: s_and_saveexec_b64 s[4:5], vcc +; WAVE64-NEXT: s_cbranch_execz .LBB16_2 +; WAVE64-NEXT: ; %bb.1: ; %bb.0 +; WAVE64-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3 +; WAVE64-NEXT: .LBB16_2: ; %bb.1 +; WAVE64-NEXT: s_or_b64 exec, exec, s[4:5] +; WAVE64-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-WAVE32-LABEL: skip_mode_switch: ; GFX10-WAVE32: ; %bb.0: ; %entry ; GFX10-WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WAVE32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s4, vcc_lo ; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB16_2 @@ -1952,7 +1938,6 @@ ; GFX11-LABEL: skip_mode_switch: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b64 s[0:1], exec ; GFX11-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX11-NEXT: s_cbranch_execz .LBB16_2 diff --git a/llvm/test/CodeGen/AMDGPU/ssubsat.ll b/llvm/test/CodeGen/AMDGPU/ssubsat.ll --- a/llvm/test/CodeGen/AMDGPU/ssubsat.ll +++ b/llvm/test/CodeGen/AMDGPU/ssubsat.ll @@ -37,7 +37,6 @@ ; GFX10PLUS-LABEL: v_ssubsat_i8: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX10PLUS-NEXT: v_lshlrev_b16 v0, 8, v0 ; GFX10PLUS-NEXT: v_sub_nc_i16 v0, v0, v1 clamp @@ -80,7 +79,6 @@ ; GFX10PLUS-LABEL: v_ssubsat_i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_sub_nc_i16 v0, v0, v1 clamp ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = call i16 @llvm.ssub.sat.i16(i16 %lhs, i16 %rhs) @@ -121,7 +119,6 @@ ; GFX10PLUS-LABEL: v_ssubsat_i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_sub_nc_i32 v0, v0, v1 clamp ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = call i32 @llvm.ssub.sat.i32(i32 %lhs, i32 %rhs) @@ -180,7 +177,6 @@ ; GFX10PLUS-LABEL: v_ssubsat_v2i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_sub_i16 v0, v0, v1 clamp ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> %lhs, <2 x i16> %rhs) @@ -252,7 +248,6 @@ ; GFX10PLUS-LABEL: v_ssubsat_v3i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_sub_i16 v0, v0, v2 clamp ; GFX10PLUS-NEXT: v_pk_sub_i16 v1, v1, v3 clamp ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -341,7 +336,6 @@ ; GFX10PLUS-LABEL: v_ssubsat_v4i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_sub_i16 v0, v0, v2 clamp ; GFX10PLUS-NEXT: v_pk_sub_i16 v1, v1, v3 clamp ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -399,7 +393,6 @@ ; GFX10PLUS-LABEL: v_ssubsat_v2i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_sub_nc_i32 v0, v0, v2 clamp ; GFX10PLUS-NEXT: v_sub_nc_i32 v1, v1, v3 clamp ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -471,7 +464,6 @@ ; GFX10PLUS-LABEL: v_ssubsat_v3i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_sub_nc_i32 v0, v0, v3 clamp ; GFX10PLUS-NEXT: v_sub_nc_i32 v1, v1, v4 clamp ; GFX10PLUS-NEXT: v_sub_nc_i32 v2, v2, v5 clamp @@ -559,7 +551,6 @@ ; GFX10PLUS-LABEL: v_ssubsat_v4i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_sub_nc_i32 v0, v0, v4 clamp ; GFX10PLUS-NEXT: v_sub_nc_i32 v1, v1, v5 clamp ; GFX10PLUS-NEXT: v_sub_nc_i32 v2, v2, v6 clamp @@ -708,7 +699,6 @@ ; GFX10PLUS-LABEL: v_ssubsat_v8i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_sub_nc_i32 v0, v0, v8 clamp ; GFX10PLUS-NEXT: v_sub_nc_i32 v1, v1, v9 clamp ; GFX10PLUS-NEXT: v_sub_nc_i32 v2, v2, v10 clamp @@ -987,7 +977,6 @@ ; GFX10-LABEL: v_ssubsat_v16i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX10-NEXT: v_sub_nc_i32 v0, v0, v16 clamp ; GFX10-NEXT: v_sub_nc_i32 v1, v1, v17 clamp @@ -1011,7 +1000,6 @@ ; GFX11-LABEL: v_ssubsat_v16i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_load_b32 v31, off, s32 ; GFX11-NEXT: v_sub_nc_i32 v0, v0, v16 clamp ; GFX11-NEXT: v_sub_nc_i32 v1, v1, v17 clamp @@ -1082,7 +1070,6 @@ ; GFX10-LABEL: v_ssubsat_i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_sub_co_u32 v4, vcc_lo, v0, v2 ; GFX10-NEXT: v_sub_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo ; GFX10-NEXT: v_cmp_lt_i64_e64 s4, 0, v[2:3] @@ -1097,7 +1084,6 @@ ; GFX11-LABEL: v_ssubsat_i64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_sub_co_u32 v4, vcc_lo, v0, v2 ; GFX11-NEXT: v_sub_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo ; GFX11-NEXT: v_cmp_lt_i64_e64 s0, 0, v[2:3] diff --git a/llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll b/llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll --- a/llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll +++ b/llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll @@ -29,7 +29,6 @@ ; GFX10-LABEL: local_store_i56: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ds_write_b8_d16_hi v0, v2 offset:6 ; GFX10-NEXT: ds_write_b16 v0, v2 offset:4 ; GFX10-NEXT: ds_write_b32 v0, v1 @@ -39,7 +38,6 @@ ; GFX11-LABEL: local_store_i56: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_store_b8_d16_hi v0, v2 offset:6 ; GFX11-NEXT: ds_store_b16 v0, v2 offset:4 ; GFX11-NEXT: ds_store_b32 v0, v1 @@ -325,7 +323,6 @@ ; GFX10-LABEL: local_store_i13: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v1, 0x1fff, v1 ; GFX10-NEXT: ds_write_b16 v0, v1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) @@ -334,7 +331,6 @@ ; GFX11-LABEL: local_store_i13: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v1, 0x1fff, v1 ; GFX11-NEXT: ds_store_b16 v0, v1 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) @@ -366,7 +362,6 @@ ; GFX10-LABEL: local_store_i17: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v2, 0x1ffff, v1 ; GFX10-NEXT: ds_write_b16 v0, v1 ; GFX10-NEXT: ds_write_b8_d16_hi v0, v2 offset:2 @@ -376,7 +371,6 @@ ; GFX11-LABEL: local_store_i17: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_and_b32_e32 v2, 0x1ffff, v1 ; GFX11-NEXT: ds_store_b16 v0, v1 ; GFX11-NEXT: ds_store_b8_d16_hi v0, v2 offset:2 diff --git a/llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll --- a/llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll @@ -15,7 +15,6 @@ ; GFX10PLUS-LABEL: v_constained_fadd_f16_fpexcept_strict: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_add_f16_e32 v0, v0, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %val = call half @llvm.experimental.constrained.fadd.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") @@ -32,7 +31,6 @@ ; GFX10PLUS-LABEL: v_constained_fadd_f16_fpexcept_ignore: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_add_f16_e32 v0, v0, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %val = call half @llvm.experimental.constrained.fadd.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") @@ -49,7 +47,6 @@ ; GFX10PLUS-LABEL: v_constained_fadd_f16_fpexcept_maytrap: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_add_f16_e32 v0, v0, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %val = call half @llvm.experimental.constrained.fadd.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") @@ -74,7 +71,6 @@ ; GFX10PLUS-LABEL: v_constained_fadd_v2f16_fpexcept_strict: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_add_f16 v0, v0, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %val = call <2 x half> @llvm.experimental.constrained.fadd.v2f16(<2 x half> %x, <2 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") @@ -99,7 +95,6 @@ ; GFX10PLUS-LABEL: v_constained_fadd_v2f16_fpexcept_ignore: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_add_f16 v0, v0, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %val = call <2 x half> @llvm.experimental.constrained.fadd.v2f16(<2 x half> %x, <2 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") @@ -124,7 +119,6 @@ ; GFX10PLUS-LABEL: v_constained_fadd_v2f16_fpexcept_maytrap: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_add_f16 v0, v0, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %val = call <2 x half> @llvm.experimental.constrained.fadd.v2f16(<2 x half> %x, <2 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") @@ -151,7 +145,6 @@ ; GFX10PLUS-LABEL: v_constained_fadd_v3f16_fpexcept_strict: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_add_f16 v0, v0, v2 ; GFX10PLUS-NEXT: v_add_f16_e32 v1, v1, v3 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -187,7 +180,6 @@ ; GFX10-LABEL: v_constained_fadd_v4f16_fpexcept_strict: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_f16_sdwa v4, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; GFX10-NEXT: v_add_f16_sdwa v5, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; GFX10-NEXT: v_add_f16_e32 v0, v0, v2 @@ -199,7 +191,6 @@ ; GFX11-LABEL: v_constained_fadd_v4f16_fpexcept_strict: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v3 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v2 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v0 diff --git a/llvm/test/CodeGen/AMDGPU/strict_fadd.f32.ll b/llvm/test/CodeGen/AMDGPU/strict_fadd.f32.ll --- a/llvm/test/CodeGen/AMDGPU/strict_fadd.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/strict_fadd.f32.ll @@ -13,7 +13,6 @@ ; GFX10PLUS-LABEL: v_constained_fadd_f32_fpexcept_strict: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") @@ -30,7 +29,6 @@ ; GFX10PLUS-LABEL: v_constained_fadd_f32_fpexcept_ignore: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") @@ -47,7 +45,6 @@ ; GFX10PLUS-LABEL: v_constained_fadd_f32_fpexcept_maytrap: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") @@ -65,7 +62,6 @@ ; GFX10-LABEL: v_constained_fadd_v2f32_fpexcept_strict: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_f32_e32 v0, v0, v2 ; GFX10-NEXT: v_add_f32_e32 v1, v1, v3 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -73,7 +69,6 @@ ; GFX11-LABEL: v_constained_fadd_v2f32_fpexcept_strict: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_add_f32 v0, v0, v2 :: v_dual_add_f32 v1, v1, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] %val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") @@ -91,7 +86,6 @@ ; GFX10-LABEL: v_constained_fadd_v2f32_fpexcept_ignore: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_f32_e32 v0, v0, v2 ; GFX10-NEXT: v_add_f32_e32 v1, v1, v3 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -99,7 +93,6 @@ ; GFX11-LABEL: v_constained_fadd_v2f32_fpexcept_ignore: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_add_f32 v0, v0, v2 :: v_dual_add_f32 v1, v1, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] %val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") @@ -117,7 +110,6 @@ ; GFX10-LABEL: v_constained_fadd_v2f32_fpexcept_maytrap: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_f32_e32 v0, v0, v2 ; GFX10-NEXT: v_add_f32_e32 v1, v1, v3 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -125,7 +117,6 @@ ; GFX11-LABEL: v_constained_fadd_v2f32_fpexcept_maytrap: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_add_f32 v0, v0, v2 :: v_dual_add_f32 v1, v1, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] %val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") @@ -144,7 +135,6 @@ ; GFX10-LABEL: v_constained_fadd_v3f32_fpexcept_strict: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_f32_e32 v0, v0, v3 ; GFX10-NEXT: v_add_f32_e32 v1, v1, v4 ; GFX10-NEXT: v_add_f32_e32 v2, v2, v5 @@ -153,7 +143,6 @@ ; GFX11-LABEL: v_constained_fadd_v3f32_fpexcept_strict: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_add_f32 v0, v0, v3 :: v_dual_add_f32 v1, v1, v4 ; GFX11-NEXT: v_add_f32_e32 v2, v2, v5 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -186,7 +175,6 @@ ; GFX10PLUS-LABEL: v_constained_fadd_f32_fpexcept_strict_fabs_lhs: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_add_f32_e64 v0, |v0|, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %fabs.x = call float @llvm.fabs.f32(float %x) @@ -204,7 +192,6 @@ ; GFX10PLUS-LABEL: v_constained_fadd_f32_fpexcept_strict_fabs_rhs: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_add_f32_e64 v0, v0, |v1| ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %fabs.y = call float @llvm.fabs.f32(float %y) @@ -222,7 +209,6 @@ ; GFX10PLUS-LABEL: v_constained_fadd_f32_fpexcept_strict_fneg_fabs_lhs: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_sub_f32_e64 v0, v1, |v0| ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %fabs.x = call float @llvm.fabs.f32(float %x) diff --git a/llvm/test/CodeGen/AMDGPU/strict_fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/strict_fadd.f64.ll --- a/llvm/test/CodeGen/AMDGPU/strict_fadd.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/strict_fadd.f64.ll @@ -13,7 +13,6 @@ ; GFX10-LABEL: v_constained_fadd_f64_fpexcept_strict: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] ; GFX10-NEXT: s_setpc_b64 s[30:31] %val = call double @llvm.experimental.constrained.fadd.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") @@ -30,7 +29,6 @@ ; GFX10-LABEL: v_constained_fadd_f64_fpexcept_ignore: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] ; GFX10-NEXT: s_setpc_b64 s[30:31] %val = call double @llvm.experimental.constrained.fadd.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") @@ -47,7 +45,6 @@ ; GFX10-LABEL: v_constained_fadd_f64_fpexcept_maytrap: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] ; GFX10-NEXT: s_setpc_b64 s[30:31] %val = call double @llvm.experimental.constrained.fadd.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") @@ -65,7 +62,6 @@ ; GFX10-LABEL: v_constained_fadd_v2f64_fpexcept_strict: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5] ; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], v[6:7] ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -84,7 +80,6 @@ ; GFX10-LABEL: v_constained_fadd_v2f64_fpexcept_ignore: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5] ; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], v[6:7] ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -103,7 +98,6 @@ ; GFX10-LABEL: v_constained_fadd_v2f64_fpexcept_maytrap: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5] ; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], v[6:7] ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -123,7 +117,6 @@ ; GFX10-LABEL: v_constained_fadd_v3f64_fpexcept_strict: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[6:7] ; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], v[8:9] ; GFX10-NEXT: v_add_f64 v[4:5], v[4:5], v[10:11] diff --git a/llvm/test/CodeGen/AMDGPU/strict_fma.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_fma.f16.ll --- a/llvm/test/CodeGen/AMDGPU/strict_fma.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/strict_fma.f16.ll @@ -14,14 +14,12 @@ ; GFX10-LABEL: v_constained_fma_f16_fpexcept_strict: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fma_f16 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_constained_fma_f16_fpexcept_strict: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fma_f16 v0, v0, v1, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] %val = call half @llvm.experimental.constrained.fma.f16(half %x, half %y, half %z, metadata !"round.tonearest", metadata !"fpexcept.strict") @@ -50,14 +48,12 @@ ; GFX10-LABEL: v_constained_fma_v2f16_fpexcept_strict: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_fma_f16 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_constained_fma_v2f16_fpexcept_strict: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_pk_fma_f16 v0, v0, v1, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] %val = call <2 x half> @llvm.experimental.constrained.fma.v2f16(<2 x half> %x, <2 x half> %y, <2 x half> %z, metadata !"round.tonearest", metadata !"fpexcept.strict") @@ -88,7 +84,6 @@ ; GFX10-LABEL: v_constained_fma_v3f16_fpexcept_strict: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_fma_f16 v0, v0, v2, v4 ; GFX10-NEXT: v_fma_f16 v1, v1, v3, v5 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -96,7 +91,6 @@ ; GFX11-LABEL: v_constained_fma_v3f16_fpexcept_strict: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_pk_fma_f16 v0, v0, v2, v4 ; GFX11-NEXT: v_fma_f16 v1, v1, v3, v5 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -145,7 +139,6 @@ ; GFX10-LABEL: v_constained_fma_v4f16_fpexcept_strict: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v5 ; GFX10-NEXT: v_lshrrev_b32_e32 v7, 16, v3 ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v1 @@ -163,7 +156,6 @@ ; GFX11-LABEL: v_constained_fma_v4f16_fpexcept_strict: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v5 ; GFX11-NEXT: v_lshrrev_b32_e32 v7, 16, v3 ; GFX11-NEXT: v_lshrrev_b32_e32 v8, 16, v1 @@ -191,14 +183,12 @@ ; GFX10-LABEL: v_constained_fma_f16_fpexcept_strict_fneg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fma_f16 v0, v0, v1, -v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_constained_fma_f16_fpexcept_strict_fneg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fma_f16 v0, v0, v1, -v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] %neg.z = fneg half %z @@ -216,14 +206,12 @@ ; GFX10-LABEL: v_constained_fma_f16_fpexcept_strict_fneg_fneg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fma_f16 v0, -v0, -v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_constained_fma_f16_fpexcept_strict_fneg_fneg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fma_f16 v0, -v0, -v1, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] %neg.x = fneg half %x @@ -242,14 +230,12 @@ ; GFX10-LABEL: v_constained_fma_f16_fpexcept_strict_fabs_fabs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fma_f16 v0, |v0|, |v1|, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_constained_fma_f16_fpexcept_strict_fabs_fabs: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fma_f16 v0, |v0|, |v1|, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] %neg.x = call half @llvm.fabs.f16(half %x) @@ -280,14 +266,12 @@ ; GFX10-LABEL: v_constained_fma_v2f16_fpexcept_strict_fneg_fneg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[1,1,0] neg_hi:[1,1,0] ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_constained_fma_v2f16_fpexcept_strict_fneg_fneg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[1,1,0] neg_hi:[1,1,0] ; GFX11-NEXT: s_setpc_b64 s[30:31] %neg.x = fneg <2 x half> %x diff --git a/llvm/test/CodeGen/AMDGPU/strict_fma.f32.ll b/llvm/test/CodeGen/AMDGPU/strict_fma.f32.ll --- a/llvm/test/CodeGen/AMDGPU/strict_fma.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/strict_fma.f32.ll @@ -13,7 +13,6 @@ ; GFX10-LABEL: v_constained_fma_f32_fpexcept_strict: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fma_f32 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] %val = call float @llvm.experimental.constrained.fma.f32(float %x, float %y, float %z, metadata !"round.tonearest", metadata !"fpexcept.strict") @@ -31,7 +30,6 @@ ; GFX10-LABEL: v_constained_fma_v2f32_fpexcept_strict: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fma_f32 v0, v0, v2, v4 ; GFX10-NEXT: v_fma_f32 v1, v1, v3, v5 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -51,7 +49,6 @@ ; GFX10-LABEL: v_constained_fma_v3f32_fpexcept_strict: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fma_f32 v0, v0, v3, v6 ; GFX10-NEXT: v_fma_f32 v1, v1, v4, v7 ; GFX10-NEXT: v_fma_f32 v2, v2, v5, v8 @@ -73,7 +70,6 @@ ; GFX10-LABEL: v_constained_fma_v4f32_fpexcept_strict: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fma_f32 v0, v0, v4, v8 ; GFX10-NEXT: v_fma_f32 v1, v1, v5, v9 ; GFX10-NEXT: v_fma_f32 v2, v2, v6, v10 @@ -93,7 +89,6 @@ ; GFX10-LABEL: v_constained_fma_f32_fpexcept_strict_fneg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fma_f32 v0, v0, v1, -v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] %neg.z = fneg float %z @@ -111,7 +106,6 @@ ; GFX10-LABEL: v_constained_fma_f32_fpexcept_strict_fneg_fneg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fma_f32 v0, -v0, -v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] %neg.x = fneg float %x @@ -130,7 +124,6 @@ ; GFX10-LABEL: v_constained_fma_f32_fpexcept_strict_fabs_fabs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fma_f32 v0, |v0|, |v1|, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] %neg.x = call float @llvm.fabs.f32(float %x) @@ -150,7 +143,6 @@ ; GFX10-LABEL: v_constained_fma_v2f32_fpexcept_strict_fneg_fneg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fma_f32 v0, -v0, -v2, v4 ; GFX10-NEXT: v_fma_f32 v1, -v1, -v3, v5 ; GFX10-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/strict_fma.f64.ll b/llvm/test/CodeGen/AMDGPU/strict_fma.f64.ll --- a/llvm/test/CodeGen/AMDGPU/strict_fma.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/strict_fma.f64.ll @@ -13,7 +13,6 @@ ; GFX10-LABEL: v_constained_fma_f64_fpexcept_strict: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] ; GFX10-NEXT: s_setpc_b64 s[30:31] %val = call double @llvm.experimental.constrained.fma.f64(double %x, double %y, double %z, metadata !"round.tonearest", metadata !"fpexcept.strict") @@ -31,7 +30,6 @@ ; GFX10-LABEL: v_constained_fma_v2f64_fpexcept_strict: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[8:9] ; GFX10-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[10:11] ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -51,7 +49,6 @@ ; GFX10-LABEL: v_constained_fma_v3f64_fpexcept_strict: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[6:7], v[12:13] ; GFX10-NEXT: v_fma_f64 v[2:3], v[2:3], v[8:9], v[14:15] ; GFX10-NEXT: v_fma_f64 v[4:5], v[4:5], v[10:11], v[16:17] @@ -73,7 +70,6 @@ ; GFX10-LABEL: v_constained_fma_v4f64_fpexcept_strict: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17] ; GFX10-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19] ; GFX10-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21] @@ -93,7 +89,6 @@ ; GFX10-LABEL: v_constained_fma_f64_fpexcept_strict_fneg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5] ; GFX10-NEXT: s_setpc_b64 s[30:31] %neg.z = fneg double %z @@ -111,7 +106,6 @@ ; GFX10-LABEL: v_constained_fma_f64_fpexcept_strict_fneg_fneg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fma_f64 v[0:1], -v[0:1], -v[2:3], v[4:5] ; GFX10-NEXT: s_setpc_b64 s[30:31] %neg.x = fneg double %x @@ -130,7 +124,6 @@ ; GFX10-LABEL: v_constained_fma_f64_fpexcept_strict_fabs_fabs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fma_f64 v[0:1], |v[0:1]|, |v[2:3]|, v[4:5] ; GFX10-NEXT: s_setpc_b64 s[30:31] %neg.x = call double @llvm.fabs.f64(double %x) @@ -150,7 +143,6 @@ ; GFX10-LABEL: v_constained_fma_v2f64_fpexcept_strict_fneg_fneg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_fma_f64 v[0:1], -v[0:1], -v[4:5], v[8:9] ; GFX10-NEXT: v_fma_f64 v[2:3], -v[2:3], -v[6:7], v[10:11] ; GFX10-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll --- a/llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll @@ -24,7 +24,6 @@ ; GFX10PLUS-LABEL: v_constained_fmul_f16_fpexcept_strict: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_mul_f16_e32 v0, v0, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %val = call half @llvm.experimental.constrained.fmul.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") @@ -41,7 +40,6 @@ ; GFX10PLUS-LABEL: v_constained_fmul_f16_fpexcept_ignore: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_mul_f16_e32 v0, v0, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %val = call half @llvm.experimental.constrained.fmul.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") @@ -58,7 +56,6 @@ ; GFX10PLUS-LABEL: v_constained_fmul_f16_fpexcept_maytrap: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_mul_f16_e32 v0, v0, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %val = call half @llvm.experimental.constrained.fmul.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") @@ -91,7 +88,6 @@ ; GFX10PLUS-LABEL: v_constained_fmul_v2f16_fpexcept_strict: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_mul_f16 v0, v0, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %val = call <2 x half> @llvm.experimental.constrained.fmul.v2f16(<2 x half> %x, <2 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") @@ -124,7 +120,6 @@ ; GFX10PLUS-LABEL: v_constained_fmul_v2f16_fpexcept_ignore: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_mul_f16 v0, v0, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %val = call <2 x half> @llvm.experimental.constrained.fmul.v2f16(<2 x half> %x, <2 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") @@ -157,7 +152,6 @@ ; GFX10PLUS-LABEL: v_constained_fmul_v2f16_fpexcept_maytrap: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_mul_f16 v0, v0, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %val = call <2 x half> @llvm.experimental.constrained.fmul.v2f16(<2 x half> %x, <2 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") @@ -200,7 +194,6 @@ ; GFX10-SDAG-LABEL: v_constained_fmul_v3f16_fpexcept_strict: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_pk_mul_f16 v0, v0, v2 ; GFX10-SDAG-NEXT: v_mul_f16_e32 v1, v1, v3 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] @@ -208,7 +201,6 @@ ; GFX10-GISEL-LABEL: v_constained_fmul_v3f16_fpexcept_strict: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: v_pk_mul_f16 v0, v0, v2 ; GFX10-GISEL-NEXT: v_pk_mul_f16 v1, v1, v3 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -216,7 +208,6 @@ ; GFX11-SDAG-LABEL: v_constained_fmul_v3f16_fpexcept_strict: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_pk_mul_f16 v0, v0, v2 ; GFX11-SDAG-NEXT: v_mul_f16_e32 v1, v1, v3 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] @@ -224,7 +215,6 @@ ; GFX1-GISEL-LABEL: v_constained_fmul_v3f16_fpexcept_strict: ; GFX1-GISEL: ; %bb.0: ; GFX1-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1-GISEL-NEXT: v_pk_mul_f16 v0, v0, v2 ; GFX1-GISEL-NEXT: v_pk_mul_f16 v1, v1, v3 ; GFX1-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -278,7 +268,6 @@ ; GFX10-SDAG-LABEL: v_constained_fmul_v4f16_fpexcept_strict: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_mul_f16_sdwa v4, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; GFX10-SDAG-NEXT: v_mul_f16_sdwa v5, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; GFX10-SDAG-NEXT: v_mul_f16_e32 v0, v0, v2 @@ -290,7 +279,6 @@ ; GFX10-GISEL-LABEL: v_constained_fmul_v4f16_fpexcept_strict: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: v_pk_mul_f16 v0, v0, v2 ; GFX10-GISEL-NEXT: v_pk_mul_f16 v1, v1, v3 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -298,7 +286,6 @@ ; GFX11-SDAG-LABEL: v_constained_fmul_v4f16_fpexcept_strict: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v3 ; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v5, 16, v2 ; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v6, 16, v0 @@ -314,7 +301,6 @@ ; GFX1-GISEL-LABEL: v_constained_fmul_v4f16_fpexcept_strict: ; GFX1-GISEL: ; %bb.0: ; GFX1-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1-GISEL-NEXT: v_pk_mul_f16 v0, v0, v2 ; GFX1-GISEL-NEXT: v_pk_mul_f16 v1, v1, v3 ; GFX1-GISEL-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/strict_fmul.f32.ll b/llvm/test/CodeGen/AMDGPU/strict_fmul.f32.ll --- a/llvm/test/CodeGen/AMDGPU/strict_fmul.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/strict_fmul.f32.ll @@ -16,7 +16,6 @@ ; GFX10PLUS-LABEL: v_constained_fmul_f32_fpexcept_strict: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %val = call float @llvm.experimental.constrained.fmul.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") @@ -33,7 +32,6 @@ ; GFX10PLUS-LABEL: v_constained_fmul_f32_fpexcept_ignore: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %val = call float @llvm.experimental.constrained.fmul.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") @@ -50,7 +48,6 @@ ; GFX10PLUS-LABEL: v_constained_fmul_f32_fpexcept_maytrap: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %val = call float @llvm.experimental.constrained.fmul.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") @@ -68,7 +65,6 @@ ; GFX10-LABEL: v_constained_fmul_v2f32_fpexcept_strict: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f32_e32 v0, v0, v2 ; GFX10-NEXT: v_mul_f32_e32 v1, v1, v3 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -76,7 +72,6 @@ ; GFX11-LABEL: v_constained_fmul_v2f32_fpexcept_strict: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] %val = call <2 x float> @llvm.experimental.constrained.fmul.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") @@ -94,7 +89,6 @@ ; GFX10-LABEL: v_constained_fmul_v2f32_fpexcept_ignore: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f32_e32 v0, v0, v2 ; GFX10-NEXT: v_mul_f32_e32 v1, v1, v3 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -102,7 +96,6 @@ ; GFX11-LABEL: v_constained_fmul_v2f32_fpexcept_ignore: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] %val = call <2 x float> @llvm.experimental.constrained.fmul.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") @@ -120,7 +113,6 @@ ; GFX10-LABEL: v_constained_fmul_v2f32_fpexcept_maytrap: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f32_e32 v0, v0, v2 ; GFX10-NEXT: v_mul_f32_e32 v1, v1, v3 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -128,7 +120,6 @@ ; GFX11-LABEL: v_constained_fmul_v2f32_fpexcept_maytrap: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] %val = call <2 x float> @llvm.experimental.constrained.fmul.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") @@ -147,7 +138,6 @@ ; GFX10-LABEL: v_constained_fmul_v3f32_fpexcept_strict: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f32_e32 v0, v0, v3 ; GFX10-NEXT: v_mul_f32_e32 v1, v1, v4 ; GFX10-NEXT: v_mul_f32_e32 v2, v2, v5 @@ -156,7 +146,6 @@ ; GFX11-LABEL: v_constained_fmul_v3f32_fpexcept_strict: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mul_f32 v0, v0, v3 :: v_dual_mul_f32 v1, v1, v4 ; GFX11-NEXT: v_mul_f32_e32 v2, v2, v5 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -189,7 +178,6 @@ ; GFX10PLUS-LABEL: v_constained_fmul_f32_fpexcept_strict_fabs_lhs: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_mul_f32_e64 v0, |v0|, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %fabs.x = call float @llvm.fabs.f32(float %x) @@ -207,7 +195,6 @@ ; GFX10PLUS-LABEL: v_constained_fmul_f32_fpexcept_strict_fabs_rhs: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_mul_f32_e64 v0, v0, |v1| ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %fabs.y = call float @llvm.fabs.f32(float %y) @@ -225,7 +212,6 @@ ; GFX10PLUS-LABEL: v_constained_fmul_f32_fpexcept_strict_fneg_fabs_lhs: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_mul_f32_e64 v0, -|v0|, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %fabs.x = call float @llvm.fabs.f32(float %x) diff --git a/llvm/test/CodeGen/AMDGPU/strict_fmul.f64.ll b/llvm/test/CodeGen/AMDGPU/strict_fmul.f64.ll --- a/llvm/test/CodeGen/AMDGPU/strict_fmul.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/strict_fmul.f64.ll @@ -16,7 +16,6 @@ ; GFX10-LABEL: v_constained_fmul_f64_fpexcept_strict: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX10-NEXT: s_setpc_b64 s[30:31] %val = call double @llvm.experimental.constrained.fmul.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") @@ -33,7 +32,6 @@ ; GFX10-LABEL: v_constained_fmul_f64_fpexcept_ignore: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX10-NEXT: s_setpc_b64 s[30:31] %val = call double @llvm.experimental.constrained.fmul.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") @@ -50,7 +48,6 @@ ; GFX10-LABEL: v_constained_fmul_f64_fpexcept_maytrap: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX10-NEXT: s_setpc_b64 s[30:31] %val = call double @llvm.experimental.constrained.fmul.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") @@ -68,7 +65,6 @@ ; GFX10-LABEL: v_constained_fmul_v2f64_fpexcept_strict: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] ; GFX10-NEXT: v_mul_f64 v[2:3], v[2:3], v[6:7] ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -87,7 +83,6 @@ ; GFX10-LABEL: v_constained_fmul_v2f64_fpexcept_ignore: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] ; GFX10-NEXT: v_mul_f64 v[2:3], v[2:3], v[6:7] ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -106,7 +101,6 @@ ; GFX10-LABEL: v_constained_fmul_v2f64_fpexcept_maytrap: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] ; GFX10-NEXT: v_mul_f64 v[2:3], v[2:3], v[6:7] ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -126,7 +120,6 @@ ; GFX10-LABEL: v_constained_fmul_v3f64_fpexcept_strict: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[6:7] ; GFX10-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] ; GFX10-NEXT: v_mul_f64 v[4:5], v[4:5], v[10:11] diff --git a/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll --- a/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll @@ -23,7 +23,6 @@ ; GFX10PLUS-LABEL: v_constained_fsub_f16_fpexcept_strict: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_sub_f16_e32 v0, v0, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %val = call half @llvm.experimental.constrained.fsub.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") @@ -40,7 +39,6 @@ ; GFX10PLUS-LABEL: v_constained_fsub_f16_fpexcept_ignore: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_sub_f16_e32 v0, v0, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %val = call half @llvm.experimental.constrained.fsub.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") @@ -57,7 +55,6 @@ ; GFX10PLUS-LABEL: v_constained_fsub_f16_fpexcept_maytrap: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_sub_f16_e32 v0, v0, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %val = call half @llvm.experimental.constrained.fsub.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") @@ -100,7 +97,6 @@ ; GFX10-SDAG-LABEL: v_constained_fsub_v2f16_fpexcept_strict: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_sub_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; GFX10-SDAG-NEXT: v_sub_f16_e32 v0, v0, v1 ; GFX10-SDAG-NEXT: v_perm_b32 v0, v2, v0, 0x5040100 @@ -109,14 +105,12 @@ ; GFX10-GISEL-LABEL: v_constained_fsub_v2f16_fpexcept_strict: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: v_pk_add_f16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1] ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10PLUS-SDAG-LABEL: v_constained_fsub_v2f16_fpexcept_strict: ; GFX10PLUS-SDAG: ; %bb.0: ; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-SDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v1 ; GFX10PLUS-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v0 ; GFX10PLUS-SDAG-NEXT: v_sub_f16_e32 v0, v0, v1 @@ -127,7 +121,6 @@ ; GFX10PLUS-GISEL-LABEL: v_constained_fsub_v2f16_fpexcept_strict: ; GFX10PLUS-GISEL: ; %bb.0: ; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-GISEL-NEXT: v_pk_add_f16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1] ; GFX10PLUS-GISEL-NEXT: s_setpc_b64 s[30:31] %val = call <2 x half> @llvm.experimental.constrained.fsub.v2f16(<2 x half> %x, <2 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") @@ -170,7 +163,6 @@ ; GFX10-SDAG-LABEL: v_constained_fsub_v2f16_fpexcept_ignore: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_sub_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; GFX10-SDAG-NEXT: v_sub_f16_e32 v0, v0, v1 ; GFX10-SDAG-NEXT: v_perm_b32 v0, v2, v0, 0x5040100 @@ -179,14 +171,12 @@ ; GFX10-GISEL-LABEL: v_constained_fsub_v2f16_fpexcept_ignore: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: v_pk_add_f16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1] ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10PLUS-SDAG-LABEL: v_constained_fsub_v2f16_fpexcept_ignore: ; GFX10PLUS-SDAG: ; %bb.0: ; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-SDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v1 ; GFX10PLUS-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v0 ; GFX10PLUS-SDAG-NEXT: v_sub_f16_e32 v0, v0, v1 @@ -197,7 +187,6 @@ ; GFX10PLUS-GISEL-LABEL: v_constained_fsub_v2f16_fpexcept_ignore: ; GFX10PLUS-GISEL: ; %bb.0: ; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-GISEL-NEXT: v_pk_add_f16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1] ; GFX10PLUS-GISEL-NEXT: s_setpc_b64 s[30:31] %val = call <2 x half> @llvm.experimental.constrained.fsub.v2f16(<2 x half> %x, <2 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") @@ -240,7 +229,6 @@ ; GFX10-SDAG-LABEL: v_constained_fsub_v2f16_fpexcept_maytrap: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_sub_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; GFX10-SDAG-NEXT: v_sub_f16_e32 v0, v0, v1 ; GFX10-SDAG-NEXT: v_perm_b32 v0, v2, v0, 0x5040100 @@ -249,14 +237,12 @@ ; GFX10-GISEL-LABEL: v_constained_fsub_v2f16_fpexcept_maytrap: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: v_pk_add_f16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1] ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10PLUS-SDAG-LABEL: v_constained_fsub_v2f16_fpexcept_maytrap: ; GFX10PLUS-SDAG: ; %bb.0: ; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-SDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v1 ; GFX10PLUS-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v0 ; GFX10PLUS-SDAG-NEXT: v_sub_f16_e32 v0, v0, v1 @@ -267,7 +253,6 @@ ; GFX10PLUS-GISEL-LABEL: v_constained_fsub_v2f16_fpexcept_maytrap: ; GFX10PLUS-GISEL: ; %bb.0: ; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-GISEL-NEXT: v_pk_add_f16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1] ; GFX10PLUS-GISEL-NEXT: s_setpc_b64 s[30:31] %val = call <2 x half> @llvm.experimental.constrained.fsub.v2f16(<2 x half> %x, <2 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") @@ -315,7 +300,6 @@ ; GFX10-SDAG-LABEL: v_constained_fsub_v3f16_fpexcept_strict: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_sub_f16_sdwa v4, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; GFX10-SDAG-NEXT: v_sub_f16_e32 v0, v0, v2 ; GFX10-SDAG-NEXT: v_sub_f16_e32 v1, v1, v3 @@ -325,7 +309,6 @@ ; GFX10-GISEL-LABEL: v_constained_fsub_v3f16_fpexcept_strict: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: v_sub_f16_e32 v4, v0, v2 ; GFX10-GISEL-NEXT: v_sub_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; GFX10-GISEL-NEXT: v_sub_f16_e32 v1, v1, v3 @@ -336,7 +319,6 @@ ; GFX10PLUS-SDAG-LABEL: v_constained_fsub_v3f16_fpexcept_strict: ; GFX10PLUS-SDAG: ; %bb.0: ; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v2 ; GFX10PLUS-SDAG-NEXT: v_lshrrev_b32_e32 v5, 16, v0 ; GFX10PLUS-SDAG-NEXT: v_sub_f16_e32 v0, v0, v2 @@ -348,7 +330,6 @@ ; GFX10PLUS-GISEL-LABEL: v_constained_fsub_v3f16_fpexcept_strict: ; GFX10PLUS-GISEL: ; %bb.0: ; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0 ; GFX10PLUS-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v2 ; GFX10PLUS-GISEL-NEXT: v_sub_f16_e32 v0, v0, v2 @@ -411,7 +392,6 @@ ; GFX10-SDAG-LABEL: v_constained_fsub_v4f16_fpexcept_strict: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SDAG-NEXT: v_sub_f16_sdwa v4, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; GFX10-SDAG-NEXT: v_sub_f16_sdwa v5, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; GFX10-SDAG-NEXT: v_sub_f16_e32 v0, v0, v2 @@ -423,7 +403,6 @@ ; GFX10-GISEL-LABEL: v_constained_fsub_v4f16_fpexcept_strict: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: v_sub_f16_e32 v4, v0, v2 ; GFX10-GISEL-NEXT: v_sub_f16_e32 v5, v1, v3 ; GFX10-GISEL-NEXT: v_sub_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 @@ -437,7 +416,6 @@ ; GFX10PLUS-SDAG-LABEL: v_constained_fsub_v4f16_fpexcept_strict: ; GFX10PLUS-SDAG: ; %bb.0: ; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v3 ; GFX10PLUS-SDAG-NEXT: v_lshrrev_b32_e32 v5, 16, v2 ; GFX10PLUS-SDAG-NEXT: v_lshrrev_b32_e32 v6, 16, v0 @@ -453,7 +431,6 @@ ; GFX10PLUS-GISEL-LABEL: v_constained_fsub_v4f16_fpexcept_strict: ; GFX10PLUS-GISEL: ; %bb.0: ; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0 ; GFX10PLUS-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v1 ; GFX10PLUS-GISEL-NEXT: v_lshrrev_b32_e32 v6, 16, v2 diff --git a/llvm/test/CodeGen/AMDGPU/strict_fsub.f32.ll b/llvm/test/CodeGen/AMDGPU/strict_fsub.f32.ll --- a/llvm/test/CodeGen/AMDGPU/strict_fsub.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/strict_fsub.f32.ll @@ -18,7 +18,6 @@ ; GFX10PLUS-LABEL: v_constained_fsub_f32_fpexcept_strict: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %val = call float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") @@ -35,7 +34,6 @@ ; GFX10PLUS-LABEL: v_constained_fsub_f32_fpexcept_ignore: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %val = call float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") @@ -52,7 +50,6 @@ ; GFX10PLUS-LABEL: v_constained_fsub_f32_fpexcept_maytrap: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %val = call float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") @@ -70,7 +67,6 @@ ; GFX10-LABEL: v_constained_fsub_v2f32_fpexcept_strict: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2 ; GFX10-NEXT: v_sub_f32_e32 v1, v1, v3 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -78,7 +74,6 @@ ; GFX11-LABEL: v_constained_fsub_v2f32_fpexcept_strict: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] %val = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") @@ -96,7 +91,6 @@ ; GFX10-LABEL: v_constained_fsub_v2f32_fpexcept_ignore: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2 ; GFX10-NEXT: v_sub_f32_e32 v1, v1, v3 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -104,7 +98,6 @@ ; GFX11-LABEL: v_constained_fsub_v2f32_fpexcept_ignore: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] %val = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") @@ -122,7 +115,6 @@ ; GFX10-LABEL: v_constained_fsub_v2f32_fpexcept_maytrap: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2 ; GFX10-NEXT: v_sub_f32_e32 v1, v1, v3 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -130,7 +122,6 @@ ; GFX11-LABEL: v_constained_fsub_v2f32_fpexcept_maytrap: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] %val = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") @@ -149,7 +140,6 @@ ; GFX10-LABEL: v_constained_fsub_v3f32_fpexcept_strict: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_sub_f32_e32 v0, v0, v3 ; GFX10-NEXT: v_sub_f32_e32 v1, v1, v4 ; GFX10-NEXT: v_sub_f32_e32 v2, v2, v5 @@ -158,7 +148,6 @@ ; GFX11-LABEL: v_constained_fsub_v3f32_fpexcept_strict: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_sub_f32 v0, v0, v3 :: v_dual_sub_f32 v1, v1, v4 ; GFX11-NEXT: v_sub_f32_e32 v2, v2, v5 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -191,7 +180,6 @@ ; GFX10PLUS-LABEL: v_constained_fsub_f32_fpexcept_strict_fabs_lhs: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_sub_f32_e64 v0, |v0|, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %fabs.x = call float @llvm.fabs.f32(float %x) @@ -209,7 +197,6 @@ ; GFX10PLUS-LABEL: v_constained_fsub_f32_fpexcept_strict_fabs_rhs: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_sub_f32_e64 v0, v0, |v1| ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %fabs.y = call float @llvm.fabs.f32(float %y) @@ -227,7 +214,6 @@ ; GFX10PLUS-LABEL: v_constained_fsub_f32_fpexcept_strict_fneg_fabs_lhs: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_sub_f32_e64 v0, -|v0|, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %fabs.x = call float @llvm.fabs.f32(float %x) diff --git a/llvm/test/CodeGen/AMDGPU/strict_fsub.f64.ll b/llvm/test/CodeGen/AMDGPU/strict_fsub.f64.ll --- a/llvm/test/CodeGen/AMDGPU/strict_fsub.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/strict_fsub.f64.ll @@ -16,7 +16,6 @@ ; GFX10-LABEL: v_constained_fsub_f64_fpexcept_strict: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] ; GFX10-NEXT: s_setpc_b64 s[30:31] %val = call double @llvm.experimental.constrained.fsub.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") @@ -33,7 +32,6 @@ ; GFX10-LABEL: v_constained_fsub_f64_fpexcept_ignore: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] ; GFX10-NEXT: s_setpc_b64 s[30:31] %val = call double @llvm.experimental.constrained.fsub.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") @@ -50,7 +48,6 @@ ; GFX10-LABEL: v_constained_fsub_f64_fpexcept_maytrap: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] ; GFX10-NEXT: s_setpc_b64 s[30:31] %val = call double @llvm.experimental.constrained.fsub.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") @@ -68,7 +65,6 @@ ; GFX10-LABEL: v_constained_fsub_v2f64_fpexcept_strict: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5] ; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], -v[6:7] ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -87,7 +83,6 @@ ; GFX10-LABEL: v_constained_fsub_v2f64_fpexcept_ignore: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5] ; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], -v[6:7] ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -106,7 +101,6 @@ ; GFX10-LABEL: v_constained_fsub_v2f64_fpexcept_maytrap: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5] ; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], -v[6:7] ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -126,7 +120,6 @@ ; GFX10-LABEL: v_constained_fsub_v3f64_fpexcept_strict: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], -v[6:7] ; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], -v[8:9] ; GFX10-NEXT: v_add_f64 v[4:5], v[4:5], -v[10:11] diff --git a/llvm/test/CodeGen/AMDGPU/strict_ldexp.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_ldexp.f16.ll --- a/llvm/test/CodeGen/AMDGPU/strict_ldexp.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/strict_ldexp.f16.ll @@ -37,7 +37,6 @@ ; GFX11-SDAG-LABEL: test_ldexp_f16_i32: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000 ; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-SDAG-NEXT: v_med3_i32 v0, v3, s0, 0x7fff @@ -65,7 +64,6 @@ ; GFX11-GISEL-LABEL: test_ldexp_f16_i32: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0x7fff ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-GISEL-NEXT: v_med3_i32 v0, 0xffff8000, v3, v0 @@ -109,7 +107,6 @@ ; GFX11-SDAG-LABEL: test_ldexp_v2f16_v2i32: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000 ; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) ; GFX11-SDAG-NEXT: v_med3_i32 v0, v3, s0, 0x7fff @@ -148,7 +145,6 @@ ; GFX11-GISEL-LABEL: test_ldexp_v2f16_v2i32: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0x7fff ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) ; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v3, v0 @@ -198,7 +194,6 @@ ; GFX11-SDAG-LABEL: test_ldexp_v3f16_v3i32: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000 ; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) ; GFX11-SDAG-NEXT: v_med3_i32 v0, v4, s0, 0x7fff @@ -244,7 +239,6 @@ ; GFX11-GISEL-LABEL: test_ldexp_v3f16_v3i32: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0x7fff ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) ; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v4, v0 @@ -302,7 +296,6 @@ ; GFX11-SDAG-LABEL: test_ldexp_v4f16_v4i32: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000 ; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-SDAG-NEXT: v_med3_i32 v0, v6, s0, 0x7fff @@ -358,7 +351,6 @@ ; GFX11-GISEL-LABEL: test_ldexp_v4f16_v4i32: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0x7fff ; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v2 ; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v8, 16, v3 diff --git a/llvm/test/CodeGen/AMDGPU/strict_ldexp.f32.ll b/llvm/test/CodeGen/AMDGPU/strict_ldexp.f32.ll --- a/llvm/test/CodeGen/AMDGPU/strict_ldexp.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/strict_ldexp.f32.ll @@ -36,7 +36,6 @@ ; GFX11-LABEL: test_ldexp_f32_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_ldexp_f32 v0, v2, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call float @llvm.experimental.constrained.ldexp.f32.i32(float %a, i32 %b, metadata !"round.dynamic", metadata !"fpexcept.strict") @@ -73,7 +72,6 @@ ; GFX11-LABEL: test_ldexp_v2f32_v2i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_ldexp_f32 v0, v2, v4 ; GFX11-NEXT: v_ldexp_f32 v1, v3, v5 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -133,7 +131,6 @@ ; GFX11-LABEL: test_ldexp_v3f32_v3i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_ldexp_f32 v0, v2, v5 ; GFX11-NEXT: v_ldexp_f32 v1, v3, v6 ; GFX11-NEXT: v_ldexp_f32 v2, v4, v7 @@ -203,7 +200,6 @@ ; GFX11-LABEL: test_ldexp_v4f32_v4i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_ldexp_f32 v0, v2, v6 ; GFX11-NEXT: v_ldexp_f32 v1, v3, v7 ; GFX11-NEXT: v_ldexp_f32 v2, v4, v8 diff --git a/llvm/test/CodeGen/AMDGPU/strict_ldexp.f64.ll b/llvm/test/CodeGen/AMDGPU/strict_ldexp.f64.ll --- a/llvm/test/CodeGen/AMDGPU/strict_ldexp.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/strict_ldexp.f64.ll @@ -15,30 +15,11 @@ ; } define double @test_ldexp_f64_i32(ptr addrspace(1) %out, double %a, i32 %b) #0 { -; GFX6-LABEL: test_ldexp_f64_i32: -; GFX6: ; %bb.0: -; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_ldexp_f64 v[0:1], v[2:3], v4 -; GFX6-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: test_ldexp_f64_i32: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_ldexp_f64 v[0:1], v[2:3], v4 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: test_ldexp_f64_i32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_ldexp_f64 v[0:1], v[2:3], v4 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: test_ldexp_f64_i32: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: v_ldexp_f64 v[0:1], v[2:3], v4 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: test_ldexp_f64_i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_ldexp_f64 v[0:1], v[2:3], v4 +; GCN-NEXT: s_setpc_b64 s[30:31] %result = call double @llvm.experimental.constrained.ldexp.f64.i32(double %a, i32 %b, metadata !"round.dynamic", metadata !"fpexcept.strict") ret double %result } @@ -49,112 +30,37 @@ ; } define <2 x double> @test_ldexp_v2f64_v2i32(ptr addrspace(1) %out, <2 x double> %a, <2 x i32> %b) #0 { -; GFX6-LABEL: test_ldexp_v2f64_v2i32: -; GFX6: ; %bb.0: -; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_ldexp_f64 v[0:1], v[2:3], v6 -; GFX6-NEXT: v_ldexp_f64 v[2:3], v[4:5], v7 -; GFX6-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: test_ldexp_v2f64_v2i32: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_ldexp_f64 v[0:1], v[2:3], v6 -; GFX8-NEXT: v_ldexp_f64 v[2:3], v[4:5], v7 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: test_ldexp_v2f64_v2i32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_ldexp_f64 v[0:1], v[2:3], v6 -; GFX9-NEXT: v_ldexp_f64 v[2:3], v[4:5], v7 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: test_ldexp_v2f64_v2i32: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: v_ldexp_f64 v[0:1], v[2:3], v6 -; GFX11-NEXT: v_ldexp_f64 v[2:3], v[4:5], v7 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: test_ldexp_v2f64_v2i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_ldexp_f64 v[0:1], v[2:3], v6 +; GCN-NEXT: v_ldexp_f64 v[2:3], v[4:5], v7 +; GCN-NEXT: s_setpc_b64 s[30:31] %result = call <2 x double> @llvm.experimental.constrained.ldexp.v2f64.v2i32(<2 x double> %a, <2 x i32> %b, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <2 x double> %result } define <3 x double> @test_ldexp_v3f64_v3i32(ptr addrspace(1) %out, <3 x double> %a, <3 x i32> %b) #0 { -; GFX6-LABEL: test_ldexp_v3f64_v3i32: -; GFX6: ; %bb.0: -; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_ldexp_f64 v[0:1], v[2:3], v8 -; GFX6-NEXT: v_ldexp_f64 v[2:3], v[4:5], v9 -; GFX6-NEXT: v_ldexp_f64 v[4:5], v[6:7], v10 -; GFX6-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: test_ldexp_v3f64_v3i32: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_ldexp_f64 v[0:1], v[2:3], v8 -; GFX8-NEXT: v_ldexp_f64 v[2:3], v[4:5], v9 -; GFX8-NEXT: v_ldexp_f64 v[4:5], v[6:7], v10 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: test_ldexp_v3f64_v3i32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_ldexp_f64 v[0:1], v[2:3], v8 -; GFX9-NEXT: v_ldexp_f64 v[2:3], v[4:5], v9 -; GFX9-NEXT: v_ldexp_f64 v[4:5], v[6:7], v10 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: test_ldexp_v3f64_v3i32: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: v_ldexp_f64 v[0:1], v[2:3], v8 -; GFX11-NEXT: v_ldexp_f64 v[2:3], v[4:5], v9 -; GFX11-NEXT: v_ldexp_f64 v[4:5], v[6:7], v10 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: test_ldexp_v3f64_v3i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_ldexp_f64 v[0:1], v[2:3], v8 +; GCN-NEXT: v_ldexp_f64 v[2:3], v[4:5], v9 +; GCN-NEXT: v_ldexp_f64 v[4:5], v[6:7], v10 +; GCN-NEXT: s_setpc_b64 s[30:31] %result = call <3 x double> @llvm.experimental.constrained.ldexp.v3f64.v3i32(<3 x double> %a, <3 x i32> %b, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <3 x double> %result } define <4 x double> @test_ldexp_v4f64_v4i32(ptr addrspace(1) %out, <4 x double> %a, <4 x i32> %b) #0 { -; GFX6-LABEL: test_ldexp_v4f64_v4i32: -; GFX6: ; %bb.0: -; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_ldexp_f64 v[0:1], v[2:3], v10 -; GFX6-NEXT: v_ldexp_f64 v[2:3], v[4:5], v11 -; GFX6-NEXT: v_ldexp_f64 v[4:5], v[6:7], v12 -; GFX6-NEXT: v_ldexp_f64 v[6:7], v[8:9], v13 -; GFX6-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: test_ldexp_v4f64_v4i32: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_ldexp_f64 v[0:1], v[2:3], v10 -; GFX8-NEXT: v_ldexp_f64 v[2:3], v[4:5], v11 -; GFX8-NEXT: v_ldexp_f64 v[4:5], v[6:7], v12 -; GFX8-NEXT: v_ldexp_f64 v[6:7], v[8:9], v13 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: test_ldexp_v4f64_v4i32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_ldexp_f64 v[0:1], v[2:3], v10 -; GFX9-NEXT: v_ldexp_f64 v[2:3], v[4:5], v11 -; GFX9-NEXT: v_ldexp_f64 v[4:5], v[6:7], v12 -; GFX9-NEXT: v_ldexp_f64 v[6:7], v[8:9], v13 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: test_ldexp_v4f64_v4i32: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: v_ldexp_f64 v[0:1], v[2:3], v10 -; GFX11-NEXT: v_ldexp_f64 v[2:3], v[4:5], v11 -; GFX11-NEXT: v_ldexp_f64 v[4:5], v[6:7], v12 -; GFX11-NEXT: v_ldexp_f64 v[6:7], v[8:9], v13 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: test_ldexp_v4f64_v4i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_ldexp_f64 v[0:1], v[2:3], v10 +; GCN-NEXT: v_ldexp_f64 v[2:3], v[4:5], v11 +; GCN-NEXT: v_ldexp_f64 v[4:5], v[6:7], v12 +; GCN-NEXT: v_ldexp_f64 v[6:7], v[8:9], v13 +; GCN-NEXT: s_setpc_b64 s[30:31] %result = call <4 x double> @llvm.experimental.constrained.ldexp.v4f64.v4i32(<4 x double> %a, <4 x i32> %b, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <4 x double> %result } @@ -169,12 +75,15 @@ attributes #0 = { strictfp } attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; GCN: {{.*}} +; GFX11: {{.*}} ; GFX11-GISEL: {{.*}} ; GFX11-SDAG: {{.*}} +; GFX6: {{.*}} ; GFX6-GISEL: {{.*}} ; GFX6-SDAG: {{.*}} +; GFX8: {{.*}} ; GFX8-GISEL: {{.*}} ; GFX8-SDAG: {{.*}} +; GFX9: {{.*}} ; GFX9-GISEL: {{.*}} ; GFX9-SDAG: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/uaddsat.ll b/llvm/test/CodeGen/AMDGPU/uaddsat.ll --- a/llvm/test/CodeGen/AMDGPU/uaddsat.ll +++ b/llvm/test/CodeGen/AMDGPU/uaddsat.ll @@ -31,7 +31,6 @@ ; GFX10-LABEL: v_uaddsat_i8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX10-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX10-NEXT: v_add_nc_u16 v0, v0, v1 @@ -66,7 +65,6 @@ ; GFX10-LABEL: v_uaddsat_i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_nc_u16 v0, v0, v1 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = call i16 @llvm.uadd.sat.i16(i16 %lhs, i16 %rhs) @@ -97,7 +95,6 @@ ; GFX10-LABEL: v_uaddsat_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_nc_u32_e64 v0, v0, v1 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = call i32 @llvm.uadd.sat.i32(i32 %lhs, i32 %rhs) @@ -137,7 +134,6 @@ ; GFX10-LABEL: v_uaddsat_v2i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_add_u16 v0, v0, v1 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> %lhs, <2 x i16> %rhs) @@ -185,7 +181,6 @@ ; GFX10-LABEL: v_uaddsat_v3i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_add_u16 v0, v0, v2 clamp ; GFX10-NEXT: v_pk_add_u16 v1, v1, v3 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -240,7 +235,6 @@ ; GFX10-LABEL: v_uaddsat_v4i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_pk_add_u16 v0, v0, v2 clamp ; GFX10-NEXT: v_pk_add_u16 v1, v1, v3 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -278,7 +272,6 @@ ; GFX10-LABEL: v_uaddsat_v2i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_nc_u32_e64 v0, v0, v2 clamp ; GFX10-NEXT: v_add_nc_u32_e64 v1, v1, v3 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -320,7 +313,6 @@ ; GFX10-LABEL: v_uaddsat_v3i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_nc_u32_e64 v0, v0, v3 clamp ; GFX10-NEXT: v_add_nc_u32_e64 v1, v1, v4 clamp ; GFX10-NEXT: v_add_nc_u32_e64 v2, v2, v5 clamp @@ -368,7 +360,6 @@ ; GFX10-LABEL: v_uaddsat_v4i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_nc_u32_e64 v0, v0, v4 clamp ; GFX10-NEXT: v_add_nc_u32_e64 v1, v1, v5 clamp ; GFX10-NEXT: v_add_nc_u32_e64 v2, v2, v6 clamp @@ -437,7 +428,6 @@ ; GFX10-LABEL: v_uaddsat_v8i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_nc_u32_e64 v0, v0, v8 clamp ; GFX10-NEXT: v_add_nc_u32_e64 v1, v1, v9 clamp ; GFX10-NEXT: v_add_nc_u32_e64 v2, v2, v10 clamp @@ -556,7 +546,6 @@ ; GFX10-LABEL: v_uaddsat_v16i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX10-NEXT: v_add_nc_u32_e64 v0, v0, v16 clamp ; GFX10-NEXT: v_add_nc_u32_e64 v1, v1, v17 clamp @@ -615,7 +604,6 @@ ; GFX10-LABEL: v_uaddsat_i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v0, v2 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v1, v3, vcc_lo ; GFX10-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[2:3], v[0:1] diff --git a/llvm/test/CodeGen/AMDGPU/udiv.ll b/llvm/test/CodeGen/AMDGPU/udiv.ll --- a/llvm/test/CodeGen/AMDGPU/udiv.ll +++ b/llvm/test/CodeGen/AMDGPU/udiv.ll @@ -2793,7 +2793,6 @@ ; GFX1030-LABEL: v_test_udiv64_mulhi_fold: ; GFX1030: ; %bb.0: ; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1030-NEXT: s_mov_b32 s4, 0x346d900 ; GFX1030-NEXT: s_add_u32 s4, 0x4237, s4 ; GFX1030-NEXT: s_addc_u32 s5, 0, 0 diff --git a/llvm/test/CodeGen/AMDGPU/usubsat.ll b/llvm/test/CodeGen/AMDGPU/usubsat.ll --- a/llvm/test/CodeGen/AMDGPU/usubsat.ll +++ b/llvm/test/CodeGen/AMDGPU/usubsat.ll @@ -30,7 +30,6 @@ ; GFX10PLUS-LABEL: v_usubsat_i8: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX10PLUS-NEXT: v_sub_nc_u16 v0, v0, v1 clamp @@ -64,7 +63,6 @@ ; GFX10PLUS-LABEL: v_usubsat_i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_sub_nc_u16 v0, v0, v1 clamp ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = call i16 @llvm.usub.sat.i16(i16 %lhs, i16 %rhs) @@ -98,7 +96,6 @@ ; GFX10PLUS-LABEL: usubsat_as_bithack_i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_sub_nc_u16 v0, v0, 0x8000 clamp ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %signsplat = ashr i16 %x, 15 @@ -134,7 +131,6 @@ ; GFX10PLUS-LABEL: usubsat_as_bithack2_i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_sub_nc_u16 v0, v0, 0x8000 clamp ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %signsplat = ashr i16 %x, 15 @@ -170,7 +166,6 @@ ; GFX10PLUS-LABEL: usubsat_as_bithack_commute_i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_sub_nc_u16 v0, v0, 0x8000 clamp ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %signsplat = ashr i16 %x, 15 @@ -202,7 +197,6 @@ ; GFX10PLUS-LABEL: v_usubsat_i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v0, v0, v1 clamp ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = call i32 @llvm.usub.sat.i32(i32 %lhs, i32 %rhs) @@ -243,7 +237,6 @@ ; GFX10PLUS-LABEL: v_usubsat_v2i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_sub_u16 v0, v0, v1 clamp ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> %lhs, <2 x i16> %rhs) @@ -290,7 +283,6 @@ ; GFX10PLUS-LABEL: v_usubsat_v3i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_sub_u16 v0, v0, v2 clamp ; GFX10PLUS-NEXT: v_pk_sub_u16 v1, v1, v3 clamp ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -345,7 +337,6 @@ ; GFX10PLUS-LABEL: v_usubsat_v4i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_pk_sub_u16 v0, v0, v2 clamp ; GFX10PLUS-NEXT: v_pk_sub_u16 v1, v1, v3 clamp ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -381,7 +372,6 @@ ; GFX10PLUS-LABEL: v_usubsat_v2i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v0, v0, v2 clamp ; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v1, v1, v3 clamp ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] @@ -420,7 +410,6 @@ ; GFX10PLUS-LABEL: v_usubsat_v3i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v0, v0, v3 clamp ; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v1, v1, v4 clamp ; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v2, v2, v5 clamp @@ -464,7 +453,6 @@ ; GFX10PLUS-LABEL: v_usubsat_v4i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v0, v0, v4 clamp ; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v1, v1, v5 clamp ; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v2, v2, v6 clamp @@ -525,7 +513,6 @@ ; GFX10PLUS-LABEL: v_usubsat_v8i32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v0, v0, v8 clamp ; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v1, v1, v9 clamp ; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v2, v2, v10 clamp @@ -628,7 +615,6 @@ ; GFX10-LABEL: v_usubsat_v16i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX10-NEXT: v_sub_nc_u32_e64 v0, v0, v16 clamp ; GFX10-NEXT: v_sub_nc_u32_e64 v1, v1, v17 clamp @@ -652,7 +638,6 @@ ; GFX11-LABEL: v_usubsat_v16i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_load_b32 v31, off, s32 ; GFX11-NEXT: v_sub_nc_u32_e64 v0, v0, v16 clamp ; GFX11-NEXT: v_sub_nc_u32_e64 v1, v1, v17 clamp @@ -711,7 +696,6 @@ ; GFX10PLUS-LABEL: v_usubsat_i64: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_sub_co_u32 v2, vcc_lo, v0, v2 ; GFX10PLUS-NEXT: v_sub_co_ci_u32_e32 v3, vcc_lo, v1, v3, vcc_lo ; GFX10PLUS-NEXT: v_cmp_gt_u64_e32 vcc_lo, v[2:3], v[0:1] diff --git a/llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll b/llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll --- a/llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll +++ b/llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll @@ -40,7 +40,6 @@ ; SDAG-GFX11-LABEL: basic_smax_smin: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; SDAG-GFX11-NEXT: v_med3_i16 v0, v0, 0, 0xff ; SDAG-GFX11-NEXT: v_med3_i16 v1, v1, 0, 0xff ; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -71,7 +70,6 @@ ; GISEL-GFX11-LABEL: basic_smax_smin: ; GISEL-GFX11: ; %bb.0: ; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GISEL-GFX11-NEXT: v_med3_i16 v0, v0, 0, 0xff ; GISEL-GFX11-NEXT: v_med3_i16 v1, v1, 0, 0xff ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -233,7 +231,6 @@ ; SDAG-GFX11-LABEL: basic_smin_smax: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; SDAG-GFX11-NEXT: v_med3_i16 v0, v0, 0, 0xff ; SDAG-GFX11-NEXT: v_med3_i16 v1, v1, 0, 0xff ; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -264,7 +261,6 @@ ; GISEL-GFX11-LABEL: basic_smin_smax: ; GISEL-GFX11: ; %bb.0: ; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GISEL-GFX11-NEXT: v_med3_i16 v0, v0, 0, 0xff ; GISEL-GFX11-NEXT: v_med3_i16 v1, v1, 0, 0xff ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -305,7 +301,6 @@ ; SDAG-GFX11-LABEL: basic_smin_smax_combined: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; SDAG-GFX11-NEXT: v_med3_i16 v0, v0, 0, 0xff ; SDAG-GFX11-NEXT: v_med3_i16 v1, v1, 0, 0xff ; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -336,7 +331,6 @@ ; GISEL-GFX11-LABEL: basic_smin_smax_combined: ; GISEL-GFX11: ; %bb.0: ; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GISEL-GFX11-NEXT: v_med3_i16 v0, v0, 0, 0xff ; GISEL-GFX11-NEXT: v_med3_i16 v1, v1, 0, 0xff ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -376,7 +370,6 @@ ; GFX11-LABEL: vec_smax_smin: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_pk_max_i16 v0, v0, 0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_pk_min_i16 v0, 0xff, v0 op_sel_hi:[0,1] @@ -554,7 +547,6 @@ ; GFX11-LABEL: vec_smin_smax: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_pk_min_i16 v0, 0xff, v0 op_sel_hi:[0,1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_pk_max_i16 v0, v0, 0 diff --git a/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll b/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll --- a/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll +++ b/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll @@ -14,7 +14,6 @@ ; GFX10-LABEL: shuffle_v4f16_23uu: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v0, v[0:1], off offset:4 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -22,7 +21,6 @@ ; GFX11-LABEL: shuffle_v4f16_23uu: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:4 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -47,7 +45,6 @@ ; GFX10-LABEL: shuffle_v4f16_234u: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v6, v[0:1], off offset:4 ; GFX10-NEXT: global_load_dwordx2 v[4:5], v[2:3], off ; GFX10-NEXT: s_waitcnt vmcnt(1) @@ -59,7 +56,6 @@ ; GFX11-LABEL: shuffle_v4f16_234u: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:4 ; GFX11-NEXT: global_load_b64 v[1:2], v[2:3], off ; GFX11-NEXT: s_waitcnt vmcnt(0) @@ -81,7 +77,6 @@ ; GFX10-LABEL: shuffle_v4f16_u1u3: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -89,7 +84,6 @@ ; GFX11-LABEL: shuffle_v4f16_u1u3: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -111,7 +105,6 @@ ; GFX10-LABEL: shuffle_v4f16_u3u1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[1:2], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, v2 @@ -120,7 +113,6 @@ ; GFX11-LABEL: shuffle_v4f16_u3u1: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b64 v[1:2], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v0, v2 @@ -142,7 +134,6 @@ ; GFX10-LABEL: shuffle_v4f16_u3uu: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v0, v[0:1], off offset:4 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -150,7 +141,6 @@ ; GFX11-LABEL: shuffle_v4f16_u3uu: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:4 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -175,7 +165,6 @@ ; GFX10-LABEL: shuffle_v4f16_3u6u: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v5, v[0:1], off offset:4 ; GFX10-NEXT: global_load_dword v4, v[2:3], off offset:4 ; GFX10-NEXT: s_waitcnt vmcnt(1) @@ -187,7 +176,6 @@ ; GFX11-LABEL: shuffle_v4f16_3u6u: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:4 ; GFX11-NEXT: global_load_b32 v1, v[2:3], off offset:4 ; GFX11-NEXT: s_waitcnt vmcnt(1) @@ -215,7 +203,6 @@ ; GFX10-LABEL: shuffle_v4f16_3uu7: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v5, v[0:1], off offset:4 ; GFX10-NEXT: global_load_dword v4, v[2:3], off offset:4 ; GFX10-NEXT: s_waitcnt vmcnt(1) @@ -227,7 +214,6 @@ ; GFX11-LABEL: shuffle_v4f16_3uu7: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:4 ; GFX11-NEXT: global_load_b32 v1, v[2:3], off offset:4 ; GFX11-NEXT: s_waitcnt vmcnt(1) @@ -255,7 +241,6 @@ ; GFX10-LABEL: shuffle_v4f16_35u5: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v5, v[0:1], off offset:4 ; GFX10-NEXT: global_load_dword v4, v[2:3], off ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -266,7 +251,6 @@ ; GFX11-LABEL: shuffle_v4f16_35u5: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:4 ; GFX11-NEXT: global_load_b32 v1, v[2:3], off ; GFX11-NEXT: s_waitcnt vmcnt(0) @@ -294,7 +278,6 @@ ; GFX10-LABEL: shuffle_v4f16_357u: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[4:5], v[2:3], off ; GFX10-NEXT: global_load_dword v6, v[0:1], off offset:4 ; GFX10-NEXT: s_waitcnt vmcnt(1) @@ -306,7 +289,6 @@ ; GFX11-LABEL: shuffle_v4f16_357u: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b64 v[2:3], v[2:3], off ; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:4 ; GFX11-NEXT: s_waitcnt vmcnt(1) @@ -332,7 +314,6 @@ ; GFX10-LABEL: shuffle_v4f16_0101: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v1, v0 @@ -341,7 +322,6 @@ ; GFX11-LABEL: shuffle_v4f16_0101: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v1, v0 @@ -363,7 +343,6 @@ ; GFX10-LABEL: shuffle_v4f16_0123: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -371,7 +350,6 @@ ; GFX11-LABEL: shuffle_v4f16_0123: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -396,7 +374,6 @@ ; GFX10-LABEL: shuffle_v4f16_0145: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v4, v[0:1], off ; GFX10-NEXT: global_load_dword v5, v[2:3], off ; GFX10-NEXT: s_waitcnt vmcnt(1) @@ -408,7 +385,6 @@ ; GFX11-LABEL: shuffle_v4f16_0145: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off ; GFX11-NEXT: global_load_b32 v1, v[2:3], off ; GFX11-NEXT: s_waitcnt vmcnt(0) @@ -434,7 +410,6 @@ ; GFX10-LABEL: shuffle_v4f16_0167: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v4, v[0:1], off ; GFX10-NEXT: global_load_dword v5, v[2:3], off offset:4 ; GFX10-NEXT: s_waitcnt vmcnt(1) @@ -446,7 +421,6 @@ ; GFX11-LABEL: shuffle_v4f16_0167: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off ; GFX11-NEXT: global_load_b32 v1, v[2:3], off offset:4 ; GFX11-NEXT: s_waitcnt vmcnt(0) @@ -469,7 +443,6 @@ ; GFX10-LABEL: shuffle_v4f16_2301: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[1:2], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, v2 @@ -478,7 +451,6 @@ ; GFX11-LABEL: shuffle_v4f16_2301: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b64 v[1:2], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v0, v2 @@ -501,7 +473,6 @@ ; GFX10-LABEL: shuffle_v4f16_2323: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v0, v[0:1], off offset:4 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v1, v0 @@ -510,7 +481,6 @@ ; GFX11-LABEL: shuffle_v4f16_2323: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:4 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v1, v0 @@ -536,7 +506,6 @@ ; GFX10-LABEL: shuffle_v4f16_2345: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v4, v[0:1], off offset:4 ; GFX10-NEXT: global_load_dword v5, v[2:3], off ; GFX10-NEXT: s_waitcnt vmcnt(1) @@ -548,7 +517,6 @@ ; GFX11-LABEL: shuffle_v4f16_2345: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:4 ; GFX11-NEXT: global_load_b32 v1, v[2:3], off ; GFX11-NEXT: s_waitcnt vmcnt(0) @@ -574,7 +542,6 @@ ; GFX10-LABEL: shuffle_v4f16_2367: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v4, v[0:1], off offset:4 ; GFX10-NEXT: global_load_dword v5, v[2:3], off offset:4 ; GFX10-NEXT: s_waitcnt vmcnt(1) @@ -586,7 +553,6 @@ ; GFX11-LABEL: shuffle_v4f16_2367: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:4 ; GFX11-NEXT: global_load_b32 v1, v[2:3], off offset:4 ; GFX11-NEXT: s_waitcnt vmcnt(0) @@ -612,7 +578,6 @@ ; GFX10-LABEL: shuffle_v4f16_4501: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v4, v[2:3], off ; GFX10-NEXT: global_load_dword v5, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(1) @@ -624,7 +589,6 @@ ; GFX11-LABEL: shuffle_v4f16_4501: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v2, v[2:3], off ; GFX11-NEXT: global_load_b32 v1, v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(1) @@ -652,7 +616,6 @@ ; GFX10-LABEL: shuffle_v4f16_4523: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v4, v[2:3], off ; GFX10-NEXT: global_load_dword v5, v[0:1], off offset:4 ; GFX10-NEXT: s_waitcnt vmcnt(1) @@ -664,7 +627,6 @@ ; GFX11-LABEL: shuffle_v4f16_4523: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v2, v[2:3], off ; GFX11-NEXT: global_load_b32 v1, v[0:1], off offset:4 ; GFX11-NEXT: s_waitcnt vmcnt(1) @@ -689,7 +651,6 @@ ; GFX10-LABEL: shuffle_v4f16_4545: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v0, v[2:3], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v1, v0 @@ -698,7 +659,6 @@ ; GFX11-LABEL: shuffle_v4f16_4545: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v[2:3], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v1, v0 @@ -720,7 +680,6 @@ ; GFX10-LABEL: shuffle_v4f16_4567: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[2:3], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -728,7 +687,6 @@ ; GFX11-LABEL: shuffle_v4f16_4567: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b64 v[0:1], v[2:3], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -753,7 +711,6 @@ ; GFX10-LABEL: shuffle_v4f16_6701: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v4, v[2:3], off offset:4 ; GFX10-NEXT: global_load_dword v5, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(1) @@ -765,7 +722,6 @@ ; GFX11-LABEL: shuffle_v4f16_6701: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v2, v[2:3], off offset:4 ; GFX11-NEXT: global_load_b32 v1, v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(1) @@ -793,7 +749,6 @@ ; GFX10-LABEL: shuffle_v4f16_6723: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v4, v[2:3], off offset:4 ; GFX10-NEXT: global_load_dword v5, v[0:1], off offset:4 ; GFX10-NEXT: s_waitcnt vmcnt(1) @@ -805,7 +760,6 @@ ; GFX11-LABEL: shuffle_v4f16_6723: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v2, v[2:3], off offset:4 ; GFX11-NEXT: global_load_b32 v1, v[0:1], off offset:4 ; GFX11-NEXT: s_waitcnt vmcnt(1) @@ -830,7 +784,6 @@ ; GFX10-LABEL: shuffle_v4f16_6745: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[1:2], v[2:3], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, v2 @@ -839,7 +792,6 @@ ; GFX11-LABEL: shuffle_v4f16_6745: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b64 v[1:2], v[2:3], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v0, v2 @@ -862,7 +814,6 @@ ; GFX10-LABEL: shuffle_v4f16_6767: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v0, v[2:3], off offset:4 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v1, v0 @@ -871,7 +822,6 @@ ; GFX11-LABEL: shuffle_v4f16_6767: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v[2:3], off offset:4 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v1, v0 @@ -897,7 +847,6 @@ ; GFX10-LABEL: shuffle_v4f16_2356: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[5:6], v[2:3], off ; GFX10-NEXT: global_load_dword v4, v[0:1], off offset:4 ; GFX10-NEXT: s_waitcnt vmcnt(1) @@ -909,7 +858,6 @@ ; GFX11-LABEL: shuffle_v4f16_2356: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b64 v[2:3], v[2:3], off ; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:4 ; GFX11-NEXT: s_waitcnt vmcnt(1) @@ -937,7 +885,6 @@ ; GFX10-LABEL: shuffle_v4f16_5623: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[5:6], v[2:3], off ; GFX10-NEXT: global_load_dword v4, v[0:1], off offset:4 ; GFX10-NEXT: s_waitcnt vmcnt(1) @@ -949,7 +896,6 @@ ; GFX11-LABEL: shuffle_v4f16_5623: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b64 v[2:3], v[2:3], off ; GFX11-NEXT: global_load_b32 v1, v[0:1], off offset:4 ; GFX11-NEXT: s_waitcnt vmcnt(1) @@ -977,7 +923,6 @@ ; GFX10-LABEL: shuffle_v4f16_3456: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[4:5], v[2:3], off ; GFX10-NEXT: global_load_dword v6, v[0:1], off offset:4 ; GFX10-NEXT: s_waitcnt vmcnt(1) @@ -989,7 +934,6 @@ ; GFX11-LABEL: shuffle_v4f16_3456: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b64 v[2:3], v[2:3], off ; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:4 ; GFX11-NEXT: s_waitcnt vmcnt(1) @@ -1018,7 +962,6 @@ ; GFX10-LABEL: shuffle_v4f16_5634: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[4:5], v[2:3], off ; GFX10-NEXT: global_load_dword v6, v[0:1], off offset:4 ; GFX10-NEXT: s_waitcnt vmcnt(1) @@ -1030,7 +973,6 @@ ; GFX11-LABEL: shuffle_v4f16_5634: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b64 v[2:3], v[2:3], off ; GFX11-NEXT: global_load_b32 v1, v[0:1], off offset:4 ; GFX11-NEXT: s_waitcnt vmcnt(1) @@ -1060,7 +1002,6 @@ ; GFX10-LABEL: shuffle_v4f16_5734: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[4:5], v[2:3], off ; GFX10-NEXT: global_load_dword v6, v[0:1], off offset:4 ; GFX10-NEXT: s_waitcnt vmcnt(1) @@ -1072,7 +1013,6 @@ ; GFX11-LABEL: shuffle_v4f16_5734: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b64 v[2:3], v[2:3], off ; GFX11-NEXT: global_load_b32 v1, v[0:1], off offset:4 ; GFX11-NEXT: s_waitcnt vmcnt(1) @@ -1101,7 +1041,6 @@ ; GFX10-LABEL: shuffle_v4i16_2356: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[5:6], v[2:3], off ; GFX10-NEXT: global_load_dword v4, v[0:1], off offset:4 ; GFX10-NEXT: s_waitcnt vmcnt(1) @@ -1113,7 +1052,6 @@ ; GFX11-LABEL: shuffle_v4i16_2356: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b64 v[2:3], v[2:3], off ; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:4 ; GFX11-NEXT: s_waitcnt vmcnt(1) @@ -1141,7 +1079,6 @@ ; GFX10-LABEL: shuffle_v4i16_0167: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v4, v[0:1], off ; GFX10-NEXT: global_load_dword v5, v[2:3], off offset:4 ; GFX10-NEXT: s_waitcnt vmcnt(1) @@ -1153,7 +1090,6 @@ ; GFX11-LABEL: shuffle_v4i16_0167: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off ; GFX11-NEXT: global_load_b32 v1, v[2:3], off offset:4 ; GFX11-NEXT: s_waitcnt vmcnt(0) @@ -1178,7 +1114,6 @@ ; GFX10-LABEL: shuffle_v4f16_0000: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_perm_b32 v0, v0, v0, 0x5040100 @@ -1188,7 +1123,6 @@ ; GFX11-LABEL: shuffle_v4f16_0000: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_perm_b32 v0, v0, v0, 0x5040100 @@ -1214,7 +1148,6 @@ ; GFX10-LABEL: shuffle_v4f16_1010: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_alignbit_b32 v0, v0, v0, 16 @@ -1224,7 +1157,6 @@ ; GFX11-LABEL: shuffle_v4f16_1010: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_alignbit_b32 v0, v0, v0, 16 @@ -1252,7 +1184,6 @@ ; GFX10-LABEL: shuffle_v4f16_1100: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[1:2], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_perm_b32 v0, v1, v1, 0x7060302 @@ -1262,7 +1193,6 @@ ; GFX11-LABEL: shuffle_v4f16_1100: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b64 v[1:2], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_perm_b32 v0, v1, v1, 0x7060302 @@ -1289,7 +1219,6 @@ ; GFX10-LABEL: shuffle_v4f16_6161: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v4, v[0:1], off ; GFX10-NEXT: global_load_dword v5, v[2:3], off offset:4 ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -1300,7 +1229,6 @@ ; GFX11-LABEL: shuffle_v4f16_6161: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off ; GFX11-NEXT: global_load_b32 v1, v[2:3], off offset:4 ; GFX11-NEXT: s_waitcnt vmcnt(0) @@ -1327,7 +1255,6 @@ ; GFX10-LABEL: shuffle_v4f16_2333: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v0, v[0:1], off offset:4 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_perm_b32 v1, v0, v0, 0x7060302 @@ -1336,7 +1263,6 @@ ; GFX11-LABEL: shuffle_v4f16_2333: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:4 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_perm_b32 v1, v0, v0, 0x7060302 @@ -1360,7 +1286,6 @@ ; GFX10-LABEL: shuffle_v4f16_6667: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v0, v[0:1], off offset:4 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_perm_b32 v1, v0, v0, 0x7060302 @@ -1369,7 +1294,6 @@ ; GFX11-LABEL: shuffle_v4f16_6667: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:4 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_perm_b32 v1, v0, v0, 0x7060302 @@ -1392,7 +1316,6 @@ ; GFX10-LABEL: shuffle_v8f16_0101: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v1, v0 @@ -1401,7 +1324,6 @@ ; GFX11-LABEL: shuffle_v8f16_0101: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v1, v0 @@ -1423,7 +1345,6 @@ ; GFX10-LABEL: shuffle_v8f16_0123: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -1431,7 +1352,6 @@ ; GFX11-LABEL: shuffle_v8f16_0123: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -1456,7 +1376,6 @@ ; GFX10-LABEL: shuffle_v8f16_4589: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v4, v[0:1], off offset:8 ; GFX10-NEXT: global_load_dword v5, v[2:3], off ; GFX10-NEXT: s_waitcnt vmcnt(1) @@ -1468,7 +1387,6 @@ ; GFX11-LABEL: shuffle_v8f16_4589: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:8 ; GFX11-NEXT: global_load_b32 v1, v[2:3], off ; GFX11-NEXT: s_waitcnt vmcnt(0) @@ -1494,7 +1412,6 @@ ; GFX10-LABEL: shuffle_v8f16_10_11_2_3: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v4, v[2:3], off offset:4 ; GFX10-NEXT: global_load_dword v5, v[0:1], off offset:4 ; GFX10-NEXT: s_waitcnt vmcnt(1) @@ -1506,7 +1423,6 @@ ; GFX11-LABEL: shuffle_v8f16_10_11_2_3: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v2, v[2:3], off offset:4 ; GFX11-NEXT: global_load_b32 v1, v[0:1], off offset:4 ; GFX11-NEXT: s_waitcnt vmcnt(1) @@ -1534,7 +1450,6 @@ ; GFX10-LABEL: shuffle_v8f16_13_14_2_3: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[5:6], v[2:3], off offset:8 ; GFX10-NEXT: global_load_dword v4, v[0:1], off offset:4 ; GFX10-NEXT: s_waitcnt vmcnt(1) @@ -1546,7 +1461,6 @@ ; GFX11-LABEL: shuffle_v8f16_13_14_2_3: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b64 v[2:3], v[2:3], off offset:8 ; GFX11-NEXT: global_load_b32 v1, v[0:1], off offset:4 ; GFX11-NEXT: s_waitcnt vmcnt(1) @@ -1572,7 +1486,6 @@ ; GFX10-LABEL: shuffle_v3f16_0122: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_perm_b32 v1, v1, v1, 0x5040100 @@ -1581,7 +1494,6 @@ ; GFX11-LABEL: shuffle_v3f16_0122: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_perm_b32 v1, v1, v1, 0x5040100 @@ -1604,7 +1516,6 @@ ; GFX10-LABEL: shuffle_v2f16_0122: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_alignbit_b32 v1, v0, v0, 16 @@ -1613,7 +1524,6 @@ ; GFX11-LABEL: shuffle_v2f16_0122: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_alignbit_b32 v1, v0, v0, 16 @@ -1643,7 +1553,6 @@ ; GFX10-LABEL: shuffle_v6f16_452367: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v6, v1 ; GFX10-NEXT: v_mov_b32_e32 v5, v0 ; GFX10-NEXT: v_mov_b32_e32 v4, v3 @@ -1659,7 +1568,6 @@ ; GFX11-LABEL: shuffle_v6f16_452367: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v4, v3 :: v_dual_mov_b32 v3, v2 ; GFX11-NEXT: global_load_b96 v[0:2], v[0:1], off ; GFX11-NEXT: global_load_b32 v3, v[3:4], off @@ -1777,7 +1685,6 @@ ; GFX10-LABEL: shuffle_v4f16_0456: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[4:5], v[0:1], off ; GFX10-NEXT: global_load_dwordx2 v[5:6], v[2:3], off ; GFX10-NEXT: ; kill: killed $vgpr0 killed $vgpr1 @@ -1790,7 +1697,6 @@ ; GFX11-LABEL: shuffle_v4f16_0456: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX11-NEXT: global_load_b64 v[1:2], v[2:3], off ; GFX11-NEXT: s_waitcnt vmcnt(0) @@ -1868,7 +1774,6 @@ ; GFX10-LABEL: low16bits: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v4, v[0:1], off ; GFX10-NEXT: global_load_dword v5, v[2:3], off ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -1878,7 +1783,6 @@ ; GFX11-LABEL: low16bits: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off ; GFX11-NEXT: global_load_b32 v1, v[2:3], off ; GFX11-NEXT: s_waitcnt vmcnt(0) @@ -1906,7 +1810,6 @@ ; GFX10-LABEL: hi16bits: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v4, v[0:1], off ; GFX10-NEXT: global_load_dword v5, v[2:3], off ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -1916,7 +1819,6 @@ ; GFX11-LABEL: hi16bits: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off ; GFX11-NEXT: global_load_b32 v1, v[2:3], off ; GFX11-NEXT: s_waitcnt vmcnt(0) @@ -1944,7 +1846,6 @@ ; GFX10-LABEL: low16hi16bits: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v4, v[0:1], off ; GFX10-NEXT: global_load_dword v5, v[2:3], off ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -1954,7 +1855,6 @@ ; GFX11-LABEL: low16hi16bits: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off ; GFX11-NEXT: global_load_b32 v1, v[2:3], off ; GFX11-NEXT: s_waitcnt vmcnt(0) @@ -1981,7 +1881,6 @@ ; GFX10-LABEL: hi16low16bits: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v4, v[0:1], off ; GFX10-NEXT: global_load_dword v5, v[2:3], off ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -1991,7 +1890,6 @@ ; GFX11-LABEL: hi16low16bits: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off ; GFX11-NEXT: global_load_b32 v1, v[2:3], off ; GFX11-NEXT: s_waitcnt vmcnt(0) @@ -2019,7 +1917,6 @@ ; GFX10-LABEL: i16_low16bits: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v4, v[0:1], off ; GFX10-NEXT: global_load_dword v5, v[2:3], off ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -2029,7 +1926,6 @@ ; GFX11-LABEL: i16_low16bits: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off ; GFX11-NEXT: global_load_b32 v1, v[2:3], off ; GFX11-NEXT: s_waitcnt vmcnt(0) @@ -2057,7 +1953,6 @@ ; GFX10-LABEL: i16_low16hi16bits: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v4, v[0:1], off ; GFX10-NEXT: global_load_dword v5, v[2:3], off ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -2067,7 +1962,6 @@ ; GFX11-LABEL: i16_low16hi16bits: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off ; GFX11-NEXT: global_load_b32 v1, v[2:3], off ; GFX11-NEXT: s_waitcnt vmcnt(0) @@ -2094,7 +1988,6 @@ ; GFX10-LABEL: i16_hi16low16bits: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v4, v[0:1], off ; GFX10-NEXT: global_load_dword v5, v[2:3], off ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -2104,7 +1997,6 @@ ; GFX11-LABEL: i16_hi16low16bits: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off ; GFX11-NEXT: global_load_b32 v1, v[2:3], off ; GFX11-NEXT: s_waitcnt vmcnt(0) @@ -2132,7 +2024,6 @@ ; GFX10-LABEL: i16_hi16bits: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v4, v[0:1], off ; GFX10-NEXT: global_load_dword v5, v[2:3], off ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -2142,7 +2033,6 @@ ; GFX11-LABEL: i16_hi16bits: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off ; GFX11-NEXT: global_load_b32 v1, v[2:3], off ; GFX11-NEXT: s_waitcnt vmcnt(0) @@ -2168,7 +2058,6 @@ ; GFX10-LABEL: v2i16_hi16bits: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 @@ -2177,7 +2066,6 @@ ; GFX11-LABEL: v2i16_hi16bits: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 @@ -2202,7 +2090,6 @@ ; GFX10-LABEL: v2half_hi16bits: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 @@ -2211,7 +2098,6 @@ ; GFX11-LABEL: v2half_hi16bits: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 @@ -2238,23 +2124,19 @@ ; GFX10-LABEL: shuffle_v8f16_concat: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[6:7], v[0:1], off ; GFX10-NEXT: global_load_dwordx2 v[8:9], v[2:3], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dwordx4 v[4:5], v[6:9], off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: shuffle_v8f16_concat: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX11-NEXT: global_load_b64 v[2:3], v[2:3], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %val0 = load <4 x half>, ptr addrspace(1) %arg0 %val1 = load <4 x half>, ptr addrspace(1) %arg1 @@ -2279,27 +2161,23 @@ ; GFX10-LABEL: shuffle_v16f16_concat: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[6:9], v[2:3], off ; GFX10-NEXT: global_load_dwordx4 v[10:13], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(1) ; GFX10-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dwordx4 v[4:5], v[10:13], off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: shuffle_v16f16_concat: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b128 v[6:9], v[2:3], off ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(1) ; GFX11-NEXT: global_store_b128 v[4:5], v[6:9], off offset:16 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %val0 = load <8 x half>, ptr addrspace(1) %arg0 %val1 = load <8 x half>, ptr addrspace(1) %arg1 @@ -2330,7 +2208,6 @@ ; GFX10-LABEL: shuffle_v32f16_concat: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: global_load_dwordx4 v[6:9], v[2:3], off ; GFX10-NEXT: global_load_dwordx4 v[10:13], v[2:3], off offset:16 @@ -2345,13 +2222,11 @@ ; GFX10-NEXT: global_store_dwordx4 v[4:5], v[14:17], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dwordx4 v[4:5], v[18:21], off offset:16 -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: shuffle_v32f16_concat: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: global_load_b128 v[6:9], v[2:3], off ; GFX11-NEXT: global_load_b128 v[10:13], v[2:3], off offset:16 @@ -2366,7 +2241,6 @@ ; GFX11-NEXT: global_store_b128 v[4:5], v[14:17], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off offset:16 -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %val0 = load <16 x half>, ptr addrspace(1) %arg0 %val1 = load <16 x half>, ptr addrspace(1) %arg1 @@ -2389,23 +2263,19 @@ ; GFX10-LABEL: shuffle_v8i16_concat: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[6:7], v[0:1], off ; GFX10-NEXT: global_load_dwordx2 v[8:9], v[2:3], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dwordx4 v[4:5], v[6:9], off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: shuffle_v8i16_concat: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX11-NEXT: global_load_b64 v[2:3], v[2:3], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %val0 = load <4 x i16>, ptr addrspace(1) %arg0 %val1 = load <4 x i16>, ptr addrspace(1) %arg1 @@ -2430,27 +2300,23 @@ ; GFX10-LABEL: shuffle_v16i16_concat: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[6:9], v[2:3], off ; GFX10-NEXT: global_load_dwordx4 v[10:13], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(1) ; GFX10-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dwordx4 v[4:5], v[10:13], off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: shuffle_v16i16_concat: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b128 v[6:9], v[2:3], off ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(1) ; GFX11-NEXT: global_store_b128 v[4:5], v[6:9], off offset:16 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %val0 = load <8 x i16>, ptr addrspace(1) %arg0 %val1 = load <8 x i16>, ptr addrspace(1) %arg1 @@ -2481,7 +2347,6 @@ ; GFX10-LABEL: shuffle_v32i16_concat: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: global_load_dwordx4 v[6:9], v[2:3], off ; GFX10-NEXT: global_load_dwordx4 v[10:13], v[2:3], off offset:16 @@ -2496,13 +2361,11 @@ ; GFX10-NEXT: global_store_dwordx4 v[4:5], v[14:17], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dwordx4 v[4:5], v[18:21], off offset:16 -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: shuffle_v32i16_concat: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: global_load_b128 v[6:9], v[2:3], off ; GFX11-NEXT: global_load_b128 v[10:13], v[2:3], off offset:16 @@ -2517,7 +2380,6 @@ ; GFX11-NEXT: global_store_b128 v[4:5], v[14:17], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off offset:16 -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %val0 = load <16 x i16>, ptr addrspace(1) %arg0 %val1 = load <16 x i16>, ptr addrspace(1) %arg1 @@ -2541,23 +2403,19 @@ ; GFX10-LABEL: shuffle_v4i8_concat: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_ushort v0, v[0:1], off ; GFX10-NEXT: global_load_short_d16_hi v0, v[2:3], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dword v[4:5], v0, off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: shuffle_v4i8_concat: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_u16 v0, v[0:1], off ; GFX11-NEXT: global_load_d16_hi_b16 v0, v[2:3], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v[4:5], v0, off -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %val0 = load <2 x i8>, ptr addrspace(1) %arg0 %val1 = load <2 x i8>, ptr addrspace(1) %arg1 @@ -2580,23 +2438,19 @@ ; GFX10-LABEL: shuffle_v8i8_concat: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v6, v[0:1], off ; GFX10-NEXT: global_load_dword v7, v[2:3], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dwordx2 v[4:5], v[6:7], off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: shuffle_v8i8_concat: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off ; GFX11-NEXT: global_load_b32 v1, v[2:3], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b64 v[4:5], v[0:1], off -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %val0 = load <4 x i8>, ptr addrspace(1) %arg0 %val1 = load <4 x i8>, ptr addrspace(1) %arg1 @@ -2619,23 +2473,19 @@ ; GFX10-LABEL: shuffle_v16i8_concat: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[6:7], v[0:1], off ; GFX10-NEXT: global_load_dwordx2 v[8:9], v[2:3], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dwordx4 v[4:5], v[6:9], off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: shuffle_v16i8_concat: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX11-NEXT: global_load_b64 v[2:3], v[2:3], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %val0 = load <8 x i8>, ptr addrspace(1) %arg0 %val1 = load <8 x i8>, ptr addrspace(1) %arg1 @@ -2660,27 +2510,23 @@ ; GFX10-LABEL: shuffle_v32i8_concat: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[6:9], v[2:3], off ; GFX10-NEXT: global_load_dwordx4 v[10:13], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(1) ; GFX10-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dwordx4 v[4:5], v[10:13], off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: shuffle_v32i8_concat: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b128 v[6:9], v[2:3], off ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(1) ; GFX11-NEXT: global_store_b128 v[4:5], v[6:9], off offset:16 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %val0 = load <16 x i8>, ptr addrspace(1) %arg0 %val1 = load <16 x i8>, ptr addrspace(1) %arg1 @@ -2703,23 +2549,19 @@ ; GFX10-LABEL: shuffle_v4i32_concat: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[6:7], v[0:1], off ; GFX10-NEXT: global_load_dwordx2 v[8:9], v[2:3], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dwordx4 v[4:5], v[6:9], off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: shuffle_v4i32_concat: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX11-NEXT: global_load_b64 v[2:3], v[2:3], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %val0 = load <2 x i32>, ptr addrspace(1) %arg0 %val1 = load <2 x i32>, ptr addrspace(1) %arg1 @@ -2744,27 +2586,23 @@ ; GFX10-LABEL: shuffle_v8i32_concat: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[6:9], v[2:3], off ; GFX10-NEXT: global_load_dwordx4 v[10:13], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(1) ; GFX10-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dwordx4 v[4:5], v[10:13], off -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: shuffle_v8i32_concat: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b128 v[6:9], v[2:3], off ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(1) ; GFX11-NEXT: global_store_b128 v[4:5], v[6:9], off offset:16 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %val0 = load <4 x i32>, ptr addrspace(1) %arg0 %val1 = load <4 x i32>, ptr addrspace(1) %arg1 @@ -2795,7 +2633,6 @@ ; GFX10-LABEL: shuffle_v16i32_concat: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: global_load_dwordx4 v[6:9], v[2:3], off ; GFX10-NEXT: global_load_dwordx4 v[10:13], v[2:3], off offset:16 @@ -2810,13 +2647,11 @@ ; GFX10-NEXT: global_store_dwordx4 v[4:5], v[14:17], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dwordx4 v[4:5], v[18:21], off offset:16 -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: shuffle_v16i32_concat: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: global_load_b128 v[6:9], v[2:3], off ; GFX11-NEXT: global_load_b128 v[10:13], v[2:3], off offset:16 @@ -2831,7 +2666,6 @@ ; GFX11-NEXT: global_store_b128 v[4:5], v[14:17], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off offset:16 -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %val0 = load <8 x i32>, ptr addrspace(1) %arg0 %val1 = load <8 x i32>, ptr addrspace(1) %arg1 diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll b/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll --- a/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll @@ -6,7 +6,6 @@ ; GCN-LABEL: vgpr_descriptor_waterfall_loop_idom_update: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_waitcnt_vscnt null, 0x0 ; GCN-NEXT: v_add_co_u32 v6, vcc_lo, v0, 8 ; GCN-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, 0, v1, vcc_lo ; GCN-NEXT: .LBB0_1: ; %bb0 @@ -37,13 +36,11 @@ ; GCN-NEXT: s_mov_b32 vcc_lo, exec_lo ; GCN-NEXT: s_cbranch_vccnz .LBB0_1 ; GCN-NEXT: ; %bb.4: ; %DummyReturnBlock -; GCN-NEXT: s_waitcnt_vscnt null, 0x0 ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: vgpr_descriptor_waterfall_loop_idom_update: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: .p2align 6 ; GFX11-NEXT: .LBB0_1: ; %bb0 ; GFX11-NEXT: ; =>This Loop Header: Depth=1 @@ -73,7 +70,6 @@ ; GFX11-NEXT: s_mov_b32 vcc_lo, exec_lo ; GFX11-NEXT: s_cbranch_vccnz .LBB0_1 ; GFX11-NEXT: ; %bb.4: ; %DummyReturnBlock -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: br label %bb0 diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll b/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll --- a/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll @@ -68,7 +68,6 @@ ; GFX10-LABEL: non_preserved_vgpr_tuple8: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s4, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s5, -1 @@ -130,7 +129,6 @@ ; GFX11-LABEL: non_preserved_vgpr_tuple8: ; GFX11: ; %bb.0: ; %main_body ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -263,7 +261,6 @@ ; GFX10-LABEL: call_preserved_vgpr_tuple8: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s4, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s5, -1 @@ -318,7 +315,6 @@ ; GFX11-LABEL: call_preserved_vgpr_tuple8: ; GFX11: ; %bb.0: ; %main_body ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-bvh.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-bvh.mir --- a/llvm/test/CodeGen/AMDGPU/waitcnt-bvh.mir +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-bvh.mir @@ -7,7 +7,6 @@ bb.0: ; GCN-LABEL: name: waitcnt-check-inorder ; GCN: S_WAITCNT 0 - ; GCN-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7) ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7) ; GCN-NEXT: S_ENDPGM 0 @@ -21,7 +20,6 @@ bb.0: ; GCN-LABEL: name: waitcnt-check-vs-vmem ; GCN: S_WAITCNT 0 - ; GCN-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7) ; GCN-NEXT: S_WAITCNT 16240 ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec @@ -36,7 +34,6 @@ bb.0: ; GCN-LABEL: name: waitcnt-check-vs-mimg-samp ; GCN: S_WAITCNT 0 - ; GCN-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7) ; GCN-NEXT: S_WAITCNT 16240 ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V2 $vgpr20_vgpr21, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) @@ -51,7 +48,6 @@ bb.0: ; GCN-LABEL: name: waitcnt-check-vs-vmem-reverse ; GCN: S_WAITCNT 0 - ; GCN-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec ; GCN-NEXT: S_WAITCNT 16240 ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7) @@ -66,7 +62,6 @@ bb.0: ; GCN-LABEL: name: waitcnt-check-vs-mimg-samp-reverse ; GCN: S_WAITCNT 0 - ; GCN-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V2 $vgpr16_vgpr17, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) ; GCN-NEXT: S_WAITCNT 16240 ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7) diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-overflow.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-overflow.mir --- a/llvm/test/CodeGen/AMDGPU/waitcnt-overflow.mir +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-overflow.mir @@ -52,7 +52,6 @@ ; GFX10: liveins: $vgpr99 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: S_WAITCNT 0 - ; GFX10-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 ; GFX10-NEXT: $vgpr0_vgpr1 = DS_READ2_B32_gfx9 renamable $vgpr99, 0, 1, 0, implicit $exec ; GFX10-NEXT: $vgpr2_vgpr3 = DS_READ2_B32_gfx9 renamable $vgpr99, 2, 3, 0, implicit $exec ; GFX10-NEXT: $vgpr4_vgpr5 = DS_READ2_B32_gfx9 renamable $vgpr99, 4, 5, 0, implicit $exec @@ -84,7 +83,6 @@ ; GFX11: liveins: $vgpr99 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: S_WAITCNT 0 - ; GFX11-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 ; GFX11-NEXT: $vgpr0_vgpr1 = DS_READ2_B32_gfx9 renamable $vgpr99, 0, 1, 0, implicit $exec ; GFX11-NEXT: $vgpr2_vgpr3 = DS_READ2_B32_gfx9 renamable $vgpr99, 2, 3, 0, implicit $exec ; GFX11-NEXT: $vgpr4_vgpr5 = DS_READ2_B32_gfx9 renamable $vgpr99, 4, 5, 0, implicit $exec @@ -225,7 +223,6 @@ ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: S_WAITCNT 0 - ; GFX10-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 ; GFX10-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec ; GFX10-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec ; GFX10-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, implicit $exec @@ -303,7 +300,6 @@ ; GFX11: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: S_WAITCNT 0 - ; GFX11-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 ; GFX11-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec ; GFX11-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec ; GFX11-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, implicit $exec @@ -476,7 +472,6 @@ ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: S_WAITCNT 0 - ; GFX10-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 ; GFX10-NEXT: EXP 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec ; GFX10-NEXT: EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec ; GFX10-NEXT: EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec @@ -491,7 +486,6 @@ ; GFX11: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: S_WAITCNT 0 - ; GFX11-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 ; GFX11-NEXT: EXP 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec ; GFX11-NEXT: EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec ; GFX11-NEXT: EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting-vscnt.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting-vscnt.mir --- a/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting-vscnt.mir +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting-vscnt.mir @@ -12,7 +12,6 @@ ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: S_WAITCNT 0 - ; GFX10-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 ; GFX10-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec ; GFX10-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 ; GFX10-NEXT: S_BARRIER @@ -24,7 +23,6 @@ ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: S_WAITCNT 0 - ; GFX11-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 ; GFX11-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec ; GFX11-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 ; GFX11-NEXT: S_BARRIER @@ -50,7 +48,6 @@ ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: S_WAITCNT 0 - ; GFX10-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 ; GFX10-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec ; GFX10-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 1 ; GFX10-NEXT: S_BARRIER @@ -62,7 +59,6 @@ ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: S_WAITCNT 0 - ; GFX11-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 ; GFX11-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec ; GFX11-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 ; GFX11-NEXT: S_BARRIER @@ -88,7 +84,6 @@ ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: S_WAITCNT 0 - ; GFX10-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 ; GFX10-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec ; GFX10-NEXT: S_WAITCNT 112 ; GFX10-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 @@ -101,7 +96,6 @@ ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: S_WAITCNT 0 - ; GFX11-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 ; GFX11-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec ; GFX11-NEXT: S_WAITCNT 112 ; GFX11-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 @@ -129,7 +123,6 @@ ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: S_WAITCNT 0 - ; GFX10-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 ; GFX10-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec ; GFX10-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 ; GFX10-NEXT: S_BARRIER @@ -141,7 +134,6 @@ ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: S_WAITCNT 0 - ; GFX11-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 ; GFX11-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec ; GFX11-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 ; GFX11-NEXT: S_BARRIER @@ -169,7 +161,6 @@ ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: S_WAITCNT 0 - ; GFX10-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 ; GFX10-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec ; GFX10-NEXT: S_WAITCNT 0 ; GFX10-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 1 @@ -182,7 +173,6 @@ ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: S_WAITCNT 0 - ; GFX11-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 ; GFX11-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec ; GFX11-NEXT: S_WAITCNT 0 ; GFX11-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.ll b/llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.ll --- a/llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.ll +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.ll @@ -222,7 +222,6 @@ ; GCN-LABEL: store_vscnt_private: ; GCN: {{buffer|scratch}}_store_{{dword|b32}} ; GFX8_9: s_waitcnt vmcnt(0) -; GFX10PLUS: s_waitcnt_vscnt null, 0x0 ; GCN-NEXT: s_setpc_b64 define void @store_vscnt_private(ptr addrspace(5) %p) { store i32 0, ptr addrspace(5) %p @@ -233,7 +232,6 @@ ; GFX8: flat_store_dword ; GFX9PLUS: global_store_{{dword|b32}} ; GFX8_9: s_waitcnt vmcnt(0) -; GFX10PLUS: s_waitcnt_vscnt null, 0x0 ; GCN-NEXT: s_setpc_b64 define void @store_vscnt_global(ptr addrspace(1) %p) { store i32 0, ptr addrspace(1) %p @@ -244,7 +242,6 @@ ; GCN: flat_store_{{dword|b32}} ; GFX8_9: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} ; GFX10PLUS: s_waitcnt lgkmcnt(0){{$}} -; GFX10PLUS: s_waitcnt_vscnt null, 0x0 ; GCN-NEXT: s_setpc_b64 define void @store_vscnt_flat(ptr %p) { store i32 0, ptr %p @@ -253,7 +250,6 @@ ; GCN-LABEL: function_prologue: ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0){{$}} -; GFX10PLUS: s_waitcnt_vscnt null, 0x0 ; GCN-NEXT: s_setpc_b64 define void @function_prologue() { ret void diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll --- a/llvm/test/CodeGen/AMDGPU/wave32.ll +++ b/llvm/test/CodeGen/AMDGPU/wave32.ll @@ -1148,14 +1148,12 @@ ; GFX1032-LABEL: test_mad_i64_i32: ; GFX1032: ; %bb.0: ; GFX1032-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1032-NEXT: v_mad_i64_i32 v[0:1], s4, v0, v1, v[2:3] ; GFX1032-NEXT: s_setpc_b64 s[30:31] ; ; GFX1064-LABEL: test_mad_i64_i32: ; GFX1064: ; %bb.0: ; GFX1064-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1064-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v0, v1, v[2:3] ; GFX1064-NEXT: s_setpc_b64 s[30:31] %sext0 = sext i32 %arg0 to i64 @@ -1169,14 +1167,12 @@ ; GFX1032-LABEL: test_mad_u64_u32: ; GFX1032: ; %bb.0: ; GFX1032-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1032-NEXT: v_mad_u64_u32 v[0:1], s4, v0, v1, v[2:3] ; GFX1032-NEXT: s_setpc_b64 s[30:31] ; ; GFX1064-LABEL: test_mad_u64_u32: ; GFX1064: ; %bb.0: ; GFX1064-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1064-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v1, v[2:3] ; GFX1064-NEXT: s_setpc_b64 s[30:31] %sext0 = zext i32 %arg0 to i64 @@ -2858,7 +2854,6 @@ ; GFX1032-LABEL: callee_no_stack_with_call: ; GFX1032: ; %bb.0: ; GFX1032-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1032-NEXT: s_mov_b32 s16, s33 ; GFX1032-NEXT: s_mov_b32 s33, s32 ; GFX1032-NEXT: s_or_saveexec_b32 s17, -1 @@ -2893,7 +2888,6 @@ ; GFX1064-LABEL: callee_no_stack_with_call: ; GFX1064: ; %bb.0: ; GFX1064-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1064-NEXT: s_mov_b32 s16, s33 ; GFX1064-NEXT: s_mov_b32 s33, s32 ; GFX1064-NEXT: s_or_saveexec_b64 s[18:19], -1 diff --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll --- a/llvm/test/CodeGen/AMDGPU/wqm.ll +++ b/llvm/test/CodeGen/AMDGPU/wqm.ll @@ -19,7 +19,6 @@ ; GFX10-W32-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ; GFX10-W32-NEXT: s_waitcnt vmcnt(0) ; GFX10-W32-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm -; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-W32-NEXT: ; return to shader part epilog main_body: %tex = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %c, <8 x i32> %rsrc, i32 0, i32 0) @@ -89,7 +88,6 @@ ; GFX10-W32-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10-W32-NEXT: s_waitcnt vmcnt(0) ; GFX10-W32-NEXT: buffer_store_dwordx4 v[0:3], v0, s[0:3], 0 idxen -; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-W32-NEXT: ; return to shader part epilog main_body: %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c, <8 x i32> %rsrc, <4 x i32> %sampler, i1 false, i32 0, i32 0) #0 @@ -121,7 +119,6 @@ ; GFX10-W32-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10-W32-NEXT: s_waitcnt vmcnt(0) ; GFX10-W32-NEXT: buffer_store_dwordx4 v[0:3], v0, s[0:3], 0 idxen -; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-W32-NEXT: ; return to shader part epilog main_body: %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c, <8 x i32> %rsrc, <4 x i32> %sampler, i1 false, i32 0, i32 0) #0 @@ -208,7 +205,6 @@ ; GFX10-W32-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10-W32-NEXT: s_waitcnt vmcnt(0) ; GFX10-W32-NEXT: buffer_store_dwordx4 v[0:3], v4, s[0:3], 0 idxen -; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-W32-NEXT: ; return to shader part epilog main_body: %c.1 = mul i32 %c, %d @@ -247,7 +243,6 @@ ; GFX10-W32-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10-W32-NEXT: s_waitcnt vmcnt(0) ; GFX10-W32-NEXT: buffer_store_dwordx4 v[0:3], v4, s[0:3], 0 idxen -; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-W32-NEXT: ; return to shader part epilog main_body: %c.1 = mul i32 %c, %d @@ -669,7 +664,6 @@ ; GFX10-W32-NEXT: v_add_f32_e32 v0, v0, v0 ; GFX10-W32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s2 -; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-W32-NEXT: ; return to shader part epilog main_body: %src0 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx0, i32 0, i32 0, i32 0) @@ -1137,7 +1131,6 @@ ; GFX10-W32-NEXT: v_add_f32_e32 v0, v0, v0 ; GFX10-W32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s2 -; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-W32-NEXT: ; return to shader part epilog main_body: %src0 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx0, i32 0, i32 0, i32 0) @@ -1419,7 +1412,6 @@ ; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; GFX10-W32-NEXT: s_waitcnt vmcnt(0) ; GFX10-W32-NEXT: v_mov_b32_e32 v0, v2 -; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-W32-NEXT: ; return to shader part epilog main_body: %cmp = icmp eq i32 %z, 0 @@ -1498,7 +1490,6 @@ ; GFX10-W32-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX10-W32-NEXT: s_waitcnt vmcnt(0) ; GFX10-W32-NEXT: v_mov_b32_e32 v0, v2 -; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-W32-NEXT: ; return to shader part epilog main_body: %cmp = icmp eq i32 %z, 0 @@ -1581,7 +1572,6 @@ ; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; GFX10-W32-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10-W32-NEXT: s_waitcnt vmcnt(0) -; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-W32-NEXT: ; return to shader part epilog main_body: %idx.1 = extractelement <3 x i32> %idx, i32 0 @@ -1671,7 +1661,6 @@ ; GFX10-W32-NEXT: s_cbranch_execnz .LBB30_4 ; GFX10-W32-NEXT: .LBB30_2: ; %END ; GFX10-W32-NEXT: s_or_b32 exec_lo, exec_lo, s0 -; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-W32-NEXT: s_branch .LBB30_5 ; GFX10-W32-NEXT: .LBB30_3: ; %ELSE ; GFX10-W32-NEXT: v_mul_f32_e32 v0, 4.0, v1 @@ -1681,7 +1670,6 @@ ; GFX10-W32-NEXT: .LBB30_4: ; %IF ; GFX10-W32-NEXT: v_mul_f32_e32 v0, 0x40400000, v1 ; GFX10-W32-NEXT: s_or_b32 exec_lo, exec_lo, s0 -; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-W32-NEXT: s_branch .LBB30_5 ; GFX10-W32-NEXT: .LBB30_5: main_body: @@ -1753,7 +1741,6 @@ ; GFX10-W32-NEXT: s_waitcnt vmcnt(0) ; GFX10-W32-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10-W32-NEXT: s_waitcnt vmcnt(0) -; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-W32-NEXT: ; return to shader part epilog main_body: %cond = icmp eq i32 %y, 0 @@ -1830,7 +1817,6 @@ ; GFX10-W32-NEXT: v_add_f32_e32 v2, v9, v13 ; GFX10-W32-NEXT: v_mov_b32_e32 v1, v4 ; GFX10-W32-NEXT: v_mov_b32_e32 v3, v5 -; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-W32-NEXT: s_branch .LBB32_3 ; GFX10-W32-NEXT: .LBB32_2: ; GFX10-W32-NEXT: s_mov_b32 exec_lo, 0 @@ -1900,7 +1886,6 @@ ; GFX10-W32-NEXT: ; %bb.1: ; %main_body ; GFX10-W32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo ; GFX10-W32-NEXT: s_waitcnt vmcnt(0) -; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-W32-NEXT: s_branch .LBB33_3 ; GFX10-W32-NEXT: .LBB33_2: ; GFX10-W32-NEXT: s_mov_b32 exec_lo, 0 @@ -2013,7 +1998,6 @@ ; GFX10-W32-NEXT: v_mov_b32_e32 v1, v5 ; GFX10-W32-NEXT: v_mov_b32_e32 v2, v6 ; GFX10-W32-NEXT: v_mov_b32_e32 v3, v7 -; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-W32-NEXT: ; return to shader part epilog entry: call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %in, i32 15, i32 undef, <8 x i32> undef, i32 0, i32 0) @@ -2243,7 +2227,6 @@ ; GFX10-W32-NEXT: v_mov_b32_e32 v5, 1.0 ; GFX10-W32-NEXT: buffer_store_dword v5, v4, s[0:3], 0 idxen ; GFX10-W32-NEXT: s_waitcnt vmcnt(0) -; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-W32-NEXT: ; return to shader part epilog main_body: %cc = icmp sgt i32 %sel, 0 @@ -2594,7 +2577,6 @@ ; GFX10-W32-NEXT: v_add_f32_e32 v0, v0, v0 ; GFX10-W32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s2 -; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-W32-NEXT: ; return to shader part epilog main_body: %src0 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx0, i32 0, i32 0, i32 0) @@ -3056,7 +3038,6 @@ ; GFX10-W32-NEXT: buffer_store_dword v0, v1, s[20:23], 0 idxen ; GFX10-W32-NEXT: buffer_load_dword v0, v1, s[20:23], 0 idxen ; GFX10-W32-NEXT: s_waitcnt vmcnt(0) -; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-W32-NEXT: ; return to shader part epilog main_body: call void @llvm.amdgcn.struct.ptr.buffer.store.f32(float %inp, ptr addrspace(8) %res, i32 %idx1, i32 0, i32 0, i32 0) @@ -3168,7 +3149,6 @@ ; GFX10-W32-NEXT: buffer_store_dword v0, v1, s[16:19], 0 idxen ; GFX10-W32-NEXT: buffer_load_dword v0, v1, s[16:19], 0 idxen ; GFX10-W32-NEXT: s_waitcnt vmcnt(0) -; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-W32-NEXT: ; return to shader part epilog main_body: call void @llvm.amdgcn.struct.ptr.buffer.store.f32(float %inp, ptr addrspace(8) %res, i32 %idx0, i32 0, i32 0, i32 0) @@ -3271,7 +3251,6 @@ ; GFX10-W32-NEXT: buffer_store_dword v0, v1, s[16:19], 0 idxen ; GFX10-W32-NEXT: buffer_load_dword v0, v1, s[16:19], 0 idxen ; GFX10-W32-NEXT: s_waitcnt vmcnt(0) -; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-W32-NEXT: ; return to shader part epilog main_body: call void @llvm.amdgcn.struct.ptr.buffer.store.f32(float %inp, ptr addrspace(8) %res, i32 %idx0, i32 0, i32 0, i32 0)