diff --git a/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp b/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp --- a/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp +++ b/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp @@ -63,7 +63,7 @@ }() && "Expected FirstMI to precede SecondMI"); // Cannot pair dependent instructions for (const auto &Use : SecondMI.uses()) - if (Use.isReg() && FirstMI.modifiesRegister(Use.getReg())) + if (Use.isReg() && FirstMI.modifiesRegister(Use.getReg(), TRI)) return false; auto getVRegIdx = [&](unsigned OpcodeIdx, unsigned OperandIdx) { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i16.ll @@ -1293,19 +1293,18 @@ ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_lshlrev_b32 v2, 4, v2 ; GFX11-NEXT: v_cndmask_b32_e32 v5, s0, v1, vcc_lo -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_4) ; GFX11-NEXT: v_lshlrev_b32_e64 v3, v2, 0xffff ; GFX11-NEXT: v_lshlrev_b32_e32 v2, v2, v0 -; GFX11-NEXT: v_not_b32_e32 v3, v3 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_4) -; GFX11-NEXT: v_and_or_b32 v5, v5, v3, v2 ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 0, v4 +; GFX11-NEXT: v_not_b32_e32 v3, v3 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-NEXT: v_and_or_b32 v5, v5, v3, v2 ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: v_mov_b32_e32 v3, 0 -; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) ; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v5, s0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo ; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm @@ -1428,8 +1427,8 @@ ; GFX11-NEXT: v_and_or_b32 v4, v4, v3, v2 ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: v_mov_b32_e32 v3, 0 -; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v4, s0 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v4, s0 ; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm @@ -4511,21 +4510,21 @@ ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: global_load_b128 v[3:6], v[0:1], off ; GFX11-NEXT: global_load_b128 v[7:10], v[0:1], off offset:16 -; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v2 +; GFX11-NEXT: v_dual_mov_b32 v11, 16 :: v_dual_and_b32 v0, 0xffff, v2 ; GFX11-NEXT: s_and_b32 s0, s2, 1 ; GFX11-NEXT: s_lshr_b32 m0, s2, 1 ; GFX11-NEXT: s_lshl_b32 s0, s0, 4 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) -; GFX11-NEXT: v_dual_mov_b32 v11, 16 :: v_dual_lshlrev_b32 v0, s0, v0 -; GFX11-NEXT: s_lshl_b32 s0, 0xffff, s0 ; GFX11-NEXT: v_mov_b32_e32 v12, 0 +; GFX11-NEXT: v_lshlrev_b32_e32 v0, s0, v0 +; GFX11-NEXT: s_lshl_b32 s0, 0xffff, s0 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_not_b32 s0, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_movrels_b32_e32 v1, v3 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) ; GFX11-NEXT: v_and_or_b32 v2, v1, s0, v0 ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) ; GFX11-NEXT: v_movreld_b32_e32 v3, v2 ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: global_store_b128 v[0:1], v[3:6], off diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i8.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i8.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i8.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i8.ll @@ -2177,19 +2177,18 @@ ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_lshlrev_b32 v2, 3, v2 ; GFX11-NEXT: v_cndmask_b32_e32 v5, s0, v1, vcc_lo -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_4) ; GFX11-NEXT: v_lshlrev_b32_e64 v3, v2, 0xff ; GFX11-NEXT: v_lshlrev_b32_e32 v2, v2, v0 -; GFX11-NEXT: v_not_b32_e32 v3, v3 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_4) -; GFX11-NEXT: v_and_or_b32 v5, v5, v3, v2 ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 0, v4 +; GFX11-NEXT: v_not_b32_e32 v3, v3 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-NEXT: v_and_or_b32 v5, v5, v3, v2 ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: v_mov_b32_e32 v3, 0 -; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) ; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v5, s0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo ; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm @@ -2314,8 +2313,8 @@ ; GFX11-NEXT: v_and_or_b32 v4, v4, v3, v2 ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: v_mov_b32_e32 v3, 0 -; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v4, s0 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v4, s0 ; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2d.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2d.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2d.ll @@ -98,19 +98,19 @@ ; GFX11-NEXT: s_mov_b32 s0, s2 ; GFX11-NEXT: s_mov_b32 s1, s3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-NEXT: v_mov_b32_e32 v8, v7 ; GFX11-NEXT: v_mov_b32_e32 v9, v7 -; GFX11-NEXT: v_mov_b32_e32 v11, v7 ; GFX11-NEXT: v_mov_b32_e32 v10, v7 -; GFX11-NEXT: v_mov_b32_e32 v8, v7 +; GFX11-NEXT: v_mov_b32_e32 v11, v7 ; GFX11-NEXT: s_mov_b32 s2, s4 ; GFX11-NEXT: s_mov_b32 s3, s5 ; GFX11-NEXT: s_mov_b32 s4, s6 ; GFX11-NEXT: s_mov_b32 s5, s7 ; GFX11-NEXT: s_mov_b32 s6, s8 ; GFX11-NEXT: s_mov_b32 s7, s9 -; GFX11-NEXT: v_mov_b32_e32 v0, v7 +; GFX11-NEXT: v_dual_mov_b32 v0, v7 :: v_dual_mov_b32 v1, v8 ; GFX11-NEXT: v_dual_mov_b32 v2, v9 :: v_dual_mov_b32 v3, v10 -; GFX11-NEXT: v_dual_mov_b32 v1, v8 :: v_dual_mov_b32 v4, v11 +; GFX11-NEXT: v_mov_b32_e32 v4, v11 ; GFX11-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v7, v4, s[10:11] @@ -186,19 +186,19 @@ ; GFX11-NEXT: s_mov_b32 s0, s2 ; GFX11-NEXT: s_mov_b32 s1, s3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-NEXT: v_mov_b32_e32 v8, v7 ; GFX11-NEXT: v_mov_b32_e32 v9, v7 -; GFX11-NEXT: v_mov_b32_e32 v11, v7 ; GFX11-NEXT: v_mov_b32_e32 v10, v7 -; GFX11-NEXT: v_mov_b32_e32 v8, v7 +; GFX11-NEXT: v_mov_b32_e32 v11, v7 ; GFX11-NEXT: s_mov_b32 s2, s4 ; GFX11-NEXT: s_mov_b32 s3, s5 ; GFX11-NEXT: s_mov_b32 s4, s6 ; GFX11-NEXT: s_mov_b32 s5, s7 ; GFX11-NEXT: s_mov_b32 s6, s8 ; GFX11-NEXT: s_mov_b32 s7, s9 -; GFX11-NEXT: v_mov_b32_e32 v0, v7 +; GFX11-NEXT: v_dual_mov_b32 v0, v7 :: v_dual_mov_b32 v1, v8 ; GFX11-NEXT: v_dual_mov_b32 v2, v9 :: v_dual_mov_b32 v3, v10 -; GFX11-NEXT: v_dual_mov_b32 v1, v8 :: v_dual_mov_b32 v4, v11 +; GFX11-NEXT: v_mov_b32_e32 v4, v11 ; GFX11-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe lwe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v7, v4, s[10:11] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.ll @@ -101,10 +101,10 @@ ; GFX11-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, v1 ; GFX11-NEXT: v_dual_mov_b32 v7, v2 :: v_dual_mov_b32 v8, v3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) +; GFX11-NEXT: v_mov_b32_e32 v10, v9 ; GFX11-NEXT: v_mov_b32_e32 v11, v9 -; GFX11-NEXT: v_mov_b32_e32 v13, v9 ; GFX11-NEXT: v_mov_b32_e32 v12, v9 -; GFX11-NEXT: v_mov_b32_e32 v10, v9 +; GFX11-NEXT: v_mov_b32_e32 v13, v9 ; GFX11-NEXT: s_mov_b32 s0, s2 ; GFX11-NEXT: s_mov_b32 s1, s3 ; GFX11-NEXT: s_mov_b32 s2, s4 @@ -113,9 +113,9 @@ ; GFX11-NEXT: s_mov_b32 s5, s7 ; GFX11-NEXT: s_mov_b32 s6, s8 ; GFX11-NEXT: s_mov_b32 s7, s9 -; GFX11-NEXT: v_mov_b32_e32 v0, v9 +; GFX11-NEXT: v_dual_mov_b32 v0, v9 :: v_dual_mov_b32 v1, v10 ; GFX11-NEXT: v_dual_mov_b32 v2, v11 :: v_dual_mov_b32 v3, v12 -; GFX11-NEXT: v_dual_mov_b32 v1, v10 :: v_dual_mov_b32 v4, v13 +; GFX11-NEXT: v_mov_b32_e32 v4, v13 ; GFX11-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v9, v4, s[10:11] @@ -194,10 +194,10 @@ ; GFX11-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, v1 ; GFX11-NEXT: v_dual_mov_b32 v7, v2 :: v_dual_mov_b32 v8, v3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) +; GFX11-NEXT: v_mov_b32_e32 v10, v9 ; GFX11-NEXT: v_mov_b32_e32 v11, v9 -; GFX11-NEXT: v_mov_b32_e32 v13, v9 ; GFX11-NEXT: v_mov_b32_e32 v12, v9 -; GFX11-NEXT: v_mov_b32_e32 v10, v9 +; GFX11-NEXT: v_mov_b32_e32 v13, v9 ; GFX11-NEXT: s_mov_b32 s0, s2 ; GFX11-NEXT: s_mov_b32 s1, s3 ; GFX11-NEXT: s_mov_b32 s2, s4 @@ -206,9 +206,9 @@ ; GFX11-NEXT: s_mov_b32 s5, s7 ; GFX11-NEXT: s_mov_b32 s6, s8 ; GFX11-NEXT: s_mov_b32 s7, s9 -; GFX11-NEXT: v_mov_b32_e32 v0, v9 +; GFX11-NEXT: v_dual_mov_b32 v0, v9 :: v_dual_mov_b32 v1, v10 ; GFX11-NEXT: v_dual_mov_b32 v2, v11 :: v_dual_mov_b32 v3, v12 -; GFX11-NEXT: v_dual_mov_b32 v1, v10 :: v_dual_mov_b32 v4, v13 +; GFX11-NEXT: v_mov_b32_e32 v4, v13 ; GFX11-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe lwe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v9, v4, s[10:11] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.a16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.a16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.a16.ll @@ -106,17 +106,17 @@ ; GFX11-NEXT: s_mov_b32 s1, s3 ; GFX11-NEXT: s_mov_b32 s2, s4 ; GFX11-NEXT: v_mov_b32_e32 v9, v7 -; GFX11-NEXT: v_mov_b32_e32 v11, v7 -; GFX11-NEXT: v_mov_b32_e32 v10, v7 ; GFX11-NEXT: v_mov_b32_e32 v8, v7 +; GFX11-NEXT: v_mov_b32_e32 v10, v7 +; GFX11-NEXT: v_mov_b32_e32 v11, v7 ; GFX11-NEXT: v_lshl_or_b32 v5, v1, 16, v0 ; GFX11-NEXT: s_mov_b32 s3, s5 ; GFX11-NEXT: s_mov_b32 s4, s6 ; GFX11-NEXT: s_mov_b32 s5, s7 ; GFX11-NEXT: s_mov_b32 s6, s8 ; GFX11-NEXT: s_mov_b32 s7, s9 -; GFX11-NEXT: v_dual_mov_b32 v0, v7 :: v_dual_mov_b32 v3, v10 -; GFX11-NEXT: v_dual_mov_b32 v1, v8 :: v_dual_mov_b32 v2, v9 +; GFX11-NEXT: v_dual_mov_b32 v0, v7 :: v_dual_mov_b32 v1, v8 +; GFX11-NEXT: v_dual_mov_b32 v2, v9 :: v_dual_mov_b32 v3, v10 ; GFX11-NEXT: v_mov_b32_e32 v4, v11 ; GFX11-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16 tfe ; GFX11-NEXT: s_waitcnt vmcnt(0) @@ -197,17 +197,17 @@ ; GFX11-NEXT: s_mov_b32 s1, s3 ; GFX11-NEXT: s_mov_b32 s2, s4 ; GFX11-NEXT: v_mov_b32_e32 v9, v7 -; GFX11-NEXT: v_mov_b32_e32 v11, v7 -; GFX11-NEXT: v_mov_b32_e32 v10, v7 ; GFX11-NEXT: v_mov_b32_e32 v8, v7 +; GFX11-NEXT: v_mov_b32_e32 v10, v7 +; GFX11-NEXT: v_mov_b32_e32 v11, v7 ; GFX11-NEXT: v_lshl_or_b32 v5, v1, 16, v0 ; GFX11-NEXT: s_mov_b32 s3, s5 ; GFX11-NEXT: s_mov_b32 s4, s6 ; GFX11-NEXT: s_mov_b32 s5, s7 ; GFX11-NEXT: s_mov_b32 s6, s8 ; GFX11-NEXT: s_mov_b32 s7, s9 -; GFX11-NEXT: v_dual_mov_b32 v0, v7 :: v_dual_mov_b32 v3, v10 -; GFX11-NEXT: v_dual_mov_b32 v1, v8 :: v_dual_mov_b32 v2, v9 +; GFX11-NEXT: v_dual_mov_b32 v0, v7 :: v_dual_mov_b32 v1, v8 +; GFX11-NEXT: v_dual_mov_b32 v2, v9 :: v_dual_mov_b32 v3, v10 ; GFX11-NEXT: v_mov_b32_e32 v4, v11 ; GFX11-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16 tfe lwe ; GFX11-NEXT: s_waitcnt vmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.ll @@ -100,19 +100,19 @@ ; GFX11-NEXT: s_mov_b32 s0, s2 ; GFX11-NEXT: s_mov_b32 s1, s3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-NEXT: v_mov_b32_e32 v9, v8 +; GFX11-NEXT: v_mov_b32_e32 v10, v8 ; GFX11-NEXT: v_mov_b32_e32 v11, v8 ; GFX11-NEXT: v_mov_b32_e32 v12, v8 -; GFX11-NEXT: v_mov_b32_e32 v10, v8 -; GFX11-NEXT: v_mov_b32_e32 v9, v8 ; GFX11-NEXT: s_mov_b32 s2, s4 ; GFX11-NEXT: s_mov_b32 s3, s5 ; GFX11-NEXT: s_mov_b32 s4, s6 ; GFX11-NEXT: s_mov_b32 s5, s7 ; GFX11-NEXT: s_mov_b32 s6, s8 ; GFX11-NEXT: s_mov_b32 s7, s9 -; GFX11-NEXT: v_mov_b32_e32 v0, v8 -; GFX11-NEXT: v_dual_mov_b32 v2, v10 :: v_dual_mov_b32 v1, v9 -; GFX11-NEXT: v_dual_mov_b32 v3, v11 :: v_dual_mov_b32 v4, v12 +; GFX11-NEXT: v_dual_mov_b32 v0, v8 :: v_dual_mov_b32 v1, v9 +; GFX11-NEXT: v_dual_mov_b32 v2, v10 :: v_dual_mov_b32 v3, v11 +; GFX11-NEXT: v_mov_b32_e32 v4, v12 ; GFX11-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm tfe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v8, v4, s[10:11] @@ -190,19 +190,19 @@ ; GFX11-NEXT: s_mov_b32 s0, s2 ; GFX11-NEXT: s_mov_b32 s1, s3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-NEXT: v_mov_b32_e32 v9, v8 +; GFX11-NEXT: v_mov_b32_e32 v10, v8 ; GFX11-NEXT: v_mov_b32_e32 v11, v8 ; GFX11-NEXT: v_mov_b32_e32 v12, v8 -; GFX11-NEXT: v_mov_b32_e32 v10, v8 -; GFX11-NEXT: v_mov_b32_e32 v9, v8 ; GFX11-NEXT: s_mov_b32 s2, s4 ; GFX11-NEXT: s_mov_b32 s3, s5 ; GFX11-NEXT: s_mov_b32 s4, s6 ; GFX11-NEXT: s_mov_b32 s5, s7 ; GFX11-NEXT: s_mov_b32 s6, s8 ; GFX11-NEXT: s_mov_b32 s7, s9 -; GFX11-NEXT: v_mov_b32_e32 v0, v8 -; GFX11-NEXT: v_dual_mov_b32 v2, v10 :: v_dual_mov_b32 v1, v9 -; GFX11-NEXT: v_dual_mov_b32 v3, v11 :: v_dual_mov_b32 v4, v12 +; GFX11-NEXT: v_dual_mov_b32 v0, v8 :: v_dual_mov_b32 v1, v9 +; GFX11-NEXT: v_dual_mov_b32 v2, v10 :: v_dual_mov_b32 v3, v11 +; GFX11-NEXT: v_mov_b32_e32 v4, v12 ; GFX11-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm tfe lwe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v8, v4, s[10:11] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/mul-known-bits.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/mul-known-bits.i64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/mul-known-bits.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/mul-known-bits.i64.ll @@ -525,8 +525,8 @@ ; GFX11-NEXT: ; %bb.1: ; %else ; GFX11-NEXT: v_mad_u64_u32 v[2:3], null, v2, v0, 0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_add_nc_u32 v3, v3, v1 -; GFX11-NEXT: v_mov_b32_e32 v1, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v3, v3, v1 +; GFX11-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3 ; GFX11-NEXT: ; %bb.2: ; %Flow ; GFX11-NEXT: s_and_not1_saveexec_b32 s0, s0 ; GFX11-NEXT: ; %bb.3: ; %if diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll @@ -125,13 +125,13 @@ ; GFX11-LABEL: load_1d_tfe: ; GFX11: ; %bb.0: ; %main_body ; GFX11-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, 0 +; GFX11-NEXT: v_mov_b32_e32 v7, v6 +; GFX11-NEXT: v_mov_b32_e32 v8, v6 ; GFX11-NEXT: v_mov_b32_e32 v9, v6 ; GFX11-NEXT: v_mov_b32_e32 v10, v6 -; GFX11-NEXT: v_mov_b32_e32 v8, v6 -; GFX11-NEXT: v_mov_b32_e32 v7, v6 -; GFX11-NEXT: v_mov_b32_e32 v0, v6 -; GFX11-NEXT: v_dual_mov_b32 v2, v8 :: v_dual_mov_b32 v1, v7 -; GFX11-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v10 +; GFX11-NEXT: v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7 +; GFX11-NEXT: v_dual_mov_b32 v2, v8 :: v_dual_mov_b32 v3, v9 +; GFX11-NEXT: v_mov_b32_e32 v4, v10 ; GFX11-NEXT: image_load v[0:4], v5, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm tfe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v6, v4, s[8:9] @@ -229,13 +229,13 @@ ; GFX11-LABEL: load_1d_lwe: ; GFX11: ; %bb.0: ; %main_body ; GFX11-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, 0 +; GFX11-NEXT: v_mov_b32_e32 v7, v6 +; GFX11-NEXT: v_mov_b32_e32 v8, v6 ; GFX11-NEXT: v_mov_b32_e32 v9, v6 ; GFX11-NEXT: v_mov_b32_e32 v10, v6 -; GFX11-NEXT: v_mov_b32_e32 v8, v6 -; GFX11-NEXT: v_mov_b32_e32 v7, v6 -; GFX11-NEXT: v_mov_b32_e32 v0, v6 -; GFX11-NEXT: v_dual_mov_b32 v2, v8 :: v_dual_mov_b32 v1, v7 -; GFX11-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v10 +; GFX11-NEXT: v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7 +; GFX11-NEXT: v_dual_mov_b32 v2, v8 :: v_dual_mov_b32 v3, v9 +; GFX11-NEXT: v_mov_b32_e32 v4, v10 ; GFX11-NEXT: image_load v[0:4], v5, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm lwe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v6, v4, s[8:9] @@ -372,14 +372,13 @@ ; GFX11-LABEL: load_2d_tfe: ; GFX11: ; %bb.0: ; %main_body ; GFX11-NEXT: v_dual_mov_b32 v7, 0 :: v_dual_mov_b32 v6, v1 -; GFX11-NEXT: v_mov_b32_e32 v5, v0 +; GFX11-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v8, v7 ; GFX11-NEXT: v_mov_b32_e32 v9, v7 -; GFX11-NEXT: v_mov_b32_e32 v11, v7 ; GFX11-NEXT: v_mov_b32_e32 v10, v7 -; GFX11-NEXT: v_mov_b32_e32 v8, v7 -; GFX11-NEXT: v_mov_b32_e32 v0, v7 +; GFX11-NEXT: v_mov_b32_e32 v11, v7 +; GFX11-NEXT: v_dual_mov_b32 v0, v7 :: v_dual_mov_b32 v1, v8 ; GFX11-NEXT: v_dual_mov_b32 v2, v9 :: v_dual_mov_b32 v3, v10 -; GFX11-NEXT: v_dual_mov_b32 v1, v8 :: v_dual_mov_b32 v4, v11 +; GFX11-NEXT: v_mov_b32_e32 v4, v11 ; GFX11-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v7, v4, s[8:9] @@ -521,13 +520,13 @@ ; GFX11: ; %bb.0: ; %main_body ; GFX11-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v8, 0 ; GFX11-NEXT: v_dual_mov_b32 v7, v2 :: v_dual_mov_b32 v6, v1 +; GFX11-NEXT: v_mov_b32_e32 v9, v8 +; GFX11-NEXT: v_mov_b32_e32 v10, v8 ; GFX11-NEXT: v_mov_b32_e32 v11, v8 ; GFX11-NEXT: v_mov_b32_e32 v12, v8 -; GFX11-NEXT: v_mov_b32_e32 v10, v8 -; GFX11-NEXT: v_mov_b32_e32 v9, v8 -; GFX11-NEXT: v_mov_b32_e32 v0, v8 -; GFX11-NEXT: v_dual_mov_b32 v2, v10 :: v_dual_mov_b32 v1, v9 -; GFX11-NEXT: v_dual_mov_b32 v3, v11 :: v_dual_mov_b32 v4, v12 +; GFX11-NEXT: v_dual_mov_b32 v0, v8 :: v_dual_mov_b32 v1, v9 +; GFX11-NEXT: v_dual_mov_b32 v2, v10 :: v_dual_mov_b32 v3, v11 +; GFX11-NEXT: v_mov_b32_e32 v4, v12 ; GFX11-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm tfe lwe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v8, v4, s[8:9] @@ -669,13 +668,13 @@ ; GFX11: ; %bb.0: ; %main_body ; GFX11-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v8, 0 ; GFX11-NEXT: v_dual_mov_b32 v7, v2 :: v_dual_mov_b32 v6, v1 +; GFX11-NEXT: v_mov_b32_e32 v9, v8 +; GFX11-NEXT: v_mov_b32_e32 v10, v8 ; GFX11-NEXT: v_mov_b32_e32 v11, v8 ; GFX11-NEXT: v_mov_b32_e32 v12, v8 -; GFX11-NEXT: v_mov_b32_e32 v10, v8 -; GFX11-NEXT: v_mov_b32_e32 v9, v8 -; GFX11-NEXT: v_mov_b32_e32 v0, v8 -; GFX11-NEXT: v_dual_mov_b32 v2, v10 :: v_dual_mov_b32 v1, v9 -; GFX11-NEXT: v_dual_mov_b32 v3, v11 :: v_dual_mov_b32 v4, v12 +; GFX11-NEXT: v_dual_mov_b32 v0, v8 :: v_dual_mov_b32 v1, v9 +; GFX11-NEXT: v_dual_mov_b32 v2, v10 :: v_dual_mov_b32 v3, v11 +; GFX11-NEXT: v_mov_b32_e32 v4, v12 ; GFX11-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm lwe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v8, v4, s[8:9] @@ -812,14 +811,13 @@ ; GFX11-LABEL: load_1darray_tfe: ; GFX11: ; %bb.0: ; %main_body ; GFX11-NEXT: v_dual_mov_b32 v7, 0 :: v_dual_mov_b32 v6, v1 -; GFX11-NEXT: v_mov_b32_e32 v5, v0 +; GFX11-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v8, v7 ; GFX11-NEXT: v_mov_b32_e32 v9, v7 -; GFX11-NEXT: v_mov_b32_e32 v11, v7 ; GFX11-NEXT: v_mov_b32_e32 v10, v7 -; GFX11-NEXT: v_mov_b32_e32 v8, v7 -; GFX11-NEXT: v_mov_b32_e32 v0, v7 +; GFX11-NEXT: v_mov_b32_e32 v11, v7 +; GFX11-NEXT: v_dual_mov_b32 v0, v7 :: v_dual_mov_b32 v1, v8 ; GFX11-NEXT: v_dual_mov_b32 v2, v9 :: v_dual_mov_b32 v3, v10 -; GFX11-NEXT: v_dual_mov_b32 v1, v8 :: v_dual_mov_b32 v4, v11 +; GFX11-NEXT: v_mov_b32_e32 v4, v11 ; GFX11-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm tfe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v7, v4, s[8:9] @@ -961,13 +959,13 @@ ; GFX11: ; %bb.0: ; %main_body ; GFX11-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v8, 0 ; GFX11-NEXT: v_dual_mov_b32 v7, v2 :: v_dual_mov_b32 v6, v1 +; GFX11-NEXT: v_mov_b32_e32 v9, v8 +; GFX11-NEXT: v_mov_b32_e32 v10, v8 ; GFX11-NEXT: v_mov_b32_e32 v11, v8 ; GFX11-NEXT: v_mov_b32_e32 v12, v8 -; GFX11-NEXT: v_mov_b32_e32 v10, v8 -; GFX11-NEXT: v_mov_b32_e32 v9, v8 -; GFX11-NEXT: v_mov_b32_e32 v0, v8 -; GFX11-NEXT: v_dual_mov_b32 v2, v10 :: v_dual_mov_b32 v1, v9 -; GFX11-NEXT: v_dual_mov_b32 v3, v11 :: v_dual_mov_b32 v4, v12 +; GFX11-NEXT: v_dual_mov_b32 v0, v8 :: v_dual_mov_b32 v1, v9 +; GFX11-NEXT: v_dual_mov_b32 v2, v10 :: v_dual_mov_b32 v3, v11 +; GFX11-NEXT: v_mov_b32_e32 v4, v12 ; GFX11-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm lwe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v8, v4, s[8:9] @@ -1109,13 +1107,13 @@ ; GFX11: ; %bb.0: ; %main_body ; GFX11-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v8, 0 ; GFX11-NEXT: v_dual_mov_b32 v7, v2 :: v_dual_mov_b32 v6, v1 +; GFX11-NEXT: v_mov_b32_e32 v9, v8 +; GFX11-NEXT: v_mov_b32_e32 v10, v8 ; GFX11-NEXT: v_mov_b32_e32 v11, v8 ; GFX11-NEXT: v_mov_b32_e32 v12, v8 -; GFX11-NEXT: v_mov_b32_e32 v10, v8 -; GFX11-NEXT: v_mov_b32_e32 v9, v8 -; GFX11-NEXT: v_mov_b32_e32 v0, v8 -; GFX11-NEXT: v_dual_mov_b32 v2, v10 :: v_dual_mov_b32 v1, v9 -; GFX11-NEXT: v_dual_mov_b32 v3, v11 :: v_dual_mov_b32 v4, v12 +; GFX11-NEXT: v_dual_mov_b32 v0, v8 :: v_dual_mov_b32 v1, v9 +; GFX11-NEXT: v_dual_mov_b32 v2, v10 :: v_dual_mov_b32 v3, v11 +; GFX11-NEXT: v_mov_b32_e32 v4, v12 ; GFX11-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm tfe lwe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v8, v4, s[8:9] @@ -1261,14 +1259,13 @@ ; GFX11: ; %bb.0: ; %main_body ; GFX11-NEXT: v_dual_mov_b32 v9, 0 :: v_dual_mov_b32 v8, v3 ; GFX11-NEXT: v_dual_mov_b32 v7, v2 :: v_dual_mov_b32 v6, v1 -; GFX11-NEXT: v_mov_b32_e32 v5, v0 +; GFX11-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v10, v9 ; GFX11-NEXT: v_mov_b32_e32 v11, v9 -; GFX11-NEXT: v_mov_b32_e32 v13, v9 ; GFX11-NEXT: v_mov_b32_e32 v12, v9 -; GFX11-NEXT: v_mov_b32_e32 v10, v9 -; GFX11-NEXT: v_mov_b32_e32 v0, v9 +; GFX11-NEXT: v_mov_b32_e32 v13, v9 +; GFX11-NEXT: v_dual_mov_b32 v0, v9 :: v_dual_mov_b32 v1, v10 ; GFX11-NEXT: v_dual_mov_b32 v2, v11 :: v_dual_mov_b32 v3, v12 -; GFX11-NEXT: v_dual_mov_b32 v1, v10 :: v_dual_mov_b32 v4, v13 +; GFX11-NEXT: v_mov_b32_e32 v4, v13 ; GFX11-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v9, v4, s[8:9] @@ -1405,14 +1402,13 @@ ; GFX11-LABEL: load_mip_1d_lwe: ; GFX11: ; %bb.0: ; %main_body ; GFX11-NEXT: v_dual_mov_b32 v7, 0 :: v_dual_mov_b32 v6, v1 -; GFX11-NEXT: v_mov_b32_e32 v5, v0 +; GFX11-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v8, v7 ; GFX11-NEXT: v_mov_b32_e32 v9, v7 -; GFX11-NEXT: v_mov_b32_e32 v11, v7 ; GFX11-NEXT: v_mov_b32_e32 v10, v7 -; GFX11-NEXT: v_mov_b32_e32 v8, v7 -; GFX11-NEXT: v_mov_b32_e32 v0, v7 +; GFX11-NEXT: v_mov_b32_e32 v11, v7 +; GFX11-NEXT: v_dual_mov_b32 v0, v7 :: v_dual_mov_b32 v1, v8 ; GFX11-NEXT: v_dual_mov_b32 v2, v9 :: v_dual_mov_b32 v3, v10 -; GFX11-NEXT: v_dual_mov_b32 v1, v8 :: v_dual_mov_b32 v4, v11 +; GFX11-NEXT: v_mov_b32_e32 v4, v11 ; GFX11-NEXT: image_load_mip v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm lwe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v7, v4, s[8:9] @@ -1554,13 +1550,13 @@ ; GFX11: ; %bb.0: ; %main_body ; GFX11-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v8, 0 ; GFX11-NEXT: v_dual_mov_b32 v7, v2 :: v_dual_mov_b32 v6, v1 +; GFX11-NEXT: v_mov_b32_e32 v9, v8 +; GFX11-NEXT: v_mov_b32_e32 v10, v8 ; GFX11-NEXT: v_mov_b32_e32 v11, v8 ; GFX11-NEXT: v_mov_b32_e32 v12, v8 -; GFX11-NEXT: v_mov_b32_e32 v10, v8 -; GFX11-NEXT: v_mov_b32_e32 v9, v8 -; GFX11-NEXT: v_mov_b32_e32 v0, v8 -; GFX11-NEXT: v_dual_mov_b32 v2, v10 :: v_dual_mov_b32 v1, v9 -; GFX11-NEXT: v_dual_mov_b32 v3, v11 :: v_dual_mov_b32 v4, v12 +; GFX11-NEXT: v_dual_mov_b32 v0, v8 :: v_dual_mov_b32 v1, v9 +; GFX11-NEXT: v_dual_mov_b32 v2, v10 :: v_dual_mov_b32 v3, v11 +; GFX11-NEXT: v_mov_b32_e32 v4, v12 ; GFX11-NEXT: image_load_mip v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v8, v4, s[8:9] @@ -1958,11 +1954,11 @@ ; GFX11-LABEL: load_1d_tfe_V4_dmask3: ; GFX11: ; %bb.0: ; %main_body ; GFX11-NEXT: v_dual_mov_b32 v4, v0 :: v_dual_mov_b32 v5, 0 +; GFX11-NEXT: v_mov_b32_e32 v6, v5 ; GFX11-NEXT: v_mov_b32_e32 v7, v5 ; GFX11-NEXT: v_mov_b32_e32 v8, v5 -; GFX11-NEXT: v_mov_b32_e32 v6, v5 -; GFX11-NEXT: v_dual_mov_b32 v0, v5 :: v_dual_mov_b32 v3, v8 -; GFX11-NEXT: v_dual_mov_b32 v1, v6 :: v_dual_mov_b32 v2, v7 +; GFX11-NEXT: v_dual_mov_b32 v0, v5 :: v_dual_mov_b32 v1, v6 +; GFX11-NEXT: v_dual_mov_b32 v2, v7 :: v_dual_mov_b32 v3, v8 ; GFX11-NEXT: image_load v[0:3], v4, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm tfe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v5, v3, s[8:9] @@ -2048,10 +2044,10 @@ ; GFX11-LABEL: load_1d_tfe_V4_dmask2: ; GFX11: ; %bb.0: ; %main_body ; GFX11-NEXT: v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v4, 0 -; GFX11-NEXT: v_mov_b32_e32 v6, v4 ; GFX11-NEXT: v_mov_b32_e32 v5, v4 -; GFX11-NEXT: v_mov_b32_e32 v0, v4 -; GFX11-NEXT: v_dual_mov_b32 v2, v6 :: v_dual_mov_b32 v1, v5 +; GFX11-NEXT: v_mov_b32_e32 v6, v4 +; GFX11-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5 +; GFX11-NEXT: v_mov_b32_e32 v2, v6 ; GFX11-NEXT: image_load v[0:2], v3, s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_1D unorm tfe ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b32 v4, v2, s[8:9] diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll @@ -105,13 +105,13 @@ ; GFX11-NEXT: s_mov_b32 s14, exec_lo ; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX11-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, 0 +; GFX11-NEXT: v_mov_b32_e32 v7, v6 +; GFX11-NEXT: v_mov_b32_e32 v8, v6 ; GFX11-NEXT: v_mov_b32_e32 v9, v6 ; GFX11-NEXT: v_mov_b32_e32 v10, v6 -; GFX11-NEXT: v_mov_b32_e32 v8, v6 -; GFX11-NEXT: v_mov_b32_e32 v7, v6 -; GFX11-NEXT: v_mov_b32_e32 v0, v6 -; GFX11-NEXT: v_dual_mov_b32 v2, v8 :: v_dual_mov_b32 v1, v7 -; GFX11-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v10 +; GFX11-NEXT: v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7 +; GFX11-NEXT: v_dual_mov_b32 v2, v8 :: v_dual_mov_b32 v3, v9 +; GFX11-NEXT: v_mov_b32_e32 v4, v10 ; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s14 ; GFX11-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D tfe ; GFX11-NEXT: s_waitcnt vmcnt(0) @@ -568,13 +568,13 @@ ; GFX11-NEXT: s_mov_b32 s14, exec_lo ; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX11-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, 0 +; GFX11-NEXT: v_mov_b32_e32 v7, v6 +; GFX11-NEXT: v_mov_b32_e32 v8, v6 ; GFX11-NEXT: v_mov_b32_e32 v9, v6 ; GFX11-NEXT: v_mov_b32_e32 v10, v6 -; GFX11-NEXT: v_mov_b32_e32 v8, v6 -; GFX11-NEXT: v_mov_b32_e32 v7, v6 -; GFX11-NEXT: v_mov_b32_e32 v0, v6 -; GFX11-NEXT: v_dual_mov_b32 v2, v8 :: v_dual_mov_b32 v1, v7 -; GFX11-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v10 +; GFX11-NEXT: v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7 +; GFX11-NEXT: v_dual_mov_b32 v2, v8 :: v_dual_mov_b32 v3, v9 +; GFX11-NEXT: v_mov_b32_e32 v4, v10 ; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s14 ; GFX11-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D lwe ; GFX11-NEXT: s_waitcnt vmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/vopd-combine.mir b/llvm/test/CodeGen/AMDGPU/vopd-combine.mir --- a/llvm/test/CodeGen/AMDGPU/vopd-combine.mir +++ b/llvm/test/CodeGen/AMDGPU/vopd-combine.mir @@ -19,6 +19,7 @@ define void @vopd_schedule_unconstrained_2() { ret void } define void @vopd_mov_fixup() { ret void } define void @vopd_mov_fixup_fail() { ret void } + define void @vopd_no_combine_dependent_subreg() { ret void } ... --- @@ -541,3 +542,22 @@ $vgpr0 = V_MOV_B32_e32 target-flags(amdgpu-abs32-lo) @lds, implicit $exec $vgpr1 = V_MOV_B32_e32 target-flags(amdgpu-abs32-lo) @lds + 4, implicit $exec ... + +--- +name: vopd_no_combine_dependent_subreg +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_no_combine_dependent_subreg + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit-def $vgpr2_vgpr3, implicit $exec + ; SCHED-NEXT: $vgpr5 = V_ADD_F32_e32 killed $vgpr0, killed $vgpr3, implicit $mode, implicit $exec + ; PAIR-LABEL: name: vopd_no_combine_dependent_subreg + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit-def $vgpr2_vgpr3, implicit $exec + ; PAIR-NEXT: $vgpr5 = V_ADD_F32_e32 killed $vgpr0, killed $vgpr3, implicit $mode, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr2 = V_MOV_B32_e32 0, implicit-def $vgpr2_vgpr3, implicit $exec + $vgpr5 = V_ADD_F32_e32 $vgpr0, $vgpr3, implicit $mode, implicit $exec +...