diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1763,6 +1763,15 @@ for (unsigned Idx = 0; Idx < NumElems; ++Idx) { Register DstReg = MI.getOperand(Idx).getReg(); Register SrcReg = Operands[Idx]; + + // This combine may run after RegBankSelect, so we need to be aware of + // register banks. + const auto &DstCB = MRI.getRegClassOrRegBank(DstReg); + if (!DstCB.isNull() && DstCB != MRI.getRegClassOrRegBank(SrcReg)) { + SrcReg = Builder.buildCopy(MRI.getType(SrcReg), SrcReg).getReg(0); + MRI.setRegClassOrRegBank(SrcReg, DstCB); + } + if (CanReuseInputDirectly) replaceRegWith(MRI, DstReg, SrcReg); else diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td --- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td @@ -127,7 +127,8 @@ def AMDGPURegBankCombinerHelper : GICombinerHelper< "AMDGPUGenRegBankCombinerHelper", - [zext_trunc_fold, int_minmax_to_med3, ptr_add_immed_chain, + [unmerge_merge, unmerge_cst, unmerge_undef, + zext_trunc_fold, int_minmax_to_med3, ptr_add_immed_chain, fp_minmax_to_clamp, fp_minmax_to_med3, fmed3_intrinsic_to_clamp]> { let DisableRuleOption = "amdgpuregbankcombiner-disable-rule"; let StateClass = "AMDGPURegBankCombinerHelperState"; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2055,7 +2055,9 @@ // (y & x) | (z & ~x) def : AMDGPUPat < (DivergentBinFrag (and i32:$y, i32:$x), (and i32:$z, (not i32:$x))), - (V_BFI_B32_e64 VSrc_b32:$x, VSrc_b32:$y, VSrc_b32:$z) + (V_BFI_B32_e64 (COPY_TO_REGCLASS VSrc_b32:$x, VGPR_32), + (COPY_TO_REGCLASS VSrc_b32:$y, VGPR_32), + (COPY_TO_REGCLASS VSrc_b32:$z, VGPR_32)) >; // (y & C) | (z & ~C) @@ -2080,7 +2082,9 @@ // z ^ (x & (y ^ z)) def : AMDGPUPat < (DivergentBinFrag i32:$z, (and i32:$x, (xor i32:$y, i32:$z))), - (V_BFI_B32_e64 VSrc_b32:$x, VSrc_b32:$y, VSrc_b32:$z) + (V_BFI_B32_e64 (COPY_TO_REGCLASS VSrc_b32:$x, VGPR_32), + (COPY_TO_REGCLASS VSrc_b32:$y, VGPR_32), + (COPY_TO_REGCLASS VSrc_b32:$z, VGPR_32)) >; // 64-bit version @@ -3196,7 +3200,10 @@ def : AMDGPUPat < (DivergentBinFrag (and i32:$x, i32:$z), (and i32:$y, (or i32:$x, i32:$z))), - (V_BFI_B32_e64 (V_XOR_B32_e64 VSrc_b32:$x, VSrc_b32:$y), VSrc_b32:$z, VSrc_b32:$y) + (V_BFI_B32_e64 (V_XOR_B32_e64 (COPY_TO_REGCLASS VSrc_b32:$x, VGPR_32), + (COPY_TO_REGCLASS VSrc_b32:$y, VGPR_32)), + (COPY_TO_REGCLASS VSrc_b32:$z, VGPR_32), + (COPY_TO_REGCLASS VSrc_b32:$y, VGPR_32)) >; def : AMDGPUPat < diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll @@ -110,17 +110,13 @@ define amdgpu_ps float @dyn_extract_v8f32_s_v(<8 x float> inreg %vec, i32 %sel) { ; GCN-LABEL: dyn_extract_v8f32_s_v: ; GCN: ; %bb.0: ; %entry -; GCN-NEXT: s_mov_b32 s0, s2 -; GCN-NEXT: s_mov_b32 s1, s3 -; GCN-NEXT: s_mov_b32 s2, s4 -; GCN-NEXT: v_mov_b32_e32 v1, s0 -; GCN-NEXT: v_mov_b32_e32 v2, s1 +; GCN-NEXT: v_mov_b32_e32 v1, s2 +; GCN-NEXT: v_mov_b32_e32 v2, s3 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GCN-NEXT: s_mov_b32 s3, s5 -; GCN-NEXT: v_mov_b32_e32 v3, s2 +; GCN-NEXT: v_mov_b32_e32 v3, s4 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 -; GCN-NEXT: v_mov_b32_e32 v4, s3 +; GCN-NEXT: v_mov_b32_e32 v4, s5 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 ; GCN-NEXT: v_mov_b32_e32 v5, s6 @@ -140,29 +136,21 @@ ; ; GFX10PLUS-LABEL: dyn_extract_v8f32_s_v: ; GFX10PLUS: ; %bb.0: ; %entry -; GFX10PLUS-NEXT: s_mov_b32 s1, s3 +; GFX10PLUS-NEXT: v_mov_b32_e32 v1, s3 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX10PLUS-NEXT: v_mov_b32_e32 v1, s1 -; GFX10PLUS-NEXT: s_mov_b32 s0, s2 -; GFX10PLUS-NEXT: s_mov_b32 s2, s4 -; GFX10PLUS-NEXT: s_mov_b32 s3, s5 -; GFX10PLUS-NEXT: s_mov_b32 s4, s6 -; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s0, v1, vcc_lo +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 -; GFX10PLUS-NEXT: s_mov_b32 s5, s7 -; GFX10PLUS-NEXT: s_mov_b32 s6, s8 -; GFX10PLUS-NEXT: s_mov_b32 s7, s9 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s2, vcc_lo +; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc_lo +; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s5, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo +; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s5, vcc_lo +; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s7, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo +; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, s7, vcc_lo +; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, s9, vcc_lo ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %ext = extractelement <8 x float> %vec, i32 %sel @@ -2183,10 +2171,8 @@ define amdgpu_ps float @dyn_extract_v6f32_s_v(<6 x float> inreg %vec, i32 %sel) { ; GCN-LABEL: dyn_extract_v6f32_s_v: ; GCN: ; %bb.0: ; %entry -; GCN-NEXT: s_mov_b32 s0, s2 -; GCN-NEXT: s_mov_b32 s1, s3 -; GCN-NEXT: v_mov_b32_e32 v1, s0 -; GCN-NEXT: v_mov_b32_e32 v2, s1 +; GCN-NEXT: v_mov_b32_e32 v1, s2 +; GCN-NEXT: v_mov_b32_e32 v2, s3 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GCN-NEXT: v_mov_b32_e32 v3, s4 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc @@ -2205,23 +2191,17 @@ ; ; GFX10PLUS-LABEL: dyn_extract_v6f32_s_v: ; GFX10PLUS: ; %bb.0: ; %entry -; GFX10PLUS-NEXT: s_mov_b32 s1, s3 +; GFX10PLUS-NEXT: v_mov_b32_e32 v1, s3 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX10PLUS-NEXT: v_mov_b32_e32 v1, s1 -; GFX10PLUS-NEXT: s_mov_b32 s0, s2 -; GFX10PLUS-NEXT: s_mov_b32 s2, s4 -; GFX10PLUS-NEXT: s_mov_b32 s3, s5 -; GFX10PLUS-NEXT: s_mov_b32 s4, s6 -; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s0, v1, vcc_lo +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 -; GFX10PLUS-NEXT: s_mov_b32 s5, s7 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s2, vcc_lo +; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc_lo +; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s5, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo +; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, s5, vcc_lo +; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, s7, vcc_lo ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %ext = extractelement <6 x float> %vec, i32 %sel @@ -2335,13 +2315,10 @@ define amdgpu_ps float @dyn_extract_v7f32_s_v(<7 x float> inreg %vec, i32 %sel) { ; GCN-LABEL: dyn_extract_v7f32_s_v: ; GCN: ; %bb.0: ; %entry -; GCN-NEXT: s_mov_b32 s0, s2 -; GCN-NEXT: s_mov_b32 s1, s3 -; GCN-NEXT: s_mov_b32 s2, s4 -; GCN-NEXT: v_mov_b32_e32 v1, s0 -; GCN-NEXT: v_mov_b32_e32 v2, s1 +; GCN-NEXT: v_mov_b32_e32 v1, s2 +; GCN-NEXT: v_mov_b32_e32 v2, s3 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GCN-NEXT: v_mov_b32_e32 v3, s2 +; GCN-NEXT: v_mov_b32_e32 v3, s4 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 ; GCN-NEXT: v_mov_b32_e32 v4, s5 @@ -2361,26 +2338,19 @@ ; ; GFX10PLUS-LABEL: dyn_extract_v7f32_s_v: ; GFX10PLUS: ; %bb.0: ; %entry -; GFX10PLUS-NEXT: s_mov_b32 s1, s3 +; GFX10PLUS-NEXT: v_mov_b32_e32 v1, s3 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX10PLUS-NEXT: v_mov_b32_e32 v1, s1 -; GFX10PLUS-NEXT: s_mov_b32 s0, s2 -; GFX10PLUS-NEXT: s_mov_b32 s2, s4 -; GFX10PLUS-NEXT: s_mov_b32 s3, s5 -; GFX10PLUS-NEXT: s_mov_b32 s4, s6 -; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s0, v1, vcc_lo +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 -; GFX10PLUS-NEXT: s_mov_b32 s5, s7 -; GFX10PLUS-NEXT: s_mov_b32 s6, s8 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s2, vcc_lo +; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc_lo +; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s5, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo +; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s5, vcc_lo +; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s7, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, s6, vcc_lo +; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, s8, vcc_lo ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %ext = extractelement <7 x float> %vec, i32 %sel @@ -3545,45 +3515,34 @@ define amdgpu_ps float @dyn_extract_v15f32_s_v(<15 x float> inreg %vec, i32 %sel) { ; GCN-LABEL: dyn_extract_v15f32_s_v: ; GCN: ; %bb.0: ; %entry -; GCN-NEXT: s_mov_b32 s0, s2 -; GCN-NEXT: s_mov_b32 s1, s3 -; GCN-NEXT: s_mov_b32 s2, s4 -; GCN-NEXT: v_mov_b32_e32 v1, s0 -; GCN-NEXT: v_mov_b32_e32 v2, s1 +; GCN-NEXT: v_mov_b32_e32 v1, s2 +; GCN-NEXT: v_mov_b32_e32 v2, s3 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GCN-NEXT: s_mov_b32 s3, s5 -; GCN-NEXT: v_mov_b32_e32 v3, s2 +; GCN-NEXT: v_mov_b32_e32 v3, s4 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 -; GCN-NEXT: s_mov_b32 s4, s6 -; GCN-NEXT: v_mov_b32_e32 v4, s3 +; GCN-NEXT: v_mov_b32_e32 v4, s5 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 -; GCN-NEXT: s_mov_b32 s5, s7 -; GCN-NEXT: v_mov_b32_e32 v5, s4 +; GCN-NEXT: v_mov_b32_e32 v5, s6 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 -; GCN-NEXT: s_mov_b32 s6, s8 -; GCN-NEXT: v_mov_b32_e32 v6, s5 +; GCN-NEXT: v_mov_b32_e32 v6, s7 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 -; GCN-NEXT: s_mov_b32 s7, s9 -; GCN-NEXT: v_mov_b32_e32 v7, s6 +; GCN-NEXT: v_mov_b32_e32 v7, s8 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 -; GCN-NEXT: s_mov_b32 s8, s10 -; GCN-NEXT: v_mov_b32_e32 v8, s7 +; GCN-NEXT: v_mov_b32_e32 v8, s9 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 -; GCN-NEXT: s_mov_b32 s9, s11 -; GCN-NEXT: v_mov_b32_e32 v9, s8 +; GCN-NEXT: v_mov_b32_e32 v9, s10 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 8, v0 -; GCN-NEXT: s_mov_b32 s10, s12 -; GCN-NEXT: v_mov_b32_e32 v10, s9 +; GCN-NEXT: v_mov_b32_e32 v10, s11 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 9, v0 -; GCN-NEXT: v_mov_b32_e32 v11, s10 +; GCN-NEXT: v_mov_b32_e32 v11, s12 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v10, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 10, v0 ; GCN-NEXT: v_mov_b32_e32 v12, s13 @@ -3603,50 +3562,35 @@ ; ; GFX10PLUS-LABEL: dyn_extract_v15f32_s_v: ; GFX10PLUS: ; %bb.0: ; %entry -; GFX10PLUS-NEXT: s_mov_b32 s1, s3 +; GFX10PLUS-NEXT: v_mov_b32_e32 v1, s3 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX10PLUS-NEXT: v_mov_b32_e32 v1, s1 -; GFX10PLUS-NEXT: s_mov_b32 s0, s2 -; GFX10PLUS-NEXT: s_mov_b32 s2, s4 -; GFX10PLUS-NEXT: s_mov_b32 s3, s5 -; GFX10PLUS-NEXT: s_mov_b32 s4, s6 -; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s0, v1, vcc_lo +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 -; GFX10PLUS-NEXT: s_mov_b32 s5, s7 -; GFX10PLUS-NEXT: s_mov_b32 s6, s8 -; GFX10PLUS-NEXT: s_mov_b32 s7, s9 -; GFX10PLUS-NEXT: s_mov_b32 s8, s10 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s2, vcc_lo +; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 -; GFX10PLUS-NEXT: s_mov_b32 s9, s11 -; GFX10PLUS-NEXT: s_mov_b32 s10, s12 -; GFX10PLUS-NEXT: s_mov_b32 s11, s13 -; GFX10PLUS-NEXT: s_mov_b32 s12, s14 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc_lo +; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s5, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 -; GFX10PLUS-NEXT: s_mov_b32 s13, s15 -; GFX10PLUS-NEXT: s_mov_b32 s14, s16 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo +; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s5, vcc_lo +; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s7, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo +; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s7, vcc_lo +; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s9, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo +; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s9, vcc_lo +; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s11, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo +; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s11, vcc_lo +; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s13, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo +; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s14, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s13, vcc_lo +; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s15, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, s14, vcc_lo +; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, s16, vcc_lo ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %ext = extractelement <15 x float> %vec, i32 %sel diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll @@ -5583,8 +5583,7 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v4, v0 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], 16, v[2:3] -; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v4 -; GFX9-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX9-NEXT: v_lshl_or_b32 v1, v4, 16, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_fshl_i64_48: @@ -5593,8 +5592,7 @@ ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v4, v0 ; GFX10-NEXT: v_lshrrev_b64 v[0:1], 16, v[2:3] -; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v4 -; GFX10-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX10-NEXT: v_lshl_or_b32 v1, v4, 16, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_fshl_i64_48: @@ -5603,9 +5601,8 @@ ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v4, v0 ; GFX11-NEXT: v_lshrrev_b64 v[0:1], 16, v[2:3] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v4 -; GFX11-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_lshl_or_b32 v1, v4, 16, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 48) ret i64 %result @@ -6358,14 +6355,13 @@ ; GFX9-NEXT: v_cndmask_b32_e32 v10, 0, v12, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v11, 0, v13, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v8, v1, v9, vcc ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v14 ; GFX9-NEXT: v_cndmask_b32_e32 v12, v0, v2, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v13, v1, v3, vcc ; GFX9-NEXT: v_lshrrev_b64 v[0:1], 1, v[4:5] -; GFX9-NEXT: v_lshlrev_b32_e32 v2, 31, v6 -; GFX9-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v13, v8, v3, vcc ; GFX9-NEXT: v_lshrrev_b64 v[2:3], 1, v[6:7] +; GFX9-NEXT: v_lshl_or_b32 v1, v6, 31, v1 ; GFX9-NEXT: v_sub_u32_e32 v6, 64, v15 ; GFX9-NEXT: v_subrev_u32_e32 v14, 64, v15 ; GFX9-NEXT: v_lshrrev_b64 v[4:5], v15, v[0:1] @@ -6395,103 +6391,97 @@ ; GFX10-NEXT: v_and_b32_e32 v18, 0x7f, v8 ; GFX10-NEXT: v_xor_b32_e32 v8, -1, v8 ; GFX10-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5] -; GFX10-NEXT: v_lshlrev_b32_e32 v12, 31, v6 -; GFX10-NEXT: v_lshrrev_b64 v[6:7], 1, v[6:7] -; GFX10-NEXT: v_sub_nc_u32_e32 v9, 64, v18 +; GFX10-NEXT: v_lshrrev_b64 v[12:13], 1, v[6:7] +; GFX10-NEXT: v_sub_nc_u32_e32 v10, 64, v18 ; GFX10-NEXT: v_and_b32_e32 v19, 0x7f, v8 -; GFX10-NEXT: v_lshlrev_b64 v[10:11], v18, v[2:3] -; GFX10-NEXT: v_or_b32_e32 v5, v5, v12 +; GFX10-NEXT: v_lshlrev_b64 v[8:9], v18, v[2:3] +; GFX10-NEXT: v_lshl_or_b32 v5, v6, 31, v5 ; GFX10-NEXT: v_subrev_nc_u32_e32 v20, 64, v18 -; GFX10-NEXT: v_lshrrev_b64 v[8:9], v9, v[0:1] +; GFX10-NEXT: v_lshrrev_b64 v[10:11], v10, v[0:1] ; GFX10-NEXT: v_sub_nc_u32_e32 v16, 64, v19 -; GFX10-NEXT: v_lshlrev_b64 v[12:13], v18, v[0:1] +; GFX10-NEXT: v_lshlrev_b64 v[6:7], v18, v[0:1] ; GFX10-NEXT: v_lshrrev_b64 v[14:15], v19, v[4:5] ; GFX10-NEXT: v_lshlrev_b64 v[0:1], v20, v[0:1] ; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v18 -; GFX10-NEXT: v_or_b32_e32 v10, v8, v10 +; GFX10-NEXT: v_or_b32_e32 v10, v10, v8 ; GFX10-NEXT: v_subrev_nc_u32_e32 v8, 64, v19 -; GFX10-NEXT: v_lshlrev_b64 v[16:17], v16, v[6:7] -; GFX10-NEXT: v_or_b32_e32 v11, v9, v11 +; GFX10-NEXT: v_lshlrev_b64 v[16:17], v16, v[12:13] +; GFX10-NEXT: v_or_b32_e32 v11, v11, v9 ; GFX10-NEXT: v_cmp_gt_u32_e64 s4, 64, v19 ; GFX10-NEXT: v_cndmask_b32_e32 v10, v0, v10, vcc_lo -; GFX10-NEXT: v_lshrrev_b64 v[8:9], v8, v[6:7] +; GFX10-NEXT: v_lshrrev_b64 v[8:9], v8, v[12:13] ; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 0, v19 ; GFX10-NEXT: v_or_b32_e32 v14, v14, v16 ; GFX10-NEXT: v_or_b32_e32 v15, v15, v17 ; GFX10-NEXT: v_cndmask_b32_e32 v11, v1, v11, vcc_lo -; GFX10-NEXT: v_lshrrev_b64 v[0:1], v19, v[6:7] +; GFX10-NEXT: v_lshrrev_b64 v[0:1], v19, v[12:13] ; GFX10-NEXT: v_cmp_eq_u32_e64 s6, 0, v18 ; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v14, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v6, v9, v15, s4 -; GFX10-NEXT: v_cndmask_b32_e32 v12, 0, v12, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v7, 0, v13, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v15, s4 +; GFX10-NEXT: v_cndmask_b32_e32 v6, 0, v6, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v7, 0, v7, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v2, v10, v2, s6 ; GFX10-NEXT: v_cndmask_b32_e64 v3, v11, v3, s6 ; GFX10-NEXT: v_cndmask_b32_e64 v4, v8, v4, s5 -; GFX10-NEXT: v_cndmask_b32_e64 v5, v6, v5, s5 -; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, v0, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, v1, s4 -; GFX10-NEXT: v_or_b32_e32 v0, v12, v4 +; GFX10-NEXT: v_cndmask_b32_e64 v5, v9, v5, s5 +; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, v0, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, v1, s4 +; GFX10-NEXT: v_or_b32_e32 v0, v6, v4 ; GFX10-NEXT: v_or_b32_e32 v1, v7, v5 -; GFX10-NEXT: v_or_b32_e32 v2, v2, v6 -; GFX10-NEXT: v_or_b32_e32 v3, v3, v8 +; GFX10-NEXT: v_or_b32_e32 v2, v2, v8 +; GFX10-NEXT: v_or_b32_e32 v3, v3, v9 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_fshl_i128: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5] ; GFX11-NEXT: v_and_b32_e32 v18, 0x7f, v8 ; GFX11-NEXT: v_xor_b32_e32 v8, -1, v8 -; GFX11-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5] +; GFX11-NEXT: v_lshrrev_b64 v[12:13], 1, v[6:7] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) -; GFX11-NEXT: v_sub_nc_u32_e32 v9, 64, v18 -; GFX11-NEXT: v_lshlrev_b64 v[10:11], v18, v[2:3] -; GFX11-NEXT: v_subrev_nc_u32_e32 v20, 64, v18 +; GFX11-NEXT: v_sub_nc_u32_e32 v10, 64, v18 +; GFX11-NEXT: v_lshl_or_b32 v5, v6, 31, v5 +; GFX11-NEXT: v_lshlrev_b64 v[6:7], v18, v[0:1] ; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v18 -; GFX11-NEXT: v_lshlrev_b32_e32 v12, 31, v6 ; GFX11-NEXT: v_and_b32_e32 v19, 0x7f, v8 -; GFX11-NEXT: v_lshrrev_b64 v[8:9], v9, v[0:1] -; GFX11-NEXT: v_lshrrev_b64 v[6:7], 1, v[6:7] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_4) | instid1(VALU_DEP_4) -; GFX11-NEXT: v_or_b32_e32 v5, v5, v12 -; GFX11-NEXT: v_lshlrev_b64 v[12:13], v18, v[0:1] -; GFX11-NEXT: v_lshlrev_b64 v[0:1], v20, v[0:1] -; GFX11-NEXT: v_or_b32_e32 v10, v8, v10 -; GFX11-NEXT: v_or_b32_e32 v11, v9, v11 -; GFX11-NEXT: v_cndmask_b32_e32 v12, 0, v12, vcc_lo -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e32 v10, v0, v10, vcc_lo +; GFX11-NEXT: v_lshlrev_b64 v[8:9], v18, v[2:3] +; GFX11-NEXT: v_lshrrev_b64 v[10:11], v10, v[0:1] +; GFX11-NEXT: v_subrev_nc_u32_e32 v20, 64, v18 +; GFX11-NEXT: v_cndmask_b32_e32 v6, 0, v6, vcc_lo ; GFX11-NEXT: v_sub_nc_u32_e32 v16, 64, v19 -; GFX11-NEXT: v_subrev_nc_u32_e32 v8, 64, v19 ; GFX11-NEXT: v_lshrrev_b64 v[14:15], v19, v[4:5] ; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v19 -; GFX11-NEXT: v_cndmask_b32_e32 v11, v1, v11, vcc_lo -; GFX11-NEXT: v_lshlrev_b64 v[16:17], v16, v[6:7] -; GFX11-NEXT: v_lshrrev_b64 v[8:9], v8, v[6:7] -; GFX11-NEXT: v_lshrrev_b64 v[0:1], v19, v[6:7] +; GFX11-NEXT: v_or_b32_e32 v10, v10, v8 +; GFX11-NEXT: v_subrev_nc_u32_e32 v8, 64, v19 +; GFX11-NEXT: v_lshlrev_b64 v[16:17], v16, v[12:13] +; GFX11-NEXT: v_lshlrev_b64 v[0:1], v20, v[0:1] +; GFX11-NEXT: v_or_b32_e32 v11, v11, v9 ; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 0, v19 -; GFX11-NEXT: v_cmp_eq_u32_e64 s2, 0, v18 -; GFX11-NEXT: v_cndmask_b32_e32 v7, 0, v13, vcc_lo +; GFX11-NEXT: v_lshrrev_b64 v[8:9], v8, v[12:13] +; GFX11-NEXT: v_cndmask_b32_e32 v7, 0, v7, vcc_lo ; GFX11-NEXT: v_or_b32_e32 v14, v14, v16 ; GFX11-NEXT: v_or_b32_e32 v15, v15, v17 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) -; GFX11-NEXT: v_cndmask_b32_e64 v2, v10, v2, s2 -; GFX11-NEXT: v_cndmask_b32_e64 v3, v11, v3, s2 +; GFX11-NEXT: v_dual_cndmask_b32 v10, v0, v10 :: v_dual_cndmask_b32 v11, v1, v11 +; GFX11-NEXT: v_lshrrev_b64 v[0:1], v19, v[12:13] +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_3) ; GFX11-NEXT: v_cndmask_b32_e64 v8, v8, v14, s0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_cndmask_b32_e64 v6, v9, v15, s0 +; GFX11-NEXT: v_cmp_eq_u32_e64 s2, 0, v18 +; GFX11-NEXT: v_cndmask_b32_e64 v9, v9, v15, s0 ; GFX11-NEXT: v_cndmask_b32_e64 v4, v8, v4, s1 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-NEXT: v_cndmask_b32_e64 v5, v6, v5, s1 -; GFX11-NEXT: v_cndmask_b32_e64 v6, 0, v0, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v8, 0, v1, s0 -; GFX11-NEXT: v_or_b32_e32 v0, v12, v4 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4) +; GFX11-NEXT: v_cndmask_b32_e64 v2, v10, v2, s2 +; GFX11-NEXT: v_cndmask_b32_e64 v3, v11, v3, s2 +; GFX11-NEXT: v_cndmask_b32_e64 v5, v9, v5, s1 +; GFX11-NEXT: v_cndmask_b32_e64 v8, 0, v0, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v9, 0, v1, s0 +; GFX11-NEXT: v_or_b32_e32 v0, v6, v4 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-NEXT: v_or_b32_e32 v1, v7, v5 -; GFX11-NEXT: v_or_b32_e32 v2, v2, v6 +; GFX11-NEXT: v_or_b32_e32 v2, v2, v8 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-NEXT: v_or_b32_e32 v3, v3, v8 +; GFX11-NEXT: v_or_b32_e32 v3, v3, v9 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call i128 @llvm.fshl.i128(i128 %lhs, i128 %rhs, i128 %amt) ret i128 %result @@ -6891,16 +6881,15 @@ ; GFX9-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9] ; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s5 ; GFX9-NEXT: s_cmp_lg_u32 s12, 0 +; GFX9-NEXT: v_lshrrev_b64 v[0:1], 1, v[0:1] ; GFX9-NEXT: s_cselect_b64 s[6:7], s[6:7], 0 ; GFX9-NEXT: s_cselect_b64 s[0:1], s[8:9], s[0:1] ; GFX9-NEXT: s_cmp_lg_u32 s13, 0 -; GFX9-NEXT: v_lshrrev_b64 v[0:1], 1, v[0:1] ; GFX9-NEXT: s_cselect_b64 s[0:1], s[2:3], s[0:1] -; GFX9-NEXT: v_lshlrev_b32_e32 v4, 31, v2 +; GFX9-NEXT: v_lshl_or_b32 v1, v2, 31, v1 ; GFX9-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3] ; GFX9-NEXT: s_sub_i32 s2, s4, 64 ; GFX9-NEXT: s_sub_i32 s3, 64, s4 -; GFX9-NEXT: v_or_b32_e32 v1, v1, v4 ; GFX9-NEXT: s_cmp_lt_u32 s4, 64 ; GFX9-NEXT: s_cselect_b32 s5, 1, 0 ; GFX9-NEXT: s_cmp_eq_u32 s4, 0 @@ -6938,7 +6927,7 @@ ; GFX10-NEXT: s_sub_i32 s5, s8, 64 ; GFX10-NEXT: s_sub_i32 s6, 64, s8 ; GFX10-NEXT: s_cmp_lt_u32 s8, 64 -; GFX10-NEXT: v_lshlrev_b32_e32 v4, 31, v2 +; GFX10-NEXT: v_lshl_or_b32 v1, v2, 31, v1 ; GFX10-NEXT: s_cselect_b32 s12, 1, 0 ; GFX10-NEXT: s_cmp_eq_u32 s8, 0 ; GFX10-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3] @@ -6949,13 +6938,12 @@ ; GFX10-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11] ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s5 ; GFX10-NEXT: s_cmp_lg_u32 s12, 0 -; GFX10-NEXT: v_or_b32_e32 v1, v1, v4 +; GFX10-NEXT: v_lshrrev_b64 v[4:5], s4, v[0:1] ; GFX10-NEXT: s_cselect_b64 s[8:9], s[8:9], 0 ; GFX10-NEXT: s_cselect_b64 s[0:1], s[6:7], s[0:1] ; GFX10-NEXT: s_cmp_lg_u32 s13, 0 ; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] ; GFX10-NEXT: s_sub_i32 s0, 64, s4 -; GFX10-NEXT: v_lshrrev_b64 v[4:5], s4, v[0:1] ; GFX10-NEXT: v_lshlrev_b64 v[6:7], s0, v[2:3] ; GFX10-NEXT: s_sub_i32 s0, s4, 64 ; GFX10-NEXT: s_cmp_lt_u32 s4, 64 @@ -6993,7 +6981,7 @@ ; GFX11-NEXT: s_sub_i32 s5, s8, 64 ; GFX11-NEXT: s_sub_i32 s6, 64, s8 ; GFX11-NEXT: s_cmp_lt_u32 s8, 64 -; GFX11-NEXT: v_lshlrev_b32_e32 v4, 31, v2 +; GFX11-NEXT: v_lshl_or_b32 v1, v2, 31, v1 ; GFX11-NEXT: s_cselect_b32 s12, 1, 0 ; GFX11-NEXT: s_cmp_eq_u32 s8, 0 ; GFX11-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3] @@ -7004,13 +6992,13 @@ ; GFX11-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11] ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s5 ; GFX11-NEXT: s_cmp_lg_u32 s12, 0 -; GFX11-NEXT: v_or_b32_e32 v1, v1, v4 +; GFX11-NEXT: v_lshrrev_b64 v[4:5], s4, v[0:1] ; GFX11-NEXT: s_cselect_b64 s[8:9], s[8:9], 0 ; GFX11-NEXT: s_cselect_b64 s[0:1], s[6:7], s[0:1] ; GFX11-NEXT: s_cmp_lg_u32 s13, 0 ; GFX11-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] ; GFX11-NEXT: s_sub_i32 s0, 64, s4 -; GFX11-NEXT: v_lshrrev_b64 v[4:5], s4, v[0:1] +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: v_lshlrev_b64 v[6:7], s0, v[2:3] ; GFX11-NEXT: s_sub_i32 s0, s4, 64 ; GFX11-NEXT: s_cmp_lt_u32 s4, 64 @@ -7025,16 +7013,16 @@ ; GFX11-NEXT: s_and_b32 s0, 1, s5 ; GFX11-NEXT: s_and_b32 s1, 1, s1 ; GFX11-NEXT: v_lshrrev_b64 v[2:3], s4, v[2:3] -; GFX11-NEXT: v_dual_cndmask_b32 v5, v9, v5 :: v_dual_cndmask_b32 v4, v8, v4 +; GFX11-NEXT: v_dual_cndmask_b32 v4, v8, v4 :: v_dual_cndmask_b32 v5, v9, v5 ; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 ; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 0, s1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_dual_cndmask_b32 v1, v5, v1 :: v_dual_cndmask_b32 v0, v4, v0 +; GFX11-NEXT: v_dual_cndmask_b32 v0, v4, v0 :: v_dual_cndmask_b32 v1, v5, v1 ; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v2, s0 ; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, v3, s0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-NEXT: v_or_b32_e32 v1, s9, v1 ; GFX11-NEXT: v_or_b32_e32 v0, s8, v0 +; GFX11-NEXT: v_or_b32_e32 v1, s9, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-NEXT: v_or_b32_e32 v2, s2, v2 ; GFX11-NEXT: v_or_b32_e32 v3, s3, v3 @@ -8095,14 +8083,13 @@ ; GFX9-NEXT: v_cndmask_b32_e32 v18, 0, v21, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v19, 0, v22, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v16, v1, v17, vcc ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v23 ; GFX9-NEXT: v_cndmask_b32_e32 v21, v0, v2, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v22, v1, v3, vcc ; GFX9-NEXT: v_lshrrev_b64 v[0:1], 1, v[8:9] -; GFX9-NEXT: v_lshlrev_b32_e32 v2, 31, v10 -; GFX9-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v22, v16, v3, vcc ; GFX9-NEXT: v_lshrrev_b64 v[2:3], 1, v[10:11] +; GFX9-NEXT: v_lshl_or_b32 v1, v10, 31, v1 ; GFX9-NEXT: v_sub_u32_e32 v10, 64, v24 ; GFX9-NEXT: v_subrev_u32_e32 v23, 64, v24 ; GFX9-NEXT: v_lshrrev_b64 v[8:9], v24, v[0:1] @@ -8136,14 +8123,13 @@ ; GFX9-NEXT: v_cndmask_b32_e32 v16, 0, v16, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v17, 0, v17, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v8, v5, v9, vcc ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v18 ; GFX9-NEXT: v_cndmask_b32_e32 v18, v4, v6, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v20, v5, v7, vcc ; GFX9-NEXT: v_lshrrev_b64 v[4:5], 1, v[12:13] -; GFX9-NEXT: v_lshlrev_b32_e32 v6, 31, v14 -; GFX9-NEXT: v_or_b32_e32 v5, v5, v6 +; GFX9-NEXT: v_cndmask_b32_e32 v20, v8, v7, vcc ; GFX9-NEXT: v_lshrrev_b64 v[6:7], 1, v[14:15] +; GFX9-NEXT: v_lshl_or_b32 v5, v14, 31, v5 ; GFX9-NEXT: v_sub_u32_e32 v10, 64, v19 ; GFX9-NEXT: v_subrev_u32_e32 v14, 64, v19 ; GFX9-NEXT: v_lshrrev_b64 v[8:9], v19, v[4:5] @@ -8175,76 +8161,74 @@ ; GFX10-NEXT: v_and_b32_e32 v27, 0x7f, v16 ; GFX10-NEXT: v_xor_b32_e32 v16, -1, v16 ; GFX10-NEXT: v_lshrrev_b64 v[8:9], 1, v[8:9] -; GFX10-NEXT: v_lshlrev_b32_e32 v21, 31, v10 -; GFX10-NEXT: v_lshrrev_b64 v[10:11], 1, v[10:11] ; GFX10-NEXT: v_sub_nc_u32_e32 v17, 64, v27 ; GFX10-NEXT: v_and_b32_e32 v28, 0x7f, v16 ; GFX10-NEXT: v_lshlrev_b64 v[18:19], v27, v[2:3] -; GFX10-NEXT: v_or_b32_e32 v9, v9, v21 -; GFX10-NEXT: v_subrev_nc_u32_e32 v29, 64, v27 +; GFX10-NEXT: v_lshl_or_b32 v9, v10, 31, v9 +; GFX10-NEXT: v_lshrrev_b64 v[10:11], 1, v[10:11] ; GFX10-NEXT: v_lshrrev_b64 v[16:17], v17, v[0:1] ; GFX10-NEXT: v_sub_nc_u32_e32 v25, 64, v28 -; GFX10-NEXT: v_lshlrev_b64 v[21:22], v27, v[0:1] +; GFX10-NEXT: v_subrev_nc_u32_e32 v29, 64, v27 ; GFX10-NEXT: v_lshrrev_b64 v[23:24], v28, v[8:9] -; GFX10-NEXT: v_lshlrev_b64 v[0:1], v29, v[0:1] +; GFX10-NEXT: v_lshlrev_b64 v[21:22], v27, v[0:1] ; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v27 ; GFX10-NEXT: v_or_b32_e32 v18, v16, v18 ; GFX10-NEXT: v_subrev_nc_u32_e32 v16, 64, v28 ; GFX10-NEXT: v_lshlrev_b64 v[25:26], v25, v[10:11] +; GFX10-NEXT: v_lshlrev_b64 v[0:1], v29, v[0:1] ; GFX10-NEXT: v_or_b32_e32 v19, v17, v19 ; GFX10-NEXT: v_cmp_gt_u32_e64 s4, 64, v28 -; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 0, v28 ; GFX10-NEXT: v_lshrrev_b64 v[16:17], v16, v[10:11] -; GFX10-NEXT: v_cmp_eq_u32_e64 s6, 0, v27 +; GFX10-NEXT: v_cndmask_b32_e32 v21, 0, v21, vcc_lo ; GFX10-NEXT: v_or_b32_e32 v23, v23, v25 -; GFX10-NEXT: v_or_b32_e32 v24, v24, v26 -; GFX10-NEXT: v_cndmask_b32_e32 v19, v1, v19, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v18, v0, v18, vcc_lo -; GFX10-NEXT: v_lshrrev_b64 v[0:1], v28, v[10:11] +; GFX10-NEXT: v_or_b32_e32 v0, v24, v26 +; GFX10-NEXT: v_cndmask_b32_e32 v22, 0, v22, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v19, v1, v19, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v16, v16, v23, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v10, v17, v24, s4 -; GFX10-NEXT: v_cndmask_b32_e32 v21, 0, v21, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v11, 0, v22, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v22, v19, v3, s6 +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v27 +; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 0, v28 +; GFX10-NEXT: v_cndmask_b32_e64 v17, v17, v0, s4 +; GFX10-NEXT: v_lshrrev_b64 v[0:1], v28, v[10:11] +; GFX10-NEXT: v_and_b32_e32 v24, 0x7f, v20 +; GFX10-NEXT: v_cndmask_b32_e32 v23, v19, v3, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v3, v16, v8, s5 -; GFX10-NEXT: v_cndmask_b32_e64 v2, v18, v2, s6 -; GFX10-NEXT: v_cndmask_b32_e64 v8, v10, v9, s5 -; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, v0, s4 -; GFX10-NEXT: v_and_b32_e32 v23, 0x7f, v20 +; GFX10-NEXT: v_cndmask_b32_e64 v8, v17, v9, s5 +; GFX10-NEXT: v_cndmask_b32_e32 v2, v18, v2, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, v0, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v25, 0, v1, s4 ; GFX10-NEXT: v_or_b32_e32 v0, v21, v3 ; GFX10-NEXT: v_xor_b32_e32 v3, -1, v20 -; GFX10-NEXT: v_cndmask_b32_e64 v24, 0, v1, s4 -; GFX10-NEXT: v_or_b32_e32 v1, v11, v8 -; GFX10-NEXT: v_sub_nc_u32_e32 v10, 64, v23 -; GFX10-NEXT: v_or_b32_e32 v2, v2, v9 +; GFX10-NEXT: v_or_b32_e32 v1, v22, v8 ; GFX10-NEXT: v_lshrrev_b64 v[8:9], 1, v[12:13] -; GFX10-NEXT: v_lshlrev_b32_e32 v16, 31, v14 -; GFX10-NEXT: v_and_b32_e32 v25, 0x7f, v3 -; GFX10-NEXT: v_lshrrev_b64 v[10:11], v10, v[4:5] -; GFX10-NEXT: v_lshlrev_b64 v[12:13], v23, v[6:7] +; GFX10-NEXT: v_sub_nc_u32_e32 v11, 64, v24 +; GFX10-NEXT: v_or_b32_e32 v2, v2, v10 +; GFX10-NEXT: v_and_b32_e32 v22, 0x7f, v3 +; GFX10-NEXT: v_lshlrev_b64 v[12:13], v24, v[6:7] +; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 64, v24 +; GFX10-NEXT: v_lshrrev_b64 v[10:11], v11, v[4:5] +; GFX10-NEXT: v_lshl_or_b32 v9, v14, 31, v9 ; GFX10-NEXT: v_lshrrev_b64 v[14:15], 1, v[14:15] -; GFX10-NEXT: v_or_b32_e32 v9, v9, v16 -; GFX10-NEXT: v_sub_nc_u32_e32 v20, 64, v25 -; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 64, v23 -; GFX10-NEXT: v_lshlrev_b64 v[16:17], v23, v[4:5] +; GFX10-NEXT: v_sub_nc_u32_e32 v20, 64, v22 +; GFX10-NEXT: v_lshlrev_b64 v[16:17], v24, v[4:5] +; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v24 ; GFX10-NEXT: v_or_b32_e32 v12, v10, v12 -; GFX10-NEXT: v_subrev_nc_u32_e32 v10, 64, v25 -; GFX10-NEXT: v_lshrrev_b64 v[18:19], v25, v[8:9] +; GFX10-NEXT: v_subrev_nc_u32_e32 v10, 64, v22 +; GFX10-NEXT: v_lshrrev_b64 v[18:19], v22, v[8:9] ; GFX10-NEXT: v_lshlrev_b64 v[20:21], v20, v[14:15] -; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v23 ; GFX10-NEXT: v_lshlrev_b64 v[3:4], v3, v[4:5] ; GFX10-NEXT: v_or_b32_e32 v5, v11, v13 ; GFX10-NEXT: v_lshrrev_b64 v[10:11], v10, v[14:15] -; GFX10-NEXT: v_cmp_gt_u32_e64 s4, 64, v25 ; GFX10-NEXT: v_cndmask_b32_e32 v13, 0, v16, vcc_lo +; GFX10-NEXT: v_cmp_gt_u32_e64 s4, 64, v22 ; GFX10-NEXT: v_or_b32_e32 v16, v18, v20 ; GFX10-NEXT: v_or_b32_e32 v18, v19, v21 ; GFX10-NEXT: v_cndmask_b32_e32 v12, v3, v12, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v5, v4, v5, vcc_lo -; GFX10-NEXT: v_lshrrev_b64 v[3:4], v25, v[14:15] +; GFX10-NEXT: v_lshrrev_b64 v[3:4], v22, v[14:15] ; GFX10-NEXT: v_cndmask_b32_e64 v10, v10, v16, s4 -; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 0, v25 -; GFX10-NEXT: v_cmp_eq_u32_e64 s6, 0, v23 +; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 0, v22 +; GFX10-NEXT: v_cmp_eq_u32_e64 s6, 0, v24 ; GFX10-NEXT: v_cndmask_b32_e64 v11, v11, v18, s4 ; GFX10-NEXT: v_cndmask_b32_e32 v14, 0, v17, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v6, v12, v6, s6 @@ -8253,7 +8237,7 @@ ; GFX10-NEXT: v_cndmask_b32_e64 v8, v11, v9, s5 ; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, v3, s4 ; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, v4, s4 -; GFX10-NEXT: v_or_b32_e32 v3, v22, v24 +; GFX10-NEXT: v_or_b32_e32 v3, v23, v25 ; GFX10-NEXT: v_or_b32_e32 v4, v13, v5 ; GFX10-NEXT: v_or_b32_e32 v5, v14, v8 ; GFX10-NEXT: v_or_b32_e32 v6, v6, v9 @@ -8264,89 +8248,93 @@ ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: v_lshrrev_b64 v[8:9], 1, v[8:9] ; GFX11-NEXT: v_and_b32_e32 v27, 0x7f, v16 -; GFX11-NEXT: v_lshlrev_b32_e32 v21, 31, v10 ; GFX11-NEXT: v_xor_b32_e32 v16, -1, v16 -; GFX11-NEXT: v_lshrrev_b64 v[10:11], 1, v[10:11] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-NEXT: v_sub_nc_u32_e32 v17, 64, v27 -; GFX11-NEXT: v_or_b32_e32 v9, v9, v21 +; GFX11-NEXT: v_lshrrev_b64 v[8:9], 1, v[8:9] +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4) ; GFX11-NEXT: v_lshlrev_b64 v[21:22], v27, v[0:1] ; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v27 ; GFX11-NEXT: v_and_b32_e32 v28, 0x7f, v16 -; GFX11-NEXT: v_lshrrev_b64 v[16:17], v17, v[0:1] +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-NEXT: v_lshl_or_b32 v9, v10, 31, v9 +; GFX11-NEXT: v_lshrrev_b64 v[10:11], 1, v[10:11] +; GFX11-NEXT: v_dual_cndmask_b32 v21, 0, v21 :: v_dual_cndmask_b32 v22, 0, v22 +; GFX11-NEXT: v_sub_nc_u32_e32 v17, 64, v27 ; GFX11-NEXT: v_lshlrev_b64 v[18:19], v27, v[2:3] -; GFX11-NEXT: v_subrev_nc_u32_e32 v29, 64, v27 -; GFX11-NEXT: v_cndmask_b32_e32 v21, 0, v21, vcc_lo ; GFX11-NEXT: v_sub_nc_u32_e32 v25, 64, v28 +; GFX11-NEXT: v_subrev_nc_u32_e32 v29, 64, v27 ; GFX11-NEXT: v_lshrrev_b64 v[23:24], v28, v[8:9] +; GFX11-NEXT: v_lshrrev_b64 v[16:17], v17, v[0:1] ; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v28 -; GFX11-NEXT: v_or_b32_e32 v18, v16, v18 -; GFX11-NEXT: v_subrev_nc_u32_e32 v16, 64, v28 ; GFX11-NEXT: v_lshlrev_b64 v[25:26], v25, v[10:11] ; GFX11-NEXT: v_lshlrev_b64 v[0:1], v29, v[0:1] -; GFX11-NEXT: v_or_b32_e32 v19, v17, v19 ; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 0, v28 -; GFX11-NEXT: v_lshrrev_b64 v[16:17], v16, v[10:11] +; GFX11-NEXT: v_or_b32_e32 v18, v16, v18 +; GFX11-NEXT: v_subrev_nc_u32_e32 v16, 64, v28 +; GFX11-NEXT: v_or_b32_e32 v19, v17, v19 ; GFX11-NEXT: v_or_b32_e32 v23, v23, v25 -; GFX11-NEXT: v_or_b32_e32 v24, v24, v26 -; GFX11-NEXT: v_dual_cndmask_b32 v18, v0, v18 :: v_dual_cndmask_b32 v19, v1, v19 -; GFX11-NEXT: v_cmp_eq_u32_e64 s2, 0, v27 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-NEXT: v_cndmask_b32_e32 v18, v0, v18, vcc_lo +; GFX11-NEXT: v_lshrrev_b64 v[16:17], v16, v[10:11] +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_4) | instid1(VALU_DEP_4) +; GFX11-NEXT: v_cndmask_b32_e32 v19, v1, v19, vcc_lo +; GFX11-NEXT: v_or_b32_e32 v0, v24, v26 +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v27 +; GFX11-NEXT: v_and_b32_e32 v24, 0x7f, v20 ; GFX11-NEXT: v_cndmask_b32_e64 v16, v16, v23, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v17, v17, v0, s0 ; GFX11-NEXT: v_lshrrev_b64 v[0:1], v28, v[10:11] -; GFX11-NEXT: v_cndmask_b32_e64 v10, v17, v24, s0 -; GFX11-NEXT: v_cndmask_b32_e32 v11, 0, v22, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v22, v19, v3, s2 +; GFX11-NEXT: v_dual_cndmask_b32 v2, v18, v2 :: v_dual_cndmask_b32 v23, v19, v3 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-NEXT: v_cndmask_b32_e64 v3, v16, v8, s1 -; GFX11-NEXT: v_cndmask_b32_e64 v2, v18, v2, s2 -; GFX11-NEXT: v_cndmask_b32_e64 v8, v10, v9, s1 -; GFX11-NEXT: v_cndmask_b32_e64 v9, 0, v0, s0 -; GFX11-NEXT: v_and_b32_e32 v23, 0x7f, v20 +; GFX11-NEXT: v_cndmask_b32_e64 v8, v17, v9, s1 +; GFX11-NEXT: v_sub_nc_u32_e32 v11, 64, v24 +; GFX11-NEXT: v_cndmask_b32_e64 v10, 0, v0, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v25, 0, v1, s0 ; GFX11-NEXT: v_or_b32_e32 v0, v21, v3 ; GFX11-NEXT: v_xor_b32_e32 v3, -1, v20 -; GFX11-NEXT: v_cndmask_b32_e64 v24, 0, v1, s0 -; GFX11-NEXT: v_or_b32_e32 v1, v11, v8 -; GFX11-NEXT: v_sub_nc_u32_e32 v10, 64, v23 -; GFX11-NEXT: v_or_b32_e32 v2, v2, v9 +; GFX11-NEXT: v_or_b32_e32 v1, v22, v8 ; GFX11-NEXT: v_lshrrev_b64 v[8:9], 1, v[12:13] -; GFX11-NEXT: v_lshlrev_b32_e32 v16, 31, v14 -; GFX11-NEXT: v_and_b32_e32 v25, 0x7f, v3 -; GFX11-NEXT: v_lshrrev_b64 v[10:11], v10, v[4:5] -; GFX11-NEXT: v_lshlrev_b64 v[12:13], v23, v[6:7] +; GFX11-NEXT: v_or_b32_e32 v2, v2, v10 +; GFX11-NEXT: v_lshrrev_b64 v[10:11], v11, v[4:5] +; GFX11-NEXT: v_lshlrev_b64 v[12:13], v24, v[6:7] +; GFX11-NEXT: v_lshlrev_b64 v[16:17], v24, v[4:5] +; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v24 +; GFX11-NEXT: v_and_b32_e32 v22, 0x7f, v3 +; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 64, v24 +; GFX11-NEXT: v_lshl_or_b32 v9, v14, 31, v9 ; GFX11-NEXT: v_lshrrev_b64 v[14:15], 1, v[14:15] -; GFX11-NEXT: v_or_b32_e32 v9, v9, v16 -; GFX11-NEXT: v_sub_nc_u32_e32 v20, 64, v25 -; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 64, v23 -; GFX11-NEXT: v_lshlrev_b64 v[16:17], v23, v[4:5] ; GFX11-NEXT: v_or_b32_e32 v12, v10, v12 -; GFX11-NEXT: v_subrev_nc_u32_e32 v10, 64, v25 -; GFX11-NEXT: v_lshrrev_b64 v[18:19], v25, v[8:9] -; GFX11-NEXT: v_lshlrev_b64 v[20:21], v20, v[14:15] -; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v23 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) ; GFX11-NEXT: v_lshlrev_b64 v[3:4], v3, v[4:5] ; GFX11-NEXT: v_or_b32_e32 v5, v11, v13 -; GFX11-NEXT: v_lshrrev_b64 v[10:11], v10, v[14:15] -; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v25 ; GFX11-NEXT: v_cndmask_b32_e32 v13, 0, v16, vcc_lo +; GFX11-NEXT: v_sub_nc_u32_e32 v20, 64, v22 +; GFX11-NEXT: v_subrev_nc_u32_e32 v10, 64, v22 +; GFX11-NEXT: v_lshrrev_b64 v[18:19], v22, v[8:9] +; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v22 +; GFX11-NEXT: v_cndmask_b32_e32 v12, v3, v12, vcc_lo +; GFX11-NEXT: v_lshlrev_b64 v[20:21], v20, v[14:15] +; GFX11-NEXT: v_lshrrev_b64 v[10:11], v10, v[14:15] +; GFX11-NEXT: v_cndmask_b32_e32 v5, v4, v5, vcc_lo +; GFX11-NEXT: v_lshrrev_b64 v[3:4], v22, v[14:15] +; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 0, v22 +; GFX11-NEXT: v_cmp_eq_u32_e64 s2, 0, v24 ; GFX11-NEXT: v_or_b32_e32 v16, v18, v20 ; GFX11-NEXT: v_or_b32_e32 v18, v19, v21 -; GFX11-NEXT: v_dual_cndmask_b32 v12, v3, v12 :: v_dual_cndmask_b32 v5, v4, v5 -; GFX11-NEXT: v_lshrrev_b64 v[3:4], v25, v[14:15] ; GFX11-NEXT: v_cndmask_b32_e32 v14, 0, v17, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-NEXT: v_cndmask_b32_e64 v6, v12, v6, s2 ; GFX11-NEXT: v_cndmask_b32_e64 v10, v10, v16, s0 -; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 0, v25 -; GFX11-NEXT: v_cmp_eq_u32_e64 s2, 0, v23 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3) ; GFX11-NEXT: v_cndmask_b32_e64 v11, v11, v18, s0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-NEXT: v_cndmask_b32_e64 v6, v12, v6, s2 ; GFX11-NEXT: v_cndmask_b32_e64 v7, v5, v7, s2 ; GFX11-NEXT: v_cndmask_b32_e64 v5, v10, v8, s1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) ; GFX11-NEXT: v_cndmask_b32_e64 v8, v11, v9, s1 ; GFX11-NEXT: v_cndmask_b32_e64 v9, 0, v3, s0 ; GFX11-NEXT: v_cndmask_b32_e64 v10, 0, v4, s0 -; GFX11-NEXT: v_or_b32_e32 v3, v22, v24 +; GFX11-NEXT: v_or_b32_e32 v3, v23, v25 ; GFX11-NEXT: v_or_b32_e32 v4, v13, v5 ; GFX11-NEXT: v_or_b32_e32 v5, v14, v8 ; GFX11-NEXT: v_or_b32_e32 v6, v6, v9 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll @@ -5645,8 +5645,7 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v4, v0 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], 5, v[2:3] -; GFX9-NEXT: v_lshlrev_b32_e32 v2, 27, v4 -; GFX9-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX9-NEXT: v_lshl_or_b32 v1, v4, 27, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_fshr_i64_5: @@ -5655,8 +5654,7 @@ ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v4, v0 ; GFX10-NEXT: v_lshrrev_b64 v[0:1], 5, v[2:3] -; GFX10-NEXT: v_lshlrev_b32_e32 v2, 27, v4 -; GFX10-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX10-NEXT: v_lshl_or_b32 v1, v4, 27, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_fshr_i64_5: @@ -5665,9 +5663,8 @@ ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v4, v0 ; GFX11-NEXT: v_lshrrev_b64 v[0:1], 5, v[2:3] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_lshlrev_b32_e32 v2, 27, v4 -; GFX11-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_lshl_or_b32 v1, v4, 27, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 5) ret i64 %result @@ -7548,12 +7545,12 @@ ; GFX9-LABEL: v_fshr_i128_65: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_lshlrev_b32_e32 v4, 31, v0 -; GFX9-NEXT: v_lshlrev_b32_e32 v5, 31, v2 +; GFX9-NEXT: v_mov_b32_e32 v8, v2 ; GFX9-NEXT: v_lshrrev_b64 v[2:3], 1, v[0:1] -; GFX9-NEXT: v_lshrrev_b64 v[0:1], 1, v[6:7] -; GFX9-NEXT: v_or_b32_e32 v3, v5, v3 -; GFX9-NEXT: v_or_b32_e32 v1, v4, v1 +; GFX9-NEXT: v_lshrrev_b64 v[4:5], 1, v[6:7] +; GFX9-NEXT: v_lshl_or_b32 v3, v8, 31, v3 +; GFX9-NEXT: v_lshl_or_b32 v1, v0, 31, v5 +; GFX9-NEXT: v_mov_b32_e32 v0, v4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_fshr_i128_65: @@ -7563,10 +7560,8 @@ ; GFX10-NEXT: v_mov_b32_e32 v8, v2 ; GFX10-NEXT: v_lshrrev_b64 v[4:5], 1, v[6:7] ; GFX10-NEXT: v_lshrrev_b64 v[2:3], 1, v[0:1] -; GFX10-NEXT: v_lshlrev_b32_e32 v9, 31, v0 -; GFX10-NEXT: v_lshlrev_b32_e32 v0, 31, v8 -; GFX10-NEXT: v_or_b32_e32 v1, v9, v5 -; GFX10-NEXT: v_or_b32_e32 v3, v0, v3 +; GFX10-NEXT: v_lshl_or_b32 v1, v0, 31, v5 +; GFX10-NEXT: v_lshl_or_b32 v3, v8, 31, v3 ; GFX10-NEXT: v_mov_b32_e32 v0, v4 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -7574,14 +7569,13 @@ ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: v_dual_mov_b32 v8, v2 :: v_dual_lshlrev_b32 v9, 31, v0 +; GFX11-NEXT: v_mov_b32_e32 v8, v2 ; GFX11-NEXT: v_lshrrev_b64 v[4:5], 1, v[6:7] ; GFX11-NEXT: v_lshrrev_b64 v[2:3], 1, v[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_lshlrev_b32_e32 v0, 31, v8 -; GFX11-NEXT: v_or_b32_e32 v1, v9, v5 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-NEXT: v_or_b32_e32 v3, v0, v3 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_lshl_or_b32 v1, v0, 31, v5 +; GFX11-NEXT: v_lshl_or_b32 v3, v8, 31, v3 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) ; GFX11-NEXT: v_mov_b32_e32 v0, v4 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 65) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll @@ -2,21 +2,34 @@ ; RUN: llc -global-isel -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 -verify-machineinstrs -o - %s | FileCheck %s ; Make sure the waterfall loop does not fail the verifier after regalloc fast +; +; FIXME: There are a lot of extra spills that aren't needed. This is due to the unmerge_merge combine +; running after RegBankSelect which inserts a lot of COPY instructions, but the original merge +; instruction (G_BUILD_VECTOR) stays because it has more than one use. +; Those spills are not present when optimizations are enabled. define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) { ; CHECK-LABEL: waterfall_loop: ; CHECK: ; %bb.0: ; %bb ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; CHECK-NEXT: s_xor_saveexec_b32 s4, -1 -; CHECK-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b32 exec_lo, s4 +; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill ; CHECK-NEXT: v_mov_b32_e32 v15, v1 +; CHECK-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill ; CHECK-NEXT: v_mov_b32_e32 v14, v2 +; CHECK-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; CHECK-NEXT: v_mov_b32_e32 v13, v3 +; CHECK-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; CHECK-NEXT: v_mov_b32_e32 v12, v4 +; CHECK-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; CHECK-NEXT: v_mov_b32_e32 v11, v5 +; CHECK-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill ; CHECK-NEXT: v_mov_b32_e32 v10, v6 +; CHECK-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; CHECK-NEXT: v_mov_b32_e32 v9, v7 +; CHECK-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; CHECK-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 killed $exec ; CHECK-NEXT: v_mov_b32_e32 v1, v15 ; CHECK-NEXT: v_mov_b32_e32 v2, v14 @@ -60,15 +73,15 @@ ; CHECK-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: v_mov_b32_e32 v7, v9 -; CHECK-NEXT: v_mov_b32_e32 v6, v10 -; CHECK-NEXT: v_mov_b32_e32 v5, v11 -; CHECK-NEXT: v_mov_b32_e32 v4, v12 -; CHECK-NEXT: v_mov_b32_e32 v3, v13 -; CHECK-NEXT: v_mov_b32_e32 v2, v14 -; CHECK-NEXT: v_mov_b32_e32 v1, v15 -; CHECK-NEXT: v_mov_b32_e32 v0, v16 ; CHECK-NEXT: v_readfirstlane_b32 s12, v7 ; CHECK-NEXT: v_readfirstlane_b32 s10, v6 ; CHECK-NEXT: v_readfirstlane_b32 s9, v5 @@ -133,20 +146,20 @@ ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: image_sample v0, v[0:1], s[8:15], s[16:19] dmask:0x1 dim:SQ_RSRC_IMG_2D ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill ; CHECK-NEXT: s_xor_b32 exec_lo, exec_lo, s4 ; CHECK-NEXT: s_cbranch_execnz .LBB0_1 ; CHECK-NEXT: ; %bb.3: ; CHECK-NEXT: v_readlane_b32 s4, v8, 4 ; CHECK-NEXT: s_mov_b32 exec_lo, s4 ; CHECK-NEXT: ; %bb.4: -; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload ; CHECK-NEXT: ; implicit-def: $sgpr4 ; CHECK-NEXT: v_mov_b32_e32 v1, s4 ; CHECK-NEXT: v_mov_b32_e32 v2, s4 ; CHECK-NEXT: v_mov_b32_e32 v3, s4 ; CHECK-NEXT: s_xor_saveexec_b32 s4, -1 -; CHECK-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b32 exec_lo, s4 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll @@ -89,38 +89,27 @@ ; GPRIDX-LABEL: dyn_insertelement_v8f32_const_s_v_v: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GPRIDX-NEXT: s_mov_b32 s11, 0x41000000 -; GPRIDX-NEXT: s_mov_b32 s10, 0x40e00000 -; GPRIDX-NEXT: s_mov_b32 s9, 0x40c00000 -; GPRIDX-NEXT: s_mov_b32 s8, 0x40a00000 -; GPRIDX-NEXT: s_mov_b32 s7, 4.0 -; GPRIDX-NEXT: s_mov_b32 s6, 0x40400000 -; GPRIDX-NEXT: s_mov_b32 s5, 2.0 -; GPRIDX-NEXT: s_mov_b32 s4, 1.0 -; GPRIDX-NEXT: v_mov_b32_e32 v15, s11 -; GPRIDX-NEXT: v_mov_b32_e32 v8, s4 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v9, s5 -; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v8, v0, vcc +; GPRIDX-NEXT: v_cndmask_b32_e32 v8, 1.0, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v10, s6 -; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v9, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v2, 0x40400000 +; GPRIDX-NEXT: v_cndmask_b32_e32 v9, 2.0, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v11, s7 -; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v10, v0, vcc +; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v12, s8 -; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v11, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v4, 0x40a00000 +; GPRIDX-NEXT: v_cndmask_b32_e32 v3, 4.0, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v13, s9 -; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v12, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v5, 0x40c00000 +; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v14, s10 -; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v13, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v6, 0x40e00000 +; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v1 -; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v14, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v7, 0x41000000 +; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v1 -; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v15, v0, vcc +; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v0, vcc ; GPRIDX-NEXT: v_mov_b32_e32 v0, v8 ; GPRIDX-NEXT: v_mov_b32_e32 v1, v9 ; GPRIDX-NEXT: s_setpc_b64 s[30:31] @@ -129,39 +118,23 @@ ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_mov_b32 s11, 0x41000000 -; GFX10-NEXT: s_mov_b32 s4, 1.0 -; GFX10-NEXT: s_mov_b32 s10, 0x40e00000 -; GFX10-NEXT: s_mov_b32 s9, 0x40c00000 -; GFX10-NEXT: s_mov_b32 s8, 0x40a00000 -; GFX10-NEXT: s_mov_b32 s7, 4.0 -; GFX10-NEXT: s_mov_b32 s6, 0x40400000 -; GFX10-NEXT: s_mov_b32 s5, 2.0 -; GFX10-NEXT: v_mov_b32_e32 v15, s11 -; GFX10-NEXT: v_mov_b32_e32 v8, s4 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX10-NEXT: v_mov_b32_e32 v9, s5 -; GFX10-NEXT: v_mov_b32_e32 v10, s6 -; GFX10-NEXT: v_mov_b32_e32 v11, s7 -; GFX10-NEXT: v_mov_b32_e32 v12, s8 -; GFX10-NEXT: v_cndmask_b32_e32 v8, v8, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v8, 1.0, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 -; GFX10-NEXT: v_mov_b32_e32 v13, s9 -; GFX10-NEXT: v_mov_b32_e32 v14, s10 -; GFX10-NEXT: v_cndmask_b32_e32 v9, v9, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v9, 2.0, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v2, v10, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x40400000, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v3, v11, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v3, 4.0, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v4, v12, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v4, 0x40a00000, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v5, v13, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v5, 0x40c00000, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v6, v14, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v6, 0x40e00000, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1 ; GFX10-NEXT: v_mov_b32_e32 v1, v9 -; GFX10-NEXT: v_cndmask_b32_e32 v7, v15, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v7, 0x41000000, v0, vcc_lo ; GFX10-NEXT: v_mov_b32_e32 v0, v8 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -169,35 +142,23 @@ ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_mov_b32 s7, 0x41000000 -; GFX11-NEXT: s_mov_b32 s1, 2.0 -; GFX11-NEXT: s_mov_b32 s0, 1.0 -; GFX11-NEXT: s_mov_b32 s6, 0x40e00000 -; GFX11-NEXT: s_mov_b32 s5, 0x40c00000 -; GFX11-NEXT: s_mov_b32 s4, 0x40a00000 -; GFX11-NEXT: s_mov_b32 s3, 4.0 -; GFX11-NEXT: s_mov_b32 s2, 0x40400000 -; GFX11-NEXT: v_dual_mov_b32 v15, s7 :: v_dual_mov_b32 v14, s6 -; GFX11-NEXT: v_dual_mov_b32 v9, s1 :: v_dual_mov_b32 v8, s0 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NEXT: v_dual_mov_b32 v11, s3 :: v_dual_mov_b32 v10, s2 -; GFX11-NEXT: v_dual_mov_b32 v13, s5 :: v_dual_mov_b32 v12, s4 -; GFX11-NEXT: v_cndmask_b32_e32 v8, v8, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v8, 1.0, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v9, v9, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v9, 2.0, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v2, v10, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x40400000, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v3, v11, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v3, 4.0, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v4, v12, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v4, 0x40a00000, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v5, v13, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v5, 0x40c00000, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v6, v14, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v6, 0x40e00000, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1 ; GFX11-NEXT: v_mov_b32_e32 v1, v9 -; GFX11-NEXT: v_dual_cndmask_b32 v7, v15, v0 :: v_dual_mov_b32 v0, v8 +; GFX11-NEXT: v_dual_cndmask_b32 v7, 0x41000000, v0 :: v_dual_mov_b32 v0, v8 ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: %insert = insertelement <8 x float> , float %val, i32 %idx @@ -207,110 +168,75 @@ define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_s_v(<8 x float> inreg %vec, float inreg %val, i32 %idx) { ; GPRIDX-LABEL: dyn_insertelement_v8f32_s_s_v: ; GPRIDX: ; %bb.0: ; %entry -; GPRIDX-NEXT: s_mov_b32 s1, s3 -; GPRIDX-NEXT: s_mov_b32 s3, s5 -; GPRIDX-NEXT: s_mov_b32 s5, s7 -; GPRIDX-NEXT: s_mov_b32 s7, s9 -; GPRIDX-NEXT: s_mov_b32 s0, s2 -; GPRIDX-NEXT: s_mov_b32 s2, s4 -; GPRIDX-NEXT: s_mov_b32 s4, s6 -; GPRIDX-NEXT: s_mov_b32 s6, s8 -; GPRIDX-NEXT: v_mov_b32_e32 v15, s7 -; GPRIDX-NEXT: v_mov_b32_e32 v8, s0 -; GPRIDX-NEXT: v_mov_b32_e32 v7, s10 +; GPRIDX-NEXT: v_mov_b32_e32 v1, s2 +; GPRIDX-NEXT: v_mov_b32_e32 v10, s10 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GPRIDX-NEXT: v_mov_b32_e32 v9, s1 -; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v8, v7, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v2, s3 +; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v1, v10, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GPRIDX-NEXT: v_mov_b32_e32 v10, s2 -; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v9, v7, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v3, s4 +; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v2, v10, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 -; GPRIDX-NEXT: v_mov_b32_e32 v11, s3 -; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v10, v7, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v4, s5 +; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v3, v10, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 -; GPRIDX-NEXT: v_mov_b32_e32 v12, s4 -; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v11, v7, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v5, s6 +; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v4, v10, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 -; GPRIDX-NEXT: v_mov_b32_e32 v13, s5 -; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v12, v7, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v6, s7 +; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v5, v10, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 -; GPRIDX-NEXT: v_mov_b32_e32 v14, s6 -; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v13, v7, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v7, s8 +; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v6, v10, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 -; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v14, v7, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v9, s9 +; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v7, v10, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 -; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v15, v7, vcc +; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v9, v10, vcc ; GPRIDX-NEXT: v_mov_b32_e32 v0, v8 ; GPRIDX-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: dyn_insertelement_v8f32_s_s_v: ; GFX10: ; %bb.0: ; %entry -; GFX10-NEXT: s_mov_b32 s1, s3 -; GFX10-NEXT: s_mov_b32 s3, s5 -; GFX10-NEXT: s_mov_b32 s5, s7 -; GFX10-NEXT: s_mov_b32 s7, s9 -; GFX10-NEXT: s_mov_b32 s0, s2 -; GFX10-NEXT: s_mov_b32 s2, s4 -; GFX10-NEXT: s_mov_b32 s4, s6 -; GFX10-NEXT: s_mov_b32 s6, s8 -; GFX10-NEXT: v_mov_b32_e32 v15, s7 -; GFX10-NEXT: v_mov_b32_e32 v8, s0 +; GFX10-NEXT: v_mov_b32_e32 v7, s10 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-NEXT: v_mov_b32_e32 v9, s1 -; GFX10-NEXT: v_mov_b32_e32 v10, s2 -; GFX10-NEXT: v_mov_b32_e32 v11, s3 -; GFX10-NEXT: v_mov_b32_e32 v12, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, s10, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v8, s2, v7, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX10-NEXT: v_mov_b32_e32 v13, s5 -; GFX10-NEXT: v_mov_b32_e32 v14, s6 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v9, s10, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, s3, v7, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v2, v10, s10, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, s4, v7, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v3, v11, s10, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v3, s5, v7, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v4, v12, s10, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v4, s6, v7, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v5, v13, s10, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v5, s7, v7, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v6, v14, s10, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v6, s8, v7, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 ; GFX10-NEXT: v_mov_b32_e32 v0, v8 -; GFX10-NEXT: v_cndmask_b32_e64 v7, v15, s10, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v7, s9, v7, vcc_lo ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: dyn_insertelement_v8f32_s_s_v: ; GFX11: ; %bb.0: ; %entry -; GFX11-NEXT: s_mov_b32 s1, s3 -; GFX11-NEXT: s_mov_b32 s3, s5 -; GFX11-NEXT: s_mov_b32 s5, s7 -; GFX11-NEXT: s_mov_b32 s7, s9 -; GFX11-NEXT: s_mov_b32 s0, s2 -; GFX11-NEXT: s_mov_b32 s2, s4 -; GFX11-NEXT: s_mov_b32 s4, s6 -; GFX11-NEXT: s_mov_b32 s6, s8 -; GFX11-NEXT: v_dual_mov_b32 v15, s7 :: v_dual_mov_b32 v14, s6 -; GFX11-NEXT: v_dual_mov_b32 v9, s1 :: v_dual_mov_b32 v8, s0 +; GFX11-NEXT: v_mov_b32_e32 v7, s10 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NEXT: v_dual_mov_b32 v11, s3 :: v_dual_mov_b32 v10, s2 -; GFX11-NEXT: v_dual_mov_b32 v13, s5 :: v_dual_mov_b32 v12, s4 -; GFX11-NEXT: v_cndmask_b32_e64 v8, v8, s10, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v8, s2, v7, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v9, s10, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, s3, v7, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v2, v10, s10, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v2, s4, v7, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v3, v11, s10, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v3, s5, v7, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v4, v12, s10, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v4, s6, v7, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v5, v13, s10, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v5, s7, v7, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v6, v14, s10, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v6, s8, v7, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 -; GFX11-NEXT: v_mov_b32_e32 v0, v8 -; GFX11-NEXT: v_cndmask_b32_e64 v7, v15, s10, vcc_lo +; GFX11-NEXT: v_dual_mov_b32 v0, v8 :: v_dual_cndmask_b32 v7, s9, v7 ; GFX11-NEXT: ; return to shader part epilog entry: %insert = insertelement <8 x float> %vec, float %val, i32 %idx @@ -320,38 +246,30 @@ define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_v_s(<8 x float> inreg %vec, float %val, i32 inreg %idx) { ; GPRIDX-LABEL: dyn_insertelement_v8f32_s_v_s: ; GPRIDX: ; %bb.0: ; %entry -; GPRIDX-NEXT: s_mov_b32 s1, s3 -; GPRIDX-NEXT: s_mov_b32 s3, s5 -; GPRIDX-NEXT: s_mov_b32 s5, s7 -; GPRIDX-NEXT: s_mov_b32 s7, s9 -; GPRIDX-NEXT: s_mov_b32 s0, s2 -; GPRIDX-NEXT: s_mov_b32 s2, s4 -; GPRIDX-NEXT: s_mov_b32 s4, s6 -; GPRIDX-NEXT: s_mov_b32 s6, s8 -; GPRIDX-NEXT: v_mov_b32_e32 v15, s7 -; GPRIDX-NEXT: v_mov_b32_e32 v8, s0 +; GPRIDX-NEXT: v_mov_b32_e32 v1, s2 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s10, 0 -; GPRIDX-NEXT: v_mov_b32_e32 v9, s1 -; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v8, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v2, s3 +; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v1, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s10, 1 -; GPRIDX-NEXT: v_mov_b32_e32 v10, s2 -; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v9, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v3, s4 +; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v2, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s10, 2 -; GPRIDX-NEXT: v_mov_b32_e32 v11, s3 -; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v10, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v4, s5 +; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v3, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s10, 3 -; GPRIDX-NEXT: v_mov_b32_e32 v12, s4 -; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v11, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v5, s6 +; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v4, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s10, 4 -; GPRIDX-NEXT: v_mov_b32_e32 v13, s5 -; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v12, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v6, s7 +; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v5, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s10, 5 -; GPRIDX-NEXT: v_mov_b32_e32 v14, s6 -; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v13, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v7, s8 +; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v6, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s10, 6 -; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v14, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v9, s9 +; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v7, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s10, 7 -; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v15, v0, vcc +; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v9, v0, vcc ; GPRIDX-NEXT: v_mov_b32_e32 v0, v8 ; GPRIDX-NEXT: ; return to shader part epilog ; @@ -436,111 +354,75 @@ define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_v_v(<8 x float> inreg %vec, float %val, i32 %idx) { ; GPRIDX-LABEL: dyn_insertelement_v8f32_s_v_v: ; GPRIDX: ; %bb.0: ; %entry -; GPRIDX-NEXT: s_mov_b32 s1, s3 -; GPRIDX-NEXT: s_mov_b32 s3, s5 -; GPRIDX-NEXT: s_mov_b32 s5, s7 -; GPRIDX-NEXT: s_mov_b32 s7, s9 -; GPRIDX-NEXT: s_mov_b32 s0, s2 -; GPRIDX-NEXT: s_mov_b32 s2, s4 -; GPRIDX-NEXT: s_mov_b32 s4, s6 -; GPRIDX-NEXT: s_mov_b32 s6, s8 -; GPRIDX-NEXT: v_mov_b32_e32 v15, s7 -; GPRIDX-NEXT: v_mov_b32_e32 v8, s0 +; GPRIDX-NEXT: v_mov_b32_e32 v2, s2 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v9, s1 -; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v8, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v3, s3 +; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v2, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v10, s2 -; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v9, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v4, s4 +; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v3, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v11, s3 -; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v10, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v5, s5 +; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v4, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v12, s4 -; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v11, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v6, s6 +; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v5, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v13, s5 -; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v12, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v7, s7 +; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v6, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v14, s6 -; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v13, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v10, s8 +; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v7, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v1 -; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v14, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v11, s9 +; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v10, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v1 -; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v15, v0, vcc +; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v11, v0, vcc ; GPRIDX-NEXT: v_mov_b32_e32 v0, v8 ; GPRIDX-NEXT: v_mov_b32_e32 v1, v9 ; GPRIDX-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: dyn_insertelement_v8f32_s_v_v: ; GFX10: ; %bb.0: ; %entry -; GFX10-NEXT: s_mov_b32 s1, s3 -; GFX10-NEXT: s_mov_b32 s3, s5 -; GFX10-NEXT: s_mov_b32 s5, s7 -; GFX10-NEXT: s_mov_b32 s7, s9 -; GFX10-NEXT: s_mov_b32 s0, s2 -; GFX10-NEXT: s_mov_b32 s2, s4 -; GFX10-NEXT: s_mov_b32 s4, s6 -; GFX10-NEXT: s_mov_b32 s6, s8 -; GFX10-NEXT: v_mov_b32_e32 v15, s7 -; GFX10-NEXT: v_mov_b32_e32 v8, s0 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX10-NEXT: v_mov_b32_e32 v9, s1 -; GFX10-NEXT: v_mov_b32_e32 v10, s2 -; GFX10-NEXT: v_mov_b32_e32 v11, s3 -; GFX10-NEXT: v_mov_b32_e32 v12, s4 -; GFX10-NEXT: v_cndmask_b32_e32 v8, v8, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v8, s2, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 -; GFX10-NEXT: v_mov_b32_e32 v13, s5 -; GFX10-NEXT: v_mov_b32_e32 v14, s6 -; GFX10-NEXT: v_cndmask_b32_e32 v9, v9, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v9, s3, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v2, v10, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, s4, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v3, v11, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v3, s5, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v4, v12, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v4, s6, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v5, v13, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v5, s7, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v6, v14, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v6, s8, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1 ; GFX10-NEXT: v_mov_b32_e32 v1, v9 -; GFX10-NEXT: v_cndmask_b32_e32 v7, v15, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v7, s9, v0, vcc_lo ; GFX10-NEXT: v_mov_b32_e32 v0, v8 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: dyn_insertelement_v8f32_s_v_v: ; GFX11: ; %bb.0: ; %entry -; GFX11-NEXT: s_mov_b32 s1, s3 -; GFX11-NEXT: s_mov_b32 s3, s5 -; GFX11-NEXT: s_mov_b32 s5, s7 -; GFX11-NEXT: s_mov_b32 s7, s9 -; GFX11-NEXT: s_mov_b32 s0, s2 -; GFX11-NEXT: s_mov_b32 s2, s4 -; GFX11-NEXT: s_mov_b32 s4, s6 -; GFX11-NEXT: s_mov_b32 s6, s8 -; GFX11-NEXT: v_dual_mov_b32 v15, s7 :: v_dual_mov_b32 v14, s6 -; GFX11-NEXT: v_dual_mov_b32 v9, s1 :: v_dual_mov_b32 v8, s0 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NEXT: v_dual_mov_b32 v11, s3 :: v_dual_mov_b32 v10, s2 -; GFX11-NEXT: v_dual_mov_b32 v13, s5 :: v_dual_mov_b32 v12, s4 -; GFX11-NEXT: v_cndmask_b32_e32 v8, v8, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v8, s2, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v9, v9, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v9, s3, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v2, v10, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v2, s4, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v3, v11, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v3, s5, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v4, v12, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v4, s6, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v5, v13, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v5, s7, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v6, v14, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v6, s8, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1 ; GFX11-NEXT: v_mov_b32_e32 v1, v9 -; GFX11-NEXT: v_dual_cndmask_b32 v7, v15, v0 :: v_dual_mov_b32 v0, v8 +; GFX11-NEXT: v_dual_cndmask_b32 v7, s9, v0 :: v_dual_mov_b32 v0, v8 ; GFX11-NEXT: ; return to shader part epilog entry: %insert = insertelement <8 x float> %vec, float %val, i32 %idx @@ -2750,122 +2632,81 @@ define amdgpu_ps <9 x float> @dyn_insertelement_v9f32_s_v_v(<9 x float> inreg %vec, float %val, i32 %idx) { ; GPRIDX-LABEL: dyn_insertelement_v9f32_s_v_v: ; GPRIDX: ; %bb.0: ; %entry -; GPRIDX-NEXT: s_mov_b32 s0, s2 -; GPRIDX-NEXT: s_mov_b32 s2, s4 -; GPRIDX-NEXT: s_mov_b32 s4, s6 -; GPRIDX-NEXT: s_mov_b32 s6, s8 -; GPRIDX-NEXT: s_mov_b32 s8, s10 -; GPRIDX-NEXT: s_mov_b32 s1, s3 -; GPRIDX-NEXT: s_mov_b32 s3, s5 -; GPRIDX-NEXT: s_mov_b32 s5, s7 -; GPRIDX-NEXT: s_mov_b32 s7, s9 -; GPRIDX-NEXT: v_mov_b32_e32 v18, s8 -; GPRIDX-NEXT: v_mov_b32_e32 v10, s0 +; GPRIDX-NEXT: v_mov_b32_e32 v2, s2 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v11, s1 -; GPRIDX-NEXT: v_cndmask_b32_e32 v10, v10, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v3, s3 +; GPRIDX-NEXT: v_cndmask_b32_e32 v10, v2, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v12, s2 -; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v11, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v4, s4 +; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v3, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v13, s3 -; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v12, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v5, s5 +; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v4, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v14, s4 -; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v13, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v6, s6 +; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v5, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v15, s5 -; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v14, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v7, s7 +; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v6, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v16, s6 -; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v15, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v8, s8 +; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v7, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v17, s7 -; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v16, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v11, s9 +; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v8, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v1 -; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v17, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v12, s10 +; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v11, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 8, v1 -; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v18, v0, vcc +; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v12, v0, vcc ; GPRIDX-NEXT: v_mov_b32_e32 v0, v10 ; GPRIDX-NEXT: v_mov_b32_e32 v1, v9 ; GPRIDX-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: dyn_insertelement_v9f32_s_v_v: ; GFX10: ; %bb.0: ; %entry -; GFX10-NEXT: s_mov_b32 s0, s2 -; GFX10-NEXT: s_mov_b32 s2, s4 -; GFX10-NEXT: s_mov_b32 s4, s6 -; GFX10-NEXT: s_mov_b32 s6, s8 -; GFX10-NEXT: s_mov_b32 s8, s10 -; GFX10-NEXT: s_mov_b32 s1, s3 -; GFX10-NEXT: s_mov_b32 s3, s5 -; GFX10-NEXT: s_mov_b32 s5, s7 -; GFX10-NEXT: s_mov_b32 s7, s9 -; GFX10-NEXT: v_mov_b32_e32 v18, s8 -; GFX10-NEXT: v_mov_b32_e32 v10, s0 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX10-NEXT: v_mov_b32_e32 v11, s1 -; GFX10-NEXT: v_mov_b32_e32 v12, s2 -; GFX10-NEXT: v_mov_b32_e32 v13, s3 -; GFX10-NEXT: v_mov_b32_e32 v14, s4 -; GFX10-NEXT: v_cndmask_b32_e32 v10, v10, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v10, s2, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 -; GFX10-NEXT: v_mov_b32_e32 v15, s5 -; GFX10-NEXT: v_mov_b32_e32 v16, s6 -; GFX10-NEXT: v_mov_b32_e32 v17, s7 -; GFX10-NEXT: v_cndmask_b32_e32 v9, v11, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v9, s3, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v2, v12, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, s4, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v3, v13, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v3, s5, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v4, v14, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v4, s6, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v5, v15, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v5, s7, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v6, v16, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v6, s8, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v7, v17, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v7, s9, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v1 ; GFX10-NEXT: v_mov_b32_e32 v1, v9 -; GFX10-NEXT: v_cndmask_b32_e32 v8, v18, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v8, s10, v0, vcc_lo ; GFX10-NEXT: v_mov_b32_e32 v0, v10 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: dyn_insertelement_v9f32_s_v_v: ; GFX11: ; %bb.0: ; %entry -; GFX11-NEXT: s_mov_b32 s0, s2 -; GFX11-NEXT: s_mov_b32 s2, s4 -; GFX11-NEXT: s_mov_b32 s4, s6 -; GFX11-NEXT: s_mov_b32 s6, s8 -; GFX11-NEXT: s_mov_b32 s8, s10 -; GFX11-NEXT: s_mov_b32 s1, s3 -; GFX11-NEXT: s_mov_b32 s3, s5 -; GFX11-NEXT: s_mov_b32 s5, s7 -; GFX11-NEXT: s_mov_b32 s7, s9 -; GFX11-NEXT: v_dual_mov_b32 v18, s8 :: v_dual_mov_b32 v17, s7 -; GFX11-NEXT: v_dual_mov_b32 v12, s2 :: v_dual_mov_b32 v11, s1 -; GFX11-NEXT: v_mov_b32_e32 v10, s0 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NEXT: v_dual_mov_b32 v14, s4 :: v_dual_mov_b32 v13, s3 -; GFX11-NEXT: v_dual_mov_b32 v16, s6 :: v_dual_mov_b32 v15, s5 -; GFX11-NEXT: v_cndmask_b32_e32 v10, v10, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v10, s2, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v9, v11, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v9, s3, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v2, v12, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v2, s4, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v3, v13, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v3, s5, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v4, v14, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v4, s6, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v5, v15, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v5, s7, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v6, v16, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v6, s8, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v7, v17, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v7, s9, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v1 -; GFX11-NEXT: v_dual_mov_b32 v1, v9 :: v_dual_cndmask_b32 v8, v18, v0 +; GFX11-NEXT: v_dual_mov_b32 v1, v9 :: v_dual_cndmask_b32 v8, s10, v0 ; GFX11-NEXT: v_mov_b32_e32 v0, v10 ; GFX11-NEXT: ; return to shader part epilog entry: @@ -3025,134 +2866,89 @@ define amdgpu_ps <10 x float> @dyn_insertelement_v10f32_s_v_v(<10 x float> inreg %vec, float %val, i32 %idx) { ; GPRIDX-LABEL: dyn_insertelement_v10f32_s_v_v: ; GPRIDX: ; %bb.0: ; %entry -; GPRIDX-NEXT: s_mov_b32 s1, s3 -; GPRIDX-NEXT: s_mov_b32 s3, s5 -; GPRIDX-NEXT: s_mov_b32 s5, s7 -; GPRIDX-NEXT: s_mov_b32 s7, s9 -; GPRIDX-NEXT: s_mov_b32 s9, s11 -; GPRIDX-NEXT: s_mov_b32 s0, s2 -; GPRIDX-NEXT: s_mov_b32 s2, s4 -; GPRIDX-NEXT: s_mov_b32 s4, s6 -; GPRIDX-NEXT: s_mov_b32 s6, s8 -; GPRIDX-NEXT: s_mov_b32 s8, s10 -; GPRIDX-NEXT: v_mov_b32_e32 v19, s9 -; GPRIDX-NEXT: v_mov_b32_e32 v10, s0 +; GPRIDX-NEXT: v_mov_b32_e32 v2, s2 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v11, s1 -; GPRIDX-NEXT: v_cndmask_b32_e32 v10, v10, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v3, s3 +; GPRIDX-NEXT: v_cndmask_b32_e32 v10, v2, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v12, s2 -; GPRIDX-NEXT: v_cndmask_b32_e32 v11, v11, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v4, s4 +; GPRIDX-NEXT: v_cndmask_b32_e32 v11, v3, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v13, s3 -; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v12, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v5, s5 +; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v4, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v14, s4 -; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v13, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v6, s6 +; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v5, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v15, s5 -; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v14, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v7, s7 +; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v6, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v16, s6 -; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v15, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v8, s8 +; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v7, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v17, s7 -; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v16, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v9, s9 +; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v8, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v18, s8 -; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v17, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v12, s10 +; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v9, v0, vcc +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 8, v1 +; GPRIDX-NEXT: v_mov_b32_e32 v13, s11 +; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v12, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 9, v1 -; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[0:1], 8, v1 -; GPRIDX-NEXT: v_cndmask_b32_e64 v8, v18, v0, s[0:1] -; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v19, v0, vcc +; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v13, v0, vcc ; GPRIDX-NEXT: v_mov_b32_e32 v0, v10 ; GPRIDX-NEXT: v_mov_b32_e32 v1, v11 ; GPRIDX-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: dyn_insertelement_v10f32_s_v_v: ; GFX10: ; %bb.0: ; %entry -; GFX10-NEXT: s_mov_b32 s1, s3 -; GFX10-NEXT: s_mov_b32 s3, s5 -; GFX10-NEXT: s_mov_b32 s5, s7 -; GFX10-NEXT: s_mov_b32 s7, s9 -; GFX10-NEXT: s_mov_b32 s9, s11 -; GFX10-NEXT: s_mov_b32 s0, s2 -; GFX10-NEXT: s_mov_b32 s2, s4 -; GFX10-NEXT: s_mov_b32 s4, s6 -; GFX10-NEXT: s_mov_b32 s6, s8 -; GFX10-NEXT: s_mov_b32 s8, s10 -; GFX10-NEXT: v_mov_b32_e32 v19, s9 -; GFX10-NEXT: v_mov_b32_e32 v10, s0 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX10-NEXT: v_mov_b32_e32 v11, s1 -; GFX10-NEXT: v_mov_b32_e32 v12, s2 -; GFX10-NEXT: v_mov_b32_e32 v13, s3 -; GFX10-NEXT: v_mov_b32_e32 v14, s4 -; GFX10-NEXT: v_cndmask_b32_e32 v10, v10, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v10, s2, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 -; GFX10-NEXT: v_mov_b32_e32 v15, s5 -; GFX10-NEXT: v_mov_b32_e32 v16, s6 -; GFX10-NEXT: v_mov_b32_e32 v17, s7 -; GFX10-NEXT: v_mov_b32_e32 v18, s8 -; GFX10-NEXT: v_cndmask_b32_e32 v11, v11, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v11, s3, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v2, v12, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, s4, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v3, v13, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v3, s5, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v4, v14, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v4, s6, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v5, v15, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v5, s7, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v6, v16, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v6, s8, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v7, v17, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v7, s9, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v8, v18, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v8, s10, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v1 ; GFX10-NEXT: v_mov_b32_e32 v1, v11 -; GFX10-NEXT: v_cndmask_b32_e32 v9, v19, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v9, s11, v0, vcc_lo ; GFX10-NEXT: v_mov_b32_e32 v0, v10 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: dyn_insertelement_v10f32_s_v_v: ; GFX11: ; %bb.0: ; %entry -; GFX11-NEXT: s_mov_b32 s1, s3 -; GFX11-NEXT: s_mov_b32 s3, s5 -; GFX11-NEXT: s_mov_b32 s5, s7 -; GFX11-NEXT: s_mov_b32 s7, s9 -; GFX11-NEXT: s_mov_b32 s9, s11 -; GFX11-NEXT: s_mov_b32 s0, s2 -; GFX11-NEXT: s_mov_b32 s2, s4 -; GFX11-NEXT: s_mov_b32 s4, s6 -; GFX11-NEXT: s_mov_b32 s6, s8 -; GFX11-NEXT: s_mov_b32 s8, s10 -; GFX11-NEXT: v_dual_mov_b32 v19, s9 :: v_dual_mov_b32 v18, s8 -; GFX11-NEXT: v_dual_mov_b32 v11, s1 :: v_dual_mov_b32 v10, s0 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NEXT: v_dual_mov_b32 v13, s3 :: v_dual_mov_b32 v12, s2 -; GFX11-NEXT: v_dual_mov_b32 v15, s5 :: v_dual_mov_b32 v14, s4 -; GFX11-NEXT: v_cndmask_b32_e32 v10, v10, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v10, s2, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 -; GFX11-NEXT: v_dual_mov_b32 v17, s7 :: v_dual_mov_b32 v16, s6 -; GFX11-NEXT: v_cndmask_b32_e32 v11, v11, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v11, s3, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v2, v12, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v2, s4, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v3, v13, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v3, s5, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v4, v14, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v4, s6, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v5, v15, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v5, s7, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v6, v16, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v6, s8, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v7, v17, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v7, s9, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v8, v18, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v8, s10, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v1 ; GFX11-NEXT: v_mov_b32_e32 v1, v11 -; GFX11-NEXT: v_dual_cndmask_b32 v9, v19, v0 :: v_dual_mov_b32 v0, v10 +; GFX11-NEXT: v_dual_cndmask_b32 v9, s11, v0 :: v_dual_mov_b32 v0, v10 ; GFX11-NEXT: ; return to shader part epilog entry: %insert = insertelement <10 x float> %vec, float %val, i32 %idx @@ -3320,145 +3116,95 @@ define amdgpu_ps <11 x float> @dyn_insertelement_v11f32_s_v_v(<11 x float> inreg %vec, float %val, i32 %idx) { ; GPRIDX-LABEL: dyn_insertelement_v11f32_s_v_v: ; GPRIDX: ; %bb.0: ; %entry -; GPRIDX-NEXT: s_mov_b32 s0, s2 -; GPRIDX-NEXT: s_mov_b32 s2, s4 -; GPRIDX-NEXT: s_mov_b32 s4, s6 -; GPRIDX-NEXT: s_mov_b32 s6, s8 -; GPRIDX-NEXT: s_mov_b32 s8, s10 -; GPRIDX-NEXT: s_mov_b32 s10, s12 -; GPRIDX-NEXT: s_mov_b32 s1, s3 -; GPRIDX-NEXT: s_mov_b32 s3, s5 -; GPRIDX-NEXT: s_mov_b32 s5, s7 -; GPRIDX-NEXT: s_mov_b32 s7, s9 -; GPRIDX-NEXT: s_mov_b32 s9, s11 -; GPRIDX-NEXT: v_mov_b32_e32 v22, s10 -; GPRIDX-NEXT: v_mov_b32_e32 v12, s0 +; GPRIDX-NEXT: v_mov_b32_e32 v2, s2 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v13, s1 -; GPRIDX-NEXT: v_cndmask_b32_e32 v12, v12, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v3, s3 +; GPRIDX-NEXT: v_cndmask_b32_e32 v12, v2, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v14, s2 -; GPRIDX-NEXT: v_cndmask_b32_e32 v11, v13, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v4, s4 +; GPRIDX-NEXT: v_cndmask_b32_e32 v11, v3, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v15, s3 -; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v14, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v5, s5 +; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v4, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v16, s4 -; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v15, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v6, s6 +; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v5, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v17, s5 -; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v16, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v7, s7 +; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v6, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v18, s6 -; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v17, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v8, s8 +; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v7, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v21, s9 -; GPRIDX-NEXT: v_mov_b32_e32 v20, s8 -; GPRIDX-NEXT: v_mov_b32_e32 v19, s7 -; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v18, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v9, s9 +; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v8, v0, vcc +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v1 +; GPRIDX-NEXT: v_mov_b32_e32 v10, s10 +; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v9, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 8, v1 -; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[0:1], 9, v1 -; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[2:3], 10, v1 -; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[4:5], 7, v1 -; GPRIDX-NEXT: v_cndmask_b32_e64 v7, v19, v0, s[4:5] -; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v20, v0, vcc -; GPRIDX-NEXT: v_cndmask_b32_e64 v9, v21, v0, s[0:1] -; GPRIDX-NEXT: v_cndmask_b32_e64 v10, v22, v0, s[2:3] +; GPRIDX-NEXT: v_mov_b32_e32 v13, s11 +; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v10, v0, vcc +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 9, v1 +; GPRIDX-NEXT: v_mov_b32_e32 v14, s12 +; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v13, v0, vcc +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 10, v1 +; GPRIDX-NEXT: v_cndmask_b32_e32 v10, v14, v0, vcc ; GPRIDX-NEXT: v_mov_b32_e32 v0, v12 ; GPRIDX-NEXT: v_mov_b32_e32 v1, v11 ; GPRIDX-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: dyn_insertelement_v11f32_s_v_v: ; GFX10: ; %bb.0: ; %entry -; GFX10-NEXT: s_mov_b32 s0, s2 -; GFX10-NEXT: s_mov_b32 s2, s4 -; GFX10-NEXT: s_mov_b32 s4, s6 -; GFX10-NEXT: s_mov_b32 s6, s8 -; GFX10-NEXT: s_mov_b32 s8, s10 -; GFX10-NEXT: s_mov_b32 s10, s12 -; GFX10-NEXT: s_mov_b32 s1, s3 -; GFX10-NEXT: s_mov_b32 s3, s5 -; GFX10-NEXT: s_mov_b32 s5, s7 -; GFX10-NEXT: s_mov_b32 s7, s9 -; GFX10-NEXT: s_mov_b32 s9, s11 -; GFX10-NEXT: v_mov_b32_e32 v22, s10 -; GFX10-NEXT: v_mov_b32_e32 v12, s0 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX10-NEXT: v_mov_b32_e32 v13, s1 -; GFX10-NEXT: v_mov_b32_e32 v14, s2 -; GFX10-NEXT: v_mov_b32_e32 v15, s3 -; GFX10-NEXT: v_mov_b32_e32 v16, s4 -; GFX10-NEXT: v_cndmask_b32_e32 v12, v12, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v12, s2, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 -; GFX10-NEXT: v_mov_b32_e32 v17, s5 -; GFX10-NEXT: v_mov_b32_e32 v18, s6 -; GFX10-NEXT: v_mov_b32_e32 v19, s7 -; GFX10-NEXT: v_mov_b32_e32 v20, s8 -; GFX10-NEXT: v_cndmask_b32_e32 v11, v13, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v11, s3, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1 -; GFX10-NEXT: v_mov_b32_e32 v21, s9 -; GFX10-NEXT: v_cndmask_b32_e32 v2, v14, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, s4, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v3, v15, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v3, s5, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v4, v16, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v4, s6, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v5, v17, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v5, s7, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v6, v18, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v6, s8, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v7, v19, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v7, s9, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v8, v20, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v8, s10, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v9, v21, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v9, s11, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v1 ; GFX10-NEXT: v_mov_b32_e32 v1, v11 -; GFX10-NEXT: v_cndmask_b32_e32 v10, v22, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v10, s12, v0, vcc_lo ; GFX10-NEXT: v_mov_b32_e32 v0, v12 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: dyn_insertelement_v11f32_s_v_v: ; GFX11: ; %bb.0: ; %entry -; GFX11-NEXT: s_mov_b32 s0, s2 -; GFX11-NEXT: s_mov_b32 s2, s4 -; GFX11-NEXT: s_mov_b32 s4, s6 -; GFX11-NEXT: s_mov_b32 s6, s8 -; GFX11-NEXT: s_mov_b32 s8, s10 -; GFX11-NEXT: s_mov_b32 s10, s12 -; GFX11-NEXT: s_mov_b32 s1, s3 -; GFX11-NEXT: s_mov_b32 s3, s5 -; GFX11-NEXT: s_mov_b32 s5, s7 -; GFX11-NEXT: s_mov_b32 s7, s9 -; GFX11-NEXT: s_mov_b32 s9, s11 -; GFX11-NEXT: v_dual_mov_b32 v22, s10 :: v_dual_mov_b32 v21, s9 -; GFX11-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v13, s1 -; GFX11-NEXT: v_mov_b32_e32 v12, s0 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NEXT: v_dual_mov_b32 v16, s4 :: v_dual_mov_b32 v15, s3 -; GFX11-NEXT: v_dual_mov_b32 v18, s6 :: v_dual_mov_b32 v17, s5 -; GFX11-NEXT: v_cndmask_b32_e32 v12, v12, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v12, s2, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 -; GFX11-NEXT: v_dual_mov_b32 v20, s8 :: v_dual_mov_b32 v19, s7 -; GFX11-NEXT: v_cndmask_b32_e32 v11, v13, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v11, s3, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v2, v14, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v2, s4, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v3, v15, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v3, s5, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v4, v16, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v4, s6, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v5, v17, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v5, s7, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v6, v18, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v6, s8, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v7, v19, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v7, s9, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v8, v20, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v8, s10, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v9, v21, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v9, s11, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v1 -; GFX11-NEXT: v_dual_mov_b32 v1, v11 :: v_dual_cndmask_b32 v10, v22, v0 +; GFX11-NEXT: v_dual_mov_b32 v1, v11 :: v_dual_cndmask_b32 v10, s12, v0 ; GFX11-NEXT: v_mov_b32_e32 v0, v12 ; GFX11-NEXT: ; return to shader part epilog entry: @@ -3637,157 +3383,103 @@ define amdgpu_ps <12 x float> @dyn_insertelement_v12f32_s_v_v(<12 x float> inreg %vec, float %val, i32 %idx) { ; GPRIDX-LABEL: dyn_insertelement_v12f32_s_v_v: ; GPRIDX: ; %bb.0: ; %entry -; GPRIDX-NEXT: s_mov_b32 s1, s3 -; GPRIDX-NEXT: s_mov_b32 s3, s5 -; GPRIDX-NEXT: s_mov_b32 s5, s7 -; GPRIDX-NEXT: s_mov_b32 s7, s9 -; GPRIDX-NEXT: s_mov_b32 s9, s11 -; GPRIDX-NEXT: s_mov_b32 s11, s13 -; GPRIDX-NEXT: s_mov_b32 s0, s2 -; GPRIDX-NEXT: s_mov_b32 s2, s4 -; GPRIDX-NEXT: s_mov_b32 s4, s6 -; GPRIDX-NEXT: s_mov_b32 s6, s8 -; GPRIDX-NEXT: s_mov_b32 s8, s10 -; GPRIDX-NEXT: s_mov_b32 s10, s12 -; GPRIDX-NEXT: v_mov_b32_e32 v23, s11 -; GPRIDX-NEXT: v_mov_b32_e32 v12, s0 +; GPRIDX-NEXT: v_mov_b32_e32 v2, s2 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v13, s1 -; GPRIDX-NEXT: v_cndmask_b32_e32 v12, v12, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v3, s3 +; GPRIDX-NEXT: v_cndmask_b32_e32 v12, v2, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v14, s2 -; GPRIDX-NEXT: v_cndmask_b32_e32 v13, v13, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v4, s4 +; GPRIDX-NEXT: v_cndmask_b32_e32 v13, v3, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v15, s3 -; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v14, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v5, s5 +; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v4, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v16, s4 -; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v15, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v6, s6 +; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v5, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v17, s5 -; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v16, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v7, s7 +; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v6, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v22, s10 -; GPRIDX-NEXT: v_mov_b32_e32 v21, s9 -; GPRIDX-NEXT: v_mov_b32_e32 v20, s8 -; GPRIDX-NEXT: v_mov_b32_e32 v19, s7 -; GPRIDX-NEXT: v_mov_b32_e32 v18, s6 -; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v17, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v8, s8 +; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v7, v0, vcc +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v1 +; GPRIDX-NEXT: v_mov_b32_e32 v9, s9 +; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v8, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v1 -; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[0:1], 8, v1 -; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[2:3], 9, v1 -; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[4:5], 10, v1 -; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[6:7], 11, v1 -; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[8:9], 6, v1 -; GPRIDX-NEXT: v_cndmask_b32_e64 v6, v18, v0, s[8:9] -; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v19, v0, vcc -; GPRIDX-NEXT: v_cndmask_b32_e64 v8, v20, v0, s[0:1] -; GPRIDX-NEXT: v_cndmask_b32_e64 v9, v21, v0, s[2:3] -; GPRIDX-NEXT: v_cndmask_b32_e64 v10, v22, v0, s[4:5] -; GPRIDX-NEXT: v_cndmask_b32_e64 v11, v23, v0, s[6:7] +; GPRIDX-NEXT: v_mov_b32_e32 v10, s10 +; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v9, v0, vcc +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 8, v1 +; GPRIDX-NEXT: v_mov_b32_e32 v11, s11 +; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v10, v0, vcc +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 9, v1 +; GPRIDX-NEXT: v_mov_b32_e32 v14, s12 +; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v11, v0, vcc +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 10, v1 +; GPRIDX-NEXT: v_mov_b32_e32 v15, s13 +; GPRIDX-NEXT: v_cndmask_b32_e32 v10, v14, v0, vcc +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 11, v1 +; GPRIDX-NEXT: v_cndmask_b32_e32 v11, v15, v0, vcc ; GPRIDX-NEXT: v_mov_b32_e32 v0, v12 ; GPRIDX-NEXT: v_mov_b32_e32 v1, v13 ; GPRIDX-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: dyn_insertelement_v12f32_s_v_v: ; GFX10: ; %bb.0: ; %entry -; GFX10-NEXT: s_mov_b32 s1, s3 -; GFX10-NEXT: s_mov_b32 s3, s5 -; GFX10-NEXT: s_mov_b32 s5, s7 -; GFX10-NEXT: s_mov_b32 s7, s9 -; GFX10-NEXT: s_mov_b32 s9, s11 -; GFX10-NEXT: s_mov_b32 s11, s13 -; GFX10-NEXT: s_mov_b32 s0, s2 -; GFX10-NEXT: s_mov_b32 s2, s4 -; GFX10-NEXT: s_mov_b32 s4, s6 -; GFX10-NEXT: s_mov_b32 s6, s8 -; GFX10-NEXT: s_mov_b32 s8, s10 -; GFX10-NEXT: s_mov_b32 s10, s12 -; GFX10-NEXT: v_mov_b32_e32 v23, s11 -; GFX10-NEXT: v_mov_b32_e32 v12, s0 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX10-NEXT: v_mov_b32_e32 v13, s1 -; GFX10-NEXT: v_mov_b32_e32 v14, s2 -; GFX10-NEXT: v_mov_b32_e32 v15, s3 -; GFX10-NEXT: v_mov_b32_e32 v16, s4 -; GFX10-NEXT: v_cndmask_b32_e32 v12, v12, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v12, s2, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 -; GFX10-NEXT: v_mov_b32_e32 v17, s5 -; GFX10-NEXT: v_mov_b32_e32 v18, s6 -; GFX10-NEXT: v_mov_b32_e32 v19, s7 -; GFX10-NEXT: v_mov_b32_e32 v20, s8 -; GFX10-NEXT: v_cndmask_b32_e32 v13, v13, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v13, s3, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1 -; GFX10-NEXT: v_mov_b32_e32 v21, s9 -; GFX10-NEXT: v_mov_b32_e32 v22, s10 -; GFX10-NEXT: v_cndmask_b32_e32 v2, v14, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, s4, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v3, v15, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v3, s5, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v4, v16, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v4, s6, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v5, v17, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v5, s7, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v6, v18, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v6, s8, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v7, v19, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v7, s9, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v8, v20, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v8, s10, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v9, v21, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v9, s11, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v10, v22, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v10, s12, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v1 ; GFX10-NEXT: v_mov_b32_e32 v1, v13 -; GFX10-NEXT: v_cndmask_b32_e32 v11, v23, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v11, s13, v0, vcc_lo ; GFX10-NEXT: v_mov_b32_e32 v0, v12 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: dyn_insertelement_v12f32_s_v_v: ; GFX11: ; %bb.0: ; %entry -; GFX11-NEXT: s_mov_b32 s1, s3 -; GFX11-NEXT: s_mov_b32 s3, s5 -; GFX11-NEXT: s_mov_b32 s5, s7 -; GFX11-NEXT: s_mov_b32 s7, s9 -; GFX11-NEXT: s_mov_b32 s9, s11 -; GFX11-NEXT: s_mov_b32 s11, s13 -; GFX11-NEXT: s_mov_b32 s0, s2 -; GFX11-NEXT: s_mov_b32 s2, s4 -; GFX11-NEXT: s_mov_b32 s4, s6 -; GFX11-NEXT: s_mov_b32 s6, s8 -; GFX11-NEXT: s_mov_b32 s8, s10 -; GFX11-NEXT: s_mov_b32 s10, s12 -; GFX11-NEXT: v_dual_mov_b32 v23, s11 :: v_dual_mov_b32 v22, s10 -; GFX11-NEXT: v_dual_mov_b32 v13, s1 :: v_dual_mov_b32 v12, s0 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NEXT: v_dual_mov_b32 v15, s3 :: v_dual_mov_b32 v14, s2 -; GFX11-NEXT: v_dual_mov_b32 v17, s5 :: v_dual_mov_b32 v16, s4 -; GFX11-NEXT: v_cndmask_b32_e32 v12, v12, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v12, s2, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 -; GFX11-NEXT: v_dual_mov_b32 v19, s7 :: v_dual_mov_b32 v18, s6 -; GFX11-NEXT: v_dual_mov_b32 v21, s9 :: v_dual_mov_b32 v20, s8 -; GFX11-NEXT: v_cndmask_b32_e32 v13, v13, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v13, s3, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v2, v14, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v2, s4, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v3, v15, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v3, s5, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v4, v16, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v4, s6, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v5, v17, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v5, s7, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v6, v18, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v6, s8, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v7, v19, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v7, s9, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v8, v20, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v8, s10, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v9, v21, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v9, s11, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v10, v22, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v10, s12, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v1 ; GFX11-NEXT: v_mov_b32_e32 v1, v13 -; GFX11-NEXT: v_dual_cndmask_b32 v11, v23, v0 :: v_dual_mov_b32 v0, v12 +; GFX11-NEXT: v_dual_cndmask_b32 v11, s13, v0 :: v_dual_mov_b32 v0, v12 ; GFX11-NEXT: ; return to shader part epilog entry: %insert = insertelement <12 x float> %vec, float %val, i32 %idx @@ -5562,99 +5254,48 @@ define amdgpu_ps <7 x float> @dyn_insertelement_v7f32_s_v_s(<7 x float> inreg %vec, float %val, i32 inreg %idx) { ; GPRIDX-LABEL: dyn_insertelement_v7f32_s_v_s: ; GPRIDX: ; %bb.0: ; %entry -; GPRIDX-NEXT: s_mov_b32 s0, s2 -; GPRIDX-NEXT: s_mov_b32 s2, s4 -; GPRIDX-NEXT: s_mov_b32 s4, s6 -; GPRIDX-NEXT: s_mov_b32 s6, s8 -; GPRIDX-NEXT: s_mov_b32 s1, s3 -; GPRIDX-NEXT: s_mov_b32 s3, s5 -; GPRIDX-NEXT: s_mov_b32 s5, s7 -; GPRIDX-NEXT: v_mov_b32_e32 v13, s6 -; GPRIDX-NEXT: v_mov_b32_e32 v7, s0 +; GPRIDX-NEXT: v_mov_b32_e32 v1, s2 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s9, 0 -; GPRIDX-NEXT: v_mov_b32_e32 v8, s1 -; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v2, s3 +; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v1, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s9, 1 -; GPRIDX-NEXT: v_mov_b32_e32 v9, s2 -; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v8, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v3, s4 +; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v2, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s9, 2 -; GPRIDX-NEXT: v_mov_b32_e32 v10, s3 -; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v9, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v4, s5 +; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v3, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s9, 3 -; GPRIDX-NEXT: v_mov_b32_e32 v11, s4 -; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v10, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v5, s6 +; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v4, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s9, 4 -; GPRIDX-NEXT: v_mov_b32_e32 v12, s5 -; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v11, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v6, s7 +; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v5, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s9, 5 -; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v12, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v8, s8 +; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v6, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s9, 6 -; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v13, v0, vcc +; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v8, v0, vcc ; GPRIDX-NEXT: v_mov_b32_e32 v0, v7 ; GPRIDX-NEXT: ; return to shader part epilog ; -; GFX10-LABEL: dyn_insertelement_v7f32_s_v_s: -; GFX10: ; %bb.0: ; %entry -; GFX10-NEXT: s_mov_b32 s0, s2 -; GFX10-NEXT: s_mov_b32 s2, s4 -; GFX10-NEXT: s_mov_b32 s4, s6 -; GFX10-NEXT: s_mov_b32 s6, s8 -; GFX10-NEXT: s_mov_b32 s1, s3 -; GFX10-NEXT: s_mov_b32 s3, s5 -; GFX10-NEXT: s_mov_b32 s5, s7 -; GFX10-NEXT: v_mov_b32_e32 v13, s6 -; GFX10-NEXT: v_mov_b32_e32 v7, s0 -; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s9, 0 -; GFX10-NEXT: v_mov_b32_e32 v8, s1 -; GFX10-NEXT: v_mov_b32_e32 v9, s2 -; GFX10-NEXT: v_mov_b32_e32 v10, s3 -; GFX10-NEXT: v_mov_b32_e32 v11, s4 -; GFX10-NEXT: v_cndmask_b32_e32 v7, v7, v0, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s9, 1 -; GFX10-NEXT: v_mov_b32_e32 v12, s5 -; GFX10-NEXT: v_cndmask_b32_e32 v1, v8, v0, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s9, 2 -; GFX10-NEXT: v_cndmask_b32_e32 v2, v9, v0, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s9, 3 -; GFX10-NEXT: v_cndmask_b32_e32 v3, v10, v0, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s9, 4 -; GFX10-NEXT: v_cndmask_b32_e32 v4, v11, v0, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s9, 5 -; GFX10-NEXT: v_cndmask_b32_e32 v5, v12, v0, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s9, 6 -; GFX10-NEXT: v_cndmask_b32_e32 v6, v13, v0, vcc_lo -; GFX10-NEXT: v_mov_b32_e32 v0, v7 -; GFX10-NEXT: ; return to shader part epilog -; -; GFX11-LABEL: dyn_insertelement_v7f32_s_v_s: -; GFX11: ; %bb.0: ; %entry -; GFX11-NEXT: s_mov_b32 s0, s2 -; GFX11-NEXT: s_mov_b32 s2, s4 -; GFX11-NEXT: s_mov_b32 s4, s6 -; GFX11-NEXT: s_mov_b32 s6, s8 -; GFX11-NEXT: s_mov_b32 s1, s3 -; GFX11-NEXT: s_mov_b32 s3, s5 -; GFX11-NEXT: s_mov_b32 s5, s7 -; GFX11-NEXT: v_dual_mov_b32 v13, s6 :: v_dual_mov_b32 v12, s5 -; GFX11-NEXT: v_dual_mov_b32 v9, s2 :: v_dual_mov_b32 v8, s1 -; GFX11-NEXT: v_mov_b32_e32 v7, s0 -; GFX11-NEXT: v_cmp_eq_u32_e64 vcc_lo, s9, 0 -; GFX11-NEXT: v_dual_mov_b32 v11, s4 :: v_dual_mov_b32 v10, s3 -; GFX11-NEXT: v_cndmask_b32_e32 v7, v7, v0, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e64 vcc_lo, s9, 1 -; GFX11-NEXT: v_cndmask_b32_e32 v1, v8, v0, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e64 vcc_lo, s9, 2 -; GFX11-NEXT: v_cndmask_b32_e32 v2, v9, v0, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e64 vcc_lo, s9, 3 -; GFX11-NEXT: v_cndmask_b32_e32 v3, v10, v0, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e64 vcc_lo, s9, 4 -; GFX11-NEXT: v_cndmask_b32_e32 v4, v11, v0, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e64 vcc_lo, s9, 5 -; GFX11-NEXT: v_cndmask_b32_e32 v5, v12, v0, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e64 vcc_lo, s9, 6 -; GFX11-NEXT: v_cndmask_b32_e32 v6, v13, v0, vcc_lo -; GFX11-NEXT: v_mov_b32_e32 v0, v7 -; GFX11-NEXT: ; return to shader part epilog +; GFX10PLUS-LABEL: dyn_insertelement_v7f32_s_v_s: +; GFX10PLUS: ; %bb.0: ; %entry +; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s9, 0 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v7, s2, v0, vcc_lo +; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s9, 1 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s3, v0, vcc_lo +; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s9, 2 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v2, s4, v0, vcc_lo +; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s9, 3 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v3, s5, v0, vcc_lo +; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s9, 4 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v4, s6, v0, vcc_lo +; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s9, 5 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v5, s7, v0, vcc_lo +; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s9, 6 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v6, s8, v0, vcc_lo +; GFX10PLUS-NEXT: v_mov_b32_e32 v0, v7 +; GFX10PLUS-NEXT: ; return to shader part epilog entry: %insert = insertelement <7 x float> %vec, float %val, i32 %idx ret <7 x float> %insert @@ -5663,99 +5304,67 @@ define amdgpu_ps <7 x float> @dyn_insertelement_v7f32_s_v_v(<7 x float> inreg %vec, float %val, i32 %idx) { ; GPRIDX-LABEL: dyn_insertelement_v7f32_s_v_v: ; GPRIDX: ; %bb.0: ; %entry -; GPRIDX-NEXT: s_mov_b32 s0, s2 -; GPRIDX-NEXT: s_mov_b32 s2, s4 -; GPRIDX-NEXT: s_mov_b32 s4, s6 -; GPRIDX-NEXT: s_mov_b32 s6, s8 -; GPRIDX-NEXT: s_mov_b32 s1, s3 -; GPRIDX-NEXT: s_mov_b32 s3, s5 -; GPRIDX-NEXT: s_mov_b32 s5, s7 -; GPRIDX-NEXT: v_mov_b32_e32 v14, s6 -; GPRIDX-NEXT: v_mov_b32_e32 v8, s0 +; GPRIDX-NEXT: v_mov_b32_e32 v2, s2 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v9, s1 -; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v8, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v3, s3 +; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v2, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v10, s2 -; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v9, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v4, s4 +; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v3, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v11, s3 -; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v10, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v5, s5 +; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v4, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v12, s4 -; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v11, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v6, s6 +; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v5, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v1 -; GPRIDX-NEXT: v_mov_b32_e32 v13, s5 -; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v12, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v9, s7 +; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v6, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v1 -; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v13, v0, vcc +; GPRIDX-NEXT: v_mov_b32_e32 v10, s8 +; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v9, v0, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v1 -; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v14, v0, vcc +; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v10, v0, vcc ; GPRIDX-NEXT: v_mov_b32_e32 v0, v8 ; GPRIDX-NEXT: v_mov_b32_e32 v1, v7 ; GPRIDX-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: dyn_insertelement_v7f32_s_v_v: ; GFX10: ; %bb.0: ; %entry -; GFX10-NEXT: s_mov_b32 s0, s2 -; GFX10-NEXT: s_mov_b32 s2, s4 -; GFX10-NEXT: s_mov_b32 s4, s6 -; GFX10-NEXT: s_mov_b32 s6, s8 -; GFX10-NEXT: s_mov_b32 s1, s3 -; GFX10-NEXT: s_mov_b32 s3, s5 -; GFX10-NEXT: s_mov_b32 s5, s7 -; GFX10-NEXT: v_mov_b32_e32 v14, s6 -; GFX10-NEXT: v_mov_b32_e32 v8, s0 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX10-NEXT: v_mov_b32_e32 v9, s1 -; GFX10-NEXT: v_mov_b32_e32 v10, s2 -; GFX10-NEXT: v_mov_b32_e32 v11, s3 -; GFX10-NEXT: v_mov_b32_e32 v12, s4 -; GFX10-NEXT: v_cndmask_b32_e32 v8, v8, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v8, s2, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 -; GFX10-NEXT: v_mov_b32_e32 v13, s5 -; GFX10-NEXT: v_cndmask_b32_e32 v7, v9, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v7, s3, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v2, v10, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, s4, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v3, v11, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v3, s5, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v4, v12, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v4, s6, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v5, v13, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v5, s7, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1 ; GFX10-NEXT: v_mov_b32_e32 v1, v7 -; GFX10-NEXT: v_cndmask_b32_e32 v6, v14, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v6, s8, v0, vcc_lo ; GFX10-NEXT: v_mov_b32_e32 v0, v8 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: dyn_insertelement_v7f32_s_v_v: ; GFX11: ; %bb.0: ; %entry -; GFX11-NEXT: s_mov_b32 s0, s2 -; GFX11-NEXT: s_mov_b32 s2, s4 -; GFX11-NEXT: s_mov_b32 s4, s6 -; GFX11-NEXT: s_mov_b32 s6, s8 -; GFX11-NEXT: s_mov_b32 s1, s3 -; GFX11-NEXT: s_mov_b32 s3, s5 -; GFX11-NEXT: s_mov_b32 s5, s7 -; GFX11-NEXT: v_dual_mov_b32 v14, s6 :: v_dual_mov_b32 v13, s5 -; GFX11-NEXT: v_dual_mov_b32 v10, s2 :: v_dual_mov_b32 v9, s1 -; GFX11-NEXT: v_mov_b32_e32 v8, s0 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NEXT: v_dual_mov_b32 v12, s4 :: v_dual_mov_b32 v11, s3 -; GFX11-NEXT: v_cndmask_b32_e32 v8, v8, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v8, s2, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v7, v9, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v7, s3, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v2, v10, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v2, s4, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v3, v11, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v3, s5, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v4, v12, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v4, s6, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v5, v13, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v5, s7, v0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1 -; GFX11-NEXT: v_dual_mov_b32 v1, v7 :: v_dual_cndmask_b32 v6, v14, v0 +; GFX11-NEXT: v_dual_mov_b32 v1, v7 :: v_dual_cndmask_b32 v6, s8, v0 ; GFX11-NEXT: v_mov_b32_e32 v0, v8 ; GFX11-NEXT: ; return to shader part epilog entry: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll @@ -113,21 +113,17 @@ ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec @@ -176,21 +172,17 @@ ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll @@ -72,21 +72,17 @@ ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec @@ -98,7 +94,7 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE2]], [[COPY10]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -108,7 +104,7 @@ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[COPY19]] + ; CHECK-NEXT: $vgpr0 = COPY [[COPY15]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) %cast = bitcast i32 %ret to float @@ -139,21 +135,17 @@ ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll @@ -155,21 +155,17 @@ ; GFX908-NEXT: bb.2: ; GFX908-NEXT: successors: %bb.3(0x80000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX908-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX908-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX908-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec @@ -210,21 +206,17 @@ ; GFX90A-NEXT: bb.2: ; GFX90A-NEXT: successors: %bb.3(0x80000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX90A-NEXT: [[COPY14:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX90A-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX90A-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec ; GFX90A-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; GFX90A-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec @@ -269,21 +261,17 @@ ; GFX908-NEXT: bb.2: ; GFX908-NEXT: successors: %bb.3(0x80000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX908-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX908-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec @@ -322,21 +310,17 @@ ; GFX90A-NEXT: bb.2: ; GFX90A-NEXT: successors: %bb.3(0x80000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX90A-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX90A-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec ; GFX90A-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; GFX90A-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll @@ -163,21 +163,17 @@ ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec @@ -217,21 +213,17 @@ ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll @@ -113,21 +113,17 @@ ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll @@ -62,21 +62,17 @@ ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} @@ -118,21 +114,17 @@ ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec @@ -479,21 +471,17 @@ ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} @@ -535,21 +523,17 @@ ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} @@ -684,8 +668,8 @@ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: %10:vgpr_32, dead %15:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 4096 @@ -818,21 +802,17 @@ ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} @@ -872,34 +852,30 @@ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: %14:vgpr_32, dead %38:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec + ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %14, [[REG_SEQUENCE1]], [[COPY5]], 904, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE1]], [[COPY5]], 904, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll @@ -172,21 +172,17 @@ ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -224,21 +220,17 @@ ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 - ; PACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 + ; PACKED-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} @@ -425,12 +417,12 @@ ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: %11:vgpr_32, dead %24:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec + ; UNPACKED-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY4]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4096 ; PACKED: bb.1 (%ir-block.0): @@ -446,8 +438,8 @@ ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; PACKED-NEXT: %11:vgpr_32, dead %15:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) + ; PACKED-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; PACKED-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4096 call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -473,7 +465,7 @@ ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: %13:vgpr_32, dead %54:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec + ; UNPACKED-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY4]], implicit $exec @@ -485,28 +477,24 @@ ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 7) + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 7) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -534,34 +522,30 @@ ; PACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; PACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; PACKED-NEXT: %13:vgpr_32, dead %38:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec + ; PACKED-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 - ; PACKED-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 + ; PACKED-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 7) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 7) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll @@ -128,21 +128,17 @@ ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} @@ -267,8 +263,8 @@ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: %13:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4096 call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -297,34 +293,30 @@ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: %15:vgpr_32, dead %40:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY8]], [[COPY10]], 0, implicit $exec + ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY8]], [[COPY10]], 0, implicit $exec ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %15, [[REG_SEQUENCE2]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE2]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll @@ -64,21 +64,17 @@ ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} @@ -163,21 +159,17 @@ ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec @@ -521,21 +513,17 @@ ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} @@ -650,8 +638,8 @@ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: %11:vgpr_32, dead %15:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4096 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -751,8 +739,8 @@ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: %11:vgpr_32, dead %15:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) + ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4096 call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -776,34 +764,30 @@ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: %14:vgpr_32, dead %38:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec + ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %14, [[REG_SEQUENCE1]], [[COPY6]], 904, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE1]], [[COPY6]], 904, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -840,21 +824,17 @@ ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll @@ -161,21 +161,17 @@ ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec @@ -215,21 +211,17 @@ ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll @@ -112,21 +112,17 @@ ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll @@ -140,21 +140,17 @@ ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -190,21 +186,17 @@ ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} @@ -246,21 +238,17 @@ ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec @@ -299,21 +287,17 @@ ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec @@ -359,21 +343,17 @@ ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec @@ -413,21 +393,17 @@ ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; PACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll @@ -55,21 +55,17 @@ ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -105,21 +101,17 @@ ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} @@ -161,21 +153,17 @@ ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec @@ -214,21 +202,17 @@ ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec @@ -274,21 +258,17 @@ ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec @@ -328,21 +308,17 @@ ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; PACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll @@ -132,21 +132,17 @@ ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} @@ -188,21 +184,17 @@ ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec @@ -248,21 +240,17 @@ ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec @@ -614,21 +602,17 @@ ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} @@ -674,21 +658,17 @@ ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs -simplify-mir -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX6 %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs -simplify-mir -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX7 %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs -simplify-mir -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX8 %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX6 %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX7 %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX8 %s ; FIXME: Merge with regbankselect, which mostly overlaps when all types supported. @@ -2686,6 +2686,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg %soffset) { ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc ; GFX6: bb.1 (%ir-block.0): + ; GFX6-NEXT: successors: %bb.2(0x80000000) ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -2699,32 +2700,32 @@ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX6-NEXT: successors: %bb.3(0x80000000) + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: - ; GFX6-NEXT: successors: %bb.4, %bb.2 + ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: successors: %bb.5(0x80000000) + ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: @@ -2732,6 +2733,7 @@ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc ; GFX7: bb.1 (%ir-block.0): + ; GFX7-NEXT: successors: %bb.2(0x80000000) ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -2745,32 +2747,32 @@ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX7-NEXT: successors: %bb.3(0x80000000) + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: - ; GFX7-NEXT: successors: %bb.4, %bb.2 + ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: successors: %bb.5(0x80000000) + ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: @@ -2778,6 +2780,7 @@ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc ; GFX8: bb.1 (%ir-block.0): + ; GFX8-NEXT: successors: %bb.2(0x80000000) ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -2791,32 +2794,32 @@ ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX8-NEXT: successors: %bb.3(0x80000000) + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: - ; GFX8-NEXT: successors: %bb.4, %bb.2 + ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: successors: %bb.5(0x80000000) + ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: @@ -2830,6 +2833,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> %rsrc, i32 inreg %soffset.base) { ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092 ; GFX6: bb.1 (%ir-block.0): + ; GFX6-NEXT: successors: %bb.2(0x80000000) ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -2841,32 +2845,32 @@ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX6-NEXT: successors: %bb.3(0x80000000) + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: - ; GFX6-NEXT: successors: %bb.4, %bb.2 + ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: successors: %bb.5(0x80000000) + ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: @@ -2874,6 +2878,7 @@ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092 ; GFX7: bb.1 (%ir-block.0): + ; GFX7-NEXT: successors: %bb.2(0x80000000) ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -2885,32 +2890,32 @@ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX7-NEXT: successors: %bb.3(0x80000000) + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: - ; GFX7-NEXT: successors: %bb.4, %bb.2 + ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: successors: %bb.5(0x80000000) + ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: @@ -2918,6 +2923,7 @@ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092 ; GFX8: bb.1 (%ir-block.0): + ; GFX8-NEXT: successors: %bb.2(0x80000000) ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -2929,32 +2935,32 @@ ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX8-NEXT: successors: %bb.3(0x80000000) + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: - ; GFX8-NEXT: successors: %bb.4, %bb.2 + ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: successors: %bb.5(0x80000000) + ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: @@ -2969,6 +2975,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> %rsrc, i32 inreg %soffset.base) { ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096 ; GFX6: bb.1 (%ir-block.0): + ; GFX6-NEXT: successors: %bb.2(0x80000000) ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -2984,32 +2991,32 @@ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX6-NEXT: successors: %bb.3(0x80000000) + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: - ; GFX6-NEXT: successors: %bb.4, %bb.2 + ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: successors: %bb.5(0x80000000) + ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: @@ -3017,6 +3024,7 @@ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096 ; GFX7: bb.1 (%ir-block.0): + ; GFX7-NEXT: successors: %bb.2(0x80000000) ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -3032,32 +3040,32 @@ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX7-NEXT: successors: %bb.3(0x80000000) + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: - ; GFX7-NEXT: successors: %bb.4, %bb.2 + ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: successors: %bb.5(0x80000000) + ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: @@ -3065,6 +3073,7 @@ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096 ; GFX8: bb.1 (%ir-block.0): + ; GFX8-NEXT: successors: %bb.2(0x80000000) ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -3080,32 +3089,32 @@ ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX8-NEXT: successors: %bb.3(0x80000000) + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: - ; GFX8-NEXT: successors: %bb.4, %bb.2 + ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: successors: %bb.5(0x80000000) + ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: @@ -3120,6 +3129,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) { ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095 ; GFX6: bb.1 (%ir-block.0): + ; GFX6-NEXT: successors: %bb.2(0x80000000) ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -3131,32 +3141,32 @@ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX6-NEXT: successors: %bb.3(0x80000000) + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY6]], [[COPY4]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: - ; GFX6-NEXT: successors: %bb.4, %bb.2 + ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: successors: %bb.5(0x80000000) + ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: @@ -3164,6 +3174,7 @@ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095 ; GFX7: bb.1 (%ir-block.0): + ; GFX7-NEXT: successors: %bb.2(0x80000000) ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -3175,32 +3186,32 @@ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX7-NEXT: successors: %bb.3(0x80000000) + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY6]], [[COPY4]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: - ; GFX7-NEXT: successors: %bb.4, %bb.2 + ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: successors: %bb.5(0x80000000) + ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: @@ -3208,6 +3219,7 @@ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095 ; GFX8: bb.1 (%ir-block.0): + ; GFX8-NEXT: successors: %bb.2(0x80000000) ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -3219,32 +3231,32 @@ ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX8-NEXT: successors: %bb.3(0x80000000) + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY6]], [[COPY4]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: - ; GFX8-NEXT: successors: %bb.4, %bb.2 + ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: successors: %bb.5(0x80000000) + ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: @@ -3258,6 +3270,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) { ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096 ; GFX6: bb.1 (%ir-block.0): + ; GFX6-NEXT: successors: %bb.2(0x80000000) ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -3271,32 +3284,32 @@ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX6-NEXT: successors: %bb.3(0x80000000) + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: - ; GFX6-NEXT: successors: %bb.4, %bb.2 + ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: successors: %bb.5(0x80000000) + ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: @@ -3304,6 +3317,7 @@ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096 ; GFX7: bb.1 (%ir-block.0): + ; GFX7-NEXT: successors: %bb.2(0x80000000) ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -3317,32 +3331,32 @@ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX7-NEXT: successors: %bb.3(0x80000000) + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: - ; GFX7-NEXT: successors: %bb.4, %bb.2 + ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: successors: %bb.5(0x80000000) + ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: @@ -3350,6 +3364,7 @@ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096 ; GFX8: bb.1 (%ir-block.0): + ; GFX8-NEXT: successors: %bb.2(0x80000000) ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -3361,32 +3376,32 @@ ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX8-NEXT: successors: %bb.3(0x80000000) + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY6]], [[COPY4]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: - ; GFX8-NEXT: successors: %bb.4, %bb.2 + ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4096) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: successors: %bb.5(0x80000000) + ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: @@ -3401,6 +3416,7 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> %rsrc, i32 inreg %soffset.base) { ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064 ; GFX6: bb.1 (%ir-block.0): + ; GFX6-NEXT: successors: %bb.2(0x80000000) ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -3412,26 +3428,24 @@ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX6-NEXT: successors: %bb.3(0x80000000) + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: - ; GFX6-NEXT: successors: %bb.4, %bb.2 + ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) @@ -3439,29 +3453,32 @@ ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: successors: %bb.5(0x80000000) + ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY13]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY14]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY15]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY16]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY17]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY18]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY19]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY20]] + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY14]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY15]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY16]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064 ; GFX7: bb.1 (%ir-block.0): + ; GFX7-NEXT: successors: %bb.2(0x80000000) ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -3473,26 +3490,24 @@ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX7-NEXT: successors: %bb.3(0x80000000) + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: - ; GFX7-NEXT: successors: %bb.4, %bb.2 + ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) @@ -3500,29 +3515,32 @@ ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: successors: %bb.5(0x80000000) + ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY13]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY14]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY15]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY16]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY17]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY18]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY19]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY20]] + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY14]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY15]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY16]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064 ; GFX8: bb.1 (%ir-block.0): + ; GFX8-NEXT: successors: %bb.2(0x80000000) ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -3534,26 +3552,24 @@ ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX8-NEXT: successors: %bb.3(0x80000000) + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: - ; GFX8-NEXT: successors: %bb.4, %bb.2 + ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) @@ -3561,26 +3577,28 @@ ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: successors: %bb.5(0x80000000) + ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY13]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY14]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY15]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY16]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY17]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY18]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY19]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY20]] + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY14]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY15]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY16]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4064 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -3592,6 +3610,7 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> %rsrc, i32 inreg %soffset.base) { ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068 ; GFX6: bb.1 (%ir-block.0): + ; GFX6-NEXT: successors: %bb.2(0x80000000) ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -3607,26 +3626,24 @@ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX6-NEXT: successors: %bb.3(0x80000000) + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: - ; GFX6-NEXT: successors: %bb.4, %bb.2 + ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) @@ -3634,29 +3651,32 @@ ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: successors: %bb.5(0x80000000) + ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX6-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY14]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY15]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY16]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY17]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY18]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY19]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY20]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY21]] + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY14]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY15]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY16]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY17]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068 ; GFX7: bb.1 (%ir-block.0): + ; GFX7-NEXT: successors: %bb.2(0x80000000) ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -3672,26 +3692,24 @@ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX7-NEXT: successors: %bb.3(0x80000000) + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: - ; GFX7-NEXT: successors: %bb.4, %bb.2 + ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) @@ -3699,29 +3717,32 @@ ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: successors: %bb.5(0x80000000) + ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX7-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY14]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY15]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY16]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY17]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY18]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY19]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY20]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY21]] + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY14]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY15]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY16]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY17]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068 ; GFX8: bb.1 (%ir-block.0): + ; GFX8-NEXT: successors: %bb.2(0x80000000) ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -3737,26 +3758,24 @@ ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX8-NEXT: successors: %bb.3(0x80000000) + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: - ; GFX8-NEXT: successors: %bb.4, %bb.2 + ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) @@ -3764,26 +3783,28 @@ ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: successors: %bb.5(0x80000000) + ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX8-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY14]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY15]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY16]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY17]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY18]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY19]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY20]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY21]] + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY14]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY15]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY16]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY17]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4068 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -3793,6 +3814,7 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> %rsrc, i32 inreg %soffset.base) { ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096 ; GFX6: bb.1 (%ir-block.0): + ; GFX6-NEXT: successors: %bb.2(0x80000000) ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -3808,26 +3830,24 @@ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX6-NEXT: successors: %bb.3(0x80000000) + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: - ; GFX6-NEXT: successors: %bb.4, %bb.2 + ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) @@ -3835,29 +3855,32 @@ ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: successors: %bb.5(0x80000000) + ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX6-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY14]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY15]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY16]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY17]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY18]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY19]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY20]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY21]] + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY14]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY15]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY16]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY17]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096 ; GFX7: bb.1 (%ir-block.0): + ; GFX7-NEXT: successors: %bb.2(0x80000000) ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -3873,26 +3896,24 @@ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX7-NEXT: successors: %bb.3(0x80000000) + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: - ; GFX7-NEXT: successors: %bb.4, %bb.2 + ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) @@ -3900,29 +3921,32 @@ ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: successors: %bb.5(0x80000000) + ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX7-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY14]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY15]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY16]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY17]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY18]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY19]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY20]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY21]] + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY14]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY15]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY16]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY17]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096 ; GFX8: bb.1 (%ir-block.0): + ; GFX8-NEXT: successors: %bb.2(0x80000000) ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -3938,26 +3962,24 @@ ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX8-NEXT: successors: %bb.3(0x80000000) + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: - ; GFX8-NEXT: successors: %bb.4, %bb.2 + ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) @@ -3965,26 +3987,28 @@ ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: successors: %bb.5(0x80000000) + ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX8-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY14]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY15]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY16]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY17]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY18]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY19]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY20]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY21]] + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY14]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY15]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY16]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY17]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4096 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -3994,6 +4018,7 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000(<4 x i32> %rsrc, i32 %offset.base) { ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX6: bb.1 (%ir-block.0): + ; GFX6-NEXT: successors: %bb.2(0x80000000) ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -4006,26 +4031,24 @@ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX6-NEXT: successors: %bb.3(0x80000000) + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: - ; GFX6-NEXT: successors: %bb.4, %bb.2 + ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) @@ -4033,29 +4056,32 @@ ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: successors: %bb.5(0x80000000) + ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY13]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY14]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY15]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY16]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY17]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY18]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY19]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY20]] + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY14]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY15]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY16]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX7: bb.1 (%ir-block.0): + ; GFX7-NEXT: successors: %bb.2(0x80000000) ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -4068,26 +4094,24 @@ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX7-NEXT: successors: %bb.3(0x80000000) + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: - ; GFX7-NEXT: successors: %bb.4, %bb.2 + ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) @@ -4095,29 +4119,32 @@ ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: successors: %bb.5(0x80000000) + ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY13]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY14]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY15]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY16]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY17]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY18]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY19]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY20]] + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY14]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY15]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY16]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX8: bb.1 (%ir-block.0): + ; GFX8-NEXT: successors: %bb.2(0x80000000) ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -4130,26 +4157,24 @@ ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX8-NEXT: successors: %bb.3(0x80000000) + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: - ; GFX8-NEXT: successors: %bb.4, %bb.2 + ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 936, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 952, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) @@ -4157,26 +4182,28 @@ ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: successors: %bb.5(0x80000000) + ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY13]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY14]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY15]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY16]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY17]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY18]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY19]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY20]] + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY14]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY15]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY16]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 5000 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -4186,6 +4213,7 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076(<4 x i32> %rsrc, i32 %offset.base) { ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX6: bb.1 (%ir-block.0): + ; GFX6-NEXT: successors: %bb.2(0x80000000) ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -4198,26 +4226,24 @@ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX6-NEXT: successors: %bb.3(0x80000000) + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: - ; GFX6-NEXT: successors: %bb.4, %bb.2 + ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) @@ -4225,29 +4251,32 @@ ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: successors: %bb.5(0x80000000) + ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY13]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY14]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY15]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY16]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY17]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY18]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY19]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY20]] + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY14]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY15]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY16]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX7: bb.1 (%ir-block.0): + ; GFX7-NEXT: successors: %bb.2(0x80000000) ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -4260,26 +4289,24 @@ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX7-NEXT: successors: %bb.3(0x80000000) + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: - ; GFX7-NEXT: successors: %bb.4, %bb.2 + ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) @@ -4287,29 +4314,32 @@ ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: successors: %bb.5(0x80000000) + ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY13]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY14]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY15]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY16]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY17]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY18]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY19]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY20]] + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY14]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY15]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY16]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX8: bb.1 (%ir-block.0): + ; GFX8-NEXT: successors: %bb.2(0x80000000) ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -4322,26 +4352,24 @@ ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX8-NEXT: successors: %bb.3(0x80000000) + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: - ; GFX8-NEXT: successors: %bb.4, %bb.2 + ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) @@ -4349,26 +4377,28 @@ ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: successors: %bb.5(0x80000000) + ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY13]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY14]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY15]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY16]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY17]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY18]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY19]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY20]] + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY14]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY15]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY16]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 4076 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -4378,6 +4408,7 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080(<4 x i32> %rsrc, i32 %offset.base) { ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX6: bb.1 (%ir-block.0): + ; GFX6-NEXT: successors: %bb.2(0x80000000) ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -4390,26 +4421,24 @@ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX6-NEXT: successors: %bb.3(0x80000000) + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: - ; GFX6-NEXT: successors: %bb.4, %bb.2 + ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) @@ -4417,29 +4446,32 @@ ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: successors: %bb.5(0x80000000) + ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY13]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY14]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY15]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY16]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY17]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY18]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY19]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY20]] + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY14]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY15]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY16]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX7: bb.1 (%ir-block.0): + ; GFX7-NEXT: successors: %bb.2(0x80000000) ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -4452,26 +4484,24 @@ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX7-NEXT: successors: %bb.3(0x80000000) + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: - ; GFX7-NEXT: successors: %bb.4, %bb.2 + ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) @@ -4479,29 +4509,32 @@ ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: successors: %bb.5(0x80000000) + ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY13]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY14]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY15]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY16]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY17]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY18]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY19]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY20]] + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY14]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY15]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY16]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX8: bb.1 (%ir-block.0): + ; GFX8-NEXT: successors: %bb.2(0x80000000) ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -4514,26 +4547,24 @@ ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX8-NEXT: successors: %bb.3(0x80000000) + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: - ; GFX8-NEXT: successors: %bb.4, %bb.2 + ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) @@ -4541,26 +4572,28 @@ ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: successors: %bb.5(0x80000000) + ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY13]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY14]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY15]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY16]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY17]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY18]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY19]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY20]] + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY14]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY15]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY16]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 4080 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -4570,6 +4603,7 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064(<4 x i32> %rsrc, i32 %offset.base) { ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064 ; GFX6: bb.1 (%ir-block.0): + ; GFX6-NEXT: successors: %bb.2(0x80000000) ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -4581,26 +4615,24 @@ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX6-NEXT: successors: %bb.3(0x80000000) + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY6]], [[COPY4]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: - ; GFX6-NEXT: successors: %bb.4, %bb.2 + ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) @@ -4608,29 +4640,32 @@ ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: successors: %bb.5(0x80000000) + ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY12]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY13]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY14]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY15]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY16]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY17]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY18]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY19]] + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY14]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY15]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064 ; GFX7: bb.1 (%ir-block.0): + ; GFX7-NEXT: successors: %bb.2(0x80000000) ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -4642,26 +4677,24 @@ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX7-NEXT: successors: %bb.3(0x80000000) + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY6]], [[COPY4]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: - ; GFX7-NEXT: successors: %bb.4, %bb.2 + ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) @@ -4669,29 +4702,32 @@ ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: successors: %bb.5(0x80000000) + ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY12]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY13]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY14]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY15]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY16]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY17]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY18]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY19]] + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY14]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY15]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064 ; GFX8: bb.1 (%ir-block.0): + ; GFX8-NEXT: successors: %bb.2(0x80000000) ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -4703,26 +4739,24 @@ ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX8-NEXT: successors: %bb.3(0x80000000) + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY6]], [[COPY4]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: - ; GFX8-NEXT: successors: %bb.4, %bb.2 + ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) @@ -4730,26 +4764,28 @@ ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: successors: %bb.5(0x80000000) + ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY12]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY13]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY14]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY15]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY16]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY17]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY18]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY19]] + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 + ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY14]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY15]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 4064, i32 0) ret <8 x float> %val @@ -4864,9 +4900,9 @@ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX6-NEXT: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm @@ -4881,9 +4917,9 @@ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX7-NEXT: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm @@ -4898,9 +4934,9 @@ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX8-NEXT: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.v, %offset.s @@ -4922,9 +4958,9 @@ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX6-NEXT: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm @@ -4939,9 +4975,9 @@ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX7-NEXT: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm @@ -4956,9 +4992,9 @@ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX8-NEXT: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.s, %offset.v @@ -5037,8 +5073,8 @@ ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX6-NEXT: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr @@ -5054,8 +5090,8 @@ ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX7-NEXT: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr @@ -5071,8 +5107,8 @@ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX8-NEXT: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.v, 1024 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll @@ -123,21 +123,17 @@ ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec @@ -189,21 +185,17 @@ ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll @@ -79,21 +79,17 @@ ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY19]], [[COPY17]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY20]], [[COPY18]], implicit $exec + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY8]], implicit $exec @@ -106,7 +102,7 @@ ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY11]], %subreg.sub0, [[COPY12]], %subreg.sub1 ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0 ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -116,7 +112,7 @@ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[COPY21]] + ; CHECK-NEXT: $vgpr0 = COPY [[COPY17]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) %cast = bitcast i32 %ret to float @@ -149,21 +145,17 @@ ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY19]], [[COPY17]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY20]], [[COPY18]], implicit $exec + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY8]], implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll @@ -169,21 +169,17 @@ ; GFX908-NEXT: bb.2: ; GFX908-NEXT: successors: %bb.3(0x80000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX908-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX908-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; GFX908-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX908-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX908-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX908-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec @@ -227,21 +223,17 @@ ; GFX90A-NEXT: bb.2: ; GFX90A-NEXT: successors: %bb.3(0x80000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX90A-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY15:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX90A-NEXT: [[COPY16:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX90A-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX90A-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX90A-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX90A-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec ; GFX90A-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec ; GFX90A-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec @@ -289,21 +281,17 @@ ; GFX908-NEXT: bb.2: ; GFX908-NEXT: successors: %bb.3(0x80000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX908-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX908-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX908-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec @@ -344,21 +332,17 @@ ; GFX90A-NEXT: bb.2: ; GFX90A-NEXT: successors: %bb.3(0x80000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX90A-NEXT: [[COPY14:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX90A-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX90A-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec ; GFX90A-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; GFX90A-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll @@ -176,21 +176,17 @@ ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec @@ -211,25 +207,25 @@ ; UNPACKED-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.5: - ; UNPACKED-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0 - ; UNPACKED-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub1 - ; UNPACKED-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub2 - ; UNPACKED-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub3 + ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0 + ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub1 + ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub2 + ; UNPACKED-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub3 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; UNPACKED-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY17]], [[COPY21]], implicit $exec - ; UNPACKED-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY18]], [[COPY22]], implicit $exec + ; UNPACKED-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY13]], [[COPY17]], implicit $exec + ; UNPACKED-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY14]], [[COPY18]], implicit $exec ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY23]], [[V_AND_B32_e64_1]], implicit $exec + ; UNPACKED-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY19]], [[V_AND_B32_e64_1]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_]], [[V_LSHLREV_B32_e64_]], implicit $exec - ; UNPACKED-NEXT: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY19]], [[COPY24]], implicit $exec - ; UNPACKED-NEXT: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY20]], [[COPY25]], implicit $exec - ; UNPACKED-NEXT: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY26]], [[V_AND_B32_e64_3]], implicit $exec + ; UNPACKED-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY15]], [[COPY20]], implicit $exec + ; UNPACKED-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY16]], [[COPY21]], implicit $exec + ; UNPACKED-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY22]], [[V_AND_B32_e64_3]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_2]], [[V_LSHLREV_B32_e64_1]], implicit $exec ; UNPACKED-NEXT: $vgpr0 = COPY [[V_OR_B32_e64_]] ; UNPACKED-NEXT: $vgpr1 = COPY [[V_OR_B32_e64_1]] @@ -254,21 +250,17 @@ ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; PACKED-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec @@ -289,10 +281,10 @@ ; PACKED-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.5: - ; PACKED-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0 - ; PACKED-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub1 - ; PACKED-NEXT: $vgpr0 = COPY [[COPY17]] - ; PACKED-NEXT: $vgpr1 = COPY [[COPY18]] + ; PACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0 + ; PACKED-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub1 + ; PACKED-NEXT: $vgpr0 = COPY [[COPY13]] + ; PACKED-NEXT: $vgpr1 = COPY [[COPY14]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %val = call <4 x half> @llvm.amdgcn.struct.buffer.load.format.v4f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret <4 x half> %val diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll @@ -122,21 +122,17 @@ ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec @@ -157,14 +153,14 @@ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub1 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub2 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub3 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY17]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY18]] - ; CHECK-NEXT: $vgpr2 = COPY [[COPY19]] - ; CHECK-NEXT: $vgpr3 = COPY [[COPY20]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub1 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub2 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub3 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY13]] + ; CHECK-NEXT: $vgpr1 = COPY [[COPY14]] + ; CHECK-NEXT: $vgpr2 = COPY [[COPY15]] + ; CHECK-NEXT: $vgpr3 = COPY [[COPY16]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret <4 x float> %val diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll @@ -193,21 +193,17 @@ ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll @@ -155,21 +155,17 @@ ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec @@ -213,21 +209,17 @@ ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; PACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; PACKED-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; PACKED-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll @@ -117,21 +117,17 @@ ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll @@ -122,21 +122,17 @@ ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY20]], [[COPY18]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY21]], [[COPY19]], implicit $exec + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY10]], implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll @@ -216,21 +216,17 @@ ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; PACKED-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec @@ -251,10 +247,10 @@ ; PACKED-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.5: - ; PACKED-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0 - ; PACKED-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub1 - ; PACKED-NEXT: $vgpr0 = COPY [[COPY17]] - ; PACKED-NEXT: $vgpr1 = COPY [[COPY18]] + ; PACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0 + ; PACKED-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub1 + ; PACKED-NEXT: $vgpr0 = COPY [[COPY13]] + ; PACKED-NEXT: $vgpr1 = COPY [[COPY14]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; UNPACKED-LABEL: name: struct_tbuffer_load_v4f16__vgpr_rsrc__sgpr_vindex__sgpr_voffset__vgpr_soffset ; UNPACKED: bb.1 (%ir-block.0): @@ -276,21 +272,17 @@ ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec @@ -311,25 +303,25 @@ ; UNPACKED-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.5: - ; UNPACKED-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0 - ; UNPACKED-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub1 - ; UNPACKED-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub2 - ; UNPACKED-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub3 + ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0 + ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub1 + ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub2 + ; UNPACKED-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub3 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; UNPACKED-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY17]], [[COPY21]], implicit $exec - ; UNPACKED-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY18]], [[COPY22]], implicit $exec + ; UNPACKED-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY13]], [[COPY17]], implicit $exec + ; UNPACKED-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY14]], [[COPY18]], implicit $exec ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY23]], [[V_AND_B32_e64_1]], implicit $exec + ; UNPACKED-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY19]], [[V_AND_B32_e64_1]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_]], [[V_LSHLREV_B32_e64_]], implicit $exec - ; UNPACKED-NEXT: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY19]], [[COPY24]], implicit $exec - ; UNPACKED-NEXT: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY20]], [[COPY25]], implicit $exec - ; UNPACKED-NEXT: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY26]], [[V_AND_B32_e64_3]], implicit $exec + ; UNPACKED-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY15]], [[COPY20]], implicit $exec + ; UNPACKED-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY16]], [[COPY21]], implicit $exec + ; UNPACKED-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY22]], [[V_AND_B32_e64_3]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_2]], [[V_LSHLREV_B32_e64_1]], implicit $exec ; UNPACKED-NEXT: $vgpr0 = COPY [[V_OR_B32_e64_]] ; UNPACKED-NEXT: $vgpr1 = COPY [[V_OR_B32_e64_1]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll @@ -145,21 +145,17 @@ ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec @@ -180,14 +176,14 @@ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub1 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub2 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub3 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY17]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY18]] - ; CHECK-NEXT: $vgpr2 = COPY [[COPY19]] - ; CHECK-NEXT: $vgpr3 = COPY [[COPY20]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub1 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub2 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub3 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY13]] + ; CHECK-NEXT: $vgpr1 = COPY [[COPY14]] + ; CHECK-NEXT: $vgpr2 = COPY [[COPY15]] + ; CHECK-NEXT: $vgpr3 = COPY [[COPY16]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 78, i32 0) ret <4 x float> %val diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll @@ -4251,7 +4251,7 @@ ; GFX6-LABEL: s_saddsat_i48: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_add_u32 s4, s0, s2 -; GFX6-NEXT: s_addc_u32 s5, s1, s3 +; GFX6-NEXT: s_addc_u32 s3, s1, s3 ; GFX6-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x300000 ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: s_bfe_i64 s[6:7], s[4:5], 0x300000 @@ -4259,13 +4259,13 @@ ; GFX6-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x300000 ; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, s[6:7], v[0:1] ; GFX6-NEXT: v_cmp_lt_i64_e64 s[0:1], s[0:1], 0 -; GFX6-NEXT: s_ashr_i32 s3, s7, 31 -; GFX6-NEXT: s_ashr_i32 s2, s7, 15 -; GFX6-NEXT: s_add_u32 s3, s3, 0xffff8000 -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s3 +; GFX6-NEXT: s_ashr_i32 s2, s7, 31 +; GFX6-NEXT: s_ashr_i32 s5, s7, 15 +; GFX6-NEXT: s_add_u32 s2, s2, 0xffff8000 +; GFX6-NEXT: v_mov_b32_e32 v0, s5 +; GFX6-NEXT: v_mov_b32_e32 v1, s2 ; GFX6-NEXT: v_mov_b32_e32 v2, s4 -; GFX6-NEXT: v_mov_b32_e32 v3, s5 +; GFX6-NEXT: v_mov_b32_e32 v3, s3 ; GFX6-NEXT: s_xor_b64 vcc, s[0:1], vcc ; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc @@ -4276,7 +4276,7 @@ ; GFX8-LABEL: s_saddsat_i48: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_add_u32 s4, s0, s2 -; GFX8-NEXT: s_addc_u32 s5, s1, s3 +; GFX8-NEXT: s_addc_u32 s3, s1, s3 ; GFX8-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x300000 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: s_bfe_i64 s[6:7], s[4:5], 0x300000 @@ -4284,13 +4284,13 @@ ; GFX8-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x300000 ; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, s[6:7], v[0:1] ; GFX8-NEXT: v_cmp_lt_i64_e64 s[0:1], s[0:1], 0 -; GFX8-NEXT: s_ashr_i32 s3, s7, 31 -; GFX8-NEXT: s_ashr_i32 s2, s7, 15 -; GFX8-NEXT: s_add_u32 s3, s3, 0xffff8000 -; GFX8-NEXT: v_mov_b32_e32 v0, s2 -; GFX8-NEXT: v_mov_b32_e32 v1, s3 +; GFX8-NEXT: s_ashr_i32 s2, s7, 31 +; GFX8-NEXT: s_ashr_i32 s5, s7, 15 +; GFX8-NEXT: s_add_u32 s2, s2, 0xffff8000 +; GFX8-NEXT: v_mov_b32_e32 v0, s5 +; GFX8-NEXT: v_mov_b32_e32 v1, s2 ; GFX8-NEXT: v_mov_b32_e32 v2, s4 -; GFX8-NEXT: v_mov_b32_e32 v3, s5 +; GFX8-NEXT: v_mov_b32_e32 v3, s3 ; GFX8-NEXT: s_xor_b64 vcc, s[0:1], vcc ; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc @@ -4328,15 +4328,15 @@ ; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 16 ; GFX10-NEXT: s_add_u32 s4, s0, s2 ; GFX10-NEXT: s_addc_u32 s5, s1, s3 -; GFX10-NEXT: v_cmp_lt_i64_e64 s2, s[2:3], 0 -; GFX10-NEXT: v_cmp_lt_i64_e64 s6, s[4:5], s[0:1] ; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: v_cmp_lt_i64_e64 s0, s[4:5], s[0:1] +; GFX10-NEXT: v_cmp_lt_i64_e64 s1, s[2:3], 0 ; GFX10-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-NEXT: s_ashr_i32 s0, s5, 31 -; GFX10-NEXT: s_add_u32 s1, s0, 0x80000000 -; GFX10-NEXT: s_xor_b32 s2, s2, s6 -; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s0, s2 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s1, s2 +; GFX10-NEXT: s_ashr_i32 s2, s5, 31 +; GFX10-NEXT: s_add_u32 s3, s2, 0x80000000 +; GFX10-NEXT: s_xor_b32 s0, s1, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s2, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s3, s0 ; GFX10-NEXT: v_ashrrev_i64 v[0:1], 16, v[0:1] ; GFX10-NEXT: v_readfirstlane_b32 s0, v0 ; GFX10-NEXT: v_readfirstlane_b32 s1, v1 @@ -4348,14 +4348,14 @@ ; GFX11-NEXT: s_lshl_b64 s[2:3], s[2:3], 16 ; GFX11-NEXT: s_add_u32 s4, s0, s2 ; GFX11-NEXT: s_addc_u32 s5, s1, s3 -; GFX11-NEXT: v_cmp_lt_i64_e64 s2, s[2:3], 0 -; GFX11-NEXT: v_cmp_lt_i64_e64 s6, s[4:5], s[0:1] ; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 -; GFX11-NEXT: s_ashr_i32 s0, s5, 31 -; GFX11-NEXT: s_add_u32 s1, s0, 0x80000000 -; GFX11-NEXT: s_xor_b32 s2, s2, s6 -; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s0, s2 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s1, s2 +; GFX11-NEXT: v_cmp_lt_i64_e64 s0, s[4:5], s[0:1] +; GFX11-NEXT: v_cmp_lt_i64_e64 s1, s[2:3], 0 +; GFX11-NEXT: s_ashr_i32 s2, s5, 31 +; GFX11-NEXT: s_add_u32 s3, s2, 0x80000000 +; GFX11-NEXT: s_xor_b32 s0, s1, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s2, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s3, s0 ; GFX11-NEXT: v_ashrrev_i64 v[0:1], 16, v[0:1] ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11-NEXT: v_readfirstlane_b32 s1, v1 @@ -4377,8 +4377,7 @@ ; GFX6-NEXT: v_cmp_gt_i64_e64 s[2:3], 0, v[0:1] ; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v3 ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 15, v3 -; GFX6-NEXT: v_mov_b32_e32 v3, 0xffff8000 -; GFX6-NEXT: v_add_i32_e32 v3, vcc, v0, v3 +; GFX6-NEXT: v_add_i32_e32 v3, vcc, 0xffff8000, v0 ; GFX6-NEXT: s_xor_b64 vcc, s[2:3], s[0:1] ; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc @@ -4397,8 +4396,7 @@ ; GFX8-NEXT: v_cmp_gt_i64_e64 s[2:3], 0, v[0:1] ; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v3 ; GFX8-NEXT: v_ashrrev_i32_e32 v1, 15, v3 -; GFX8-NEXT: v_mov_b32_e32 v3, 0xffff8000 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v0, v3 +; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0xffff8000, v0 ; GFX8-NEXT: s_xor_b64 vcc, s[2:3], s[0:1] ; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc @@ -4415,8 +4413,7 @@ ; GFX9-NEXT: v_cmp_gt_i64_e64 s[0:1], s[0:1], v[2:3] ; GFX9-NEXT: v_cmp_gt_i64_e64 s[2:3], 0, v[0:1] ; GFX9-NEXT: v_ashrrev_i32_e32 v0, 31, v3 -; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 -; GFX9-NEXT: v_add_co_u32_e32 v1, vcc, v0, v1 +; GFX9-NEXT: v_add_co_u32_e32 v1, vcc, 0x80000000, v0 ; GFX9-NEXT: s_xor_b64 vcc, s[2:3], s[0:1] ; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc @@ -4475,8 +4472,7 @@ ; GFX6-NEXT: v_cmp_lt_i64_e64 s[0:1], s[0:1], 0 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v3 ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 15, v3 -; GFX6-NEXT: v_mov_b32_e32 v3, 0xffff8000 -; GFX6-NEXT: v_add_i32_e32 v3, vcc, v0, v3 +; GFX6-NEXT: v_add_i32_e32 v3, vcc, 0xffff8000, v0 ; GFX6-NEXT: s_xor_b64 vcc, s[0:1], s[2:3] ; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc @@ -4495,8 +4491,7 @@ ; GFX8-NEXT: v_cmp_lt_i64_e64 s[0:1], s[0:1], 0 ; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v3 ; GFX8-NEXT: v_ashrrev_i32_e32 v1, 15, v3 -; GFX8-NEXT: v_mov_b32_e32 v3, 0xffff8000 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v0, v3 +; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0xffff8000, v0 ; GFX8-NEXT: s_xor_b64 vcc, s[0:1], s[2:3] ; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc @@ -4513,8 +4508,7 @@ ; GFX9-NEXT: v_cmp_lt_i64_e64 s[0:1], v[2:3], v[0:1] ; GFX9-NEXT: v_cmp_lt_i64_e64 s[2:3], s[2:3], 0 ; GFX9-NEXT: v_ashrrev_i32_e32 v0, 31, v3 -; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 -; GFX9-NEXT: v_add_co_u32_e32 v1, vcc, v0, v1 +; GFX9-NEXT: v_add_co_u32_e32 v1, vcc, 0x80000000, v0 ; GFX9-NEXT: s_xor_b64 vcc, s[2:3], s[0:1] ; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc @@ -4703,15 +4697,15 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_add_u32 s4, s0, s2 ; GFX10-NEXT: s_addc_u32 s5, s1, s3 -; GFX10-NEXT: v_cmp_lt_i64_e64 s2, s[2:3], 0 -; GFX10-NEXT: v_cmp_lt_i64_e64 s6, s[4:5], s[0:1] ; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: v_cmp_lt_i64_e64 s0, s[4:5], s[0:1] +; GFX10-NEXT: v_cmp_lt_i64_e64 s1, s[2:3], 0 ; GFX10-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-NEXT: s_ashr_i32 s0, s5, 31 -; GFX10-NEXT: s_add_u32 s1, s0, 0x80000000 -; GFX10-NEXT: s_xor_b32 s2, s2, s6 -; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s0, s2 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s1, s2 +; GFX10-NEXT: s_ashr_i32 s2, s5, 31 +; GFX10-NEXT: s_add_u32 s3, s2, 0x80000000 +; GFX10-NEXT: s_xor_b32 s0, s1, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s2, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s3, s0 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0 ; GFX10-NEXT: v_readfirstlane_b32 s1, v1 ; GFX10-NEXT: ; return to shader part epilog @@ -4720,14 +4714,14 @@ ; GFX11: ; %bb.0: ; GFX11-NEXT: s_add_u32 s4, s0, s2 ; GFX11-NEXT: s_addc_u32 s5, s1, s3 -; GFX11-NEXT: v_cmp_lt_i64_e64 s2, s[2:3], 0 -; GFX11-NEXT: v_cmp_lt_i64_e64 s6, s[4:5], s[0:1] ; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 -; GFX11-NEXT: s_ashr_i32 s0, s5, 31 -; GFX11-NEXT: s_add_u32 s1, s0, 0x80000000 -; GFX11-NEXT: s_xor_b32 s2, s2, s6 -; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s0, s2 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s1, s2 +; GFX11-NEXT: v_cmp_lt_i64_e64 s0, s[4:5], s[0:1] +; GFX11-NEXT: v_cmp_lt_i64_e64 s1, s[2:3], 0 +; GFX11-NEXT: s_ashr_i32 s2, s5, 31 +; GFX11-NEXT: s_add_u32 s3, s2, 0x80000000 +; GFX11-NEXT: s_xor_b32 s0, s1, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s2, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s3, s0 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11-NEXT: v_readfirstlane_b32 s1, v1 ; GFX11-NEXT: ; return to shader part epilog @@ -5120,26 +5114,26 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_add_u32 s8, s0, s4 ; GFX10-NEXT: s_addc_u32 s9, s1, s5 -; GFX10-NEXT: v_cmp_lt_i64_e64 s4, s[4:5], 0 -; GFX10-NEXT: v_cmp_lt_i64_e64 s10, s[8:9], s[0:1] -; GFX10-NEXT: s_ashr_i32 s0, s9, 31 ; GFX10-NEXT: v_mov_b32_e32 v0, s8 -; GFX10-NEXT: s_add_u32 s1, s0, 0x80000000 +; GFX10-NEXT: v_cmp_lt_i64_e64 s0, s[8:9], s[0:1] +; GFX10-NEXT: v_cmp_lt_i64_e64 s1, s[4:5], 0 +; GFX10-NEXT: s_ashr_i32 s4, s9, 31 ; GFX10-NEXT: v_mov_b32_e32 v1, s9 -; GFX10-NEXT: s_xor_b32 s8, s4, s10 -; GFX10-NEXT: s_add_u32 s4, s2, s6 -; GFX10-NEXT: s_addc_u32 s5, s3, s7 -; GFX10-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-NEXT: v_cmp_lt_i64_e64 s2, s[4:5], s[2:3] +; GFX10-NEXT: s_add_u32 s5, s4, 0x80000000 +; GFX10-NEXT: s_xor_b32 s8, s1, s0 +; GFX10-NEXT: s_add_u32 s0, s2, s6 +; GFX10-NEXT: s_addc_u32 s1, s3, s7 +; GFX10-NEXT: v_mov_b32_e32 v2, s0 +; GFX10-NEXT: v_cmp_lt_i64_e64 s2, s[0:1], s[2:3] ; GFX10-NEXT: v_cmp_lt_i64_e64 s3, s[6:7], 0 -; GFX10-NEXT: v_mov_b32_e32 v3, s5 -; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s0, s8 -; GFX10-NEXT: s_ashr_i32 s0, s5, 31 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s1, s8 -; GFX10-NEXT: s_add_u32 s1, s0, 0x80000000 -; GFX10-NEXT: s_xor_b32 s2, s3, s2 -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s0, s2 -; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, s1, s2 +; GFX10-NEXT: v_mov_b32_e32 v3, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s4, s8 +; GFX10-NEXT: s_ashr_i32 s4, s1, 31 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s5, s8 +; GFX10-NEXT: s_add_u32 s0, s4, 0x80000000 +; GFX10-NEXT: s_xor_b32 s1, s3, s2 +; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s4, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, s0, s1 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0 ; GFX10-NEXT: v_readfirstlane_b32 s1, v1 ; GFX10-NEXT: v_readfirstlane_b32 s2, v2 @@ -5150,24 +5144,24 @@ ; GFX11: ; %bb.0: ; GFX11-NEXT: s_add_u32 s8, s0, s4 ; GFX11-NEXT: s_addc_u32 s9, s1, s5 -; GFX11-NEXT: v_cmp_lt_i64_e64 s4, s[4:5], 0 -; GFX11-NEXT: v_cmp_lt_i64_e64 s10, s[8:9], s[0:1] -; GFX11-NEXT: s_ashr_i32 s0, s9, 31 ; GFX11-NEXT: v_dual_mov_b32 v0, s8 :: v_dual_mov_b32 v1, s9 -; GFX11-NEXT: s_add_u32 s1, s0, 0x80000000 -; GFX11-NEXT: s_xor_b32 s8, s4, s10 -; GFX11-NEXT: s_add_u32 s4, s2, s6 -; GFX11-NEXT: s_addc_u32 s5, s3, s7 -; GFX11-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5 -; GFX11-NEXT: v_cmp_lt_i64_e64 s2, s[4:5], s[2:3] +; GFX11-NEXT: v_cmp_lt_i64_e64 s0, s[8:9], s[0:1] +; GFX11-NEXT: v_cmp_lt_i64_e64 s1, s[4:5], 0 +; GFX11-NEXT: s_ashr_i32 s4, s9, 31 +; GFX11-NEXT: s_add_u32 s5, s4, 0x80000000 +; GFX11-NEXT: s_xor_b32 s8, s1, s0 +; GFX11-NEXT: s_add_u32 s0, s2, s6 +; GFX11-NEXT: s_addc_u32 s1, s3, s7 +; GFX11-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1 +; GFX11-NEXT: v_cmp_lt_i64_e64 s2, s[0:1], s[2:3] ; GFX11-NEXT: v_cmp_lt_i64_e64 s3, s[6:7], 0 -; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s0, s8 -; GFX11-NEXT: s_ashr_i32 s0, s5, 31 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s1, s8 -; GFX11-NEXT: s_add_u32 s1, s0, 0x80000000 -; GFX11-NEXT: s_xor_b32 s2, s3, s2 -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s0, s2 -; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, s1, s2 +; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s4, s8 +; GFX11-NEXT: s_ashr_i32 s4, s1, 31 +; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s5, s8 +; GFX11-NEXT: s_add_u32 s0, s4, 0x80000000 +; GFX11-NEXT: s_xor_b32 s1, s3, s2 +; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s4, s1 +; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, s0, s1 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11-NEXT: v_readfirstlane_b32 s1, v1 ; GFX11-NEXT: v_readfirstlane_b32 s2, v2 @@ -5203,20 +5197,19 @@ ; GFX6-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX6-NEXT: s_add_u32 s1, s0, 0x80000000 ; GFX6-NEXT: v_mov_b32_e32 v1, s0 -; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: v_mov_b32_e32 v3, s4 -; GFX6-NEXT: v_mov_b32_e32 v4, s5 +; GFX6-NEXT: v_mov_b32_e32 v2, s4 +; GFX6-NEXT: v_mov_b32_e32 v3, s5 ; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; GFX6-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc -; GFX6-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc ; GFX6-NEXT: v_mov_b32_e32 v3, s1 ; GFX6-NEXT: v_mov_b32_e32 v4, s8 ; GFX6-NEXT: v_mov_b32_e32 v5, s9 -; GFX6-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc ; GFX6-NEXT: v_readfirstlane_b32 s0, v0 -; GFX6-NEXT: v_readfirstlane_b32 s1, v1 -; GFX6-NEXT: v_readfirstlane_b32 s2, v2 +; GFX6-NEXT: v_readfirstlane_b32 s1, v2 +; GFX6-NEXT: v_readfirstlane_b32 s2, v1 ; GFX6-NEXT: v_readfirstlane_b32 s3, v3 ; GFX6-NEXT: ; return to shader part epilog ; @@ -5251,21 +5244,19 @@ ; GFX8-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX8-NEXT: s_add_u32 s1, s0, 0x80000000 ; GFX8-NEXT: v_mov_b32_e32 v1, s0 -; GFX8-NEXT: v_mov_b32_e32 v2, s0 -; GFX8-NEXT: v_mov_b32_e32 v3, s4 -; GFX8-NEXT: v_mov_b32_e32 v4, s5 +; GFX8-NEXT: v_mov_b32_e32 v2, s4 +; GFX8-NEXT: v_mov_b32_e32 v3, s5 ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc -; GFX8-NEXT: v_mov_b32_e32 v2, s0 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc ; GFX8-NEXT: v_mov_b32_e32 v3, s1 ; GFX8-NEXT: v_mov_b32_e32 v4, s8 ; GFX8-NEXT: v_mov_b32_e32 v5, s9 -; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc ; GFX8-NEXT: v_readfirstlane_b32 s0, v0 -; GFX8-NEXT: v_readfirstlane_b32 s1, v1 -; GFX8-NEXT: v_readfirstlane_b32 s2, v2 +; GFX8-NEXT: v_readfirstlane_b32 s1, v2 +; GFX8-NEXT: v_readfirstlane_b32 s2, v1 ; GFX8-NEXT: v_readfirstlane_b32 s3, v3 ; GFX8-NEXT: ; return to shader part epilog ; @@ -5300,66 +5291,102 @@ ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: s_add_u32 s1, s0, 0x80000000 ; GFX9-NEXT: v_mov_b32_e32 v1, s0 -; GFX9-NEXT: v_mov_b32_e32 v2, s0 -; GFX9-NEXT: v_mov_b32_e32 v3, s4 -; GFX9-NEXT: v_mov_b32_e32 v4, s5 +; GFX9-NEXT: v_mov_b32_e32 v2, s4 +; GFX9-NEXT: v_mov_b32_e32 v3, s5 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc -; GFX9-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc ; GFX9-NEXT: v_mov_b32_e32 v3, s1 ; GFX9-NEXT: v_mov_b32_e32 v4, s8 ; GFX9-NEXT: v_mov_b32_e32 v5, s9 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc ; GFX9-NEXT: v_readfirstlane_b32 s0, v0 -; GFX9-NEXT: v_readfirstlane_b32 s1, v1 -; GFX9-NEXT: v_readfirstlane_b32 s2, v2 +; GFX9-NEXT: v_readfirstlane_b32 s1, v2 +; GFX9-NEXT: v_readfirstlane_b32 s2, v1 ; GFX9-NEXT: v_readfirstlane_b32 s3, v3 ; GFX9-NEXT: ; return to shader part epilog ; -; GFX10PLUS-LABEL: s_saddsat_i128: -; GFX10PLUS: ; %bb.0: -; GFX10PLUS-NEXT: s_add_u32 s4, s0, s4 -; GFX10PLUS-NEXT: s_addc_u32 s5, s1, s5 -; GFX10PLUS-NEXT: s_addc_u32 s8, s2, s6 -; GFX10PLUS-NEXT: v_cmp_lt_u64_e64 s0, s[4:5], s[0:1] -; GFX10PLUS-NEXT: s_addc_u32 s9, s3, s7 -; GFX10PLUS-NEXT: s_cmp_eq_u64 s[8:9], s[2:3] -; GFX10PLUS-NEXT: v_mov_b32_e32 v3, s9 -; GFX10PLUS-NEXT: s_cselect_b32 s10, 1, 0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 -; GFX10PLUS-NEXT: v_cmp_lt_i64_e64 s0, s[8:9], s[2:3] -; GFX10PLUS-NEXT: v_cmp_lt_i64_e64 s2, s[6:7], 0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 -; GFX10PLUS-NEXT: s_and_b32 s0, 1, s10 -; GFX10PLUS-NEXT: s_cmp_eq_u64 s[6:7], 0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v2, 0, 1, s2 -; GFX10PLUS-NEXT: s_cselect_b32 s1, 1, 0 -; GFX10PLUS-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 -; GFX10PLUS-NEXT: s_and_b32 s1, 1, s1 -; GFX10PLUS-NEXT: v_cmp_ne_u32_e64 s0, 0, s1 -; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v2, 0, s0 -; GFX10PLUS-NEXT: v_mov_b32_e32 v2, s5 -; GFX10PLUS-NEXT: s_ashr_i32 s0, s9, 31 -; GFX10PLUS-NEXT: s_add_u32 s1, s0, 0x80000000 -; GFX10PLUS-NEXT: v_xor_b32_e32 v0, v1, v0 -; GFX10PLUS-NEXT: v_mov_b32_e32 v1, s4 -; GFX10PLUS-NEXT: s_mov_b32 s3, s0 -; GFX10PLUS-NEXT: s_mov_b32 s2, s0 -; GFX10PLUS-NEXT: v_and_b32_e32 v0, 1, v0 -; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s8 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s0, vcc_lo -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v2, v2, s3, vcc_lo -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc_lo -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v3, v3, s1, vcc_lo -; GFX10PLUS-NEXT: v_readfirstlane_b32 s0, v1 -; GFX10PLUS-NEXT: v_readfirstlane_b32 s1, v2 -; GFX10PLUS-NEXT: v_readfirstlane_b32 s2, v0 -; GFX10PLUS-NEXT: v_readfirstlane_b32 s3, v3 -; GFX10PLUS-NEXT: ; return to shader part epilog +; GFX10-LABEL: s_saddsat_i128: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_add_u32 s4, s0, s4 +; GFX10-NEXT: s_addc_u32 s5, s1, s5 +; GFX10-NEXT: s_addc_u32 s8, s2, s6 +; GFX10-NEXT: v_cmp_lt_u64_e64 s0, s[4:5], s[0:1] +; GFX10-NEXT: s_addc_u32 s9, s3, s7 +; GFX10-NEXT: s_cmp_eq_u64 s[8:9], s[2:3] +; GFX10-NEXT: v_mov_b32_e32 v3, s9 +; GFX10-NEXT: s_cselect_b32 s10, 1, 0 +; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX10-NEXT: v_cmp_lt_i64_e64 s0, s[8:9], s[2:3] +; GFX10-NEXT: v_cmp_lt_i64_e64 s2, s[6:7], 0 +; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX10-NEXT: s_and_b32 s0, 1, s10 +; GFX10-NEXT: s_cmp_eq_u64 s[6:7], 0 +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s2 +; GFX10-NEXT: s_cselect_b32 s1, 1, 0 +; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 +; GFX10-NEXT: s_and_b32 s1, 1, s1 +; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, s1 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, 0, s0 +; GFX10-NEXT: v_mov_b32_e32 v2, s5 +; GFX10-NEXT: s_ashr_i32 s0, s9, 31 +; GFX10-NEXT: s_add_u32 s1, s0, 0x80000000 +; GFX10-NEXT: v_xor_b32_e32 v0, v1, v0 +; GFX10-NEXT: v_mov_b32_e32 v1, s4 +; GFX10-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX10-NEXT: v_mov_b32_e32 v0, s8 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, s1, vcc_lo +; GFX10-NEXT: v_readfirstlane_b32 s0, v1 +; GFX10-NEXT: v_readfirstlane_b32 s1, v2 +; GFX10-NEXT: v_readfirstlane_b32 s2, v0 +; GFX10-NEXT: v_readfirstlane_b32 s3, v3 +; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: s_saddsat_i128: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_add_u32 s4, s0, s4 +; GFX11-NEXT: s_addc_u32 s5, s1, s5 +; GFX11-NEXT: s_addc_u32 s8, s2, s6 +; GFX11-NEXT: v_cmp_lt_u64_e64 s0, s[4:5], s[0:1] +; GFX11-NEXT: s_addc_u32 s9, s3, s7 +; GFX11-NEXT: s_cmp_eq_u64 s[8:9], s[2:3] +; GFX11-NEXT: v_mov_b32_e32 v3, s9 +; GFX11-NEXT: s_cselect_b32 s10, 1, 0 +; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11-NEXT: v_cmp_lt_i64_e64 s0, s[8:9], s[2:3] +; GFX11-NEXT: v_cmp_lt_i64_e64 s2, s[6:7], 0 +; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX11-NEXT: s_and_b32 s0, 1, s10 +; GFX11-NEXT: s_cmp_eq_u64 s[6:7], 0 +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s2 +; GFX11-NEXT: s_cselect_b32 s1, 1, 0 +; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 +; GFX11-NEXT: s_and_b32 s1, 1, s1 +; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 0, s1 +; GFX11-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, 0, s0 +; GFX11-NEXT: v_mov_b32_e32 v2, s5 +; GFX11-NEXT: s_ashr_i32 s0, s9, 31 +; GFX11-NEXT: s_add_u32 s1, s0, 0x80000000 +; GFX11-NEXT: v_xor_b32_e32 v0, v1, v0 +; GFX11-NEXT: v_dual_mov_b32 v1, s4 :: v_dual_and_b32 v0, 1, v0 +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-NEXT: v_mov_b32_e32 v0, s8 +; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, s1, vcc_lo +; GFX11-NEXT: v_readfirstlane_b32 s0, v1 +; GFX11-NEXT: v_readfirstlane_b32 s1, v2 +; GFX11-NEXT: v_readfirstlane_b32 s2, v0 +; GFX11-NEXT: v_readfirstlane_b32 s3, v3 +; GFX11-NEXT: ; return to shader part epilog %result = call i128 @llvm.sadd.sat.i128(i128 %lhs, i128 %rhs) ret i128 %result } @@ -5687,7 +5714,6 @@ ; GFX6-NEXT: v_addc_u32_e32 v16, vcc, v2, v10, vcc ; GFX6-NEXT: v_addc_u32_e32 v17, vcc, v3, v11, vcc ; GFX6-NEXT: v_cmp_lt_u64_e32 vcc, v[8:9], v[0:1] -; GFX6-NEXT: v_bfrev_b32_e32 v18, 1 ; GFX6-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, v[16:17], v[2:3] ; GFX6-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc @@ -5699,7 +5725,8 @@ ; GFX6-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[10:11] ; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc ; GFX6-NEXT: v_xor_b32_e32 v0, v1, v0 -; GFX6-NEXT: v_add_i32_e32 v3, vcc, v2, v18 +; GFX6-NEXT: v_bfrev_b32_e32 v1, 1 +; GFX6-NEXT: v_add_i32_e32 v3, vcc, v2, v1 ; GFX6-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v8, v2, vcc @@ -5722,7 +5749,7 @@ ; GFX6-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15] ; GFX6-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc ; GFX6-NEXT: v_xor_b32_e32 v4, v5, v4 -; GFX6-NEXT: v_add_i32_e32 v7, vcc, v6, v18 +; GFX6-NEXT: v_add_i32_e32 v7, vcc, 0x80000000, v6 ; GFX6-NEXT: v_and_b32_e32 v4, 1, v4 ; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v8, v6, vcc @@ -5739,7 +5766,6 @@ ; GFX8-NEXT: v_addc_u32_e32 v16, vcc, v2, v10, vcc ; GFX8-NEXT: v_addc_u32_e32 v17, vcc, v3, v11, vcc ; GFX8-NEXT: v_cmp_lt_u64_e32 vcc, v[8:9], v[0:1] -; GFX8-NEXT: v_bfrev_b32_e32 v18, 1 ; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, v[16:17], v[2:3] ; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc @@ -5751,7 +5777,8 @@ ; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[10:11] ; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc ; GFX8-NEXT: v_xor_b32_e32 v0, v1, v0 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v2, v18 +; GFX8-NEXT: v_bfrev_b32_e32 v1, 1 +; GFX8-NEXT: v_add_u32_e32 v3, vcc, v2, v1 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v2, vcc @@ -5774,7 +5801,7 @@ ; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15] ; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc ; GFX8-NEXT: v_xor_b32_e32 v4, v5, v4 -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v6, v18 +; GFX8-NEXT: v_add_u32_e32 v7, vcc, 0x80000000, v6 ; GFX8-NEXT: v_and_b32_e32 v4, 1, v4 ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v8, v6, vcc @@ -5791,7 +5818,6 @@ ; GFX9-NEXT: v_addc_co_u32_e32 v16, vcc, v2, v10, vcc ; GFX9-NEXT: v_addc_co_u32_e32 v17, vcc, v3, v11, vcc ; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, v[8:9], v[0:1] -; GFX9-NEXT: v_bfrev_b32_e32 v18, 1 ; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, v[16:17], v[2:3] ; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc @@ -5803,7 +5829,8 @@ ; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[10:11] ; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc ; GFX9-NEXT: v_xor_b32_e32 v0, v1, v0 -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v2, v18 +; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 +; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v2, v1 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v8, v2, vcc @@ -5826,7 +5853,7 @@ ; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15] ; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc ; GFX9-NEXT: v_xor_b32_e32 v4, v5, v4 -; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v6, v18 +; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, 0x80000000, v6 ; GFX9-NEXT: v_and_b32_e32 v4, 1, v4 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v8, v6, vcc @@ -5967,18 +5994,16 @@ ; GFX6-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX6-NEXT: s_add_u32 s1, s0, 0x80000000 ; GFX6-NEXT: v_mov_b32_e32 v1, s0 -; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: v_mov_b32_e32 v3, s8 -; GFX6-NEXT: v_mov_b32_e32 v4, s9 +; GFX6-NEXT: v_mov_b32_e32 v2, s8 +; GFX6-NEXT: v_mov_b32_e32 v3, s9 ; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GFX6-NEXT: v_cndmask_b32_e32 v4, v2, v1, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v5, v3, v1, vcc -; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v2, vcc -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s16 ; GFX6-NEXT: v_mov_b32_e32 v3, s17 -; GFX6-NEXT: v_cndmask_b32_e32 v6, v2, v0, vcc -; GFX6-NEXT: v_cndmask_b32_e32 v7, v3, v1, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v6, v2, v1, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v7, v3, v0, vcc ; GFX6-NEXT: s_add_u32 s0, s4, s12 ; GFX6-NEXT: v_mov_b32_e32 v2, s4 ; GFX6-NEXT: s_addc_u32 s1, s5, s13 @@ -6002,24 +6027,23 @@ ; GFX6-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX6-NEXT: s_add_u32 s5, s4, 0x80000000 ; GFX6-NEXT: v_mov_b32_e32 v1, s4 -; GFX6-NEXT: v_mov_b32_e32 v2, s4 -; GFX6-NEXT: v_mov_b32_e32 v3, s0 -; GFX6-NEXT: v_mov_b32_e32 v8, s1 +; GFX6-NEXT: v_mov_b32_e32 v2, s0 +; GFX6-NEXT: v_mov_b32_e32 v3, s1 ; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; GFX6-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc -; GFX6-NEXT: v_cndmask_b32_e32 v1, v8, v2, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc ; GFX6-NEXT: v_mov_b32_e32 v3, s5 ; GFX6-NEXT: v_mov_b32_e32 v8, s2 ; GFX6-NEXT: v_mov_b32_e32 v9, s3 -; GFX6-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc -; GFX6-NEXT: v_readfirstlane_b32 s0, v5 -; GFX6-NEXT: v_readfirstlane_b32 s1, v4 +; GFX6-NEXT: v_readfirstlane_b32 s0, v4 +; GFX6-NEXT: v_readfirstlane_b32 s1, v5 ; GFX6-NEXT: v_readfirstlane_b32 s2, v6 ; GFX6-NEXT: v_readfirstlane_b32 s3, v7 ; GFX6-NEXT: v_readfirstlane_b32 s4, v0 -; GFX6-NEXT: v_readfirstlane_b32 s5, v1 -; GFX6-NEXT: v_readfirstlane_b32 s6, v2 +; GFX6-NEXT: v_readfirstlane_b32 s5, v2 +; GFX6-NEXT: v_readfirstlane_b32 s6, v1 ; GFX6-NEXT: v_readfirstlane_b32 s7, v3 ; GFX6-NEXT: ; return to shader part epilog ; @@ -6054,19 +6078,17 @@ ; GFX8-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX8-NEXT: s_add_u32 s1, s0, 0x80000000 ; GFX8-NEXT: v_mov_b32_e32 v1, s0 -; GFX8-NEXT: v_mov_b32_e32 v2, s0 -; GFX8-NEXT: v_mov_b32_e32 v3, s8 -; GFX8-NEXT: v_mov_b32_e32 v4, s9 +; GFX8-NEXT: v_mov_b32_e32 v2, s8 +; GFX8-NEXT: v_mov_b32_e32 v3, s9 ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v1, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v5, v3, v1, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v2, vcc -; GFX8-NEXT: v_mov_b32_e32 v0, s0 -; GFX8-NEXT: v_mov_b32_e32 v1, s1 +; GFX8-NEXT: v_mov_b32_e32 v0, s1 ; GFX8-NEXT: v_mov_b32_e32 v2, s16 ; GFX8-NEXT: v_mov_b32_e32 v3, s17 ; GFX8-NEXT: s_add_u32 s0, s4, s12 -; GFX8-NEXT: v_cndmask_b32_e32 v6, v2, v0, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v7, v3, v1, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v6, v2, v1, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v7, v3, v0, vcc ; GFX8-NEXT: s_addc_u32 s1, s5, s13 ; GFX8-NEXT: v_mov_b32_e32 v2, s4 ; GFX8-NEXT: s_addc_u32 s2, s6, s14 @@ -6095,25 +6117,23 @@ ; GFX8-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX8-NEXT: s_add_u32 s5, s4, 0x80000000 ; GFX8-NEXT: v_mov_b32_e32 v1, s4 -; GFX8-NEXT: v_mov_b32_e32 v2, s4 -; GFX8-NEXT: v_mov_b32_e32 v3, s0 -; GFX8-NEXT: v_mov_b32_e32 v8, s1 +; GFX8-NEXT: v_mov_b32_e32 v2, s0 +; GFX8-NEXT: v_mov_b32_e32 v3, s1 ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v1, v8, v2, vcc -; GFX8-NEXT: v_mov_b32_e32 v2, s4 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc ; GFX8-NEXT: v_mov_b32_e32 v3, s5 ; GFX8-NEXT: v_mov_b32_e32 v8, s2 ; GFX8-NEXT: v_mov_b32_e32 v9, s3 -; GFX8-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc -; GFX8-NEXT: v_readfirstlane_b32 s0, v5 -; GFX8-NEXT: v_readfirstlane_b32 s1, v4 +; GFX8-NEXT: v_readfirstlane_b32 s0, v4 +; GFX8-NEXT: v_readfirstlane_b32 s1, v5 ; GFX8-NEXT: v_readfirstlane_b32 s2, v6 ; GFX8-NEXT: v_readfirstlane_b32 s3, v7 ; GFX8-NEXT: v_readfirstlane_b32 s4, v0 -; GFX8-NEXT: v_readfirstlane_b32 s5, v1 -; GFX8-NEXT: v_readfirstlane_b32 s6, v2 +; GFX8-NEXT: v_readfirstlane_b32 s5, v2 +; GFX8-NEXT: v_readfirstlane_b32 s6, v1 ; GFX8-NEXT: v_readfirstlane_b32 s7, v3 ; GFX8-NEXT: ; return to shader part epilog ; @@ -6148,19 +6168,17 @@ ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: s_add_u32 s1, s0, 0x80000000 ; GFX9-NEXT: v_mov_b32_e32 v1, s0 -; GFX9-NEXT: v_mov_b32_e32 v2, s0 -; GFX9-NEXT: v_mov_b32_e32 v3, s8 -; GFX9-NEXT: v_mov_b32_e32 v4, s9 +; GFX9-NEXT: v_mov_b32_e32 v2, s8 +; GFX9-NEXT: v_mov_b32_e32 v3, s9 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v1, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v5, v3, v1, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v2, vcc -; GFX9-NEXT: v_mov_b32_e32 v0, s0 -; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_mov_b32_e32 v0, s1 ; GFX9-NEXT: v_mov_b32_e32 v2, s16 ; GFX9-NEXT: v_mov_b32_e32 v3, s17 ; GFX9-NEXT: s_add_u32 s0, s4, s12 -; GFX9-NEXT: v_cndmask_b32_e32 v6, v2, v0, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v7, v3, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v6, v2, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v7, v3, v0, vcc ; GFX9-NEXT: s_addc_u32 s1, s5, s13 ; GFX9-NEXT: v_mov_b32_e32 v2, s4 ; GFX9-NEXT: s_addc_u32 s2, s6, s14 @@ -6189,25 +6207,23 @@ ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: s_add_u32 s5, s4, 0x80000000 ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-NEXT: v_mov_b32_e32 v3, s0 -; GFX9-NEXT: v_mov_b32_e32 v8, s1 +; GFX9-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-NEXT: v_mov_b32_e32 v3, s1 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v2, vcc -; GFX9-NEXT: v_mov_b32_e32 v2, s4 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc ; GFX9-NEXT: v_mov_b32_e32 v3, s5 ; GFX9-NEXT: v_mov_b32_e32 v8, s2 ; GFX9-NEXT: v_mov_b32_e32 v9, s3 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc -; GFX9-NEXT: v_readfirstlane_b32 s0, v5 -; GFX9-NEXT: v_readfirstlane_b32 s1, v4 +; GFX9-NEXT: v_readfirstlane_b32 s0, v4 +; GFX9-NEXT: v_readfirstlane_b32 s1, v5 ; GFX9-NEXT: v_readfirstlane_b32 s2, v6 ; GFX9-NEXT: v_readfirstlane_b32 s3, v7 ; GFX9-NEXT: v_readfirstlane_b32 s4, v0 -; GFX9-NEXT: v_readfirstlane_b32 s5, v1 -; GFX9-NEXT: v_readfirstlane_b32 s6, v2 +; GFX9-NEXT: v_readfirstlane_b32 s5, v2 +; GFX9-NEXT: v_readfirstlane_b32 s6, v1 ; GFX9-NEXT: v_readfirstlane_b32 s7, v3 ; GFX9-NEXT: ; return to shader part epilog ; @@ -6218,6 +6234,7 @@ ; GFX10-NEXT: s_addc_u32 s16, s2, s10 ; GFX10-NEXT: v_cmp_lt_u64_e64 s0, s[8:9], s[0:1] ; GFX10-NEXT: s_addc_u32 s17, s3, s11 +; GFX10-NEXT: v_mov_b32_e32 v4, s9 ; GFX10-NEXT: s_cmp_eq_u64 s[16:17], s[2:3] ; GFX10-NEXT: s_cselect_b32 s18, 1, 0 ; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 @@ -6230,66 +6247,61 @@ ; GFX10-NEXT: s_cselect_b32 s1, 1, 0 ; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 ; GFX10-NEXT: s_and_b32 s1, 1, s1 +; GFX10-NEXT: s_ashr_i32 s10, s17, 31 ; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, s1 +; GFX10-NEXT: s_add_u32 s11, s10, 0x80000000 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, 0, s0 -; GFX10-NEXT: s_ashr_i32 s0, s17, 31 -; GFX10-NEXT: v_mov_b32_e32 v2, s9 -; GFX10-NEXT: s_add_u32 s1, s0, 0x80000000 -; GFX10-NEXT: s_add_u32 s10, s4, s12 -; GFX10-NEXT: s_addc_u32 s11, s5, s13 -; GFX10-NEXT: s_addc_u32 s12, s6, s14 -; GFX10-NEXT: v_cmp_lt_u64_e64 s4, s[10:11], s[4:5] -; GFX10-NEXT: s_addc_u32 s13, s7, s15 +; GFX10-NEXT: s_add_u32 s0, s4, s12 +; GFX10-NEXT: s_addc_u32 s1, s5, s13 +; GFX10-NEXT: s_addc_u32 s2, s6, s14 +; GFX10-NEXT: v_cmp_lt_u64_e64 s4, s[0:1], s[4:5] +; GFX10-NEXT: s_addc_u32 s3, s7, s15 ; GFX10-NEXT: v_xor_b32_e32 v0, v1, v0 -; GFX10-NEXT: s_cmp_eq_u64 s[12:13], s[6:7] -; GFX10-NEXT: v_mov_b32_e32 v1, s8 -; GFX10-NEXT: s_cselect_b32 s8, 1, 0 -; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 -; GFX10-NEXT: v_cmp_lt_i64_e64 s4, s[12:13], s[6:7] +; GFX10-NEXT: s_cmp_eq_u64 s[2:3], s[6:7] +; GFX10-NEXT: v_mov_b32_e32 v5, s0 +; GFX10-NEXT: s_cselect_b32 s12, 1, 0 +; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4 +; GFX10-NEXT: v_cmp_lt_i64_e64 s4, s[2:3], s[6:7] ; GFX10-NEXT: v_cmp_lt_i64_e64 s6, s[14:15], 0 ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0 -; GFX10-NEXT: s_mov_b32 s3, s0 -; GFX10-NEXT: s_mov_b32 s2, s0 -; GFX10-NEXT: v_mov_b32_e32 v6, s11 -; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s4 -; GFX10-NEXT: s_and_b32 s4, 1, s8 +; GFX10-NEXT: v_mov_b32_e32 v6, s1 +; GFX10-NEXT: v_mov_b32_e32 v7, s3 +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4 +; GFX10-NEXT: s_and_b32 s4, 1, s12 ; GFX10-NEXT: s_cmp_eq_u64 s[14:15], 0 -; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s6 +; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s6 ; GFX10-NEXT: s_cselect_b32 s5, 1, 0 ; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s4 ; GFX10-NEXT: s_and_b32 s5, 1, s5 -; GFX10-NEXT: v_mov_b32_e32 v7, s13 ; GFX10-NEXT: v_cmp_ne_u32_e64 s4, 0, s5 -; GFX10-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc_lo ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 ; GFX10-NEXT: v_mov_b32_e32 v0, s16 -; GFX10-NEXT: v_cndmask_b32_e64 v4, v5, 0, s4 -; GFX10-NEXT: v_mov_b32_e32 v5, s17 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s3, vcc_lo -; GFX10-NEXT: v_xor_b32_e32 v3, v4, v3 -; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v4, v5, s1, vcc_lo -; GFX10-NEXT: v_mov_b32_e32 v5, s10 -; GFX10-NEXT: s_ashr_i32 s0, s13, 31 -; GFX10-NEXT: v_and_b32_e32 v3, 1, v3 -; GFX10-NEXT: s_add_u32 s1, s0, 0x80000000 -; GFX10-NEXT: s_mov_b32 s3, s0 -; GFX10-NEXT: s_mov_b32 s2, s0 -; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v3 -; GFX10-NEXT: v_mov_b32_e32 v3, s12 -; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, s0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, s3, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, s2, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, s1, vcc_lo -; GFX10-NEXT: v_readfirstlane_b32 s0, v1 -; GFX10-NEXT: v_readfirstlane_b32 s1, v2 +; GFX10-NEXT: v_cndmask_b32_e64 v2, v3, 0, s4 +; GFX10-NEXT: v_mov_b32_e32 v3, s8 +; GFX10-NEXT: s_ashr_i32 s4, s3, 31 +; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, s10, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s10, vcc_lo +; GFX10-NEXT: v_xor_b32_e32 v1, v2, v1 +; GFX10-NEXT: v_mov_b32_e32 v2, s17 +; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, s10, vcc_lo +; GFX10-NEXT: s_add_u32 s0, s4, 0x80000000 +; GFX10-NEXT: v_readfirstlane_b32 s1, v4 +; GFX10-NEXT: v_and_b32_e32 v1, 1, v1 +; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 +; GFX10-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-NEXT: v_readfirstlane_b32 s2, v0 -; GFX10-NEXT: v_readfirstlane_b32 s3, v4 +; GFX10-NEXT: v_readfirstlane_b32 s3, v2 +; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, s4, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, s4, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, s0, vcc_lo +; GFX10-NEXT: v_readfirstlane_b32 s0, v3 ; GFX10-NEXT: v_readfirstlane_b32 s4, v5 ; GFX10-NEXT: v_readfirstlane_b32 s5, v6 -; GFX10-NEXT: v_readfirstlane_b32 s6, v3 +; GFX10-NEXT: v_readfirstlane_b32 s6, v1 ; GFX10-NEXT: v_readfirstlane_b32 s7, v7 ; GFX10-NEXT: ; return to shader part epilog ; @@ -6312,65 +6324,60 @@ ; GFX11-NEXT: s_cselect_b32 s1, 1, 0 ; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 ; GFX11-NEXT: s_and_b32 s1, 1, s1 +; GFX11-NEXT: s_ashr_i32 s10, s17, 31 ; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 0, s1 +; GFX11-NEXT: s_add_u32 s11, s10, 0x80000000 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, 0, s0 -; GFX11-NEXT: s_ashr_i32 s0, s17, 31 -; GFX11-NEXT: v_mov_b32_e32 v2, s9 -; GFX11-NEXT: s_add_u32 s1, s0, 0x80000000 -; GFX11-NEXT: s_add_u32 s10, s4, s12 -; GFX11-NEXT: s_addc_u32 s11, s5, s13 -; GFX11-NEXT: s_addc_u32 s12, s6, s14 -; GFX11-NEXT: v_cmp_lt_u64_e64 s4, s[10:11], s[4:5] -; GFX11-NEXT: s_addc_u32 s13, s7, s15 +; GFX11-NEXT: s_add_u32 s0, s4, s12 +; GFX11-NEXT: s_addc_u32 s1, s5, s13 +; GFX11-NEXT: s_addc_u32 s2, s6, s14 +; GFX11-NEXT: v_cmp_lt_u64_e64 s4, s[0:1], s[4:5] +; GFX11-NEXT: s_addc_u32 s3, s7, s15 ; GFX11-NEXT: v_xor_b32_e32 v0, v1, v0 -; GFX11-NEXT: s_cmp_eq_u64 s[12:13], s[6:7] -; GFX11-NEXT: v_mov_b32_e32 v1, s8 -; GFX11-NEXT: s_cselect_b32 s8, 1, 0 -; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 -; GFX11-NEXT: v_cmp_lt_i64_e64 s4, s[12:13], s[6:7] +; GFX11-NEXT: s_cmp_eq_u64 s[2:3], s[6:7] +; GFX11-NEXT: v_dual_mov_b32 v6, s1 :: v_dual_mov_b32 v7, s3 +; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4 +; GFX11-NEXT: v_cmp_lt_i64_e64 s4, s[2:3], s[6:7] +; GFX11-NEXT: s_cselect_b32 s12, 1, 0 ; GFX11-NEXT: v_cmp_lt_i64_e64 s6, s[14:15], 0 -; GFX11-NEXT: s_mov_b32 s3, s0 -; GFX11-NEXT: s_mov_b32 s2, s0 -; GFX11-NEXT: v_dual_mov_b32 v7, s13 :: v_dual_mov_b32 v6, s11 -; GFX11-NEXT: v_cndmask_b32_e64 v4, 0, 1, s4 -; GFX11-NEXT: s_and_b32 s4, 1, s8 +; GFX11-NEXT: v_dual_mov_b32 v5, s0 :: v_dual_and_b32 v0, 1, v0 +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4 +; GFX11-NEXT: s_and_b32 s4, 1, s12 ; GFX11-NEXT: s_cmp_eq_u64 s[14:15], 0 -; GFX11-NEXT: v_cndmask_b32_e64 v5, 0, 1, s6 +; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, s6 ; GFX11-NEXT: s_cselect_b32 s5, 1, 0 ; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s4 ; GFX11-NEXT: s_and_b32 s5, 1, s5 -; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX11-NEXT: v_cmp_ne_u32_e64 s4, 0, s5 -; GFX11-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v2, v3, 0, s4 +; GFX11-NEXT: v_mov_b32_e32 v3, s8 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v4, v5, 0, s4 -; GFX11-NEXT: v_mov_b32_e32 v5, s17 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s0, vcc_lo -; GFX11-NEXT: v_xor_b32_e32 v3, v4, v3 ; GFX11-NEXT: v_mov_b32_e32 v0, s16 -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s3, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v4, v5, s1, vcc_lo -; GFX11-NEXT: v_mov_b32_e32 v5, s10 -; GFX11-NEXT: v_and_b32_e32 v3, 1, v3 -; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc_lo -; GFX11-NEXT: s_ashr_i32 s0, s13, 31 -; GFX11-NEXT: s_add_u32 s1, s0, 0x80000000 -; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v3 -; GFX11-NEXT: v_mov_b32_e32 v3, s12 -; GFX11-NEXT: s_mov_b32 s3, s0 -; GFX11-NEXT: s_mov_b32 s2, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, s0, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, s3, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, s2, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v7, v7, s1, vcc_lo -; GFX11-NEXT: v_readfirstlane_b32 s0, v1 -; GFX11-NEXT: v_readfirstlane_b32 s1, v2 +; GFX11-NEXT: s_ashr_i32 s4, s3, 31 +; GFX11-NEXT: v_xor_b32_e32 v1, v2, v1 +; GFX11-NEXT: v_mov_b32_e32 v4, s9 +; GFX11-NEXT: v_mov_b32_e32 v2, s17 +; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, s10, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s10, vcc_lo +; GFX11-NEXT: v_and_b32_e32 v1, 1, v1 +; GFX11-NEXT: v_cndmask_b32_e64 v4, v4, s10, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo +; GFX11-NEXT: s_add_u32 s0, s4, 0x80000000 +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 +; GFX11-NEXT: v_mov_b32_e32 v1, s2 +; GFX11-NEXT: v_readfirstlane_b32 s1, v4 ; GFX11-NEXT: v_readfirstlane_b32 s2, v0 -; GFX11-NEXT: v_readfirstlane_b32 s3, v4 +; GFX11-NEXT: v_readfirstlane_b32 s3, v2 +; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, s4, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, s4, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v7, v7, s0, vcc_lo +; GFX11-NEXT: v_readfirstlane_b32 s0, v3 ; GFX11-NEXT: v_readfirstlane_b32 s4, v5 ; GFX11-NEXT: v_readfirstlane_b32 s5, v6 -; GFX11-NEXT: v_readfirstlane_b32 s6, v3 +; GFX11-NEXT: v_readfirstlane_b32 s6, v1 ; GFX11-NEXT: v_readfirstlane_b32 s7, v7 ; GFX11-NEXT: ; return to shader part epilog %result = call <2 x i128> @llvm.sadd.sat.v2i128(<2 x i128> %lhs, <2 x i128> %rhs) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll @@ -1435,13 +1435,12 @@ ; GFX9-NEXT: v_lshlrev_b64 v[2:3], 22, v[2:3] ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 10, v1 ; GFX9-NEXT: v_or_b32_e32 v2, v2, v3 -; GFX9-NEXT: v_bfe_i32 v2, v2, 0, 1 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], 0, v[0:1] +; GFX9-NEXT: v_bfe_i32 v2, v2, 0, 1 ; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; GFX9-NEXT: v_bfe_u32 v1, v1, 0, 10 -; GFX9-NEXT: v_lshlrev_b32_e32 v4, 10, v2 +; GFX9-NEXT: v_lshl_or_b32 v1, v2, 10, v1 ; GFX9-NEXT: v_ashrrev_i64 v[2:3], 22, v[2:3] -; GFX9-NEXT: v_or_b32_e32 v1, v1, v4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10PLUS-LABEL: v_sext_inreg_i65_22: @@ -1455,9 +1454,8 @@ ; GFX10PLUS-NEXT: v_bfe_u32 v1, v1, 0, 10 ; GFX10PLUS-NEXT: v_bfe_i32 v2, v2, 0, 1 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v3, 31, v2 -; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v4, 10, v2 +; GFX10PLUS-NEXT: v_lshl_or_b32 v1, v2, 10, v1 ; GFX10PLUS-NEXT: v_ashrrev_i64 v[2:3], 22, v[2:3] -; GFX10PLUS-NEXT: v_or_b32_e32 v1, v1, v4 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %shl = shl i65 %value, 22 %ashr = ashr i65 %shl, 22 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll @@ -1070,28 +1070,28 @@ ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v6, v2 ; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], s6, v2, v[1:2] -; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v4, v0 -; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], 0, v5, v[1:2] ; CHECK-NEXT: v_mov_b32_e32 v3, 0x1000 ; CHECK-NEXT: s_bfe_i32 s6, 1, 0x10000 -; CHECK-NEXT: v_subb_u32_e64 v2, s[4:5], v9, v1, vcc -; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v9, v1 -; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], 0, v5, v[1:2] +; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v4, v0 +; CHECK-NEXT: v_subb_u32_e64 v2, vcc, v9, v1, s[4:5] +; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v9, v1 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc +; CHECK-NEXT: v_mov_b32_e32 v5, s6 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 +; CHECK-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc ; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v0, v3 +; CHECK-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5] ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v3 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v6, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] -; CHECK-NEXT: v_mov_b32_e32 v5, s6 -; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CHECK-NEXT: v_cndmask_b32_e64 v4, v5, v4, s[4:5] -; CHECK-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc -; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v6, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc +; CHECK-NEXT: v_subrev_i32_e32 v5, vcc, 0x1000, v6 ; CHECK-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 -; CHECK-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v3, v6, v5, vcc ; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc @@ -1718,28 +1718,28 @@ ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v6, v2 ; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], s6, v2, v[1:2] -; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v4, v0 -; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], 0, v5, v[1:2] ; CHECK-NEXT: v_mov_b32_e32 v3, 0x12d8fb ; CHECK-NEXT: s_bfe_i32 s6, 1, 0x10000 -; CHECK-NEXT: v_subb_u32_e64 v2, s[4:5], v9, v1, vcc -; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v9, v1 -; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], 0, v5, v[1:2] +; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v4, v0 +; CHECK-NEXT: v_subb_u32_e64 v2, vcc, v9, v1, s[4:5] +; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v9, v1 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc +; CHECK-NEXT: v_mov_b32_e32 v5, s6 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 +; CHECK-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc ; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v0, v3 +; CHECK-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5] ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v3 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v6, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] -; CHECK-NEXT: v_mov_b32_e32 v5, s6 -; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CHECK-NEXT: v_cndmask_b32_e64 v4, v5, v4, s[4:5] -; CHECK-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc -; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v6, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc +; CHECK-NEXT: v_subrev_i32_e32 v5, vcc, 0x12d8fb, v6 ; CHECK-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 -; CHECK-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v3, v6, v5, vcc ; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll @@ -4237,7 +4237,7 @@ ; GFX6-LABEL: s_ssubsat_i48: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_sub_u32 s4, s0, s2 -; GFX6-NEXT: s_subb_u32 s5, s1, s3 +; GFX6-NEXT: s_subb_u32 s3, s1, s3 ; GFX6-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x300000 ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: s_bfe_i64 s[6:7], s[4:5], 0x300000 @@ -4245,13 +4245,13 @@ ; GFX6-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x300000 ; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, s[6:7], v[0:1] ; GFX6-NEXT: v_cmp_gt_i64_e64 s[0:1], s[0:1], 0 -; GFX6-NEXT: s_ashr_i32 s3, s7, 31 -; GFX6-NEXT: s_ashr_i32 s2, s7, 15 -; GFX6-NEXT: s_add_u32 s3, s3, 0xffff8000 -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s3 +; GFX6-NEXT: s_ashr_i32 s2, s7, 31 +; GFX6-NEXT: s_ashr_i32 s5, s7, 15 +; GFX6-NEXT: s_add_u32 s2, s2, 0xffff8000 +; GFX6-NEXT: v_mov_b32_e32 v0, s5 +; GFX6-NEXT: v_mov_b32_e32 v1, s2 ; GFX6-NEXT: v_mov_b32_e32 v2, s4 -; GFX6-NEXT: v_mov_b32_e32 v3, s5 +; GFX6-NEXT: v_mov_b32_e32 v3, s3 ; GFX6-NEXT: s_xor_b64 vcc, s[0:1], vcc ; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc @@ -4262,7 +4262,7 @@ ; GFX8-LABEL: s_ssubsat_i48: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_sub_u32 s4, s0, s2 -; GFX8-NEXT: s_subb_u32 s5, s1, s3 +; GFX8-NEXT: s_subb_u32 s3, s1, s3 ; GFX8-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x300000 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: s_bfe_i64 s[6:7], s[4:5], 0x300000 @@ -4270,13 +4270,13 @@ ; GFX8-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x300000 ; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, s[6:7], v[0:1] ; GFX8-NEXT: v_cmp_gt_i64_e64 s[0:1], s[0:1], 0 -; GFX8-NEXT: s_ashr_i32 s3, s7, 31 -; GFX8-NEXT: s_ashr_i32 s2, s7, 15 -; GFX8-NEXT: s_add_u32 s3, s3, 0xffff8000 -; GFX8-NEXT: v_mov_b32_e32 v0, s2 -; GFX8-NEXT: v_mov_b32_e32 v1, s3 +; GFX8-NEXT: s_ashr_i32 s2, s7, 31 +; GFX8-NEXT: s_ashr_i32 s5, s7, 15 +; GFX8-NEXT: s_add_u32 s2, s2, 0xffff8000 +; GFX8-NEXT: v_mov_b32_e32 v0, s5 +; GFX8-NEXT: v_mov_b32_e32 v1, s2 ; GFX8-NEXT: v_mov_b32_e32 v2, s4 -; GFX8-NEXT: v_mov_b32_e32 v3, s5 +; GFX8-NEXT: v_mov_b32_e32 v3, s3 ; GFX8-NEXT: s_xor_b64 vcc, s[0:1], vcc ; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc @@ -4314,15 +4314,15 @@ ; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 16 ; GFX10-NEXT: s_sub_u32 s4, s0, s2 ; GFX10-NEXT: s_subb_u32 s5, s1, s3 -; GFX10-NEXT: v_cmp_gt_i64_e64 s2, s[2:3], 0 -; GFX10-NEXT: v_cmp_lt_i64_e64 s6, s[4:5], s[0:1] ; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: v_cmp_lt_i64_e64 s0, s[4:5], s[0:1] +; GFX10-NEXT: v_cmp_gt_i64_e64 s1, s[2:3], 0 ; GFX10-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-NEXT: s_ashr_i32 s0, s5, 31 -; GFX10-NEXT: s_add_u32 s1, s0, 0x80000000 -; GFX10-NEXT: s_xor_b32 s2, s2, s6 -; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s0, s2 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s1, s2 +; GFX10-NEXT: s_ashr_i32 s2, s5, 31 +; GFX10-NEXT: s_add_u32 s3, s2, 0x80000000 +; GFX10-NEXT: s_xor_b32 s0, s1, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s2, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s3, s0 ; GFX10-NEXT: v_ashrrev_i64 v[0:1], 16, v[0:1] ; GFX10-NEXT: v_readfirstlane_b32 s0, v0 ; GFX10-NEXT: v_readfirstlane_b32 s1, v1 @@ -4334,14 +4334,14 @@ ; GFX11-NEXT: s_lshl_b64 s[2:3], s[2:3], 16 ; GFX11-NEXT: s_sub_u32 s4, s0, s2 ; GFX11-NEXT: s_subb_u32 s5, s1, s3 -; GFX11-NEXT: v_cmp_gt_i64_e64 s2, s[2:3], 0 -; GFX11-NEXT: v_cmp_lt_i64_e64 s6, s[4:5], s[0:1] ; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 -; GFX11-NEXT: s_ashr_i32 s0, s5, 31 -; GFX11-NEXT: s_add_u32 s1, s0, 0x80000000 -; GFX11-NEXT: s_xor_b32 s2, s2, s6 -; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s0, s2 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s1, s2 +; GFX11-NEXT: v_cmp_lt_i64_e64 s0, s[4:5], s[0:1] +; GFX11-NEXT: v_cmp_gt_i64_e64 s1, s[2:3], 0 +; GFX11-NEXT: s_ashr_i32 s2, s5, 31 +; GFX11-NEXT: s_add_u32 s3, s2, 0x80000000 +; GFX11-NEXT: s_xor_b32 s0, s1, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s2, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s3, s0 ; GFX11-NEXT: v_ashrrev_i64 v[0:1], 16, v[0:1] ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11-NEXT: v_readfirstlane_b32 s1, v1 @@ -4363,8 +4363,7 @@ ; GFX6-NEXT: v_cmp_lt_i64_e64 s[2:3], 0, v[0:1] ; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v3 ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 15, v3 -; GFX6-NEXT: v_mov_b32_e32 v3, 0xffff8000 -; GFX6-NEXT: v_add_i32_e32 v3, vcc, v0, v3 +; GFX6-NEXT: v_add_i32_e32 v3, vcc, 0xffff8000, v0 ; GFX6-NEXT: s_xor_b64 vcc, s[2:3], s[0:1] ; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc @@ -4383,8 +4382,7 @@ ; GFX8-NEXT: v_cmp_lt_i64_e64 s[2:3], 0, v[0:1] ; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v3 ; GFX8-NEXT: v_ashrrev_i32_e32 v1, 15, v3 -; GFX8-NEXT: v_mov_b32_e32 v3, 0xffff8000 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v0, v3 +; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0xffff8000, v0 ; GFX8-NEXT: s_xor_b64 vcc, s[2:3], s[0:1] ; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc @@ -4401,8 +4399,7 @@ ; GFX9-NEXT: v_cmp_gt_i64_e64 s[0:1], s[0:1], v[2:3] ; GFX9-NEXT: v_cmp_lt_i64_e64 s[2:3], 0, v[0:1] ; GFX9-NEXT: v_ashrrev_i32_e32 v0, 31, v3 -; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 -; GFX9-NEXT: v_add_co_u32_e32 v1, vcc, v0, v1 +; GFX9-NEXT: v_add_co_u32_e32 v1, vcc, 0x80000000, v0 ; GFX9-NEXT: s_xor_b64 vcc, s[2:3], s[0:1] ; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc @@ -4461,8 +4458,7 @@ ; GFX6-NEXT: v_cmp_gt_i64_e64 s[0:1], s[0:1], 0 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v3 ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 15, v3 -; GFX6-NEXT: v_mov_b32_e32 v3, 0xffff8000 -; GFX6-NEXT: v_add_i32_e32 v3, vcc, v0, v3 +; GFX6-NEXT: v_add_i32_e32 v3, vcc, 0xffff8000, v0 ; GFX6-NEXT: s_xor_b64 vcc, s[0:1], s[2:3] ; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc @@ -4481,8 +4477,7 @@ ; GFX8-NEXT: v_cmp_gt_i64_e64 s[0:1], s[0:1], 0 ; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v3 ; GFX8-NEXT: v_ashrrev_i32_e32 v1, 15, v3 -; GFX8-NEXT: v_mov_b32_e32 v3, 0xffff8000 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v0, v3 +; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0xffff8000, v0 ; GFX8-NEXT: s_xor_b64 vcc, s[0:1], s[2:3] ; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc @@ -4499,8 +4494,7 @@ ; GFX9-NEXT: v_cmp_lt_i64_e64 s[0:1], v[2:3], v[0:1] ; GFX9-NEXT: v_cmp_gt_i64_e64 s[2:3], s[2:3], 0 ; GFX9-NEXT: v_ashrrev_i32_e32 v0, 31, v3 -; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 -; GFX9-NEXT: v_add_co_u32_e32 v1, vcc, v0, v1 +; GFX9-NEXT: v_add_co_u32_e32 v1, vcc, 0x80000000, v0 ; GFX9-NEXT: s_xor_b64 vcc, s[2:3], s[0:1] ; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc @@ -4689,15 +4683,15 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_sub_u32 s4, s0, s2 ; GFX10-NEXT: s_subb_u32 s5, s1, s3 -; GFX10-NEXT: v_cmp_gt_i64_e64 s2, s[2:3], 0 -; GFX10-NEXT: v_cmp_lt_i64_e64 s6, s[4:5], s[0:1] ; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: v_cmp_lt_i64_e64 s0, s[4:5], s[0:1] +; GFX10-NEXT: v_cmp_gt_i64_e64 s1, s[2:3], 0 ; GFX10-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-NEXT: s_ashr_i32 s0, s5, 31 -; GFX10-NEXT: s_add_u32 s1, s0, 0x80000000 -; GFX10-NEXT: s_xor_b32 s2, s2, s6 -; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s0, s2 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s1, s2 +; GFX10-NEXT: s_ashr_i32 s2, s5, 31 +; GFX10-NEXT: s_add_u32 s3, s2, 0x80000000 +; GFX10-NEXT: s_xor_b32 s0, s1, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s2, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s3, s0 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0 ; GFX10-NEXT: v_readfirstlane_b32 s1, v1 ; GFX10-NEXT: ; return to shader part epilog @@ -4706,14 +4700,14 @@ ; GFX11: ; %bb.0: ; GFX11-NEXT: s_sub_u32 s4, s0, s2 ; GFX11-NEXT: s_subb_u32 s5, s1, s3 -; GFX11-NEXT: v_cmp_gt_i64_e64 s2, s[2:3], 0 -; GFX11-NEXT: v_cmp_lt_i64_e64 s6, s[4:5], s[0:1] ; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 -; GFX11-NEXT: s_ashr_i32 s0, s5, 31 -; GFX11-NEXT: s_add_u32 s1, s0, 0x80000000 -; GFX11-NEXT: s_xor_b32 s2, s2, s6 -; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s0, s2 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s1, s2 +; GFX11-NEXT: v_cmp_lt_i64_e64 s0, s[4:5], s[0:1] +; GFX11-NEXT: v_cmp_gt_i64_e64 s1, s[2:3], 0 +; GFX11-NEXT: s_ashr_i32 s2, s5, 31 +; GFX11-NEXT: s_add_u32 s3, s2, 0x80000000 +; GFX11-NEXT: s_xor_b32 s0, s1, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s2, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s3, s0 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11-NEXT: v_readfirstlane_b32 s1, v1 ; GFX11-NEXT: ; return to shader part epilog @@ -5106,26 +5100,26 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_sub_u32 s8, s0, s4 ; GFX10-NEXT: s_subb_u32 s9, s1, s5 -; GFX10-NEXT: v_cmp_gt_i64_e64 s4, s[4:5], 0 -; GFX10-NEXT: v_cmp_lt_i64_e64 s10, s[8:9], s[0:1] -; GFX10-NEXT: s_ashr_i32 s0, s9, 31 ; GFX10-NEXT: v_mov_b32_e32 v0, s8 -; GFX10-NEXT: s_add_u32 s1, s0, 0x80000000 +; GFX10-NEXT: v_cmp_lt_i64_e64 s0, s[8:9], s[0:1] +; GFX10-NEXT: v_cmp_gt_i64_e64 s1, s[4:5], 0 +; GFX10-NEXT: s_ashr_i32 s4, s9, 31 ; GFX10-NEXT: v_mov_b32_e32 v1, s9 -; GFX10-NEXT: s_xor_b32 s8, s4, s10 -; GFX10-NEXT: s_sub_u32 s4, s2, s6 -; GFX10-NEXT: s_subb_u32 s5, s3, s7 -; GFX10-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-NEXT: v_cmp_lt_i64_e64 s2, s[4:5], s[2:3] +; GFX10-NEXT: s_add_u32 s5, s4, 0x80000000 +; GFX10-NEXT: s_xor_b32 s8, s1, s0 +; GFX10-NEXT: s_sub_u32 s0, s2, s6 +; GFX10-NEXT: s_subb_u32 s1, s3, s7 +; GFX10-NEXT: v_mov_b32_e32 v2, s0 +; GFX10-NEXT: v_cmp_lt_i64_e64 s2, s[0:1], s[2:3] ; GFX10-NEXT: v_cmp_gt_i64_e64 s3, s[6:7], 0 -; GFX10-NEXT: v_mov_b32_e32 v3, s5 -; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s0, s8 -; GFX10-NEXT: s_ashr_i32 s0, s5, 31 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s1, s8 -; GFX10-NEXT: s_add_u32 s1, s0, 0x80000000 -; GFX10-NEXT: s_xor_b32 s2, s3, s2 -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s0, s2 -; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, s1, s2 +; GFX10-NEXT: v_mov_b32_e32 v3, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s4, s8 +; GFX10-NEXT: s_ashr_i32 s4, s1, 31 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s5, s8 +; GFX10-NEXT: s_add_u32 s0, s4, 0x80000000 +; GFX10-NEXT: s_xor_b32 s1, s3, s2 +; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s4, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, s0, s1 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0 ; GFX10-NEXT: v_readfirstlane_b32 s1, v1 ; GFX10-NEXT: v_readfirstlane_b32 s2, v2 @@ -5136,24 +5130,24 @@ ; GFX11: ; %bb.0: ; GFX11-NEXT: s_sub_u32 s8, s0, s4 ; GFX11-NEXT: s_subb_u32 s9, s1, s5 -; GFX11-NEXT: v_cmp_gt_i64_e64 s4, s[4:5], 0 -; GFX11-NEXT: v_cmp_lt_i64_e64 s10, s[8:9], s[0:1] -; GFX11-NEXT: s_ashr_i32 s0, s9, 31 ; GFX11-NEXT: v_dual_mov_b32 v0, s8 :: v_dual_mov_b32 v1, s9 -; GFX11-NEXT: s_add_u32 s1, s0, 0x80000000 -; GFX11-NEXT: s_xor_b32 s8, s4, s10 -; GFX11-NEXT: s_sub_u32 s4, s2, s6 -; GFX11-NEXT: s_subb_u32 s5, s3, s7 -; GFX11-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5 -; GFX11-NEXT: v_cmp_lt_i64_e64 s2, s[4:5], s[2:3] +; GFX11-NEXT: v_cmp_lt_i64_e64 s0, s[8:9], s[0:1] +; GFX11-NEXT: v_cmp_gt_i64_e64 s1, s[4:5], 0 +; GFX11-NEXT: s_ashr_i32 s4, s9, 31 +; GFX11-NEXT: s_add_u32 s5, s4, 0x80000000 +; GFX11-NEXT: s_xor_b32 s8, s1, s0 +; GFX11-NEXT: s_sub_u32 s0, s2, s6 +; GFX11-NEXT: s_subb_u32 s1, s3, s7 +; GFX11-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1 +; GFX11-NEXT: v_cmp_lt_i64_e64 s2, s[0:1], s[2:3] ; GFX11-NEXT: v_cmp_gt_i64_e64 s3, s[6:7], 0 -; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s0, s8 -; GFX11-NEXT: s_ashr_i32 s0, s5, 31 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s1, s8 -; GFX11-NEXT: s_add_u32 s1, s0, 0x80000000 -; GFX11-NEXT: s_xor_b32 s2, s3, s2 -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s0, s2 -; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, s1, s2 +; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s4, s8 +; GFX11-NEXT: s_ashr_i32 s4, s1, 31 +; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s5, s8 +; GFX11-NEXT: s_add_u32 s0, s4, 0x80000000 +; GFX11-NEXT: s_xor_b32 s1, s3, s2 +; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s4, s1 +; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, s0, s1 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11-NEXT: v_readfirstlane_b32 s1, v1 ; GFX11-NEXT: v_readfirstlane_b32 s2, v2 @@ -5191,20 +5185,19 @@ ; GFX6-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX6-NEXT: s_add_u32 s1, s0, 0x80000000 ; GFX6-NEXT: v_mov_b32_e32 v1, s0 -; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: v_mov_b32_e32 v3, s8 -; GFX6-NEXT: v_mov_b32_e32 v4, s9 +; GFX6-NEXT: v_mov_b32_e32 v2, s8 +; GFX6-NEXT: v_mov_b32_e32 v3, s9 ; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; GFX6-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc -; GFX6-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc ; GFX6-NEXT: v_mov_b32_e32 v3, s1 ; GFX6-NEXT: v_mov_b32_e32 v4, s10 ; GFX6-NEXT: v_mov_b32_e32 v5, s11 -; GFX6-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc ; GFX6-NEXT: v_readfirstlane_b32 s0, v0 -; GFX6-NEXT: v_readfirstlane_b32 s1, v1 -; GFX6-NEXT: v_readfirstlane_b32 s2, v2 +; GFX6-NEXT: v_readfirstlane_b32 s1, v2 +; GFX6-NEXT: v_readfirstlane_b32 s2, v1 ; GFX6-NEXT: v_readfirstlane_b32 s3, v3 ; GFX6-NEXT: ; return to shader part epilog ; @@ -5241,21 +5234,19 @@ ; GFX8-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX8-NEXT: s_add_u32 s1, s0, 0x80000000 ; GFX8-NEXT: v_mov_b32_e32 v1, s0 -; GFX8-NEXT: v_mov_b32_e32 v2, s0 -; GFX8-NEXT: v_mov_b32_e32 v3, s8 -; GFX8-NEXT: v_mov_b32_e32 v4, s9 +; GFX8-NEXT: v_mov_b32_e32 v2, s8 +; GFX8-NEXT: v_mov_b32_e32 v3, s9 ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc -; GFX8-NEXT: v_mov_b32_e32 v2, s0 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc ; GFX8-NEXT: v_mov_b32_e32 v3, s1 ; GFX8-NEXT: v_mov_b32_e32 v4, s10 ; GFX8-NEXT: v_mov_b32_e32 v5, s11 -; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc ; GFX8-NEXT: v_readfirstlane_b32 s0, v0 -; GFX8-NEXT: v_readfirstlane_b32 s1, v1 -; GFX8-NEXT: v_readfirstlane_b32 s2, v2 +; GFX8-NEXT: v_readfirstlane_b32 s1, v2 +; GFX8-NEXT: v_readfirstlane_b32 s2, v1 ; GFX8-NEXT: v_readfirstlane_b32 s3, v3 ; GFX8-NEXT: ; return to shader part epilog ; @@ -5292,21 +5283,19 @@ ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: s_add_u32 s1, s0, 0x80000000 ; GFX9-NEXT: v_mov_b32_e32 v1, s0 -; GFX9-NEXT: v_mov_b32_e32 v2, s0 -; GFX9-NEXT: v_mov_b32_e32 v3, s8 -; GFX9-NEXT: v_mov_b32_e32 v4, s9 +; GFX9-NEXT: v_mov_b32_e32 v2, s8 +; GFX9-NEXT: v_mov_b32_e32 v3, s9 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc -; GFX9-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc ; GFX9-NEXT: v_mov_b32_e32 v3, s1 ; GFX9-NEXT: v_mov_b32_e32 v4, s10 ; GFX9-NEXT: v_mov_b32_e32 v5, s11 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc ; GFX9-NEXT: v_readfirstlane_b32 s0, v0 -; GFX9-NEXT: v_readfirstlane_b32 s1, v1 -; GFX9-NEXT: v_readfirstlane_b32 s2, v2 +; GFX9-NEXT: v_readfirstlane_b32 s1, v2 +; GFX9-NEXT: v_readfirstlane_b32 s2, v1 ; GFX9-NEXT: v_readfirstlane_b32 s3, v3 ; GFX9-NEXT: ; return to shader part epilog ; @@ -5331,12 +5320,10 @@ ; GFX10-NEXT: s_cselect_b32 s1, 1, 0 ; GFX10-NEXT: s_ashr_i32 s0, s11, 31 ; GFX10-NEXT: s_and_b32 s1, 1, s1 -; GFX10-NEXT: s_mov_b32 s3, s0 ; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s2 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo ; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s1 ; GFX10-NEXT: s_add_u32 s1, s0, 0x80000000 -; GFX10-NEXT: s_mov_b32 s2, s0 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc_lo ; GFX10-NEXT: v_mov_b32_e32 v2, s9 ; GFX10-NEXT: v_mov_b32_e32 v3, s11 @@ -5346,8 +5333,8 @@ ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 ; GFX10-NEXT: v_mov_b32_e32 v0, s10 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s3, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s0, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, s1, vcc_lo ; GFX10-NEXT: v_readfirstlane_b32 s0, v1 ; GFX10-NEXT: v_readfirstlane_b32 s1, v2 @@ -5376,12 +5363,10 @@ ; GFX11-NEXT: s_cselect_b32 s1, 1, 0 ; GFX11-NEXT: s_ashr_i32 s0, s11, 31 ; GFX11-NEXT: s_and_b32 s1, 1, s1 -; GFX11-NEXT: s_mov_b32 s3, s0 ; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, s2 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo ; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s1 ; GFX11-NEXT: s_add_u32 s1, s0, 0x80000000 -; GFX11-NEXT: s_mov_b32 s2, s0 ; GFX11-NEXT: v_dual_cndmask_b32 v1, v3, v2 :: v_dual_mov_b32 v2, s9 ; GFX11-NEXT: v_mov_b32_e32 v3, s11 ; GFX11-NEXT: v_xor_b32_e32 v0, v1, v0 @@ -5389,8 +5374,8 @@ ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: v_mov_b32_e32 v0, s10 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s0, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s3, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s0, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, s1, vcc_lo ; GFX11-NEXT: v_readfirstlane_b32 s0, v1 ; GFX11-NEXT: v_readfirstlane_b32 s1, v2 @@ -5744,7 +5729,6 @@ ; GFX6-NEXT: v_subb_u32_e32 v18, vcc, v2, v10, vcc ; GFX6-NEXT: v_subb_u32_e32 v19, vcc, v3, v11, vcc ; GFX6-NEXT: v_cmp_lt_u64_e32 vcc, v[16:17], v[0:1] -; GFX6-NEXT: v_bfrev_b32_e32 v20, 1 ; GFX6-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, v[18:19], v[2:3] ; GFX6-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc @@ -5758,7 +5742,8 @@ ; GFX6-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc ; GFX6-NEXT: v_xor_b32_e32 v0, v1, v0 ; GFX6-NEXT: v_ashrrev_i32_e32 v2, 31, v19 -; GFX6-NEXT: v_add_i32_e32 v3, vcc, v2, v20 +; GFX6-NEXT: v_bfrev_b32_e32 v1, 1 +; GFX6-NEXT: v_add_i32_e32 v3, vcc, v2, v1 ; GFX6-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v16, v2, vcc @@ -5783,7 +5768,7 @@ ; GFX6-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc ; GFX6-NEXT: v_xor_b32_e32 v4, v5, v4 ; GFX6-NEXT: v_ashrrev_i32_e32 v6, 31, v11 -; GFX6-NEXT: v_add_i32_e32 v7, vcc, v6, v20 +; GFX6-NEXT: v_add_i32_e32 v7, vcc, 0x80000000, v6 ; GFX6-NEXT: v_and_b32_e32 v4, 1, v4 ; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v8, v6, vcc @@ -5800,7 +5785,6 @@ ; GFX8-NEXT: v_subb_u32_e32 v18, vcc, v2, v10, vcc ; GFX8-NEXT: v_subb_u32_e32 v19, vcc, v3, v11, vcc ; GFX8-NEXT: v_cmp_lt_u64_e32 vcc, v[16:17], v[0:1] -; GFX8-NEXT: v_bfrev_b32_e32 v20, 1 ; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, v[18:19], v[2:3] ; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc @@ -5814,7 +5798,8 @@ ; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc ; GFX8-NEXT: v_xor_b32_e32 v0, v1, v0 ; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v19 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v2, v20 +; GFX8-NEXT: v_bfrev_b32_e32 v1, 1 +; GFX8-NEXT: v_add_u32_e32 v3, vcc, v2, v1 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v16, v2, vcc @@ -5839,7 +5824,7 @@ ; GFX8-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc ; GFX8-NEXT: v_xor_b32_e32 v4, v5, v4 ; GFX8-NEXT: v_ashrrev_i32_e32 v6, 31, v11 -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v6, v20 +; GFX8-NEXT: v_add_u32_e32 v7, vcc, 0x80000000, v6 ; GFX8-NEXT: v_and_b32_e32 v4, 1, v4 ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v8, v6, vcc @@ -5856,7 +5841,6 @@ ; GFX9-NEXT: v_subb_co_u32_e32 v18, vcc, v2, v10, vcc ; GFX9-NEXT: v_subb_co_u32_e32 v19, vcc, v3, v11, vcc ; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, v[16:17], v[0:1] -; GFX9-NEXT: v_bfrev_b32_e32 v20, 1 ; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, v[18:19], v[2:3] ; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc @@ -5870,7 +5854,8 @@ ; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc ; GFX9-NEXT: v_xor_b32_e32 v0, v1, v0 ; GFX9-NEXT: v_ashrrev_i32_e32 v2, 31, v19 -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v2, v20 +; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 +; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v2, v1 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v16, v2, vcc @@ -5895,7 +5880,7 @@ ; GFX9-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc ; GFX9-NEXT: v_xor_b32_e32 v4, v5, v4 ; GFX9-NEXT: v_ashrrev_i32_e32 v6, 31, v11 -; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v6, v20 +; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, 0x80000000, v6 ; GFX9-NEXT: v_and_b32_e32 v4, 1, v4 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v8, v6, vcc @@ -6046,18 +6031,16 @@ ; GFX6-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX6-NEXT: s_add_u32 s1, s0, 0x80000000 ; GFX6-NEXT: v_mov_b32_e32 v1, s0 -; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: v_mov_b32_e32 v3, s16 -; GFX6-NEXT: v_mov_b32_e32 v4, s17 +; GFX6-NEXT: v_mov_b32_e32 v2, s16 +; GFX6-NEXT: v_mov_b32_e32 v3, s17 ; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GFX6-NEXT: v_cndmask_b32_e32 v4, v2, v1, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v5, v3, v1, vcc -; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v2, vcc -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s18 ; GFX6-NEXT: v_mov_b32_e32 v3, s19 -; GFX6-NEXT: v_cndmask_b32_e32 v6, v2, v0, vcc -; GFX6-NEXT: v_cndmask_b32_e32 v7, v3, v1, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v6, v2, v1, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v7, v3, v0, vcc ; GFX6-NEXT: s_sub_u32 s0, s4, s12 ; GFX6-NEXT: v_mov_b32_e32 v2, s4 ; GFX6-NEXT: s_subb_u32 s1, s5, s13 @@ -6083,24 +6066,23 @@ ; GFX6-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX6-NEXT: s_add_u32 s5, s4, 0x80000000 ; GFX6-NEXT: v_mov_b32_e32 v1, s4 -; GFX6-NEXT: v_mov_b32_e32 v2, s4 -; GFX6-NEXT: v_mov_b32_e32 v3, s0 -; GFX6-NEXT: v_mov_b32_e32 v8, s1 +; GFX6-NEXT: v_mov_b32_e32 v2, s0 +; GFX6-NEXT: v_mov_b32_e32 v3, s1 ; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; GFX6-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc -; GFX6-NEXT: v_cndmask_b32_e32 v1, v8, v2, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc ; GFX6-NEXT: v_mov_b32_e32 v3, s5 ; GFX6-NEXT: v_mov_b32_e32 v8, s2 ; GFX6-NEXT: v_mov_b32_e32 v9, s3 -; GFX6-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc -; GFX6-NEXT: v_readfirstlane_b32 s0, v5 -; GFX6-NEXT: v_readfirstlane_b32 s1, v4 +; GFX6-NEXT: v_readfirstlane_b32 s0, v4 +; GFX6-NEXT: v_readfirstlane_b32 s1, v5 ; GFX6-NEXT: v_readfirstlane_b32 s2, v6 ; GFX6-NEXT: v_readfirstlane_b32 s3, v7 ; GFX6-NEXT: v_readfirstlane_b32 s4, v0 -; GFX6-NEXT: v_readfirstlane_b32 s5, v1 -; GFX6-NEXT: v_readfirstlane_b32 s6, v2 +; GFX6-NEXT: v_readfirstlane_b32 s5, v2 +; GFX6-NEXT: v_readfirstlane_b32 s6, v1 ; GFX6-NEXT: v_readfirstlane_b32 s7, v3 ; GFX6-NEXT: ; return to shader part epilog ; @@ -6137,19 +6119,17 @@ ; GFX8-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX8-NEXT: s_add_u32 s1, s0, 0x80000000 ; GFX8-NEXT: v_mov_b32_e32 v1, s0 -; GFX8-NEXT: v_mov_b32_e32 v2, s0 -; GFX8-NEXT: v_mov_b32_e32 v3, s16 -; GFX8-NEXT: v_mov_b32_e32 v4, s17 +; GFX8-NEXT: v_mov_b32_e32 v2, s16 +; GFX8-NEXT: v_mov_b32_e32 v3, s17 ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v1, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v5, v3, v1, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v2, vcc -; GFX8-NEXT: v_mov_b32_e32 v0, s0 -; GFX8-NEXT: v_mov_b32_e32 v1, s1 +; GFX8-NEXT: v_mov_b32_e32 v0, s1 ; GFX8-NEXT: v_mov_b32_e32 v2, s18 ; GFX8-NEXT: v_mov_b32_e32 v3, s19 ; GFX8-NEXT: s_sub_u32 s0, s4, s12 -; GFX8-NEXT: v_cndmask_b32_e32 v6, v2, v0, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v7, v3, v1, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v6, v2, v1, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v7, v3, v0, vcc ; GFX8-NEXT: s_subb_u32 s1, s5, s13 ; GFX8-NEXT: v_mov_b32_e32 v2, s4 ; GFX8-NEXT: s_subb_u32 s2, s6, s14 @@ -6180,25 +6160,23 @@ ; GFX8-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX8-NEXT: s_add_u32 s5, s4, 0x80000000 ; GFX8-NEXT: v_mov_b32_e32 v1, s4 -; GFX8-NEXT: v_mov_b32_e32 v2, s4 -; GFX8-NEXT: v_mov_b32_e32 v3, s0 -; GFX8-NEXT: v_mov_b32_e32 v8, s1 +; GFX8-NEXT: v_mov_b32_e32 v2, s0 +; GFX8-NEXT: v_mov_b32_e32 v3, s1 ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v1, v8, v2, vcc -; GFX8-NEXT: v_mov_b32_e32 v2, s4 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc ; GFX8-NEXT: v_mov_b32_e32 v3, s5 ; GFX8-NEXT: v_mov_b32_e32 v8, s2 ; GFX8-NEXT: v_mov_b32_e32 v9, s3 -; GFX8-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc -; GFX8-NEXT: v_readfirstlane_b32 s0, v5 -; GFX8-NEXT: v_readfirstlane_b32 s1, v4 +; GFX8-NEXT: v_readfirstlane_b32 s0, v4 +; GFX8-NEXT: v_readfirstlane_b32 s1, v5 ; GFX8-NEXT: v_readfirstlane_b32 s2, v6 ; GFX8-NEXT: v_readfirstlane_b32 s3, v7 ; GFX8-NEXT: v_readfirstlane_b32 s4, v0 -; GFX8-NEXT: v_readfirstlane_b32 s5, v1 -; GFX8-NEXT: v_readfirstlane_b32 s6, v2 +; GFX8-NEXT: v_readfirstlane_b32 s5, v2 +; GFX8-NEXT: v_readfirstlane_b32 s6, v1 ; GFX8-NEXT: v_readfirstlane_b32 s7, v3 ; GFX8-NEXT: ; return to shader part epilog ; @@ -6235,19 +6213,17 @@ ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: s_add_u32 s1, s0, 0x80000000 ; GFX9-NEXT: v_mov_b32_e32 v1, s0 -; GFX9-NEXT: v_mov_b32_e32 v2, s0 -; GFX9-NEXT: v_mov_b32_e32 v3, s16 -; GFX9-NEXT: v_mov_b32_e32 v4, s17 +; GFX9-NEXT: v_mov_b32_e32 v2, s16 +; GFX9-NEXT: v_mov_b32_e32 v3, s17 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v1, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v5, v3, v1, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v2, vcc -; GFX9-NEXT: v_mov_b32_e32 v0, s0 -; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_mov_b32_e32 v0, s1 ; GFX9-NEXT: v_mov_b32_e32 v2, s18 ; GFX9-NEXT: v_mov_b32_e32 v3, s19 ; GFX9-NEXT: s_sub_u32 s0, s4, s12 -; GFX9-NEXT: v_cndmask_b32_e32 v6, v2, v0, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v7, v3, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v6, v2, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v7, v3, v0, vcc ; GFX9-NEXT: s_subb_u32 s1, s5, s13 ; GFX9-NEXT: v_mov_b32_e32 v2, s4 ; GFX9-NEXT: s_subb_u32 s2, s6, s14 @@ -6278,25 +6254,23 @@ ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: s_add_u32 s5, s4, 0x80000000 ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-NEXT: v_mov_b32_e32 v3, s0 -; GFX9-NEXT: v_mov_b32_e32 v8, s1 +; GFX9-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-NEXT: v_mov_b32_e32 v3, s1 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v2, vcc -; GFX9-NEXT: v_mov_b32_e32 v2, s4 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc ; GFX9-NEXT: v_mov_b32_e32 v3, s5 ; GFX9-NEXT: v_mov_b32_e32 v8, s2 ; GFX9-NEXT: v_mov_b32_e32 v9, s3 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc -; GFX9-NEXT: v_readfirstlane_b32 s0, v5 -; GFX9-NEXT: v_readfirstlane_b32 s1, v4 +; GFX9-NEXT: v_readfirstlane_b32 s0, v4 +; GFX9-NEXT: v_readfirstlane_b32 s1, v5 ; GFX9-NEXT: v_readfirstlane_b32 s2, v6 ; GFX9-NEXT: v_readfirstlane_b32 s3, v7 ; GFX9-NEXT: v_readfirstlane_b32 s4, v0 -; GFX9-NEXT: v_readfirstlane_b32 s5, v1 -; GFX9-NEXT: v_readfirstlane_b32 s6, v2 +; GFX9-NEXT: v_readfirstlane_b32 s5, v2 +; GFX9-NEXT: v_readfirstlane_b32 s6, v1 ; GFX9-NEXT: v_readfirstlane_b32 s7, v3 ; GFX9-NEXT: ; return to shader part epilog ; @@ -6315,74 +6289,70 @@ ; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 ; GFX10-NEXT: s_and_b32 s0, 1, s20 ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 -; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 -; GFX10-NEXT: s_cselect_b32 s1, 1, 0 ; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s2 ; GFX10-NEXT: v_cmp_gt_i64_e64 s2, s[10:11], 0 +; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 +; GFX10-NEXT: s_cselect_b32 s1, 1, 0 +; GFX10-NEXT: s_ashr_i32 s8, s19, 31 ; GFX10-NEXT: s_and_b32 s1, 1, s1 -; GFX10-NEXT: s_ashr_i32 s0, s19, 31 +; GFX10-NEXT: s_add_u32 s9, s8, 0x80000000 +; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s2 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo ; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s1 -; GFX10-NEXT: s_add_u32 s1, s0, 0x80000000 -; GFX10-NEXT: s_sub_u32 s8, s4, s12 -; GFX10-NEXT: s_subb_u32 s9, s5, s13 -; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s2 -; GFX10-NEXT: v_cmp_lt_u64_e64 s4, s[8:9], s[4:5] -; GFX10-NEXT: s_subb_u32 s10, s6, s14 -; GFX10-NEXT: s_subb_u32 s11, s7, s15 -; GFX10-NEXT: s_mov_b32 s3, s0 +; GFX10-NEXT: s_sub_u32 s0, s4, s12 +; GFX10-NEXT: s_subb_u32 s1, s5, s13 +; GFX10-NEXT: s_subb_u32 s2, s6, s14 +; GFX10-NEXT: v_cmp_lt_u64_e64 s4, s[0:1], s[4:5] ; GFX10-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc_lo -; GFX10-NEXT: s_cmp_eq_u64 s[10:11], s[6:7] -; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 -; GFX10-NEXT: v_cmp_lt_i64_e64 s4, s[10:11], s[6:7] -; GFX10-NEXT: v_cmp_gt_u64_e64 s6, s[12:13], 0 +; GFX10-NEXT: s_subb_u32 s3, s7, s15 +; GFX10-NEXT: v_mov_b32_e32 v5, s0 +; GFX10-NEXT: s_cmp_eq_u64 s[2:3], s[6:7] +; GFX10-NEXT: v_mov_b32_e32 v6, s1 ; GFX10-NEXT: v_xor_b32_e32 v0, v1, v0 -; GFX10-NEXT: v_mov_b32_e32 v1, s16 -; GFX10-NEXT: s_cselect_b32 s16, 1, 0 -; GFX10-NEXT: v_mov_b32_e32 v2, s17 -; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s4 -; GFX10-NEXT: s_and_b32 s4, 1, s16 +; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4 +; GFX10-NEXT: v_cmp_lt_i64_e64 s4, s[2:3], s[6:7] +; GFX10-NEXT: v_cmp_gt_u64_e64 s6, s[12:13], 0 +; GFX10-NEXT: s_cselect_b32 s10, 1, 0 +; GFX10-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX10-NEXT: v_mov_b32_e32 v7, s3 +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4 +; GFX10-NEXT: s_and_b32 s4, 1, s10 ; GFX10-NEXT: s_cmp_eq_u64 s[14:15], 0 -; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s6 +; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s6 ; GFX10-NEXT: v_cmp_gt_i64_e64 s6, s[14:15], 0 ; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s4 ; GFX10-NEXT: s_cselect_b32 s5, 1, 0 -; GFX10-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX10-NEXT: s_ashr_i32 s4, s3, 31 ; GFX10-NEXT: s_and_b32 s5, 1, s5 -; GFX10-NEXT: s_mov_b32 s2, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s6 -; GFX10-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc_lo +; GFX10-NEXT: s_add_u32 s0, s4, 0x80000000 +; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s6 +; GFX10-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc_lo ; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s5 -; GFX10-NEXT: v_mov_b32_e32 v7, s11 -; GFX10-NEXT: v_cndmask_b32_e32 v4, v6, v5, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, v4, v3, vcc_lo +; GFX10-NEXT: v_mov_b32_e32 v3, s16 +; GFX10-NEXT: v_mov_b32_e32 v4, s17 ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 ; GFX10-NEXT: v_mov_b32_e32 v0, s18 -; GFX10-NEXT: v_mov_b32_e32 v5, s19 -; GFX10-NEXT: v_mov_b32_e32 v6, s9 -; GFX10-NEXT: v_xor_b32_e32 v3, v4, v3 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s3, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v4, v5, s1, vcc_lo -; GFX10-NEXT: v_and_b32_e32 v3, 1, v3 -; GFX10-NEXT: v_mov_b32_e32 v5, s8 -; GFX10-NEXT: s_ashr_i32 s0, s11, 31 -; GFX10-NEXT: s_add_u32 s1, s0, 0x80000000 -; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v3 -; GFX10-NEXT: v_mov_b32_e32 v3, s10 -; GFX10-NEXT: s_mov_b32 s3, s0 -; GFX10-NEXT: s_mov_b32 s2, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, s0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, s3, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, s2, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, s1, vcc_lo -; GFX10-NEXT: v_readfirstlane_b32 s0, v1 -; GFX10-NEXT: v_readfirstlane_b32 s1, v2 +; GFX10-NEXT: v_xor_b32_e32 v1, v2, v1 +; GFX10-NEXT: v_mov_b32_e32 v2, s19 +; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, s8, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, s8, vcc_lo +; GFX10-NEXT: v_and_b32_e32 v1, 1, v1 +; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s8, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo +; GFX10-NEXT: v_readfirstlane_b32 s1, v4 +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 +; GFX10-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-NEXT: v_readfirstlane_b32 s2, v0 -; GFX10-NEXT: v_readfirstlane_b32 s3, v4 +; GFX10-NEXT: v_readfirstlane_b32 s3, v2 +; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, s4, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, s4, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, s0, vcc_lo +; GFX10-NEXT: v_readfirstlane_b32 s0, v3 ; GFX10-NEXT: v_readfirstlane_b32 s4, v5 ; GFX10-NEXT: v_readfirstlane_b32 s5, v6 -; GFX10-NEXT: v_readfirstlane_b32 s6, v3 +; GFX10-NEXT: v_readfirstlane_b32 s6, v1 ; GFX10-NEXT: v_readfirstlane_b32 s7, v7 ; GFX10-NEXT: ; return to shader part epilog ; @@ -6405,69 +6375,64 @@ ; GFX11-NEXT: v_cmp_gt_i64_e64 s2, s[10:11], 0 ; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 ; GFX11-NEXT: s_cselect_b32 s1, 1, 0 -; GFX11-NEXT: s_ashr_i32 s0, s19, 31 +; GFX11-NEXT: s_ashr_i32 s8, s19, 31 ; GFX11-NEXT: s_and_b32 s1, 1, s1 -; GFX11-NEXT: s_mov_b32 s3, s0 +; GFX11-NEXT: s_add_u32 s9, s8, 0x80000000 ; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, s2 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo ; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s1 -; GFX11-NEXT: s_add_u32 s1, s0, 0x80000000 -; GFX11-NEXT: s_sub_u32 s8, s4, s12 -; GFX11-NEXT: s_subb_u32 s9, s5, s13 -; GFX11-NEXT: s_subb_u32 s10, s6, s14 +; GFX11-NEXT: s_sub_u32 s0, s4, s12 +; GFX11-NEXT: s_subb_u32 s1, s5, s13 +; GFX11-NEXT: s_subb_u32 s2, s6, s14 +; GFX11-NEXT: v_cmp_lt_u64_e64 s4, s[0:1], s[4:5] ; GFX11-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc_lo -; GFX11-NEXT: v_cmp_lt_u64_e64 s4, s[8:9], s[4:5] -; GFX11-NEXT: s_subb_u32 s11, s7, s15 -; GFX11-NEXT: s_mov_b32 s2, s0 -; GFX11-NEXT: s_cmp_eq_u64 s[10:11], s[6:7] +; GFX11-NEXT: s_subb_u32 s3, s7, s15 +; GFX11-NEXT: v_dual_mov_b32 v6, s1 :: v_dual_mov_b32 v7, s3 +; GFX11-NEXT: s_cmp_eq_u64 s[2:3], s[6:7] ; GFX11-NEXT: v_xor_b32_e32 v0, v1, v0 -; GFX11-NEXT: v_mov_b32_e32 v1, s16 -; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 -; GFX11-NEXT: v_cmp_lt_i64_e64 s4, s[10:11], s[6:7] +; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4 +; GFX11-NEXT: v_cmp_lt_i64_e64 s4, s[2:3], s[6:7] ; GFX11-NEXT: v_cmp_gt_u64_e64 s6, s[12:13], 0 -; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 -; GFX11-NEXT: s_cselect_b32 s16, 1, 0 -; GFX11-NEXT: v_mov_b32_e32 v7, s11 -; GFX11-NEXT: v_cndmask_b32_e64 v4, 0, 1, s4 -; GFX11-NEXT: s_and_b32 s4, 1, s16 +; GFX11-NEXT: s_cselect_b32 s10, 1, 0 +; GFX11-NEXT: v_mov_b32_e32 v5, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4 +; GFX11-NEXT: s_and_b32 s4, 1, s10 ; GFX11-NEXT: s_cmp_eq_u64 s[14:15], 0 -; GFX11-NEXT: v_cndmask_b32_e64 v5, 0, 1, s6 +; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, s6 ; GFX11-NEXT: v_cmp_gt_i64_e64 s6, s[14:15], 0 ; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s4 ; GFX11-NEXT: s_cselect_b32 s5, 1, 0 -; GFX11-NEXT: v_mov_b32_e32 v2, s17 +; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX11-NEXT: s_and_b32 s5, 1, s5 -; GFX11-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v6, 0, 1, s6 +; GFX11-NEXT: s_ashr_i32 s4, s3, 31 +; GFX11-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v4, 0, 1, s6 ; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s5 -; GFX11-NEXT: v_dual_cndmask_b32 v4, v6, v5 :: v_dual_mov_b32 v5, s19 +; GFX11-NEXT: s_add_u32 s0, s4, 0x80000000 +; GFX11-NEXT: v_dual_cndmask_b32 v2, v4, v3 :: v_dual_mov_b32 v3, s16 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-NEXT: v_mov_b32_e32 v6, s9 -; GFX11-NEXT: v_xor_b32_e32 v3, v4, v3 ; GFX11-NEXT: v_mov_b32_e32 v0, s18 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s0, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s3, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v4, v5, s1, vcc_lo -; GFX11-NEXT: v_and_b32_e32 v3, 1, v3 -; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc_lo -; GFX11-NEXT: v_mov_b32_e32 v5, s8 -; GFX11-NEXT: s_ashr_i32 s0, s11, 31 -; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v3 -; GFX11-NEXT: v_mov_b32_e32 v3, s10 -; GFX11-NEXT: s_add_u32 s1, s0, 0x80000000 -; GFX11-NEXT: s_mov_b32 s3, s0 -; GFX11-NEXT: s_mov_b32 s2, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, s0, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, s3, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, s2, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v7, v7, s1, vcc_lo -; GFX11-NEXT: v_readfirstlane_b32 s0, v1 -; GFX11-NEXT: v_readfirstlane_b32 s1, v2 +; GFX11-NEXT: v_xor_b32_e32 v1, v2, v1 +; GFX11-NEXT: v_mov_b32_e32 v4, s17 +; GFX11-NEXT: v_mov_b32_e32 v2, s19 +; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, s8, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s8, vcc_lo +; GFX11-NEXT: v_and_b32_e32 v1, 1, v1 +; GFX11-NEXT: v_cndmask_b32_e64 v4, v4, s8, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 +; GFX11-NEXT: v_mov_b32_e32 v1, s2 +; GFX11-NEXT: v_readfirstlane_b32 s1, v4 ; GFX11-NEXT: v_readfirstlane_b32 s2, v0 -; GFX11-NEXT: v_readfirstlane_b32 s3, v4 +; GFX11-NEXT: v_readfirstlane_b32 s3, v2 +; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, s4, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, s4, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v7, v7, s0, vcc_lo +; GFX11-NEXT: v_readfirstlane_b32 s0, v3 ; GFX11-NEXT: v_readfirstlane_b32 s4, v5 ; GFX11-NEXT: v_readfirstlane_b32 s5, v6 -; GFX11-NEXT: v_readfirstlane_b32 s6, v3 +; GFX11-NEXT: v_readfirstlane_b32 s6, v1 ; GFX11-NEXT: v_readfirstlane_b32 s7, v7 ; GFX11-NEXT: ; return to shader part epilog %result = call <2 x i128> @llvm.ssub.sat.v2i128(<2 x i128> %lhs, <2 x i128> %rhs) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll @@ -1065,25 +1065,26 @@ ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_mul_lo_u32 v5, v5, s4 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v5, v3 -; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v7 -; CHECK-NEXT: v_subb_u32_e64 v5, s[4:5], v1, v3, vcc -; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v3 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v3, v4, v3, s[4:5] -; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v7 +; CHECK-NEXT: v_subb_u32_e64 v5, vcc, v1, v3, s[4:5] +; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v1, v3 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc ; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v0, v2 -; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v6, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc -; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v6, v2 +; CHECK-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v3, v4, v3, s[6:7] +; CHECK-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5] +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[4:5] +; CHECK-NEXT: s_mov_b64 s[4:5], vcc +; CHECK-NEXT: v_subrev_i32_e32 v7, vcc, 0x12d8fb, v6 +; CHECK-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5] ; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1 -; CHECK-NEXT: v_cndmask_b32_e64 v4, v4, v7, s[4:5] -; CHECK-NEXT: v_subbrev_u32_e32 v7, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; CHECK-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc -; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[4:5] +; CHECK-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v1, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 +; CHECK-NEXT: v_cndmask_b32_e32 v2, v6, v7, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; CHECK-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc @@ -1290,48 +1291,49 @@ ; GISEL-NEXT: v_mul_lo_u32 v9, v9, s4 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v9, v6 -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v13 -; GISEL-NEXT: v_subb_u32_e64 v8, s[4:5], v1, v7, vcc -; GISEL-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v7 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v14 -; GISEL-NEXT: v_subb_u32_e64 v9, s[6:7], v3, v6, s[4:5] -; GISEL-NEXT: v_sub_i32_e64 v3, s[6:7], v3, v6 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v2, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[6:7] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v7, v5, v7, s[6:7] -; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9 -; GISEL-NEXT: v_cndmask_b32_e32 v6, v5, v6, vcc -; GISEL-NEXT: v_subbrev_u32_e64 v3, vcc, 0, v3, s[4:5] -; GISEL-NEXT: v_sub_i32_e32 v11, vcc, v0, v4 -; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v11, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc -; GISEL-NEXT: v_sub_i32_e32 v13, vcc, v2, v4 -; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v13, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, vcc -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GISEL-NEXT: v_cndmask_b32_e32 v5, v5, v12, vcc -; GISEL-NEXT: v_sub_i32_e32 v12, vcc, v11, v4 -; GISEL-NEXT: v_subbrev_u32_e32 v15, vcc, 0, v1, vcc -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 -; GISEL-NEXT: v_cndmask_b32_e32 v10, v10, v14, vcc -; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v13, v4 -; GISEL-NEXT: v_subbrev_u32_e32 v14, vcc, 0, v3, vcc +; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v13 +; GISEL-NEXT: v_subb_u32_e64 v8, vcc, v1, v7, s[4:5] +; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v7 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc +; GISEL-NEXT: v_sub_i32_e64 v2, s[6:7], v2, v14 +; GISEL-NEXT: v_subb_u32_e64 v9, vcc, v3, v6, s[6:7] +; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v3, v6 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc +; GISEL-NEXT: v_sub_i32_e32 v11, vcc, v2, v4 +; GISEL-NEXT: v_cmp_eq_u32_e64 s[8:9], 0, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v7, v5, v7, s[8:9] +; GISEL-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v6, v5, v6, s[4:5] +; GISEL-NEXT: v_subbrev_u32_e64 v3, s[4:5], 0, v3, s[6:7] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v11, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] +; GISEL-NEXT: s_mov_b64 s[4:5], vcc +; GISEL-NEXT: v_subrev_i32_e32 v13, vcc, 0x12d8fb, v11 +; GISEL-NEXT: v_sub_i32_e64 v14, s[6:7], v0, v4 +; GISEL-NEXT: v_subbrev_u32_e64 v1, s[6:7], 0, v1, s[6:7] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v14, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, -1, s[6:7] +; GISEL-NEXT: v_subbrev_u32_e64 v3, s[4:5], 0, v3, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v15, s[4:5] +; GISEL-NEXT: v_sub_i32_e64 v4, s[4:5], v14, v4 +; GISEL-NEXT: v_subbrev_u32_e64 v15, s[4:5], 0, v1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v10, v10, v12, s[4:5] +; GISEL-NEXT: v_subbrev_u32_e32 v12, vcc, 0, v3, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 -; GISEL-NEXT: v_cndmask_b32_e32 v5, v11, v12, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v4, v14, v4, vcc ; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v4, v13, v4, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v5, v11, v13, s[4:5] ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 -; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v3, v3, v14, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v3, v3, v12, s[4:5] ; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v6 -; GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[4:5] ; GISEL-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc ; GISEL-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[4:5] ; GISEL-NEXT: s_setpc_b64 s[30:31] @@ -1526,48 +1528,49 @@ ; CGP-NEXT: v_mul_lo_u32 v9, v9, s4 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 ; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v12 -; CGP-NEXT: v_subb_u32_e64 v8, s[4:5], v1, v6, vcc -; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v6 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] -; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v13 -; CGP-NEXT: v_subb_u32_e64 v9, s[6:7], v3, v7, s[4:5] -; CGP-NEXT: v_sub_i32_e64 v3, s[6:7], v3, v7 -; CGP-NEXT: v_cmp_ge_u32_e64 s[6:7], v2, v4 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[6:7] -; CGP-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v8 -; CGP-NEXT: v_cndmask_b32_e64 v6, v5, v6, s[6:7] -; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9 -; CGP-NEXT: v_cndmask_b32_e32 v7, v5, v7, vcc -; CGP-NEXT: v_subbrev_u32_e64 v3, vcc, 0, v3, s[4:5] -; CGP-NEXT: v_sub_i32_e32 v10, vcc, v0, v4 -; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v10, v4 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc -; CGP-NEXT: v_sub_i32_e32 v12, vcc, v2, v4 -; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v12, v4 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CGP-NEXT: v_cndmask_b32_e32 v11, v5, v11, vcc -; CGP-NEXT: v_sub_i32_e32 v14, vcc, v10, v4 -; CGP-NEXT: v_subbrev_u32_e32 v15, vcc, 0, v1, vcc -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 -; CGP-NEXT: v_cndmask_b32_e32 v5, v5, v13, vcc -; CGP-NEXT: v_sub_i32_e32 v4, vcc, v12, v4 -; CGP-NEXT: v_subbrev_u32_e32 v13, vcc, 0, v3, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 -; CGP-NEXT: v_cndmask_b32_e32 v10, v10, v14, vcc +; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v12 +; CGP-NEXT: v_subb_u32_e64 v8, vcc, v1, v6, s[4:5] +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v6 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc +; CGP-NEXT: v_sub_i32_e64 v2, s[6:7], v2, v13 +; CGP-NEXT: v_subb_u32_e64 v9, vcc, v3, v7, s[6:7] +; CGP-NEXT: v_sub_i32_e32 v3, vcc, v3, v7 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc +; CGP-NEXT: v_sub_i32_e32 v10, vcc, v2, v4 +; CGP-NEXT: v_cmp_eq_u32_e64 s[8:9], 0, v8 +; CGP-NEXT: v_cndmask_b32_e64 v6, v5, v6, s[8:9] +; CGP-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5] +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 +; CGP-NEXT: v_cndmask_b32_e64 v7, v5, v7, s[4:5] +; CGP-NEXT: v_subbrev_u32_e64 v3, s[4:5], 0, v3, s[6:7] +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v4 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] +; CGP-NEXT: s_mov_b64 s[4:5], vcc +; CGP-NEXT: v_subrev_i32_e32 v12, vcc, 0x12d8fb, v10 +; CGP-NEXT: v_sub_i32_e64 v13, s[6:7], v0, v4 +; CGP-NEXT: v_subbrev_u32_e64 v1, s[6:7], 0, v1, s[6:7] +; CGP-NEXT: v_cmp_ge_u32_e64 s[6:7], v13, v4 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[6:7] +; CGP-NEXT: v_subbrev_u32_e64 v3, s[4:5], 0, v3, s[4:5] +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1 +; CGP-NEXT: v_cndmask_b32_e64 v14, v5, v14, s[4:5] +; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v13, v4 +; CGP-NEXT: v_subbrev_u32_e64 v15, s[4:5], 0, v1, s[4:5] +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3 +; CGP-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[4:5] +; CGP-NEXT: v_subbrev_u32_e32 v11, vcc, 0, v3, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 +; CGP-NEXT: v_cndmask_b32_e32 v4, v13, v4, vcc ; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v5 -; CGP-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[4:5] +; CGP-NEXT: v_cndmask_b32_e64 v5, v10, v12, s[4:5] ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 -; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc -; CGP-NEXT: v_cndmask_b32_e64 v3, v3, v13, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; CGP-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[4:5] ; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v7 -; CGP-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[4:5] +; CGP-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[4:5] ; CGP-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc ; CGP-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[4:5] ; CGP-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll @@ -252,23 +252,46 @@ } define i64 @vector_xnor_i64_one_use(i64 %a, i64 %b) { -; GCN-LABEL: vector_xnor_i64_one_use: -; GCN: ; %bb.0: ; %entry -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_xor_b32_e32 v0, v0, v2 -; GCN-NEXT: v_xor_b32_e32 v1, v1, v3 -; GCN-NEXT: v_xor_b32_e32 v0, -1, v0 -; GCN-NEXT: v_xor_b32_e32 v1, -1, v1 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX7-LABEL: vector_xnor_i64_one_use: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_xor_b32_e32 v0, v0, v2 +; GFX7-NEXT: v_xor_b32_e32 v1, v1, v3 +; GFX7-NEXT: v_xor_b32_e32 v0, -1, v0 +; GFX7-NEXT: v_xor_b32_e32 v1, -1, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: vector_xnor_i64_one_use: +; GFX8: ; %bb.0: ; %entry +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_xor_b32_e32 v0, v0, v2 +; GFX8-NEXT: v_xor_b32_e32 v1, v1, v3 +; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0 +; GFX8-NEXT: v_xor_b32_e32 v1, -1, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-LABEL: vector_xnor_i64_one_use: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_xor_b32_e32 v0, v0, v2 +; GFX900-NEXT: v_xor_b32_e32 v1, v1, v3 +; GFX900-NEXT: v_xor_b32_e32 v0, -1, v0 +; GFX900-NEXT: v_xor_b32_e32 v1, -1, v1 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX906-LABEL: vector_xnor_i64_one_use: +; GFX906: ; %bb.0: ; %entry +; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX906-NEXT: v_xnor_b32_e32 v0, v0, v2 +; GFX906-NEXT: v_xnor_b32_e32 v1, v1, v3 +; GFX906-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: vector_xnor_i64_one_use: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_xor_b32_e32 v0, v0, v2 -; GFX10-NEXT: v_xor_b32_e32 v1, v1, v3 -; GFX10-NEXT: v_xor_b32_e32 v0, -1, v0 -; GFX10-NEXT: v_xor_b32_e32 v1, -1, v1 +; GFX10-NEXT: v_xor3_b32 v0, v0, v2, -1 +; GFX10-NEXT: v_xor3_b32 v1, v1, v3, -1 ; GFX10-NEXT: s_setpc_b64 s[30:31] entry: %xor = xor i64 %a, %b @@ -375,19 +398,15 @@ ; GFX906-LABEL: xnor_i64_s_v_one_use: ; GFX906: ; %bb.0: ; %entry ; GFX906-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1] -; GFX906-NEXT: v_xor_b32_e32 v0, s0, v0 -; GFX906-NEXT: v_xor_b32_e32 v1, s1, v1 -; GFX906-NEXT: v_xor_b32_e32 v0, -1, v0 -; GFX906-NEXT: v_xor_b32_e32 v1, -1, v1 +; GFX906-NEXT: v_xnor_b32_e32 v0, s0, v0 +; GFX906-NEXT: v_xnor_b32_e32 v1, s1, v1 ; GFX906-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: xnor_i64_s_v_one_use: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1] -; GFX10-NEXT: v_xor_b32_e32 v0, s0, v0 -; GFX10-NEXT: v_xor_b32_e32 v1, s1, v1 -; GFX10-NEXT: v_xor_b32_e32 v0, -1, v0 -; GFX10-NEXT: v_xor_b32_e32 v1, -1, v1 +; GFX10-NEXT: v_xor3_b32 v0, s0, v0, -1 +; GFX10-NEXT: v_xor3_b32 v1, s1, v1, -1 ; GFX10-NEXT: ; return to shader part epilog entry: %b = shl i64 %b64, 29 @@ -428,19 +447,15 @@ ; GFX906-LABEL: xnor_i64_v_s_one_use: ; GFX906: ; %bb.0: ; GFX906-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1] -; GFX906-NEXT: v_xor_b32_e32 v0, s0, v0 -; GFX906-NEXT: v_xor_b32_e32 v1, s1, v1 -; GFX906-NEXT: v_xor_b32_e32 v0, -1, v0 -; GFX906-NEXT: v_xor_b32_e32 v1, -1, v1 +; GFX906-NEXT: v_xnor_b32_e64 v0, v0, s0 +; GFX906-NEXT: v_xnor_b32_e64 v1, v1, s1 ; GFX906-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: xnor_i64_v_s_one_use: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1] -; GFX10-NEXT: v_xor_b32_e32 v0, s0, v0 -; GFX10-NEXT: v_xor_b32_e32 v1, s1, v1 -; GFX10-NEXT: v_xor_b32_e32 v0, -1, v0 -; GFX10-NEXT: v_xor_b32_e32 v1, -1, v1 +; GFX10-NEXT: v_xor3_b32 v0, v0, s0, -1 +; GFX10-NEXT: v_xor3_b32 v1, v1, s1, -1 ; GFX10-NEXT: ; return to shader part epilog %b = shl i64 %b64, 29 %xor = xor i64 %b, %a diff --git a/llvm/test/CodeGen/AMDGPU/bfi_int.ll b/llvm/test/CodeGen/AMDGPU/bfi_int.ll --- a/llvm/test/CodeGen/AMDGPU/bfi_int.ll +++ b/llvm/test/CodeGen/AMDGPU/bfi_int.ll @@ -253,14 +253,14 @@ define amdgpu_ps float @v_s_s_bfi_sha256_ch(i32 %x, i32 inreg %y, i32 inreg %z) { ; GFX7-LABEL: v_s_s_bfi_sha256_ch: ; GFX7: ; %bb.0: ; %entry -; GFX7-NEXT: v_mov_b32_e32 v1, s1 -; GFX7-NEXT: v_bfi_b32 v0, v0, s0, v1 +; GFX7-NEXT: v_mov_b32_e32 v1, s0 +; GFX7-NEXT: v_bfi_b32 v0, v0, v1, s1 ; GFX7-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: v_s_s_bfi_sha256_ch: ; GFX8: ; %bb.0: ; %entry -; GFX8-NEXT: v_mov_b32_e32 v1, s1 -; GFX8-NEXT: v_bfi_b32 v0, v0, s0, v1 +; GFX8-NEXT: v_mov_b32_e32 v1, s0 +; GFX8-NEXT: v_bfi_b32 v0, v0, v1, s1 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: v_s_s_bfi_sha256_ch: @@ -292,14 +292,14 @@ define amdgpu_ps float @s_v_s_bfi_sha256_ch(i32 inreg %x, i32 %y, i32 inreg %z) { ; GFX7-LABEL: s_v_s_bfi_sha256_ch: ; GFX7: ; %bb.0: ; %entry -; GFX7-NEXT: v_mov_b32_e32 v1, s1 -; GFX7-NEXT: v_bfi_b32 v0, s0, v0, v1 +; GFX7-NEXT: v_mov_b32_e32 v1, s0 +; GFX7-NEXT: v_bfi_b32 v0, v1, v0, s1 ; GFX7-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_v_s_bfi_sha256_ch: ; GFX8: ; %bb.0: ; %entry -; GFX8-NEXT: v_mov_b32_e32 v1, s1 -; GFX8-NEXT: v_bfi_b32 v0, s0, v0, v1 +; GFX8-NEXT: v_mov_b32_e32 v1, s0 +; GFX8-NEXT: v_bfi_b32 v0, v1, v0, s1 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: s_v_s_bfi_sha256_ch: @@ -331,14 +331,14 @@ define amdgpu_ps float @s_s_v_bfi_sha256_ch(i32 inreg %x, i32 inreg %y, i32 %z) { ; GFX7-LABEL: s_s_v_bfi_sha256_ch: ; GFX7: ; %bb.0: ; %entry -; GFX7-NEXT: v_mov_b32_e32 v1, s1 -; GFX7-NEXT: v_bfi_b32 v0, s0, v1, v0 +; GFX7-NEXT: v_mov_b32_e32 v1, s0 +; GFX7-NEXT: v_bfi_b32 v0, v1, s1, v0 ; GFX7-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_s_v_bfi_sha256_ch: ; GFX8: ; %bb.0: ; %entry -; GFX8-NEXT: v_mov_b32_e32 v1, s1 -; GFX8-NEXT: v_bfi_b32 v0, s0, v1, v0 +; GFX8-NEXT: v_mov_b32_e32 v1, s0 +; GFX8-NEXT: v_bfi_b32 v0, v1, s1, v0 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: s_s_v_bfi_sha256_ch: @@ -626,24 +626,16 @@ ; GFX8-GISEL-LABEL: v_bitselect_v2i32_pat1: ; GFX8-GISEL: ; %bb.0: ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 -; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 -; GFX8-GISEL-NEXT: v_and_b32_e32 v0, v0, v2 -; GFX8-GISEL-NEXT: v_and_b32_e32 v1, v1, v3 -; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 -; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 +; GFX8-GISEL-NEXT: v_bfi_b32 v0, v2, v0, v4 +; GFX8-GISEL-NEXT: v_bfi_b32 v1, v3, v1, v5 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-GISEL-LABEL: v_bitselect_v2i32_pat1: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 -; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 -; GFX10-GISEL-NEXT: v_and_b32_e32 v0, v0, v2 -; GFX10-GISEL-NEXT: v_and_b32_e32 v1, v1, v3 -; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 -; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 +; GFX10-GISEL-NEXT: v_bfi_b32 v0, v2, v0, v4 +; GFX10-GISEL-NEXT: v_bfi_b32 v1, v3, v1, v5 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] %xor.0 = xor <2 x i32> %a, %mask %and = and <2 x i32> %xor.0, %b @@ -677,28 +669,16 @@ ; GFX8-GISEL-LABEL: v_bitselect_i64_pat_0: ; GFX8-GISEL: ; %bb.0: ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-GISEL-NEXT: v_and_b32_e32 v2, v0, v2 -; GFX8-GISEL-NEXT: v_and_b32_e32 v3, v1, v3 -; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, -1, v0 -; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, -1, v1 -; GFX8-GISEL-NEXT: v_and_b32_e32 v0, v0, v4 -; GFX8-GISEL-NEXT: v_and_b32_e32 v1, v1, v5 -; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 -; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v3, v1 +; GFX8-GISEL-NEXT: v_bfi_b32 v0, v0, v2, v4 +; GFX8-GISEL-NEXT: v_bfi_b32 v1, v1, v3, v5 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-GISEL-LABEL: v_bitselect_i64_pat_0: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-GISEL-NEXT: v_xor_b32_e32 v6, -1, v0 -; GFX10-GISEL-NEXT: v_xor_b32_e32 v7, -1, v1 -; GFX10-GISEL-NEXT: v_and_b32_e32 v0, v0, v2 -; GFX10-GISEL-NEXT: v_and_b32_e32 v1, v1, v3 -; GFX10-GISEL-NEXT: v_and_b32_e32 v2, v6, v4 -; GFX10-GISEL-NEXT: v_and_b32_e32 v3, v7, v5 -; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX10-GISEL-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX10-GISEL-NEXT: v_bfi_b32 v0, v0, v2, v4 +; GFX10-GISEL-NEXT: v_bfi_b32 v1, v1, v3, v5 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] %and0 = and i64 %a, %b %not.a = xor i64 %a, -1 @@ -732,26 +712,16 @@ ; ; GFX8-GISEL-LABEL: v_s_s_bitselect_i64_pat_0: ; GFX8-GISEL: ; %bb.0: -; GFX8-GISEL-NEXT: v_and_b32_e32 v2, s0, v0 -; GFX8-GISEL-NEXT: v_and_b32_e32 v3, s1, v1 -; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, -1, v0 -; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, -1, v1 -; GFX8-GISEL-NEXT: v_and_b32_e32 v0, s2, v0 -; GFX8-GISEL-NEXT: v_and_b32_e32 v1, s3, v1 -; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 -; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v3, v1 +; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s0 +; GFX8-GISEL-NEXT: v_bfi_b32 v0, v0, v2, s2 +; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s1 +; GFX8-GISEL-NEXT: v_bfi_b32 v1, v1, v2, s3 ; GFX8-GISEL-NEXT: ; return to shader part epilog ; ; GFX10-GISEL-LABEL: v_s_s_bitselect_i64_pat_0: ; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: v_xor_b32_e32 v2, -1, v0 -; GFX10-GISEL-NEXT: v_xor_b32_e32 v3, -1, v1 -; GFX10-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 -; GFX10-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 -; GFX10-GISEL-NEXT: v_and_b32_e32 v2, s2, v2 -; GFX10-GISEL-NEXT: v_and_b32_e32 v3, s3, v3 -; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX10-GISEL-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX10-GISEL-NEXT: v_bfi_b32 v0, v0, s0, s2 +; GFX10-GISEL-NEXT: v_bfi_b32 v1, v1, s1, s3 ; GFX10-GISEL-NEXT: ; return to shader part epilog %and0 = and i64 %a, %b %not.a = xor i64 %a, -1 @@ -795,11 +765,9 @@ ; ; GFX10-GISEL-LABEL: s_v_s_bitselect_i64_pat_0: ; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 -; GFX10-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 -; GFX10-GISEL-NEXT: s_andn2_b64 s[0:1], s[2:3], s[0:1] -; GFX10-GISEL-NEXT: v_or_b32_e32 v0, s0, v0 -; GFX10-GISEL-NEXT: v_or_b32_e32 v1, s1, v1 +; GFX10-GISEL-NEXT: s_andn2_b64 s[2:3], s[2:3], s[0:1] +; GFX10-GISEL-NEXT: v_and_or_b32 v0, s0, v0, s2 +; GFX10-GISEL-NEXT: v_and_or_b32 v1, s1, v1, s3 ; GFX10-GISEL-NEXT: ; return to shader part epilog %and0 = and i64 %a, %b %not.a = xor i64 %a, -1 @@ -844,12 +812,10 @@ ; ; GFX10-GISEL-LABEL: s_s_v_bitselect_i64_pat_0: ; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_not_b64 s[4:5], s[0:1] -; GFX10-GISEL-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] -; GFX10-GISEL-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX10-GISEL-NEXT: v_and_b32_e32 v1, s5, v1 -; GFX10-GISEL-NEXT: v_or_b32_e32 v0, s0, v0 -; GFX10-GISEL-NEXT: v_or_b32_e32 v1, s1, v1 +; GFX10-GISEL-NEXT: s_and_b64 s[2:3], s[0:1], s[2:3] +; GFX10-GISEL-NEXT: s_not_b64 s[0:1], s[0:1] +; GFX10-GISEL-NEXT: v_and_or_b32 v0, s0, v0, s2 +; GFX10-GISEL-NEXT: v_and_or_b32 v1, s1, v1, s3 ; GFX10-GISEL-NEXT: ; return to shader part epilog %and0 = and i64 %a, %b %not.a = xor i64 %a, -1 @@ -880,26 +846,14 @@ ; ; GFX8-GISEL-LABEL: v_v_s_bitselect_i64_pat_0: ; GFX8-GISEL: ; %bb.0: -; GFX8-GISEL-NEXT: v_and_b32_e32 v2, v0, v2 -; GFX8-GISEL-NEXT: v_and_b32_e32 v3, v1, v3 -; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, -1, v0 -; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, -1, v1 -; GFX8-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 -; GFX8-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 -; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 -; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v3, v1 +; GFX8-GISEL-NEXT: v_bfi_b32 v0, v0, v2, s0 +; GFX8-GISEL-NEXT: v_bfi_b32 v1, v1, v3, s1 ; GFX8-GISEL-NEXT: ; return to shader part epilog ; ; GFX10-GISEL-LABEL: v_v_s_bitselect_i64_pat_0: ; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: v_xor_b32_e32 v4, -1, v0 -; GFX10-GISEL-NEXT: v_xor_b32_e32 v5, -1, v1 -; GFX10-GISEL-NEXT: v_and_b32_e32 v0, v0, v2 -; GFX10-GISEL-NEXT: v_and_b32_e32 v1, v1, v3 -; GFX10-GISEL-NEXT: v_and_b32_e32 v2, s0, v4 -; GFX10-GISEL-NEXT: v_and_b32_e32 v3, s1, v5 -; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX10-GISEL-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX10-GISEL-NEXT: v_bfi_b32 v0, v0, v2, s0 +; GFX10-GISEL-NEXT: v_bfi_b32 v1, v1, v3, s1 ; GFX10-GISEL-NEXT: ; return to shader part epilog %and0 = and i64 %a, %b %not.a = xor i64 %a, -1 @@ -930,26 +884,14 @@ ; ; GFX8-GISEL-LABEL: v_s_v_bitselect_i64_pat_0: ; GFX8-GISEL: ; %bb.0: -; GFX8-GISEL-NEXT: v_and_b32_e32 v4, s0, v0 -; GFX8-GISEL-NEXT: v_and_b32_e32 v5, s1, v1 -; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, -1, v0 -; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, -1, v1 -; GFX8-GISEL-NEXT: v_and_b32_e32 v0, v0, v2 -; GFX8-GISEL-NEXT: v_and_b32_e32 v1, v1, v3 -; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0 -; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v5, v1 +; GFX8-GISEL-NEXT: v_bfi_b32 v0, v0, s0, v2 +; GFX8-GISEL-NEXT: v_bfi_b32 v1, v1, s1, v3 ; GFX8-GISEL-NEXT: ; return to shader part epilog ; ; GFX10-GISEL-LABEL: v_s_v_bitselect_i64_pat_0: ; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: v_xor_b32_e32 v4, -1, v0 -; GFX10-GISEL-NEXT: v_xor_b32_e32 v5, -1, v1 -; GFX10-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 -; GFX10-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 -; GFX10-GISEL-NEXT: v_and_b32_e32 v2, v4, v2 -; GFX10-GISEL-NEXT: v_and_b32_e32 v3, v5, v3 -; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX10-GISEL-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX10-GISEL-NEXT: v_bfi_b32 v0, v0, s0, v2 +; GFX10-GISEL-NEXT: v_bfi_b32 v1, v1, s1, v3 ; GFX10-GISEL-NEXT: ; return to shader part epilog %and0 = and i64 %a, %b %not.a = xor i64 %a, -1 @@ -992,12 +934,10 @@ ; GFX10-GISEL-LABEL: s_v_v_bitselect_i64_pat_0: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_not_b64 s[2:3], s[0:1] -; GFX10-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 -; GFX10-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 ; GFX10-GISEL-NEXT: v_and_b32_e32 v2, s2, v2 ; GFX10-GISEL-NEXT: v_and_b32_e32 v3, s3, v3 -; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX10-GISEL-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX10-GISEL-NEXT: v_and_or_b32 v0, s0, v0, v2 +; GFX10-GISEL-NEXT: v_and_or_b32 v1, s1, v1, v3 ; GFX10-GISEL-NEXT: ; return to shader part epilog %and0 = and i64 %a, %b %not.a = xor i64 %a, -1 @@ -1033,24 +973,16 @@ ; GFX8-GISEL-LABEL: v_bitselect_i64_pat_1: ; GFX8-GISEL: ; %bb.0: ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 -; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 -; GFX8-GISEL-NEXT: v_and_b32_e32 v0, v0, v2 -; GFX8-GISEL-NEXT: v_and_b32_e32 v1, v1, v3 -; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 -; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 +; GFX8-GISEL-NEXT: v_bfi_b32 v0, v2, v0, v4 +; GFX8-GISEL-NEXT: v_bfi_b32 v1, v3, v1, v5 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-GISEL-LABEL: v_bitselect_i64_pat_1: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 -; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 -; GFX10-GISEL-NEXT: v_and_b32_e32 v0, v0, v2 -; GFX10-GISEL-NEXT: v_and_b32_e32 v1, v1, v3 -; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 -; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 +; GFX10-GISEL-NEXT: v_bfi_b32 v0, v2, v0, v4 +; GFX10-GISEL-NEXT: v_bfi_b32 v1, v3, v1, v5 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] %xor.0 = xor i64 %a, %mask %and = and i64 %xor.0, %b @@ -1083,22 +1015,16 @@ ; ; GFX8-GISEL-LABEL: v_s_s_bitselect_i64_pat_1: ; GFX8-GISEL: ; %bb.0: -; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, s2, v0 -; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, s3, v1 -; GFX8-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 -; GFX8-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 -; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, s2, v0 -; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, s3, v1 +; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s0 +; GFX8-GISEL-NEXT: v_bfi_b32 v0, v2, v0, s2 +; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s1 +; GFX8-GISEL-NEXT: v_bfi_b32 v1, v2, v1, s3 ; GFX8-GISEL-NEXT: ; return to shader part epilog ; ; GFX10-GISEL-LABEL: v_s_s_bitselect_i64_pat_1: ; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, s2, v0 -; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, s3, v1 -; GFX10-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 -; GFX10-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 -; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, s2, v0 -; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, s3, v1 +; GFX10-GISEL-NEXT: v_bfi_b32 v0, s0, v0, s2 +; GFX10-GISEL-NEXT: v_bfi_b32 v1, s1, v1, s3 ; GFX10-GISEL-NEXT: ; return to shader part epilog %xor.0 = xor i64 %a, %mask %and = and i64 %xor.0, %b @@ -1132,22 +1058,16 @@ ; ; GFX8-GISEL-LABEL: s_s_v_bitselect_i64_pat_1: ; GFX8-GISEL: ; %bb.0: -; GFX8-GISEL-NEXT: v_xor_b32_e32 v2, s0, v0 -; GFX8-GISEL-NEXT: v_xor_b32_e32 v3, s1, v1 -; GFX8-GISEL-NEXT: v_and_b32_e32 v2, s2, v2 -; GFX8-GISEL-NEXT: v_and_b32_e32 v3, s3, v3 -; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, v2, v0 -; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, v3, v1 +; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX8-GISEL-NEXT: v_bfi_b32 v0, v2, s0, v0 +; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s3 +; GFX8-GISEL-NEXT: v_bfi_b32 v1, v2, s1, v1 ; GFX8-GISEL-NEXT: ; return to shader part epilog ; ; GFX10-GISEL-LABEL: s_s_v_bitselect_i64_pat_1: ; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: v_xor_b32_e32 v2, s0, v0 -; GFX10-GISEL-NEXT: v_xor_b32_e32 v3, s1, v1 -; GFX10-GISEL-NEXT: v_and_b32_e32 v2, s2, v2 -; GFX10-GISEL-NEXT: v_and_b32_e32 v3, s3, v3 -; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, v2, v0 -; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, v3, v1 +; GFX10-GISEL-NEXT: v_bfi_b32 v0, s2, s0, v0 +; GFX10-GISEL-NEXT: v_bfi_b32 v1, s3, s1, v1 ; GFX10-GISEL-NEXT: ; return to shader part epilog %xor.0 = xor i64 %a, %mask %and = and i64 %xor.0, %b @@ -1229,24 +1149,16 @@ ; GFX8-GISEL-LABEL: v_bitselect_i64_pat_2: ; GFX8-GISEL: ; %bb.0: ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 -; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 -; GFX8-GISEL-NEXT: v_and_b32_e32 v0, v0, v2 -; GFX8-GISEL-NEXT: v_and_b32_e32 v1, v1, v3 -; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 -; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 +; GFX8-GISEL-NEXT: v_bfi_b32 v0, v2, v0, v4 +; GFX8-GISEL-NEXT: v_bfi_b32 v1, v3, v1, v5 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-GISEL-LABEL: v_bitselect_i64_pat_2: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 -; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 -; GFX10-GISEL-NEXT: v_and_b32_e32 v0, v0, v2 -; GFX10-GISEL-NEXT: v_and_b32_e32 v1, v1, v3 -; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 -; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 +; GFX10-GISEL-NEXT: v_bfi_b32 v0, v2, v0, v4 +; GFX10-GISEL-NEXT: v_bfi_b32 v1, v3, v1, v5 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] %xor.0 = xor i64 %a, %mask %and = and i64 %xor.0, %b @@ -1286,28 +1198,20 @@ ; GFX8-GISEL-LABEL: v_bfi_sha256_ma_i64: ; GFX8-GISEL: ; %bb.0: ; %entry ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-GISEL-NEXT: v_and_b32_e32 v6, v0, v4 -; GFX8-GISEL-NEXT: v_and_b32_e32 v7, v1, v5 -; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v4 -; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v1, v5 -; GFX8-GISEL-NEXT: v_and_b32_e32 v0, v2, v0 -; GFX8-GISEL-NEXT: v_and_b32_e32 v1, v3, v1 -; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v6, v0 -; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v7, v1 +; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 +; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 +; GFX8-GISEL-NEXT: v_bfi_b32 v0, v0, v4, v2 +; GFX8-GISEL-NEXT: v_bfi_b32 v1, v1, v5, v3 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-GISEL-LABEL: v_bfi_sha256_ma_i64: ; GFX10-GISEL: ; %bb.0: ; %entry ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-GISEL-NEXT: v_or_b32_e32 v6, v0, v4 -; GFX10-GISEL-NEXT: v_or_b32_e32 v7, v1, v5 -; GFX10-GISEL-NEXT: v_and_b32_e32 v0, v0, v4 -; GFX10-GISEL-NEXT: v_and_b32_e32 v1, v1, v5 -; GFX10-GISEL-NEXT: v_and_b32_e32 v2, v2, v6 -; GFX10-GISEL-NEXT: v_and_b32_e32 v3, v3, v7 -; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX10-GISEL-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 +; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 +; GFX10-GISEL-NEXT: v_bfi_b32 v0, v0, v4, v2 +; GFX10-GISEL-NEXT: v_bfi_b32 v1, v1, v5, v3 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] entry: %and0 = and i64 %x, %z @@ -1348,26 +1252,20 @@ ; ; GFX8-GISEL-LABEL: v_s_s_bfi_sha256_ma_i64: ; GFX8-GISEL: ; %bb.0: ; %entry -; GFX8-GISEL-NEXT: v_and_b32_e32 v2, s2, v0 -; GFX8-GISEL-NEXT: v_and_b32_e32 v3, s3, v1 -; GFX8-GISEL-NEXT: v_or_b32_e32 v0, s2, v0 -; GFX8-GISEL-NEXT: v_or_b32_e32 v1, s3, v1 -; GFX8-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 -; GFX8-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 -; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 -; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v3, v1 +; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, s0, v0 +; GFX8-GISEL-NEXT: v_bfi_b32 v0, v0, v2, s0 +; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s3 +; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, s1, v1 +; GFX8-GISEL-NEXT: v_bfi_b32 v1, v1, v2, s1 ; GFX8-GISEL-NEXT: ; return to shader part epilog ; ; GFX10-GISEL-LABEL: v_s_s_bfi_sha256_ma_i64: ; GFX10-GISEL: ; %bb.0: ; %entry -; GFX10-GISEL-NEXT: v_or_b32_e32 v2, s2, v0 -; GFX10-GISEL-NEXT: v_or_b32_e32 v3, s3, v1 -; GFX10-GISEL-NEXT: v_and_b32_e32 v0, s2, v0 -; GFX10-GISEL-NEXT: v_and_b32_e32 v1, s3, v1 -; GFX10-GISEL-NEXT: v_and_b32_e32 v2, s0, v2 -; GFX10-GISEL-NEXT: v_and_b32_e32 v3, s1, v3 -; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX10-GISEL-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, s0, v0 +; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, s1, v1 +; GFX10-GISEL-NEXT: v_bfi_b32 v0, v0, s2, s0 +; GFX10-GISEL-NEXT: v_bfi_b32 v1, v1, s3, s1 ; GFX10-GISEL-NEXT: ; return to shader part epilog entry: %and0 = and i64 %x, %z @@ -1415,12 +1313,10 @@ ; ; GFX10-GISEL-LABEL: s_v_s_bfi_sha256_ma_i64: ; GFX10-GISEL: ; %bb.0: ; %entry -; GFX10-GISEL-NEXT: s_or_b64 s[4:5], s[0:1], s[2:3] -; GFX10-GISEL-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] -; GFX10-GISEL-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX10-GISEL-NEXT: v_and_b32_e32 v1, s5, v1 -; GFX10-GISEL-NEXT: v_or_b32_e32 v0, s0, v0 -; GFX10-GISEL-NEXT: v_or_b32_e32 v1, s1, v1 +; GFX10-GISEL-NEXT: s_and_b64 s[4:5], s[0:1], s[2:3] +; GFX10-GISEL-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] +; GFX10-GISEL-NEXT: v_and_or_b32 v0, v0, s0, s4 +; GFX10-GISEL-NEXT: v_and_or_b32 v1, v1, s1, s5 ; GFX10-GISEL-NEXT: ; return to shader part epilog entry: %and0 = and i64 %x, %z @@ -1462,26 +1358,20 @@ ; ; GFX8-GISEL-LABEL: s_s_v_bfi_sha256_ma_i64: ; GFX8-GISEL: ; %bb.0: ; %entry -; GFX8-GISEL-NEXT: v_and_b32_e32 v2, s0, v0 -; GFX8-GISEL-NEXT: v_and_b32_e32 v3, s1, v1 -; GFX8-GISEL-NEXT: v_or_b32_e32 v0, s0, v0 -; GFX8-GISEL-NEXT: v_or_b32_e32 v1, s1, v1 -; GFX8-GISEL-NEXT: v_and_b32_e32 v0, s2, v0 -; GFX8-GISEL-NEXT: v_and_b32_e32 v1, s3, v1 -; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 -; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v3, v1 +; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s0 +; GFX8-GISEL-NEXT: v_xor_b32_e32 v2, s2, v2 +; GFX8-GISEL-NEXT: v_bfi_b32 v0, v2, v0, s2 +; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s1 +; GFX8-GISEL-NEXT: v_xor_b32_e32 v2, s3, v2 +; GFX8-GISEL-NEXT: v_bfi_b32 v1, v2, v1, s3 ; GFX8-GISEL-NEXT: ; return to shader part epilog ; ; GFX10-GISEL-LABEL: s_s_v_bfi_sha256_ma_i64: ; GFX10-GISEL: ; %bb.0: ; %entry -; GFX10-GISEL-NEXT: v_or_b32_e32 v2, s0, v0 -; GFX10-GISEL-NEXT: v_or_b32_e32 v3, s1, v1 -; GFX10-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 -; GFX10-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 -; GFX10-GISEL-NEXT: v_and_b32_e32 v2, s2, v2 -; GFX10-GISEL-NEXT: v_and_b32_e32 v3, s3, v3 -; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX10-GISEL-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX10-GISEL-NEXT: v_xor_b32_e64 v2, s0, s2 +; GFX10-GISEL-NEXT: v_xor_b32_e64 v3, s1, s3 +; GFX10-GISEL-NEXT: v_bfi_b32 v0, v2, v0, s2 +; GFX10-GISEL-NEXT: v_bfi_b32 v1, v3, v1, s3 ; GFX10-GISEL-NEXT: ; return to shader part epilog entry: %and0 = and i64 %x, %z @@ -1519,26 +1409,18 @@ ; ; GFX8-GISEL-LABEL: v_s_v_bfi_sha256_ma_i64: ; GFX8-GISEL: ; %bb.0: ; %entry -; GFX8-GISEL-NEXT: v_and_b32_e32 v4, v0, v2 -; GFX8-GISEL-NEXT: v_and_b32_e32 v5, v1, v3 -; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v1, v3 -; GFX8-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 -; GFX8-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 -; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0 -; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v5, v1 +; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, s0, v0 +; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, s1, v1 +; GFX8-GISEL-NEXT: v_bfi_b32 v0, v0, v2, s0 +; GFX8-GISEL-NEXT: v_bfi_b32 v1, v1, v3, s1 ; GFX8-GISEL-NEXT: ; return to shader part epilog ; ; GFX10-GISEL-LABEL: v_s_v_bfi_sha256_ma_i64: ; GFX10-GISEL: ; %bb.0: ; %entry -; GFX10-GISEL-NEXT: v_or_b32_e32 v4, v0, v2 -; GFX10-GISEL-NEXT: v_or_b32_e32 v5, v1, v3 -; GFX10-GISEL-NEXT: v_and_b32_e32 v0, v0, v2 -; GFX10-GISEL-NEXT: v_and_b32_e32 v1, v1, v3 -; GFX10-GISEL-NEXT: v_and_b32_e32 v2, s0, v4 -; GFX10-GISEL-NEXT: v_and_b32_e32 v3, s1, v5 -; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX10-GISEL-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, s0, v0 +; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, s1, v1 +; GFX10-GISEL-NEXT: v_bfi_b32 v0, v0, v2, s0 +; GFX10-GISEL-NEXT: v_bfi_b32 v1, v1, v3, s1 ; GFX10-GISEL-NEXT: ; return to shader part epilog entry: %and0 = and i64 %x, %z diff --git a/llvm/test/CodeGen/AMDGPU/constrained-shift.ll b/llvm/test/CodeGen/AMDGPU/constrained-shift.ll --- a/llvm/test/CodeGen/AMDGPU/constrained-shift.ll +++ b/llvm/test/CodeGen/AMDGPU/constrained-shift.ll @@ -139,10 +139,6 @@ ; GISEL-LABEL: csh_v4i32: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: v_and_b32_e32 v4, 31, v4 -; GISEL-NEXT: v_and_b32_e32 v5, 31, v5 -; GISEL-NEXT: v_and_b32_e32 v6, 31, v6 -; GISEL-NEXT: v_and_b32_e32 v7, 31, v7 ; GISEL-NEXT: v_lshlrev_b32_e32 v8, v4, v0 ; GISEL-NEXT: v_lshlrev_b32_e32 v9, v5, v1 ; GISEL-NEXT: v_lshlrev_b32_e32 v10, v6, v2 diff --git a/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll b/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll --- a/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll +++ b/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll @@ -1033,8 +1033,8 @@ ; GFX9-GISEL-NEXT: global_load_ubyte v4, v1, s[2:3] offset:3 ; GFX9-GISEL-NEXT: global_load_ubyte v5, v1, s[2:3] offset:4 ; GFX9-GISEL-NEXT: global_load_ubyte v6, v1, s[2:3] offset:5 -; GFX9-GISEL-NEXT: global_load_ubyte v7, v1, s[2:3] offset:7 -; GFX9-GISEL-NEXT: global_load_ubyte v8, v1, s[2:3] offset:6 +; GFX9-GISEL-NEXT: global_load_ubyte v7, v1, s[2:3] offset:6 +; GFX9-GISEL-NEXT: global_load_ubyte v8, v1, s[2:3] offset:7 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(6) ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v2, 8, v0 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5) @@ -1045,10 +1045,10 @@ ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(2) ; GFX9-GISEL-NEXT: v_lshl_or_b32 v4, v6, 8, v5 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) -; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v5, 24, v7 +; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v7 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v6, 16, v8 -; GFX9-GISEL-NEXT: v_or3_b32 v3, v5, v6, v4 +; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v8, 24, v5 +; GFX9-GISEL-NEXT: v_or3_b32 v3, v0, v4, 0 ; GFX9-GISEL-NEXT: v_ffbl_b32_e32 v4, v3 ; GFX9-GISEL-NEXT: v_ffbl_b32_e32 v0, v2 ; GFX9-GISEL-NEXT: v_add_u32_e32 v4, 32, v4