Index: llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -477,39 +477,25 @@ MachineBasicBlock &MBB = *MI.getParent(); const DebugLoc &DL = MI.getDebugLoc(); - MachineBasicBlock::iterator InsPt = MBB.begin(); + MachineBasicBlock::iterator InsPt = MI; - // If we have instructions that aren't prolog instructions, split the block - // and emit a terminator instruction. This ensures correct spill placement. - // FIXME: We should unconditionally split the block here. - bool NeedBlockSplit = false; Register DataReg = MI.getOperand(0).getReg(); - for (MachineBasicBlock::iterator I = InsPt, E = MI.getIterator(); - I != E; ++I) { - if (I->modifiesRegister(DataReg, TRI)) { - NeedBlockSplit = true; - break; - } - } - unsigned Opcode = OrOpc; - MachineBasicBlock *SplitBB = &MBB; - if (NeedBlockSplit) { - SplitBB = MBB.splitAt(MI, /*UpdateLiveIns*/true, LIS); - if (MDT && SplitBB != &MBB) { - MachineDomTreeNode *MBBNode = (*MDT)[&MBB]; - SmallVector Children(MBBNode->begin(), - MBBNode->end()); - MachineDomTreeNode *SplitBBNode = MDT->addNewBlock(SplitBB, &MBB); - for (MachineDomTreeNode *Child : Children) - MDT->changeImmediateDominator(Child, SplitBBNode); - } - Opcode = OrTermrOpc; - InsPt = MI; + // If we have instructions that aren't prolog instructions, split the block + // and emit a terminator instruction. This ensures correct spill placement + // relative to exec writes. + MachineBasicBlock *SplitBB = MBB.splitAt(MI, /*UpdateLiveIns=*/true, LIS); + if (MDT && SplitBB != &MBB) { + MachineDomTreeNode *MBBNode = (*MDT)[&MBB]; + SmallVector Children(MBBNode->begin(), + MBBNode->end()); + MachineDomTreeNode *SplitBBNode = MDT->addNewBlock(SplitBB, &MBB); + for (MachineDomTreeNode *Child : Children) + MDT->changeImmediateDominator(Child, SplitBBNode); } MachineInstr *NewMI = - BuildMI(MBB, InsPt, DL, TII->get(Opcode), Exec) + BuildMI(MBB, InsPt, DL, TII->get(OrTermrOpc), Exec) .addReg(Exec) .add(MI.getOperand(0)); if (LV) { Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.demote.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.demote.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.demote.ll @@ -885,21 +885,21 @@ ; SI-NEXT: s_mov_b64 s[0:1], exec ; SI-NEXT: s_wqm_b64 exec, exec ; SI-NEXT: v_cvt_i32_f32_e32 v0, v0 -; SI-NEXT: s_mov_b32 s4, 0 +; SI-NEXT: s_mov_b32 s6, 0 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 ; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc -; SI-NEXT: s_xor_b64 s[2:3], exec, s[2:3] +; SI-NEXT: s_xor_b64 s[4:5], exec, s[2:3] ; SI-NEXT: s_cbranch_execz .LBB7_3 ; SI-NEXT: ; %bb.1: ; %.demote0 ; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], exec ; SI-NEXT: s_cbranch_scc0 .LBB7_9 ; SI-NEXT: ; %bb.2: ; %.demote0 -; SI-NEXT: s_wqm_b64 s[6:7], s[0:1] -; SI-NEXT: s_and_b64 exec, exec, s[6:7] +; SI-NEXT: s_wqm_b64 s[2:3], s[0:1] +; SI-NEXT: s_and_b64 exec, exec, s[2:3] ; SI-NEXT: .LBB7_3: ; %.continue0.preheader -; SI-NEXT: s_or_b64 exec, exec, s[2:3] ; SI-NEXT: s_mov_b64 s[2:3], 0 -; SI-NEXT: v_mov_b32_e32 v0, s4 +; SI-NEXT: s_or_b64 exec, exec, s[4:5] +; SI-NEXT: v_mov_b32_e32 v0, s6 ; SI-NEXT: s_branch .LBB7_5 ; SI-NEXT: .LBB7_4: ; %.continue1 ; SI-NEXT: ; in Loop: Header=BB7_5 Depth=1 @@ -951,21 +951,21 @@ ; GFX9-NEXT: s_mov_b64 s[0:1], exec ; GFX9-NEXT: s_wqm_b64 exec, exec ; GFX9-NEXT: v_cvt_i32_f32_e32 v0, v0 -; GFX9-NEXT: s_mov_b32 s4, 0 +; GFX9-NEXT: s_mov_b32 s6, 0 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 ; GFX9-NEXT: s_and_saveexec_b64 s[2:3], vcc -; GFX9-NEXT: s_xor_b64 s[2:3], exec, s[2:3] +; GFX9-NEXT: s_xor_b64 s[4:5], exec, s[2:3] ; GFX9-NEXT: s_cbranch_execz .LBB7_3 ; GFX9-NEXT: ; %bb.1: ; %.demote0 ; GFX9-NEXT: s_andn2_b64 s[0:1], s[0:1], exec ; GFX9-NEXT: s_cbranch_scc0 .LBB7_9 ; GFX9-NEXT: ; %bb.2: ; %.demote0 -; GFX9-NEXT: s_wqm_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_and_b64 exec, exec, s[6:7] +; GFX9-NEXT: s_wqm_b64 s[2:3], s[0:1] +; GFX9-NEXT: s_and_b64 exec, exec, s[2:3] ; GFX9-NEXT: .LBB7_3: ; %.continue0.preheader -; GFX9-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9-NEXT: s_mov_b64 s[2:3], 0 -; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: s_branch .LBB7_5 ; GFX9-NEXT: .LBB7_4: ; %.continue1 ; GFX9-NEXT: ; in Loop: Header=BB7_5 Depth=1 @@ -1080,21 +1080,21 @@ ; GFX10-64-NEXT: s_mov_b64 s[0:1], exec ; GFX10-64-NEXT: s_wqm_b64 exec, exec ; GFX10-64-NEXT: v_cvt_i32_f32_e32 v0, v0 -; GFX10-64-NEXT: s_mov_b32 s4, 0 +; GFX10-64-NEXT: s_mov_b32 s6, 0 ; GFX10-64-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 ; GFX10-64-NEXT: s_and_saveexec_b64 s[2:3], vcc -; GFX10-64-NEXT: s_xor_b64 s[2:3], exec, s[2:3] +; GFX10-64-NEXT: s_xor_b64 s[4:5], exec, s[2:3] ; GFX10-64-NEXT: s_cbranch_execz .LBB7_3 ; GFX10-64-NEXT: ; %bb.1: ; %.demote0 ; GFX10-64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec ; GFX10-64-NEXT: s_cbranch_scc0 .LBB7_9 ; GFX10-64-NEXT: ; %bb.2: ; %.demote0 -; GFX10-64-NEXT: s_wqm_b64 s[6:7], s[0:1] -; GFX10-64-NEXT: s_and_b64 exec, exec, s[6:7] +; GFX10-64-NEXT: s_wqm_b64 s[2:3], s[0:1] +; GFX10-64-NEXT: s_and_b64 exec, exec, s[2:3] ; GFX10-64-NEXT: .LBB7_3: ; %.continue0.preheader -; GFX10-64-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX10-64-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-64-NEXT: s_mov_b64 s[2:3], 0 +; GFX10-64-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX10-64-NEXT: v_mov_b32_e32 v0, s6 ; GFX10-64-NEXT: s_branch .LBB7_5 ; GFX10-64-NEXT: .LBB7_4: ; %.continue1 ; GFX10-64-NEXT: ; in Loop: Header=BB7_5 Depth=1 Index: llvm/test/CodeGen/AMDGPU/block-should-not-be-in-alive-blocks.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/block-should-not-be-in-alive-blocks.mir +++ llvm/test/CodeGen/AMDGPU/block-should-not-be-in-alive-blocks.mir @@ -34,8 +34,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[COPY]], 0, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, killed %15, 0, implicit $exec - ; CHECK-NEXT: %7:vgpr_32, dead %8:sreg_32_xm0_xexec = V_ADDC_U32_e64 0, killed [[S_LOAD_DWORDX2_IMM]].sub1, killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[V_ADD_CO_U32_e64_]], %subreg.sub0, killed %7, %subreg.sub1 + ; CHECK-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 0, killed [[S_LOAD_DWORDX2_IMM]].sub1, killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[V_ADD_CO_U32_e64_]], %subreg.sub0, killed [[V_ADDC_U32_e64_]], %subreg.sub1 ; CHECK-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; CHECK-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec ; CHECK-NEXT: GLOBAL_STORE_BYTE killed [[V_MOV_B]], killed [[GLOBAL_LOAD_UBYTE]], 0, 0, implicit $exec :: (store (s8), addrspace 1) @@ -55,7 +55,7 @@ ; CHECK-NEXT: successors: %bb.6(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[V_MOV_B1:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec - ; CHECK-NEXT: dead %13:vgpr_32 = GLOBAL_LOAD_UBYTE killed [[V_MOV_B1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; CHECK-NEXT: dead [[GLOBAL_LOAD_UBYTE1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE killed [[V_MOV_B1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; CHECK-NEXT: S_BRANCH %bb.6 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: @@ -75,7 +75,11 @@ ; CHECK-NEXT: S_BRANCH %bb.5 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.7: - ; CHECK-NEXT: $exec_lo = S_OR_B32 $exec_lo, killed [[S_AND_B32_1]], implicit-def $scc + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $exec_lo = S_OR_B32_term $exec_lo, killed [[S_AND_B32_1]], implicit-def $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8: ; CHECK-NEXT: S_ENDPGM 0 bb.0: successors: %bb.2(0x40000000), %bb.5(0x40000000) Index: llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll +++ llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll @@ -32,25 +32,25 @@ ; GFX90A-NEXT: S_CBRANCH_VCCZ %bb.2, implicit $vcc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.1.bb103: - ; GFX90A-NEXT: successors: %bb.58(0x40000000), %bb.2(0x40000000) + ; GFX90A-NEXT: successors: %bb.59(0x40000000), %bb.2(0x40000000) ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr58_sgpr59:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x00000000000000FF, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $sgpr34_sgpr35 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, renamable $sgpr30_sgpr31, implicit-def dead $scc ; GFX90A-NEXT: $vgpr24 = IMPLICIT_DEF - ; GFX90A-NEXT: $agpr0 = IMPLICIT_DEF + ; GFX90A-NEXT: $vgpr12 = IMPLICIT_DEF ; GFX90A-NEXT: $vgpr26 = IMPLICIT_DEF ; GFX90A-NEXT: $vgpr20 = IMPLICIT_DEF ; GFX90A-NEXT: $vgpr22 = IMPLICIT_DEF - ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.58, implicit $vcc + ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.59, implicit $vcc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.2: ; GFX90A-NEXT: successors: %bb.3(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr24, $sgpr33, $vgpr31, $agpr0, $vgpr26, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr58, $sgpr59, $sgpr20_sgpr21_sgpr22, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr2, $vgpr3, $vgpr20, $vgpr22 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr24, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr58, $sgpr59, $sgpr20_sgpr21_sgpr22, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr2, $vgpr3, $vgpr12, $vgpr26, $vgpr20, $vgpr22 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $sgpr17 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $sgpr23 = IMPLICIT_DEF - ; GFX90A-NEXT: renamable $agpr1 = IMPLICIT_DEF + ; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr21 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr23 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr25 = IMPLICIT_DEF @@ -58,12 +58,12 @@ ; GFX90A-NEXT: renamable $sgpr36_sgpr37 = S_MOV_B64 0 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.3.Flow17: - ; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.57(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr23, $sgpr33, $vgpr31, $agpr0_agpr1:0x000000000000000F, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr58_sgpr59:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr26_vgpr27:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.58(0x40000000) + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr23, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr58_sgpr59:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr26_vgpr27:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr4 = V_AND_B32_e32 1023, $vgpr31, implicit $exec ; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr34_sgpr35, implicit-def dead $scc - ; GFX90A-NEXT: S_CBRANCH_VCCZ %bb.57, implicit $vcc + ; GFX90A-NEXT: S_CBRANCH_VCCZ %bb.58, implicit $vcc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.4.bb15: ; GFX90A-NEXT: successors: %bb.35(0x40000000), %bb.5(0x40000000) @@ -112,12 +112,12 @@ ; GFX90A-NEXT: renamable $vgpr18 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr54 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF - ; GFX90A-NEXT: renamable $agpr1 = IMPLICIT_DEF + ; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $sgpr17 = IMPLICIT_DEF ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.6.Flow20: ; GFX90A-NEXT: successors: %bb.7(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr54, $agpr0_agpr1:0x000000000000000F, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x0000000000000003, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr54, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x0000000000000003, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr21 = COPY renamable $sgpr17, implicit $exec ; GFX90A-NEXT: renamable $vgpr20 = COPY $sgpr17, implicit $exec @@ -130,7 +130,7 @@ ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.7.Flow19: ; GFX90A-NEXT: successors: %bb.62(0x40000000), %bb.8(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr54, $agpr0_agpr1:0x000000000000000F, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x0000000000000003, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr26_vgpr27:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr54, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x0000000000000003, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr26_vgpr27:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_MOV_B64 0 ; GFX90A-NEXT: $sgpr24_sgpr25 = S_AND_SAVEEXEC_B64 $sgpr36_sgpr37, implicit-def $exec, implicit-def $scc, implicit $exec @@ -399,14 +399,14 @@ ; GFX90A-NEXT: renamable $vgpr18 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr54 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF - ; GFX90A-NEXT: renamable $agpr1 = IMPLICIT_DEF + ; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $sgpr17 = IMPLICIT_DEF ; GFX90A-NEXT: $sgpr24_sgpr25 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.37, implicit $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.36.Flow21: ; GFX90A-NEXT: successors: %bb.6(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr54, $agpr0_agpr1:0x000000000000000F, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x0000000000000003, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr54, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x0000000000000003, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr24_sgpr25, implicit-def $scc ; GFX90A-NEXT: S_BRANCH %bb.6 @@ -436,14 +436,14 @@ ; GFX90A-NEXT: renamable $vgpr18 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr54 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF - ; GFX90A-NEXT: renamable $agpr1 = IMPLICIT_DEF + ; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $sgpr17 = IMPLICIT_DEF ; GFX90A-NEXT: $sgpr38_sgpr39 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.39, implicit $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.38.Flow22: ; GFX90A-NEXT: successors: %bb.36(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr54, $agpr0_agpr1:0x000000000000000F, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x0000000000000003, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr54, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x0000000000000003, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr38_sgpr39, implicit-def $scc ; GFX90A-NEXT: renamable $sgpr38_sgpr39 = S_XOR_B64 $exec, -1, implicit-def dead $scc @@ -486,14 +486,14 @@ ; GFX90A-NEXT: renamable $vgpr18 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr54 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF - ; GFX90A-NEXT: renamable $agpr1 = IMPLICIT_DEF + ; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $sgpr17 = IMPLICIT_DEF ; GFX90A-NEXT: $sgpr40_sgpr41 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.41, implicit $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.40.Flow23: ; GFX90A-NEXT: successors: %bb.38(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr54, $agpr0_agpr1:0x000000000000000F, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x0000000000000003, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr54, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x0000000000000003, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr40_sgpr41, implicit-def $scc ; GFX90A-NEXT: renamable $sgpr40_sgpr41 = S_XOR_B64 $exec, -1, implicit-def dead $scc @@ -538,14 +538,14 @@ ; GFX90A-NEXT: renamable $vgpr18 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr54 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF - ; GFX90A-NEXT: renamable $agpr1 = IMPLICIT_DEF + ; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $sgpr17 = IMPLICIT_DEF ; GFX90A-NEXT: $sgpr42_sgpr43 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.46, implicit $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.42.Flow24: ; GFX90A-NEXT: successors: %bb.40(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr17, $vgpr19, $vgpr20, $vgpr30, $vgpr31, $vgpr54, $agpr0_agpr1:0x000000000000000F, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x0000000000000003, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr17, $vgpr19, $vgpr20, $vgpr30, $vgpr31, $vgpr54, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x0000000000000003, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr42_sgpr43, implicit-def $scc ; GFX90A-NEXT: renamable $vgpr59 = COPY killed renamable $vgpr20, implicit $exec @@ -573,13 +573,13 @@ ; GFX90A-NEXT: renamable $vgpr62 = V_ADD_CO_U32_e32 6144, $vgpr40, implicit-def $vcc, implicit $exec ; GFX90A-NEXT: renamable $vgpr63, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $vcc, 0, implicit $exec ; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, renamable $sgpr48_sgpr49, implicit-def dead $scc - ; GFX90A-NEXT: $agpr0 = IMPLICIT_DEF + ; GFX90A-NEXT: $vgpr12 = IMPLICIT_DEF ; GFX90A-NEXT: $vgpr14 = IMPLICIT_DEF ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.48, implicit $vcc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.44: ; GFX90A-NEXT: successors: %bb.45(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr58, $vgpr57, $vgpr20, $vgpr61, $vgpr31, $vgpr63, $agpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $vgpr40, $vgpr62, $vgpr60, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr20_sgpr21_sgpr22, $sgpr22_sgpr23, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr56, $vgpr47, $vgpr2, $vgpr3, $vgpr4, $vgpr46, $vgpr45, $vgpr44, $vgpr43, $vgpr42, $vgpr41, $vgpr14 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr58, $vgpr57, $vgpr20, $vgpr61, $vgpr31, $vgpr63, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $vgpr40, $vgpr62, $vgpr60, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr20_sgpr21_sgpr22, $sgpr22_sgpr23, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr56, $vgpr47, $vgpr2, $vgpr3, $vgpr4, $vgpr46, $vgpr45, $vgpr44, $vgpr43, $vgpr42, $vgpr41, $vgpr12, $vgpr14 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = COPY renamable $sgpr36_sgpr37 ; GFX90A-NEXT: renamable $vgpr10_vgpr11 = IMPLICIT_DEF @@ -593,13 +593,13 @@ ; GFX90A-NEXT: renamable $vgpr18 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr54 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF - ; GFX90A-NEXT: renamable $agpr1 = IMPLICIT_DEF + ; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $sgpr17 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_MOV_B64 0 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.45.Flow26: ; GFX90A-NEXT: successors: %bb.47(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr17, $vgpr19, $vgpr20, $vgpr30, $vgpr31, $vgpr54, $agpr0_agpr1:0x000000000000000F, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x0000000000000003, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr17, $vgpr19, $vgpr20, $vgpr30, $vgpr31, $vgpr54, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x0000000000000003, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_XOR_B64 $exec, -1, implicit-def dead $scc ; GFX90A-NEXT: renamable $sgpr70_sgpr71 = S_AND_B64 killed renamable $sgpr44_sgpr45, $exec, implicit-def dead $scc @@ -641,14 +641,14 @@ ; GFX90A-NEXT: renamable $vgpr18 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr54 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF - ; GFX90A-NEXT: renamable $agpr1 = IMPLICIT_DEF + ; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $sgpr17 = IMPLICIT_DEF ; GFX90A-NEXT: $sgpr18_sgpr19 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.43, implicit $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.47.Flow25: ; GFX90A-NEXT: successors: %bb.42(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr17, $vgpr19, $vgpr20, $vgpr30, $vgpr31, $vgpr54, $agpr0_agpr1:0x000000000000000F, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr60_sgpr61, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $sgpr70_sgpr71, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x0000000000000003, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr17, $vgpr19, $vgpr20, $vgpr30, $vgpr31, $vgpr54, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr60_sgpr61, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $sgpr70_sgpr71, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x0000000000000003, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr18_sgpr19, implicit-def $scc ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_XOR_B64 $exec, -1, implicit-def dead $scc @@ -679,12 +679,12 @@ ; GFX90A-NEXT: S_BRANCH %bb.44 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.50.bb68: - ; GFX90A-NEXT: successors: %bb.54(0x40000000), %bb.51(0x40000000) + ; GFX90A-NEXT: successors: %bb.55(0x40000000), %bb.51(0x40000000) ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr20, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr58_sgpr59:0x000000000000000F, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr46_sgpr47, $sgpr52_sgpr53, $sgpr56_sgpr57, $sgpr54_sgpr55 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr0_vgpr1 = V_LSHLREV_B64_e64 3, $vgpr4_vgpr5, implicit $exec ; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr48_sgpr49, implicit-def dead $scc - ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.54, implicit $vcc + ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.55, implicit $vcc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.51: ; GFX90A-NEXT: successors: %bb.45(0x80000000) @@ -702,22 +702,22 @@ ; GFX90A-NEXT: renamable $vgpr18 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr54 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF - ; GFX90A-NEXT: renamable $agpr1 = IMPLICIT_DEF + ; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $sgpr17 = IMPLICIT_DEF ; GFX90A-NEXT: S_BRANCH %bb.45 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.52.bb80: - ; GFX90A-NEXT: successors: %bb.59(0x40000000), %bb.53(0x40000000) + ; GFX90A-NEXT: successors: %bb.60(0x40000000), %bb.53(0x40000000) ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr20, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr58_sgpr59:0x000000000000000F, $sgpr60_sgpr61, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x0000000000000003, $vgpr6_vgpr7:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $sgpr17 = S_BFE_U32 renamable $sgpr20, 65560, implicit-def dead $scc ; GFX90A-NEXT: S_CMP_EQ_U32 killed renamable $sgpr17, 0, implicit-def $scc ; GFX90A-NEXT: renamable $vgpr8 = V_ADD_CO_U32_e32 4096, $vgpr0, implicit-def $vcc, implicit $exec ; GFX90A-NEXT: renamable $vgpr9, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr1, killed $vcc, 0, implicit $exec - ; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.59, implicit killed $scc + ; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.60, implicit killed $scc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.53: - ; GFX90A-NEXT: successors: %bb.61(0x80000000) + ; GFX90A-NEXT: successors: %bb.54(0x80000000) ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr20, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x0000000000000003, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_MOV_B64 0 @@ -731,12 +731,23 @@ ; GFX90A-NEXT: renamable $vgpr18 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr54 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF - ; GFX90A-NEXT: renamable $agpr1 = IMPLICIT_DEF + ; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $sgpr17 = IMPLICIT_DEF - ; GFX90A-NEXT: S_BRANCH %bb.61 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: bb.54.bb73: - ; GFX90A-NEXT: successors: %bb.52(0x40000000), %bb.55(0x40000000) + ; GFX90A-NEXT: bb.54.Flow30: + ; GFX90A-NEXT: successors: %bb.56(0x80000000) + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr17, $vgpr19, $vgpr20, $vgpr30, $vgpr31, $vgpr54, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x0000000000000003, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_XOR_B64 $exec, -1, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_AND_B64 killed renamable $sgpr52_sgpr53, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_AND_B64 killed renamable $sgpr50_sgpr51, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_ANDN2_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_AND_B64 killed renamable $sgpr62_sgpr63, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_OR_B64 killed renamable $sgpr50_sgpr51, killed renamable $sgpr58_sgpr59, implicit-def dead $scc + ; GFX90A-NEXT: S_BRANCH %bb.56 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: bb.55.bb73: + ; GFX90A-NEXT: successors: %bb.52(0x40000000), %bb.56(0x40000000) ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr20, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr58_sgpr59:0x000000000000000F, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr52_sgpr53, $sgpr56_sgpr57 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr5 = GLOBAL_LOAD_UBYTE renamable $vgpr0_vgpr1, 2048, 0, implicit $exec :: (load (s8) from %ir.i74, addrspace 1) @@ -755,20 +766,20 @@ ; GFX90A-NEXT: renamable $vgpr18 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr54 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF - ; GFX90A-NEXT: renamable $agpr1 = IMPLICIT_DEF + ; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $sgpr17 = IMPLICIT_DEF ; GFX90A-NEXT: $sgpr60_sgpr61 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.52, implicit $exec ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: bb.55.Flow29: + ; GFX90A-NEXT: bb.56.Flow29: ; GFX90A-NEXT: successors: %bb.45(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr17, $vgpr19, $vgpr20, $vgpr30, $vgpr31, $vgpr54, $agpr0_agpr1:0x000000000000000F, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x0000000000000003, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr17, $vgpr19, $vgpr20, $vgpr30, $vgpr31, $vgpr54, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x0000000000000003, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr60_sgpr61, implicit-def $scc ; GFX90A-NEXT: S_BRANCH %bb.45 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: bb.56.bb90: - ; GFX90A-NEXT: successors: %bb.60(0x80000000) + ; GFX90A-NEXT: bb.57.bb90: + ; GFX90A-NEXT: successors: %bb.61(0x80000000) ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr20, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr52_sgpr53, $sgpr58_sgpr59:0x000000000000000F, $sgpr60_sgpr61, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x0000000000000003, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr54 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $sgpr64_sgpr65, implicit $exec @@ -785,11 +796,11 @@ ; GFX90A-NEXT: renamable $vgpr17 = V_ALIGNBIT_B32_e64 $vgpr17, $vgpr16, 1, implicit $exec ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_XOR_B64 $exec, -1, implicit-def dead $scc ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_OR_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc - ; GFX90A-NEXT: S_BRANCH %bb.60 + ; GFX90A-NEXT: S_BRANCH %bb.61 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: bb.57: + ; GFX90A-NEXT: bb.58: ; GFX90A-NEXT: successors: %bb.7(0x80000000) - ; GFX90A-NEXT: liveins: $exec:0x000000000000000F, $sgpr14, $sgpr15, $sgpr16, $sgpr17:0x0000000000000003, $sgpr23:0x0000000000000003, $vgpr31, $agpr0_agpr1:0x000000000000000F, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr26_vgpr27:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $exec:0x000000000000000F, $sgpr14, $sgpr15, $sgpr16, $sgpr17:0x0000000000000003, $sgpr23:0x0000000000000003, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x0000000000000003, $vgpr12_vgpr13:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr26_vgpr27:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr17 = COPY killed renamable $sgpr23, implicit $exec ; GFX90A-NEXT: renamable $vgpr19 = COPY killed renamable $sgpr17, implicit $exec @@ -824,7 +835,7 @@ ; GFX90A-NEXT: renamable $sgpr34_sgpr35 = S_MOV_B64 0 ; GFX90A-NEXT: S_BRANCH %bb.7 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: bb.58.bb105: + ; GFX90A-NEXT: bb.59.bb105: ; GFX90A-NEXT: successors: %bb.3(0x80000000) ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr58_sgpr59:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x00000000000000FF, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} @@ -835,7 +846,7 @@ ; GFX90A-NEXT: renamable $vgpr0 = COPY renamable $sgpr21, implicit $exec ; GFX90A-NEXT: renamable $vgpr20_vgpr21 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.7, addrspace 3) ; GFX90A-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr17, implicit $exec - ; GFX90A-NEXT: renamable $agpr0_agpr1 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.435, addrspace 3) + ; GFX90A-NEXT: renamable $vgpr12_vgpr13 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.435, addrspace 3) ; GFX90A-NEXT: renamable $vgpr0 = COPY renamable $sgpr22, implicit $exec ; GFX90A-NEXT: renamable $vgpr26_vgpr27 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.8, addrspace 3) ; GFX90A-NEXT: renamable $sgpr36_sgpr37 = S_MOV_B64 -1 @@ -843,8 +854,8 @@ ; GFX90A-NEXT: renamable $sgpr17 = S_MOV_B32 0 ; GFX90A-NEXT: S_BRANCH %bb.3 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: bb.59.bb85: - ; GFX90A-NEXT: successors: %bb.56(0x40000000), %bb.60(0x40000000) + ; GFX90A-NEXT: bb.60.bb85: + ; GFX90A-NEXT: successors: %bb.57(0x40000000), %bb.61(0x40000000) ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr20, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr58_sgpr59:0x000000000000000F, $sgpr60_sgpr61, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x0000000000000003, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr10 = V_OR_B32_e32 1, $vgpr8, implicit $exec @@ -863,32 +874,20 @@ ; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF ; GFX90A-NEXT: $sgpr52_sgpr53 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.56, implicit $exec + ; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.57, implicit $exec ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: bb.60.Flow31: - ; GFX90A-NEXT: successors: %bb.61(0x80000000) + ; GFX90A-NEXT: bb.61.Flow31: + ; GFX90A-NEXT: successors: %bb.54(0x80000000) ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr17, $vgpr19, $vgpr20, $vgpr30, $vgpr31, $vgpr54, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x0000000000000003, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000C, $vgpr14_vgpr15:0x000000000000000F, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr52_sgpr53, implicit-def $scc ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $vgpr12 = COPY renamable $vgpr16, implicit $exec - ; GFX90A-NEXT: renamable $agpr0_agpr1 = COPY killed renamable $vgpr12_vgpr13, implicit $exec - ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: bb.61.Flow30: - ; GFX90A-NEXT: successors: %bb.55(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr17, $vgpr19, $vgpr20, $vgpr30, $vgpr31, $vgpr54, $agpr0_agpr1:0x000000000000000F, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x0000000000000003, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_XOR_B64 $exec, -1, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_AND_B64 killed renamable $sgpr52_sgpr53, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_AND_B64 killed renamable $sgpr50_sgpr51, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_ANDN2_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_AND_B64 killed renamable $sgpr62_sgpr63, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_OR_B64 killed renamable $sgpr50_sgpr51, killed renamable $sgpr58_sgpr59, implicit-def dead $scc - ; GFX90A-NEXT: S_BRANCH %bb.55 + ; GFX90A-NEXT: S_BRANCH %bb.54 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.62.bb140: ; GFX90A-NEXT: successors: %bb.68(0x40000000), %bb.63(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr54, $agpr0_agpr1:0x000000000000000F, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x0000000000000003, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr26_vgpr27:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr54, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x0000000000000003, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr26_vgpr27:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $sgpr36_sgpr37 = S_MOV_B64 -1 ; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr30_sgpr31, implicit-def dead $scc @@ -896,14 +895,14 @@ ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.63.Flow13: ; GFX90A-NEXT: successors: %bb.64(0x40000000), %bb.66(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr54, $agpr0_agpr1:0x000000000000000C, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x0000000000000003, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr54, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x0000000000000003, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000C, $vgpr14_vgpr15:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $vcc = S_ANDN2_B64 $exec, killed renamable $sgpr36_sgpr37, implicit-def dead $scc ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.66, implicit $vcc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.64.bb159: ; GFX90A-NEXT: successors: %bb.67(0x40000000), %bb.65(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr54, $agpr0_agpr1:0x000000000000000C, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x0000000000000003, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr54, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x0000000000000003, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000C, $vgpr14_vgpr15:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vcc = V_CMP_NE_U32_e64 0, killed $vgpr4, implicit $exec ; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec @@ -926,12 +925,11 @@ ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.67.bb161: ; GFX90A-NEXT: successors: %bb.65(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr54, $agpr0_agpr1:0x000000000000000C, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $vgpr0_vgpr1:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr54, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $vgpr0_vgpr1:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000C, $vgpr14_vgpr15:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 killed $vgpr23, killed $vgpr25, implicit $exec ; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 killed $vgpr2, killed $vgpr27, implicit $exec - ; GFX90A-NEXT: renamable $vgpr3 = COPY killed renamable $agpr1, implicit $exec - ; GFX90A-NEXT: renamable $vgpr3 = V_OR_B32_e32 killed $vgpr3, killed $vgpr21, implicit $exec + ; GFX90A-NEXT: renamable $vgpr3 = V_OR_B32_e32 killed $vgpr13, killed $vgpr21, implicit $exec ; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 killed $vgpr3, killed $vgpr2, implicit $exec ; GFX90A-NEXT: renamable $vgpr3 = V_MOV_B32_e32 0, implicit $exec ; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U16_sdwa 0, killed $vgpr54, 0, $vgpr3, 0, 0, 6, implicit $exec @@ -946,15 +944,14 @@ ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.68.bb174: ; GFX90A-NEXT: successors: %bb.72(0x40000000), %bb.69(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr54, $agpr0_agpr1:0x000000000000000F, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x0000000000000003, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr26_vgpr27:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr54, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x0000000000000003, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr26_vgpr27:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr28 = V_OR_B32_e32 1, $vgpr26, implicit $exec ; GFX90A-NEXT: renamable $vgpr38 = V_OR_B32_e32 $vgpr28, $vgpr24, implicit $exec ; GFX90A-NEXT: renamable $vgpr36 = V_OR_B32_e32 $vgpr38, $vgpr22, implicit $exec ; GFX90A-NEXT: renamable $vgpr32 = V_CNDMASK_B32_e64 0, $vgpr36, 0, 0, $sgpr12_sgpr13, implicit $exec ; GFX90A-NEXT: renamable $vgpr50 = V_OR_B32_e32 $vgpr32, $vgpr20, implicit $exec - ; GFX90A-NEXT: renamable $vgpr12_vgpr13 = COPY renamable $agpr0_agpr1, implicit $exec - ; GFX90A-NEXT: renamable $vgpr48 = V_OR_B32_e32 $vgpr50, killed $vgpr12, implicit $exec + ; GFX90A-NEXT: renamable $vgpr48 = V_OR_B32_e32 $vgpr50, $vgpr12, implicit $exec ; GFX90A-NEXT: renamable $vgpr34 = V_OR_B32_e32 $vgpr48, $vgpr14, implicit $exec ; GFX90A-NEXT: renamable $vgpr52 = V_CNDMASK_B32_e64 0, 0, 0, $vgpr34, killed $sgpr12_sgpr13, implicit $exec ; GFX90A-NEXT: renamable $sgpr12_sgpr13 = S_MOV_B64 -1 @@ -963,14 +960,14 @@ ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.69.Flow: ; GFX90A-NEXT: successors: %bb.70(0x40000000), %bb.71(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr54, $agpr0_agpr1:0x000000000000000C, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x0000000000000003, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr52_vgpr53:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr54, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x0000000000000003, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000C, $vgpr14_vgpr15:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr52_vgpr53:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $vcc = S_ANDN2_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def dead $scc ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.71, implicit $vcc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.70.bb186: ; GFX90A-NEXT: successors: %bb.71(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr54, $agpr0_agpr1:0x000000000000000C, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x0000000000000003, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr52_vgpr53:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr54, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x0000000000000003, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000C, $vgpr14_vgpr15:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr52_vgpr53:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr2_vgpr3 = V_LSHLREV_B64_e64 3, killed $vgpr2_vgpr3, implicit $exec ; GFX90A-NEXT: renamable $vgpr5 = COPY renamable $sgpr27, implicit $exec @@ -999,19 +996,21 @@ ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.71.Flow9: ; GFX90A-NEXT: successors: %bb.63(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr54, $agpr0_agpr1:0x000000000000000C, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x0000000000000003, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr54, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x0000000000000003, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000C, $vgpr14_vgpr15:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $sgpr36_sgpr37 = S_MOV_B64 0 ; GFX90A-NEXT: S_BRANCH %bb.63 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.72.bb196: ; GFX90A-NEXT: successors: %bb.69(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr54, $agpr0_agpr1:0x000000000000000C, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x0000000000000003, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000C, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr52_vgpr53:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr54, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x0000000000000003, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000C, $vgpr14_vgpr15:0x000000000000000C, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr52_vgpr53:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr5 = V_OR_B32_e32 $vgpr52, killed $vgpr18, implicit $exec + ; GFX90A-NEXT: renamable $vgpr29 = COPY killed renamable $vgpr13, implicit $exec ; GFX90A-NEXT: renamable $vgpr12 = V_OR_B32_e32 killed $vgpr5, killed $vgpr16, implicit $exec ; GFX90A-NEXT: renamable $vgpr13 = V_MOV_B32_e32 0, implicit $exec ; GFX90A-NEXT: DS_WRITE_B64_gfx9 killed renamable $vgpr13, renamable $vgpr12_vgpr13, 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, addrspace 3) + ; GFX90A-NEXT: renamable $vgpr13 = COPY killed renamable $vgpr29, implicit $exec ; GFX90A-NEXT: renamable $sgpr12_sgpr13 = S_MOV_B64 0 ; GFX90A-NEXT: S_BRANCH %bb.69 bb: Index: llvm/test/CodeGen/AMDGPU/collapse-endcf.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/collapse-endcf.ll +++ llvm/test/CodeGen/AMDGPU/collapse-endcf.ll @@ -61,7 +61,7 @@ ; GCN-O0-NEXT: v_writelane_b32 v1, s1, 3 ; GCN-O0-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] ; GCN-O0-NEXT: s_mov_b64 exec, s[0:1] -; GCN-O0-NEXT: s_cbranch_execz .LBB0_4 +; GCN-O0-NEXT: s_cbranch_execz .LBB0_5 ; GCN-O0-NEXT: ; %bb.1: ; %bb.outer.then ; GCN-O0-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: v_readlane_b32 s4, v1, 0 @@ -112,10 +112,12 @@ ; GCN-O0-NEXT: v_readlane_b32 s0, v1, 4 ; GCN-O0-NEXT: v_readlane_b32 s1, v1, 5 ; GCN-O0-NEXT: s_or_b64 exec, exec, s[0:1] -; GCN-O0-NEXT: .LBB0_4: ; %bb.outer.end +; GCN-O0-NEXT: ; %bb.4: ; %Flow +; GCN-O0-NEXT: .LBB0_5: ; %bb.outer.end ; GCN-O0-NEXT: v_readlane_b32 s0, v1, 2 ; GCN-O0-NEXT: v_readlane_b32 s1, v1, 3 ; GCN-O0-NEXT: s_or_b64 exec, exec, s[0:1] +; GCN-O0-NEXT: ; %bb.6: ; %bb.outer.end ; GCN-O0-NEXT: s_waitcnt expcnt(0) ; GCN-O0-NEXT: v_mov_b32_e32 v2, 3 ; GCN-O0-NEXT: v_mov_b32_e32 v0, 0 @@ -232,7 +234,7 @@ ; GCN-O0-NEXT: v_writelane_b32 v1, s1, 5 ; GCN-O0-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] ; GCN-O0-NEXT: s_mov_b64 exec, s[0:1] -; GCN-O0-NEXT: s_cbranch_execz .LBB1_4 +; GCN-O0-NEXT: s_cbranch_execz .LBB1_5 ; GCN-O0-NEXT: ; %bb.2: ; %bb.inner.then ; GCN-O0-NEXT: s_waitcnt expcnt(0) ; GCN-O0-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload @@ -253,18 +255,20 @@ ; GCN-O0-NEXT: ; kill: def $sgpr0_sgpr1 killed $sgpr0_sgpr1 def $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-O0-NEXT: s_mov_b64 s[2:3], s[4:5] ; GCN-O0-NEXT: buffer_store_dword v0, v[2:3], s[0:3], 0 addr64 -; GCN-O0-NEXT: s_branch .LBB1_4 +; GCN-O0-NEXT: s_branch .LBB1_5 ; GCN-O0-NEXT: .LBB1_3: ; %Flow ; GCN-O0-NEXT: v_readlane_b32 s0, v1, 2 ; GCN-O0-NEXT: v_readlane_b32 s1, v1, 3 ; GCN-O0-NEXT: s_or_b64 exec, exec, s[0:1] -; GCN-O0-NEXT: s_branch .LBB1_5 -; GCN-O0-NEXT: .LBB1_4: ; %bb.inner.end +; GCN-O0-NEXT: ; %bb.4: ; %Flow +; GCN-O0-NEXT: s_branch .LBB1_7 +; GCN-O0-NEXT: .LBB1_5: ; %bb.inner.end +; GCN-O0-NEXT: v_readlane_b32 s0, v1, 4 +; GCN-O0-NEXT: v_readlane_b32 s1, v1, 5 +; GCN-O0-NEXT: s_or_b64 exec, exec, s[0:1] +; GCN-O0-NEXT: ; %bb.6: ; %bb.inner.end ; GCN-O0-NEXT: s_waitcnt expcnt(0) ; GCN-O0-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload -; GCN-O0-NEXT: v_readlane_b32 s2, v1, 4 -; GCN-O0-NEXT: v_readlane_b32 s3, v1, 5 -; GCN-O0-NEXT: s_or_b64 exec, exec, s[2:3] ; GCN-O0-NEXT: v_readlane_b32 s0, v1, 0 ; GCN-O0-NEXT: v_readlane_b32 s1, v1, 1 ; GCN-O0-NEXT: v_mov_b32_e32 v0, 2 @@ -282,7 +286,7 @@ ; GCN-O0-NEXT: s_mov_b64 s[2:3], s[4:5] ; GCN-O0-NEXT: buffer_store_dword v0, v[2:3], s[0:3], 0 addr64 ; GCN-O0-NEXT: s_branch .LBB1_3 -; GCN-O0-NEXT: .LBB1_5: ; %bb.outer.end +; GCN-O0-NEXT: .LBB1_7: ; %bb.outer.end ; GCN-O0-NEXT: s_waitcnt expcnt(0) ; GCN-O0-NEXT: v_mov_b32_e32 v2, 3 ; GCN-O0-NEXT: v_mov_b32_e32 v0, 0 @@ -403,7 +407,7 @@ ; GCN-O0-NEXT: v_writelane_b32 v1, s1, 3 ; GCN-O0-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] ; GCN-O0-NEXT: s_mov_b64 exec, s[0:1] -; GCN-O0-NEXT: s_cbranch_execz .LBB2_6 +; GCN-O0-NEXT: s_cbranch_execz .LBB2_7 ; GCN-O0-NEXT: ; %bb.1: ; %bb.outer.then ; GCN-O0-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b32 s0, 2 @@ -471,10 +475,12 @@ ; GCN-O0-NEXT: v_readlane_b32 s0, v1, 6 ; GCN-O0-NEXT: v_readlane_b32 s1, v1, 7 ; GCN-O0-NEXT: s_or_b64 exec, exec, s[0:1] -; GCN-O0-NEXT: .LBB2_6: ; %bb.outer.end +; GCN-O0-NEXT: ; %bb.6: ; %Flow1 +; GCN-O0-NEXT: .LBB2_7: ; %bb.outer.end ; GCN-O0-NEXT: v_readlane_b32 s0, v1, 2 ; GCN-O0-NEXT: v_readlane_b32 s1, v1, 3 ; GCN-O0-NEXT: s_or_b64 exec, exec, s[0:1] +; GCN-O0-NEXT: ; %bb.8: ; %bb.outer.end ; GCN-O0-NEXT: s_waitcnt expcnt(0) ; GCN-O0-NEXT: v_mov_b32_e32 v2, 3 ; GCN-O0-NEXT: v_mov_b32_e32 v0, 0 @@ -629,7 +635,7 @@ ; GCN-O0-NEXT: v_writelane_b32 v1, s0, 2 ; GCN-O0-NEXT: v_writelane_b32 v1, s1, 3 ; GCN-O0-NEXT: s_xor_b64 exec, exec, s[0:1] -; GCN-O0-NEXT: s_cbranch_execz .LBB3_8 +; GCN-O0-NEXT: s_cbranch_execz .LBB3_10 ; GCN-O0-NEXT: ; %bb.2: ; %bb.outer.then ; GCN-O0-NEXT: s_waitcnt expcnt(0) ; GCN-O0-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:12 ; 4-byte Folded Reload @@ -653,7 +659,7 @@ ; GCN-O0-NEXT: v_writelane_b32 v1, s1, 5 ; GCN-O0-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] ; GCN-O0-NEXT: s_mov_b64 exec, s[0:1] -; GCN-O0-NEXT: s_cbranch_execz .LBB3_7 +; GCN-O0-NEXT: s_cbranch_execz .LBB3_8 ; GCN-O0-NEXT: ; %bb.3: ; %bb.inner.then ; GCN-O0-NEXT: s_waitcnt expcnt(0) ; GCN-O0-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload @@ -669,7 +675,7 @@ ; GCN-O0-NEXT: v_mov_b32_e32 v0, 2 ; GCN-O0-NEXT: s_waitcnt vmcnt(0) ; GCN-O0-NEXT: buffer_store_dword v0, v[2:3], s[0:3], 0 addr64 offset:8 -; GCN-O0-NEXT: s_branch .LBB3_7 +; GCN-O0-NEXT: s_branch .LBB3_8 ; GCN-O0-NEXT: .LBB3_4: ; %bb.outer.else ; GCN-O0-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:12 ; 4-byte Folded Reload ; GCN-O0-NEXT: buffer_load_dword v3, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload @@ -712,15 +718,18 @@ ; GCN-O0-NEXT: v_readlane_b32 s0, v1, 6 ; GCN-O0-NEXT: v_readlane_b32 s1, v1, 7 ; GCN-O0-NEXT: s_or_b64 exec, exec, s[0:1] +; GCN-O0-NEXT: ; %bb.7: ; %Flow ; GCN-O0-NEXT: s_branch .LBB3_1 -; GCN-O0-NEXT: .LBB3_7: ; %Flow1 +; GCN-O0-NEXT: .LBB3_8: ; %Flow1 ; GCN-O0-NEXT: v_readlane_b32 s0, v1, 4 ; GCN-O0-NEXT: v_readlane_b32 s1, v1, 5 ; GCN-O0-NEXT: s_or_b64 exec, exec, s[0:1] -; GCN-O0-NEXT: .LBB3_8: ; %bb.outer.end +; GCN-O0-NEXT: ; %bb.9: ; %Flow1 +; GCN-O0-NEXT: .LBB3_10: ; %bb.outer.end ; GCN-O0-NEXT: v_readlane_b32 s0, v1, 2 ; GCN-O0-NEXT: v_readlane_b32 s1, v1, 3 ; GCN-O0-NEXT: s_or_b64 exec, exec, s[0:1] +; GCN-O0-NEXT: ; %bb.11: ; %bb.outer.end ; GCN-O0-NEXT: s_waitcnt expcnt(0) ; GCN-O0-NEXT: v_mov_b32_e32 v2, 3 ; GCN-O0-NEXT: v_mov_b32_e32 v0, 0 @@ -826,6 +835,7 @@ ; GCN-O0-NEXT: v_readlane_b32 s0, v1, 2 ; GCN-O0-NEXT: v_readlane_b32 s1, v1, 3 ; GCN-O0-NEXT: s_or_b64 exec, exec, s[0:1] +; GCN-O0-NEXT: ; %bb.3: ; %bb.end ; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) ; GCN-O0-NEXT: s_barrier ; GCN-O0-NEXT: s_endpgm @@ -954,10 +964,12 @@ ; GCN-O0-NEXT: s_cbranch_execnz .LBB5_1 ; GCN-O0-NEXT: ; %bb.2: ; %bb2 ; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GCN-O0-NEXT: v_readlane_b32 s4, v1, 6 ; GCN-O0-NEXT: v_readlane_b32 s5, v1, 7 ; GCN-O0-NEXT: s_or_b64 exec, exec, s[4:5] +; GCN-O0-NEXT: ; %bb.3: ; %bb2 +; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1 +; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b32 s6, 0 ; GCN-O0-NEXT: s_waitcnt vmcnt(0) ; GCN-O0-NEXT: v_cmp_ne_u32_e64 s[4:5], v0, s6 @@ -983,8 +995,8 @@ ; GCN-O0-NEXT: v_writelane_b32 v1, s5, 11 ; GCN-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] ; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: s_cbranch_execz .LBB5_5 -; GCN-O0-NEXT: ; %bb.3: ; %bb4 +; GCN-O0-NEXT: s_cbranch_execz .LBB5_6 +; GCN-O0-NEXT: ; %bb.4: ; %bb4 ; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1 ; GCN-O0-NEXT: ; implicit-def: $sgpr4 ; GCN-O0-NEXT: v_mov_b32_e32 v0, s4 @@ -1011,8 +1023,8 @@ ; GCN-O0-NEXT: v_writelane_b32 v1, s5, 13 ; GCN-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] ; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: s_cbranch_execz .LBB5_6 -; GCN-O0-NEXT: ; %bb.4: ; %bb8 +; GCN-O0-NEXT: s_cbranch_execz .LBB5_8 +; GCN-O0-NEXT: ; %bb.5: ; %bb8 ; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1 ; GCN-O0-NEXT: s_mov_b32 s10, 0 ; GCN-O0-NEXT: ; implicit-def: $sgpr4 @@ -1035,42 +1047,46 @@ ; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_branch .LBB5_6 -; GCN-O0-NEXT: .LBB5_5: ; %Flow2 +; GCN-O0-NEXT: s_branch .LBB5_8 +; GCN-O0-NEXT: .LBB5_6: ; %Flow2 +; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1 +; GCN-O0-NEXT: v_readlane_b32 s4, v1, 10 +; GCN-O0-NEXT: v_readlane_b32 s5, v1, 11 +; GCN-O0-NEXT: s_or_b64 exec, exec, s[4:5] +; GCN-O0-NEXT: ; %bb.7: ; %Flow2 ; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1 ; GCN-O0-NEXT: s_waitcnt expcnt(0) ; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; GCN-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GCN-O0-NEXT: v_readlane_b32 s4, v1, 10 -; GCN-O0-NEXT: v_readlane_b32 s5, v1, 11 -; GCN-O0-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN-O0-NEXT: s_waitcnt vmcnt(0) ; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_waitcnt vmcnt(0) ; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_branch .LBB5_7 -; GCN-O0-NEXT: .LBB5_6: ; %Flow +; GCN-O0-NEXT: s_branch .LBB5_10 +; GCN-O0-NEXT: .LBB5_8: ; %Flow +; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1 +; GCN-O0-NEXT: v_readlane_b32 s4, v1, 12 +; GCN-O0-NEXT: v_readlane_b32 s5, v1, 13 +; GCN-O0-NEXT: s_or_b64 exec, exec, s[4:5] +; GCN-O0-NEXT: ; %bb.9: ; %Flow ; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1 ; GCN-O0-NEXT: s_waitcnt expcnt(0) ; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload ; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload ; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload ; GCN-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; GCN-O0-NEXT: v_readlane_b32 s4, v1, 12 -; GCN-O0-NEXT: v_readlane_b32 s5, v1, 13 -; GCN-O0-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN-O0-NEXT: s_waitcnt vmcnt(0) ; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_waitcnt vmcnt(0) ; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_branch .LBB5_5 -; GCN-O0-NEXT: .LBB5_7: ; %bb10 +; GCN-O0-NEXT: s_branch .LBB5_6 +; GCN-O0-NEXT: .LBB5_10: ; %bb10 ; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1 ; GCN-O0-NEXT: v_readlane_b32 s6, v1, 8 ; GCN-O0-NEXT: v_readlane_b32 s7, v1, 9 @@ -1082,23 +1098,25 @@ ; GCN-O0-NEXT: v_writelane_b32 v1, s5, 17 ; GCN-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] ; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: s_cbranch_execz .LBB5_9 -; GCN-O0-NEXT: ; %bb.8: ; %Flow1 +; GCN-O0-NEXT: s_cbranch_execz .LBB5_12 +; GCN-O0-NEXT: ; %bb.11: ; %Flow1 ; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1 ; GCN-O0-NEXT: s_mov_b64 s[4:5], 0 ; GCN-O0-NEXT: s_xor_b64 s[4:5], exec, -1 ; GCN-O0-NEXT: v_writelane_b32 v1, s4, 14 ; GCN-O0-NEXT: v_writelane_b32 v1, s5, 15 -; GCN-O0-NEXT: .LBB5_9: ; %Flow3 +; GCN-O0-NEXT: .LBB5_12: ; %Flow3 +; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1 +; GCN-O0-NEXT: v_readlane_b32 s4, v1, 16 +; GCN-O0-NEXT: v_readlane_b32 s5, v1, 17 +; GCN-O0-NEXT: s_or_b64 exec, exec, s[4:5] +; GCN-O0-NEXT: ; %bb.13: ; %Flow3 ; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1 ; GCN-O0-NEXT: s_waitcnt expcnt(0) ; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload ; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload ; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload ; GCN-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload -; GCN-O0-NEXT: v_readlane_b32 s8, v1, 16 -; GCN-O0-NEXT: v_readlane_b32 s9, v1, 17 -; GCN-O0-NEXT: s_or_b64 exec, exec, s[8:9] ; GCN-O0-NEXT: v_readlane_b32 s6, v1, 4 ; GCN-O0-NEXT: v_readlane_b32 s7, v1, 5 ; GCN-O0-NEXT: v_readlane_b32 s4, v1, 14 @@ -1122,11 +1140,11 @@ ; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_andn2_b64 exec, exec, s[4:5] ; GCN-O0-NEXT: s_cbranch_execnz .LBB5_1 -; GCN-O0-NEXT: ; %bb.10: ; %bb12 +; GCN-O0-NEXT: ; %bb.14: ; %bb12 ; GCN-O0-NEXT: v_readlane_b32 s4, v1, 18 ; GCN-O0-NEXT: v_readlane_b32 s5, v1, 19 ; GCN-O0-NEXT: s_or_b64 exec, exec, s[4:5] -; GCN-O0-NEXT: ; %bb.11: ; %bb12 +; GCN-O0-NEXT: ; %bb.15: ; %bb12 ; GCN-O0-NEXT: s_waitcnt expcnt(0) ; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload ; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload Index: llvm/test/CodeGen/AMDGPU/collapse-endcf.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/collapse-endcf.mir +++ llvm/test/CodeGen/AMDGPU/collapse-endcf.mir @@ -18,20 +18,29 @@ ; GCN-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: - ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) + ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.5(0x40000000) ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] - ; GCN-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec + ; GCN-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: + ; GCN-NEXT: successors: %bb.5(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.5: ; GCN-NEXT: successors: %bb.4(0x80000000) ; GCN-NEXT: {{ $}} + ; GCN-NEXT: DBG_VALUE ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.4: - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: successors: %bb.6(0x80000000) + ; GCN-NEXT: {{ $}} ; GCN-NEXT: DBG_VALUE + ; GCN-NEXT: $exec = S_OR_B64_term $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.6: ; GCN-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1, %bb.4 @@ -89,7 +98,11 @@ ; GCN-NEXT: {{ $}} ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.5: - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: successors: %bb.6(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $exec = S_OR_B64_term $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.6: ; GCN-NEXT: S_ENDPGM 0 bb.0: %0:sreg_64 = SI_IF undef %1:sreg_64, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec @@ -146,7 +159,11 @@ ; GCN-NEXT: DBG_VALUE ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.5: - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: successors: %bb.6(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $exec = S_OR_B64_term $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.6: ; GCN-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1, %bb.4 @@ -208,7 +225,11 @@ ; GCN-NEXT: KILL [[DEF]] ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.4: - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: successors: %bb.5(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $exec = S_OR_B64_term $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.5: ; GCN-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1, %bb.4 @@ -253,18 +274,18 @@ ; GCN-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: - ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.5(0x40000000) ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] - ; GCN-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec + ; GCN-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: - ; GCN-NEXT: successors: %bb.3(0x80000000) + ; GCN-NEXT: successors: %bb.5(0x80000000) ; GCN-NEXT: {{ $}} ; GCN-NEXT: {{ $}} - ; GCN-NEXT: bb.3: + ; GCN-NEXT: bb.5: ; GCN-NEXT: successors: %bb.4(0x80000000) ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF @@ -273,7 +294,11 @@ ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY [[S_BREV_B32_]] ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.4: - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: successors: %bb.6(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $exec = S_OR_B64_term $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.6: ; GCN-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1, %bb.4 @@ -329,13 +354,21 @@ ; GCN-NEXT: {{ $}} ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.3: + ; GCN-NEXT: successors: %bb.5(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $exec = S_OR_B64_term $exec, [[COPY1]], implicit-def $scc + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.5: ; GCN-NEXT: successors: %bb.4(0x80000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY1]], implicit-def $scc ; GCN-NEXT: [[S_BREV_B64_:%[0-9]+]]:sreg_64 = S_BREV_B64 $exec ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.4: - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: successors: %bb.6(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $exec = S_OR_B64_term $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.6: ; GCN-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1, %bb.4 @@ -389,13 +422,21 @@ ; GCN-NEXT: {{ $}} ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.3: + ; GCN-NEXT: successors: %bb.5(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $exec = S_OR_B64_term $exec, [[COPY1]], implicit-def $scc + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.5: ; GCN-NEXT: successors: %bb.4(0x80000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY1]], implicit-def $scc ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.4: - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: successors: %bb.6(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $exec = S_OR_B64_term $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.6: ; GCN-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1, %bb.4 @@ -436,25 +477,34 @@ ; GCN-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: - ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.5(0x40000000) + ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.6(0x40000000) ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] - ; GCN-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec + ; GCN-NEXT: S_CBRANCH_EXECZ %bb.6, implicit $exec ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: + ; GCN-NEXT: successors: %bb.6(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.6: ; GCN-NEXT: successors: %bb.5(0x80000000) ; GCN-NEXT: {{ $}} + ; GCN-NEXT: S_BRANCH %bb.5 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.4: + ; GCN-NEXT: successors: %bb.7(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $exec = S_OR_B64_term $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.7: + ; GCN-NEXT: S_ENDPGM 0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.5: ; GCN-NEXT: successors: %bb.4(0x80000000) ; GCN-NEXT: {{ $}} ; GCN-NEXT: S_BRANCH %bb.4 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: bb.4: - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc - ; GCN-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1, %bb.4 @@ -499,9 +549,13 @@ ; GCN-NEXT: [[V_CMP_LT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_U32_e64 1, [[COPY]], implicit $exec ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: %bb.2(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $exec = S_OR_B64_term $exec, [[V_CMP_LT_U32_e64_]], implicit-def $scc + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: ; GCN-NEXT: successors: %bb.1(0x80000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[V_CMP_LT_U32_e64_]], implicit-def $scc ; GCN-NEXT: S_BRANCH %bb.1 bb.0: successors: %bb.1 @@ -564,10 +618,14 @@ ; GCN-NEXT: bb.5: ; GCN-NEXT: successors: %bb.6(0x80000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY1]], implicit-def $scc + ; GCN-NEXT: $exec = S_OR_B64_term $exec, [[COPY1]], implicit-def $scc ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.6: - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[S_AND_B64_1]], implicit-def $scc + ; GCN-NEXT: successors: %bb.7(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $exec = S_OR_B64_term $exec, [[S_AND_B64_1]], implicit-def $scc + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.7: ; GCN-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1, %bb.2 @@ -629,12 +687,12 @@ ; GCN-NEXT: bb.3: ; GCN-NEXT: successors: %bb.4(0x80000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: $exec = S_OR_B64_term $exec, [[COPY]], implicit-def $scc ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.4: ; GCN-NEXT: successors: %bb.5(0x80000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $exec = S_OR_B64 $exec, %2, implicit-def $scc + ; GCN-NEXT: $exec = S_OR_B64_term $exec, %2, implicit-def $scc ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.5: ; GCN-NEXT: successors: %bb.6(0x80000000) @@ -707,7 +765,7 @@ ; GCN-NEXT: bb.4: ; GCN-NEXT: successors: %bb.5(0x80000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: $exec = S_OR_B64_term $exec, [[COPY]], implicit-def $scc ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.5: ; GCN-NEXT: S_ENDPGM 0 @@ -756,14 +814,18 @@ ; GCN-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: - ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.5(0x40000000) + ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.7(0x40000000) ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] - ; GCN-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec + ; GCN-NEXT: S_CBRANCH_EXECZ %bb.7, implicit $exec ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: + ; GCN-NEXT: successors: %bb.7(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.7: ; GCN-NEXT: successors: %bb.5(0x80000000) ; GCN-NEXT: {{ $}} ; GCN-NEXT: S_BRANCH %bb.5 @@ -774,7 +836,7 @@ ; GCN-NEXT: bb.5: ; GCN-NEXT: successors: %bb.6(0x80000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: $exec = S_OR_B64_term $exec, [[COPY]], implicit-def $scc ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.6: ; GCN-NEXT: successors: %bb.4(0x80000000) @@ -832,14 +894,14 @@ ; GCN-NEXT: S_BRANCH %bb.1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: - ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.14(0x40000000) + ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.16(0x40000000) ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 0, killed [[DEF1]], implicit $exec ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], killed [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] - ; GCN-NEXT: S_CBRANCH_EXECZ %bb.14, implicit $exec + ; GCN-NEXT: S_CBRANCH_EXECZ %bb.16, implicit $exec ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: @@ -854,25 +916,39 @@ ; GCN-NEXT: S_BRANCH %bb.3 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.3: - ; GCN-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000) + ; GCN-NEXT: successors: %bb.4(0x40000000), %bb.15(0x40000000) ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[V_CMP_EQ_U32_e64_3:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 0, killed [[DEF3]], implicit $exec ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN-NEXT: [[S_AND_B64_3:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY3]], killed [[V_CMP_EQ_U32_e64_3]], implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_3]] - ; GCN-NEXT: S_CBRANCH_EXECZ %bb.7, implicit $exec + ; GCN-NEXT: S_CBRANCH_EXECZ %bb.15, implicit $exec ; GCN-NEXT: S_BRANCH %bb.4 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.4: + ; GCN-NEXT: successors: %bb.15(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: S_BRANCH %bb.15 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.15: ; GCN-NEXT: successors: %bb.7(0x80000000) ; GCN-NEXT: {{ $}} ; GCN-NEXT: S_BRANCH %bb.7 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.16: + ; GCN-NEXT: successors: %bb.14(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: S_BRANCH %bb.14 + ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.7: + ; GCN-NEXT: successors: %bb.17(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $exec = S_OR_B64_term $exec, [[COPY2]], implicit-def $scc + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.17: ; GCN-NEXT: successors: %bb.8(0x80000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY2]], implicit-def $scc ; GCN-NEXT: S_BRANCH %bb.8 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.8: @@ -893,9 +969,9 @@ ; GCN-NEXT: S_BRANCH %bb.11 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.10: - ; GCN-NEXT: successors: %bb.14(0x80000000) + ; GCN-NEXT: successors: %bb.18(0x80000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: S_BRANCH %bb.14 + ; GCN-NEXT: S_BRANCH %bb.18 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.11: ; GCN-NEXT: successors: %bb.12(0x80000000) @@ -903,16 +979,25 @@ ; GCN-NEXT: S_BRANCH %bb.12 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.12: - ; GCN-NEXT: successors: %bb.10(0x40000000), %bb.14(0x40000000) + ; GCN-NEXT: successors: %bb.10(0x40000000), %bb.18(0x40000000) ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[S_OR_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_OR_SAVEEXEC_B64 [[S_XOR_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GCN-NEXT: [[S_AND_B64_5:%[0-9]+]]:sreg_64 = S_AND_B64 $exec, [[S_OR_SAVEEXEC_B64_]], implicit-def $scc ; GCN-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_B64_5]], implicit-def $scc - ; GCN-NEXT: S_CBRANCH_EXECZ %bb.14, implicit $exec + ; GCN-NEXT: S_CBRANCH_EXECZ %bb.18, implicit $exec ; GCN-NEXT: S_BRANCH %bb.10 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.18: + ; GCN-NEXT: successors: %bb.16(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: S_BRANCH %bb.16 + ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.14: - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: successors: %bb.19(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $exec = S_OR_B64_term $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.19: ; GCN-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1, %bb.14 Index: llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll +++ llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll @@ -48,9 +48,6 @@ ; VMEM: [[ENDIF]]: -; Restore val -; GCN: buffer_load_dword [[RELOAD_VAL:v[0-9]+]], off, s[0:3], 0 offset:[[VAL_OFFSET]] ; 4-byte Folded Reload - ; Reload and restore exec mask ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]] ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]] @@ -61,6 +58,9 @@ ; VMEM: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC]], 1 ; GCN: s_or_b64 exec, exec, s[[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]] +; Restore val +; GCN: buffer_load_dword [[RELOAD_VAL:v[0-9]+]], off, s[0:3], 0 offset:[[VAL_OFFSET]] ; 4-byte Folded Reload + ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RELOAD_VAL]] define amdgpu_kernel void @divergent_if_endif(ptr addrspace(1) %out) #0 { @@ -121,7 +121,6 @@ ; GCN: buffer_store_dword v[[VAL_LOOP_RELOAD]], off, s[0:3], 0 offset:[[VAL_SUB_OFFSET:[0-9]+]] ; 4-byte Folded Spill ; GCN: [[END]]: -; GCN: buffer_load_dword v[[VAL_END:[0-9]+]], off, s[0:3], 0 offset:[[VAL_SUB_OFFSET]] ; 4-byte Folded Reload ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]] ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]] @@ -131,6 +130,7 @@ ; VMEM: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC]], 1 ; GCN: s_or_b64 exec, exec, s[[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]] +; GCN: buffer_load_dword v[[VAL_END:[0-9]+]], off, s[0:3], 0 offset:[[VAL_SUB_OFFSET]] ; 4-byte Folded Reload ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[VAL_END]] define amdgpu_kernel void @divergent_loop(ptr addrspace(1) %out) #0 { @@ -230,7 +230,6 @@ ; GCN-NEXT: s_branch [[FLOW]] ; GCN: [[ENDIF]]: -; GCN: buffer_load_dword v[[RESULT:[0-9]+]], off, s[0:3], 0 offset:[[RESULT_OFFSET]] ; 4-byte Folded Reload ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[FLOW_SAVEEXEC_LO_LANE]] ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[FLOW_SAVEEXEC_HI_LANE]] @@ -241,6 +240,7 @@ ; VMEM: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC]], 1 ; GCN: s_or_b64 exec, exec, s[[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]] +; GCN: buffer_load_dword v[[RESULT:[0-9]+]], off, s[0:3], 0 offset:[[RESULT_OFFSET]] ; 4-byte Folded Reload ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RESULT]] define amdgpu_kernel void @divergent_if_else_endif(ptr addrspace(1) %out) #0 { Index: llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll +++ llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll @@ -10,136 +10,136 @@ ; GFX900: ; %bb.0: ; GFX900-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX900-NEXT: s_mov_b64 s[2:3], 0 -; GFX900-NEXT: v_mov_b32_e32 v0, 0 +; GFX900-NEXT: v_mov_b32_e32 v1, 0 ; GFX900-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX900-NEXT: s_waitcnt lgkmcnt(0) -; GFX900-NEXT: v_mov_b32_e32 v1, s4 +; GFX900-NEXT: v_mov_b32_e32 v0, s4 ; GFX900-NEXT: .LBB0_1: ; %atomicrmw.start ; GFX900-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX900-NEXT: v_mov_b32_e32 v2, v1 -; GFX900-NEXT: v_add_f32_e32 v1, 4.0, v2 +; GFX900-NEXT: v_mov_b32_e32 v3, v0 +; GFX900-NEXT: v_add_f32_e32 v2, 4.0, v3 ; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX900-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[0:1] glc +; GFX900-NEXT: global_atomic_cmpswap v0, v1, v[2:3], s[0:1] glc ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: buffer_wbinvl1_vol -; GFX900-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX900-NEXT: v_cmp_eq_u32_e32 vcc, v0, v3 ; GFX900-NEXT: s_or_b64 s[2:3], vcc, s[2:3] ; GFX900-NEXT: s_andn2_b64 exec, exec, s[2:3] ; GFX900-NEXT: s_cbranch_execnz .LBB0_1 ; GFX900-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX900-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX900-NEXT: global_store_dword v[0:1], v1, off +; GFX900-NEXT: global_store_dword v[0:1], v0, off ; GFX900-NEXT: s_endpgm ; ; GFX908-LABEL: global_atomic_fadd_ret_f32: ; GFX908: ; %bb.0: ; GFX908-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX908-NEXT: s_mov_b64 s[2:3], 0 -; GFX908-NEXT: v_mov_b32_e32 v0, 0 +; GFX908-NEXT: v_mov_b32_e32 v1, 0 ; GFX908-NEXT: s_waitcnt lgkmcnt(0) ; GFX908-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX908-NEXT: s_waitcnt lgkmcnt(0) -; GFX908-NEXT: v_mov_b32_e32 v1, s4 +; GFX908-NEXT: v_mov_b32_e32 v0, s4 ; GFX908-NEXT: .LBB0_1: ; %atomicrmw.start ; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX908-NEXT: v_mov_b32_e32 v2, v1 -; GFX908-NEXT: v_add_f32_e32 v1, 4.0, v2 +; GFX908-NEXT: v_mov_b32_e32 v3, v0 +; GFX908-NEXT: v_add_f32_e32 v2, 4.0, v3 ; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX908-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[0:1] glc +; GFX908-NEXT: global_atomic_cmpswap v0, v1, v[2:3], s[0:1] glc ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: buffer_wbinvl1_vol -; GFX908-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX908-NEXT: v_cmp_eq_u32_e32 vcc, v0, v3 ; GFX908-NEXT: s_or_b64 s[2:3], vcc, s[2:3] ; GFX908-NEXT: s_andn2_b64 exec, exec, s[2:3] ; GFX908-NEXT: s_cbranch_execnz .LBB0_1 ; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX908-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX908-NEXT: global_store_dword v[0:1], v1, off +; GFX908-NEXT: global_store_dword v[0:1], v0, off ; GFX908-NEXT: s_endpgm ; ; GFX90A-LABEL: global_atomic_fadd_ret_f32: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX90A-NEXT: s_mov_b64 s[2:3], 0 -; GFX90A-NEXT: v_mov_b32_e32 v0, 0 +; GFX90A-NEXT: v_mov_b32_e32 v1, 0 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: v_mov_b32_e32 v1, s4 +; GFX90A-NEXT: v_mov_b32_e32 v0, s4 ; GFX90A-NEXT: .LBB0_1: ; %atomicrmw.start ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX90A-NEXT: v_mov_b32_e32 v3, v1 +; GFX90A-NEXT: v_mov_b32_e32 v3, v0 ; GFX90A-NEXT: v_add_f32_e32 v2, 4.0, v3 ; GFX90A-NEXT: buffer_wbl2 ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX90A-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] glc +; GFX90A-NEXT: global_atomic_cmpswap v0, v1, v[2:3], s[0:1] glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: buffer_invl2 ; GFX90A-NEXT: buffer_wbinvl1_vol -; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3 +; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v0, v3 ; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3] ; GFX90A-NEXT: s_andn2_b64 exec, exec, s[2:3] ; GFX90A-NEXT: s_cbranch_execnz .LBB0_1 ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX90A-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX90A-NEXT: global_store_dword v[0:1], v1, off +; GFX90A-NEXT: global_store_dword v[0:1], v0, off ; GFX90A-NEXT: s_endpgm ; ; GFX10-LABEL: global_atomic_fadd_ret_f32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX10-NEXT: v_mov_b32_e32 v0, 0 +; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_mov_b32_e32 v1, s2 +; GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-NEXT: s_mov_b32 s2, 0 ; GFX10-NEXT: .LBB0_1: ; %atomicrmw.start ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_mov_b32_e32 v2, v1 -; GFX10-NEXT: v_add_f32_e32 v1, 4.0, v2 +; GFX10-NEXT: v_mov_b32_e32 v3, v0 +; GFX10-NEXT: v_add_f32_e32 v2, 4.0, v3 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[0:1] glc +; GFX10-NEXT: global_atomic_cmpswap v0, v1, v[2:3], s[0:1] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v3 ; GFX10-NEXT: s_or_b32 s2, vcc_lo, s2 ; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s2 ; GFX10-NEXT: s_cbranch_execnz .LBB0_1 ; GFX10-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s2 -; GFX10-NEXT: global_store_dword v[0:1], v1, off +; GFX10-NEXT: global_store_dword v[0:1], v0, off ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_atomic_fadd_ret_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 -; GFX11-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: v_mov_b32_e32 v1, s2 +; GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-NEXT: s_mov_b32 s2, 0 ; GFX11-NEXT: .LBB0_1: ; %atomicrmw.start ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX11-NEXT: v_mov_b32_e32 v2, v1 -; GFX11-NEXT: v_add_f32_e32 v1, 4.0, v2 +; GFX11-NEXT: v_mov_b32_e32 v3, v0 +; GFX11-NEXT: v_add_f32_e32 v2, 4.0, v3 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] glc +; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v1, v[2:3], s[0:1] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v3 ; GFX11-NEXT: s_or_b32 s2, vcc_lo, s2 ; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s2 ; GFX11-NEXT: s_cbranch_execnz .LBB0_1 ; GFX11-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s2 -; GFX11-NEXT: global_store_b32 v[0:1], v1, off +; GFX11-NEXT: global_store_b32 v[0:1], v0, off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %result = atomicrmw fadd ptr addrspace(1) %ptr, float 4.0 seq_cst @@ -152,52 +152,52 @@ ; GFX900: ; %bb.0: ; GFX900-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX900-NEXT: s_mov_b64 s[2:3], 0 -; GFX900-NEXT: v_mov_b32_e32 v0, 0 +; GFX900-NEXT: v_mov_b32_e32 v1, 0 ; GFX900-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX900-NEXT: s_waitcnt lgkmcnt(0) -; GFX900-NEXT: v_mov_b32_e32 v1, s4 +; GFX900-NEXT: v_mov_b32_e32 v0, s4 ; GFX900-NEXT: .LBB1_1: ; %atomicrmw.start ; GFX900-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX900-NEXT: v_mov_b32_e32 v2, v1 -; GFX900-NEXT: v_add_f32_e32 v1, 4.0, v2 +; GFX900-NEXT: v_mov_b32_e32 v3, v0 +; GFX900-NEXT: v_add_f32_e32 v2, 4.0, v3 ; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX900-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[0:1] glc +; GFX900-NEXT: global_atomic_cmpswap v0, v1, v[2:3], s[0:1] glc ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: buffer_wbinvl1_vol -; GFX900-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX900-NEXT: v_cmp_eq_u32_e32 vcc, v0, v3 ; GFX900-NEXT: s_or_b64 s[2:3], vcc, s[2:3] ; GFX900-NEXT: s_andn2_b64 exec, exec, s[2:3] ; GFX900-NEXT: s_cbranch_execnz .LBB1_1 ; GFX900-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX900-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX900-NEXT: global_store_dword v[0:1], v1, off +; GFX900-NEXT: global_store_dword v[0:1], v0, off ; GFX900-NEXT: s_endpgm ; ; GFX908-LABEL: global_atomic_fadd_ret_f32_ieee: ; GFX908: ; %bb.0: ; GFX908-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX908-NEXT: s_mov_b64 s[2:3], 0 -; GFX908-NEXT: v_mov_b32_e32 v0, 0 +; GFX908-NEXT: v_mov_b32_e32 v1, 0 ; GFX908-NEXT: s_waitcnt lgkmcnt(0) ; GFX908-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX908-NEXT: s_waitcnt lgkmcnt(0) -; GFX908-NEXT: v_mov_b32_e32 v1, s4 +; GFX908-NEXT: v_mov_b32_e32 v0, s4 ; GFX908-NEXT: .LBB1_1: ; %atomicrmw.start ; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX908-NEXT: v_mov_b32_e32 v2, v1 -; GFX908-NEXT: v_add_f32_e32 v1, 4.0, v2 +; GFX908-NEXT: v_mov_b32_e32 v3, v0 +; GFX908-NEXT: v_add_f32_e32 v2, 4.0, v3 ; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX908-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[0:1] glc +; GFX908-NEXT: global_atomic_cmpswap v0, v1, v[2:3], s[0:1] glc ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: buffer_wbinvl1_vol -; GFX908-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX908-NEXT: v_cmp_eq_u32_e32 vcc, v0, v3 ; GFX908-NEXT: s_or_b64 s[2:3], vcc, s[2:3] ; GFX908-NEXT: s_andn2_b64 exec, exec, s[2:3] ; GFX908-NEXT: s_cbranch_execnz .LBB1_1 ; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX908-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX908-NEXT: global_store_dword v[0:1], v1, off +; GFX908-NEXT: global_store_dword v[0:1], v0, off ; GFX908-NEXT: s_endpgm ; ; GFX90A-LABEL: global_atomic_fadd_ret_f32_ieee: @@ -215,29 +215,29 @@ ; GFX10-LABEL: global_atomic_fadd_ret_f32_ieee: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX10-NEXT: v_mov_b32_e32 v0, 0 +; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_mov_b32_e32 v1, s2 +; GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-NEXT: s_mov_b32 s2, 0 ; GFX10-NEXT: .LBB1_1: ; %atomicrmw.start ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_mov_b32_e32 v2, v1 -; GFX10-NEXT: v_add_f32_e32 v1, 4.0, v2 +; GFX10-NEXT: v_mov_b32_e32 v3, v0 +; GFX10-NEXT: v_add_f32_e32 v2, 4.0, v3 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[0:1] glc +; GFX10-NEXT: global_atomic_cmpswap v0, v1, v[2:3], s[0:1] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v3 ; GFX10-NEXT: s_or_b32 s2, vcc_lo, s2 ; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s2 ; GFX10-NEXT: s_cbranch_execnz .LBB1_1 ; GFX10-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s2 -; GFX10-NEXT: global_store_dword v[0:1], v1, off +; GFX10-NEXT: global_store_dword v[0:1], v0, off ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_atomic_fadd_ret_f32_ieee: @@ -439,52 +439,52 @@ ; GFX900: ; %bb.0: ; GFX900-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX900-NEXT: s_mov_b64 s[2:3], 0 -; GFX900-NEXT: v_mov_b32_e32 v0, 0 +; GFX900-NEXT: v_mov_b32_e32 v1, 0 ; GFX900-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX900-NEXT: s_waitcnt lgkmcnt(0) -; GFX900-NEXT: v_mov_b32_e32 v1, s4 +; GFX900-NEXT: v_mov_b32_e32 v0, s4 ; GFX900-NEXT: .LBB4_1: ; %atomicrmw.start ; GFX900-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX900-NEXT: v_mov_b32_e32 v2, v1 -; GFX900-NEXT: v_add_f32_e32 v1, 4.0, v2 +; GFX900-NEXT: v_mov_b32_e32 v3, v0 +; GFX900-NEXT: v_add_f32_e32 v2, 4.0, v3 ; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX900-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[0:1] glc +; GFX900-NEXT: global_atomic_cmpswap v0, v1, v[2:3], s[0:1] glc ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: buffer_wbinvl1_vol -; GFX900-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX900-NEXT: v_cmp_eq_u32_e32 vcc, v0, v3 ; GFX900-NEXT: s_or_b64 s[2:3], vcc, s[2:3] ; GFX900-NEXT: s_andn2_b64 exec, exec, s[2:3] ; GFX900-NEXT: s_cbranch_execnz .LBB4_1 ; GFX900-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX900-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX900-NEXT: global_store_dword v[0:1], v1, off +; GFX900-NEXT: global_store_dword v[0:1], v0, off ; GFX900-NEXT: s_endpgm ; ; GFX908-LABEL: global_atomic_fadd_ret_f32_agent: ; GFX908: ; %bb.0: ; GFX908-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX908-NEXT: s_mov_b64 s[2:3], 0 -; GFX908-NEXT: v_mov_b32_e32 v0, 0 +; GFX908-NEXT: v_mov_b32_e32 v1, 0 ; GFX908-NEXT: s_waitcnt lgkmcnt(0) ; GFX908-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX908-NEXT: s_waitcnt lgkmcnt(0) -; GFX908-NEXT: v_mov_b32_e32 v1, s4 +; GFX908-NEXT: v_mov_b32_e32 v0, s4 ; GFX908-NEXT: .LBB4_1: ; %atomicrmw.start ; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX908-NEXT: v_mov_b32_e32 v2, v1 -; GFX908-NEXT: v_add_f32_e32 v1, 4.0, v2 +; GFX908-NEXT: v_mov_b32_e32 v3, v0 +; GFX908-NEXT: v_add_f32_e32 v2, 4.0, v3 ; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX908-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[0:1] glc +; GFX908-NEXT: global_atomic_cmpswap v0, v1, v[2:3], s[0:1] glc ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: buffer_wbinvl1_vol -; GFX908-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX908-NEXT: v_cmp_eq_u32_e32 vcc, v0, v3 ; GFX908-NEXT: s_or_b64 s[2:3], vcc, s[2:3] ; GFX908-NEXT: s_andn2_b64 exec, exec, s[2:3] ; GFX908-NEXT: s_cbranch_execnz .LBB4_1 ; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX908-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX908-NEXT: global_store_dword v[0:1], v1, off +; GFX908-NEXT: global_store_dword v[0:1], v0, off ; GFX908-NEXT: s_endpgm ; ; GFX90A-LABEL: global_atomic_fadd_ret_f32_agent: @@ -502,29 +502,29 @@ ; GFX10-LABEL: global_atomic_fadd_ret_f32_agent: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX10-NEXT: v_mov_b32_e32 v0, 0 +; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_mov_b32_e32 v1, s2 +; GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-NEXT: s_mov_b32 s2, 0 ; GFX10-NEXT: .LBB4_1: ; %atomicrmw.start ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_mov_b32_e32 v2, v1 -; GFX10-NEXT: v_add_f32_e32 v1, 4.0, v2 +; GFX10-NEXT: v_mov_b32_e32 v3, v0 +; GFX10-NEXT: v_add_f32_e32 v2, 4.0, v3 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[0:1] glc +; GFX10-NEXT: global_atomic_cmpswap v0, v1, v[2:3], s[0:1] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v3 ; GFX10-NEXT: s_or_b32 s2, vcc_lo, s2 ; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s2 ; GFX10-NEXT: s_cbranch_execnz .LBB4_1 ; GFX10-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s2 -; GFX10-NEXT: global_store_dword v[0:1], v1, off +; GFX10-NEXT: global_store_dword v[0:1], v0, off ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_atomic_fadd_ret_f32_agent: @@ -550,136 +550,136 @@ ; GFX900: ; %bb.0: ; GFX900-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX900-NEXT: s_mov_b64 s[2:3], 0 -; GFX900-NEXT: v_mov_b32_e32 v0, 0 +; GFX900-NEXT: v_mov_b32_e32 v1, 0 ; GFX900-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX900-NEXT: s_waitcnt lgkmcnt(0) -; GFX900-NEXT: v_mov_b32_e32 v1, s4 +; GFX900-NEXT: v_mov_b32_e32 v0, s4 ; GFX900-NEXT: .LBB5_1: ; %atomicrmw.start ; GFX900-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX900-NEXT: v_mov_b32_e32 v2, v1 -; GFX900-NEXT: v_add_f32_e32 v1, 4.0, v2 +; GFX900-NEXT: v_mov_b32_e32 v3, v0 +; GFX900-NEXT: v_add_f32_e32 v2, 4.0, v3 ; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[0:1] glc +; GFX900-NEXT: global_atomic_cmpswap v0, v1, v[2:3], s[0:1] glc ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: buffer_wbinvl1_vol -; GFX900-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX900-NEXT: v_cmp_eq_u32_e32 vcc, v0, v3 ; GFX900-NEXT: s_or_b64 s[2:3], vcc, s[2:3] ; GFX900-NEXT: s_andn2_b64 exec, exec, s[2:3] ; GFX900-NEXT: s_cbranch_execnz .LBB5_1 ; GFX900-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX900-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX900-NEXT: global_store_dword v[0:1], v1, off +; GFX900-NEXT: global_store_dword v[0:1], v0, off ; GFX900-NEXT: s_endpgm ; ; GFX908-LABEL: global_atomic_fadd_ret_f32_system: ; GFX908: ; %bb.0: ; GFX908-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX908-NEXT: s_mov_b64 s[2:3], 0 -; GFX908-NEXT: v_mov_b32_e32 v0, 0 +; GFX908-NEXT: v_mov_b32_e32 v1, 0 ; GFX908-NEXT: s_waitcnt lgkmcnt(0) ; GFX908-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX908-NEXT: s_waitcnt lgkmcnt(0) -; GFX908-NEXT: v_mov_b32_e32 v1, s4 +; GFX908-NEXT: v_mov_b32_e32 v0, s4 ; GFX908-NEXT: .LBB5_1: ; %atomicrmw.start ; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX908-NEXT: v_mov_b32_e32 v2, v1 -; GFX908-NEXT: v_add_f32_e32 v1, 4.0, v2 +; GFX908-NEXT: v_mov_b32_e32 v3, v0 +; GFX908-NEXT: v_add_f32_e32 v2, 4.0, v3 ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[0:1] glc +; GFX908-NEXT: global_atomic_cmpswap v0, v1, v[2:3], s[0:1] glc ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: buffer_wbinvl1_vol -; GFX908-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX908-NEXT: v_cmp_eq_u32_e32 vcc, v0, v3 ; GFX908-NEXT: s_or_b64 s[2:3], vcc, s[2:3] ; GFX908-NEXT: s_andn2_b64 exec, exec, s[2:3] ; GFX908-NEXT: s_cbranch_execnz .LBB5_1 ; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX908-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX908-NEXT: global_store_dword v[0:1], v1, off +; GFX908-NEXT: global_store_dword v[0:1], v0, off ; GFX908-NEXT: s_endpgm ; ; GFX90A-LABEL: global_atomic_fadd_ret_f32_system: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX90A-NEXT: s_mov_b64 s[2:3], 0 -; GFX90A-NEXT: v_mov_b32_e32 v0, 0 +; GFX90A-NEXT: v_mov_b32_e32 v1, 0 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: v_mov_b32_e32 v1, s4 +; GFX90A-NEXT: v_mov_b32_e32 v0, s4 ; GFX90A-NEXT: .LBB5_1: ; %atomicrmw.start ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX90A-NEXT: v_mov_b32_e32 v3, v1 +; GFX90A-NEXT: v_mov_b32_e32 v3, v0 ; GFX90A-NEXT: v_add_f32_e32 v2, 4.0, v3 ; GFX90A-NEXT: buffer_wbl2 ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] glc +; GFX90A-NEXT: global_atomic_cmpswap v0, v1, v[2:3], s[0:1] glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: buffer_invl2 ; GFX90A-NEXT: buffer_wbinvl1_vol -; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3 +; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v0, v3 ; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3] ; GFX90A-NEXT: s_andn2_b64 exec, exec, s[2:3] ; GFX90A-NEXT: s_cbranch_execnz .LBB5_1 ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX90A-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX90A-NEXT: global_store_dword v[0:1], v1, off +; GFX90A-NEXT: global_store_dword v[0:1], v0, off ; GFX90A-NEXT: s_endpgm ; ; GFX10-LABEL: global_atomic_fadd_ret_f32_system: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX10-NEXT: v_mov_b32_e32 v0, 0 +; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_mov_b32_e32 v1, s2 +; GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-NEXT: s_mov_b32 s2, 0 ; GFX10-NEXT: .LBB5_1: ; %atomicrmw.start ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_mov_b32_e32 v2, v1 -; GFX10-NEXT: v_add_f32_e32 v1, 4.0, v2 +; GFX10-NEXT: v_mov_b32_e32 v3, v0 +; GFX10-NEXT: v_add_f32_e32 v2, 4.0, v3 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[0:1] glc +; GFX10-NEXT: global_atomic_cmpswap v0, v1, v[2:3], s[0:1] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v3 ; GFX10-NEXT: s_or_b32 s2, vcc_lo, s2 ; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s2 ; GFX10-NEXT: s_cbranch_execnz .LBB5_1 ; GFX10-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s2 -; GFX10-NEXT: global_store_dword v[0:1], v1, off +; GFX10-NEXT: global_store_dword v[0:1], v0, off ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_atomic_fadd_ret_f32_system: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 -; GFX11-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: v_mov_b32_e32 v1, s2 +; GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-NEXT: s_mov_b32 s2, 0 ; GFX11-NEXT: .LBB5_1: ; %atomicrmw.start ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX11-NEXT: v_mov_b32_e32 v2, v1 -; GFX11-NEXT: v_add_f32_e32 v1, 4.0, v2 +; GFX11-NEXT: v_mov_b32_e32 v3, v0 +; GFX11-NEXT: v_add_f32_e32 v2, 4.0, v3 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] glc +; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v1, v[2:3], s[0:1] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v3 ; GFX11-NEXT: s_or_b32 s2, vcc_lo, s2 ; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s2 ; GFX11-NEXT: s_cbranch_execnz .LBB5_1 ; GFX11-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s2 -; GFX11-NEXT: global_store_b32 v[0:1], v1, off +; GFX11-NEXT: global_store_b32 v[0:1], v0, off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %result = atomicrmw fadd ptr addrspace(1) %ptr, float 4.0 syncscope("one-as") seq_cst @@ -692,52 +692,52 @@ ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GCN-NEXT: s_mov_b64 s[2:3], 0 -; GCN-NEXT: v_mov_b32_e32 v0, 0 +; GCN-NEXT: v_mov_b32_e32 v1, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_load_dword s4, s[0:1], 0x0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v1, s4 +; GCN-NEXT: v_mov_b32_e32 v0, s4 ; GCN-NEXT: .LBB6_1: ; %atomicrmw.start ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: v_mov_b32_e32 v2, v1 -; GCN-NEXT: v_add_f32_e32 v1, 4.0, v2 +; GCN-NEXT: v_mov_b32_e32 v3, v0 +; GCN-NEXT: v_add_f32_e32 v2, 4.0, v3 ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[0:1] glc +; GCN-NEXT: global_atomic_cmpswap v0, v1, v[2:3], s[0:1] glc ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_wbinvl1_vol -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, v0, v3 ; GCN-NEXT: s_or_b64 s[2:3], vcc, s[2:3] ; GCN-NEXT: s_andn2_b64 exec, exec, s[2:3] ; GCN-NEXT: s_cbranch_execnz .LBB6_1 ; GCN-NEXT: ; %bb.2: ; %atomicrmw.end ; GCN-NEXT: s_or_b64 exec, exec, s[2:3] -; GCN-NEXT: global_store_dword v[0:1], v1, off +; GCN-NEXT: global_store_dword v[0:1], v0, off ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_atomic_fadd_ret_f32_wrong_subtarget: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX11-NEXT: s_mov_b64 s[2:3], 0 -; GFX11-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: v_mov_b32_e32 v1, s4 +; GFX11-NEXT: v_mov_b32_e32 v0, s4 ; GFX11-NEXT: .LBB6_1: ; %atomicrmw.start ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX11-NEXT: v_mov_b32_e32 v2, v1 -; GFX11-NEXT: v_add_f32_e32 v1, 4.0, v2 +; GFX11-NEXT: v_mov_b32_e32 v3, v0 +; GFX11-NEXT: v_add_f32_e32 v2, 4.0, v3 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[0:1] glc +; GFX11-NEXT: global_atomic_cmpswap v0, v1, v[2:3], s[0:1] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_wbinvl1_vol -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc, v0, v3 ; GFX11-NEXT: s_or_b64 s[2:3], vcc, s[2:3] ; GFX11-NEXT: s_andn2_b64 exec, exec, s[2:3] ; GFX11-NEXT: s_cbranch_execnz .LBB6_1 ; GFX11-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX11-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX11-NEXT: global_store_dword v[0:1], v1, off +; GFX11-NEXT: global_store_dword v[0:1], v0, off ; GFX11-NEXT: s_endpgm %result = atomicrmw fadd ptr addrspace(1) %ptr, float 4.0 syncscope("agent") seq_cst store float %result, ptr addrspace(1) undef Index: llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll +++ llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll @@ -1152,13 +1152,13 @@ ; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; W64-O0-NEXT: .LBB2_8: ; %bb2 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; W64-O0-NEXT: s_nop 0 -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload ; W64-O0-NEXT: v_readlane_b32 s4, v8, 10 ; W64-O0-NEXT: v_readlane_b32 s5, v8, 11 ; W64-O0-NEXT: s_or_b64 exec, exec, s[4:5] +; W64-O0-NEXT: ; %bb.9: ; %bb2 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload ; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: global_store_dword v[0:1], v2, off ; W64-O0-NEXT: s_waitcnt vmcnt(0) Index: llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll +++ llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll @@ -1222,13 +1222,13 @@ ; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill ; W64-O0-NEXT: .LBB2_8: ; %bb2 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; W64-O0-NEXT: s_nop 0 -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload ; W64-O0-NEXT: v_readlane_b32 s4, v8, 10 ; W64-O0-NEXT: v_readlane_b32 s5, v8, 11 ; W64-O0-NEXT: s_or_b64 exec, exec, s[4:5] +; W64-O0-NEXT: ; %bb.9: ; %bb2 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload ; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: global_store_dword v[0:1], v2, off ; W64-O0-NEXT: s_waitcnt vmcnt(0) Index: llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll +++ llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll @@ -98,22 +98,16 @@ ; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[56:57], 1, v2 ; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[58:59], 1, v3 ; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[60:61], 1, v0 -; GLOBALNESS1-NEXT: s_branch .LBB1_4 -; GLOBALNESS1-NEXT: .LBB1_1: ; %bb70.i -; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[60:61] -; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_29 -; GLOBALNESS1-NEXT: .LBB1_2: ; %Flow15 -; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS1-NEXT: s_or_b64 exec, exec, s[4:5] -; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], 0 -; GLOBALNESS1-NEXT: ; implicit-def: $sgpr4_sgpr5 -; GLOBALNESS1-NEXT: .LBB1_3: ; %Flow28 -; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS1-NEXT: s_branch .LBB1_3 +; GLOBALNESS1-NEXT: .LBB1_1: ; in Loop: Header=BB1_3 Depth=1 +; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], -1 +; GLOBALNESS1-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GLOBALNESS1-NEXT: .LBB1_2: ; %Flow28 +; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_3 Depth=1 ; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[6:7] ; GLOBALNESS1-NEXT: v_pk_mov_b32 v[44:45], v[0:1], v[0:1] op_sel:[0,1] ; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_30 -; GLOBALNESS1-NEXT: .LBB1_4: ; %bb5 +; GLOBALNESS1-NEXT: .LBB1_3: ; %bb5 ; GLOBALNESS1-NEXT: ; =>This Loop Header: Depth=1 ; GLOBALNESS1-NEXT: ; Child Loop BB1_15 Depth 2 ; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], s[74:75], s[74:75] op_sel:[0,1] @@ -135,27 +129,31 @@ ; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], -1 ; GLOBALNESS1-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_8 -; GLOBALNESS1-NEXT: ; %bb.5: ; %NodeBlock -; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS1-NEXT: ; %bb.4: ; %NodeBlock +; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_3 Depth=1 ; GLOBALNESS1-NEXT: s_cmp_lt_i32 s79, 1 -; GLOBALNESS1-NEXT: s_cbranch_scc1 .LBB1_7 -; GLOBALNESS1-NEXT: ; %bb.6: ; %LeafBlock12 -; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS1-NEXT: s_cbranch_scc1 .LBB1_6 +; GLOBALNESS1-NEXT: ; %bb.5: ; %LeafBlock12 +; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_3 Depth=1 ; GLOBALNESS1-NEXT: s_cmp_lg_u32 s79, 1 ; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], -1 ; GLOBALNESS1-NEXT: s_cselect_b64 s[6:7], -1, 0 -; GLOBALNESS1-NEXT: s_cbranch_execnz .LBB1_8 -; GLOBALNESS1-NEXT: s_branch .LBB1_23 -; GLOBALNESS1-NEXT: .LBB1_7: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_7 +; GLOBALNESS1-NEXT: s_branch .LBB1_8 +; GLOBALNESS1-NEXT: .LBB1_6: ; in Loop: Header=BB1_3 Depth=1 ; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], 0 ; GLOBALNESS1-NEXT: ; implicit-def: $sgpr4_sgpr5 -; GLOBALNESS1-NEXT: s_branch .LBB1_23 +; GLOBALNESS1-NEXT: .LBB1_7: ; %LeafBlock +; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_3 Depth=1 +; GLOBALNESS1-NEXT: s_cmp_lg_u32 s79, 0 +; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], 0 +; GLOBALNESS1-NEXT: s_cselect_b64 s[6:7], -1, 0 ; GLOBALNESS1-NEXT: .LBB1_8: ; %Flow25 -; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_3 Depth=1 ; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[6:7] -; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_24 -; GLOBALNESS1-NEXT: .LBB1_9: ; %baz.exit.i -; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_1 +; GLOBALNESS1-NEXT: ; %bb.9: ; %baz.exit.i +; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_3 Depth=1 ; GLOBALNESS1-NEXT: v_pk_mov_b32 v[2:3], 0, 0 ; GLOBALNESS1-NEXT: flat_load_dword v0, v[2:3] ; GLOBALNESS1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -163,19 +161,19 @@ ; GLOBALNESS1-NEXT: v_mov_b32_e32 v0, 0 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v1, 0x3ff00000 ; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[80:81], s[62:63] -; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_26 +; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_24 ; GLOBALNESS1-NEXT: ; %bb.10: ; %bb33.i -; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_3 Depth=1 ; GLOBALNESS1-NEXT: global_load_dwordx2 v[0:1], v[2:3], off ; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[54:55] ; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_12 ; GLOBALNESS1-NEXT: ; %bb.11: ; %bb39.i -; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_3 Depth=1 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v41, v40 ; GLOBALNESS1-NEXT: v_pk_mov_b32 v[2:3], 0, 0 ; GLOBALNESS1-NEXT: global_store_dwordx2 v[2:3], v[40:41], off ; GLOBALNESS1-NEXT: .LBB1_12: ; %bb44.lr.ph.i -; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_3 Depth=1 ; GLOBALNESS1-NEXT: v_cmp_ne_u32_e32 vcc, 0, v46 ; GLOBALNESS1-NEXT: v_cndmask_b32_e32 v2, 0, v43, vcc ; GLOBALNESS1-NEXT: s_waitcnt vmcnt(0) @@ -188,9 +186,9 @@ ; GLOBALNESS1-NEXT: .LBB1_14: ; %bb63.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_15 Depth=2 ; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[52:53] -; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_25 +; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_23 ; GLOBALNESS1-NEXT: .LBB1_15: ; %bb44.i -; GLOBALNESS1-NEXT: ; Parent Loop BB1_4 Depth=1 +; GLOBALNESS1-NEXT: ; Parent Loop BB1_3 Depth=1 ; GLOBALNESS1-NEXT: ; => This Inner Loop Header: Depth=2 ; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[48:49] ; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_14 @@ -244,40 +242,37 @@ ; GLOBALNESS1-NEXT: v_mov_b32_e32 v41, v40 ; GLOBALNESS1-NEXT: global_store_dwordx2 v[46:47], v[40:41], off ; GLOBALNESS1-NEXT: s_branch .LBB1_13 -; GLOBALNESS1-NEXT: .LBB1_23: ; %LeafBlock -; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS1-NEXT: s_cmp_lg_u32 s79, 0 -; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], 0 -; GLOBALNESS1-NEXT: s_cselect_b64 s[6:7], -1, 0 -; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[6:7] -; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_9 -; GLOBALNESS1-NEXT: .LBB1_24: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], -1 -; GLOBALNESS1-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GLOBALNESS1-NEXT: s_branch .LBB1_3 -; GLOBALNESS1-NEXT: .LBB1_25: ; %Flow23 -; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS1-NEXT: .LBB1_23: ; %Flow23 +; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_3 Depth=1 ; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], 0, 0 -; GLOBALNESS1-NEXT: .LBB1_26: ; %Flow24 -; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS1-NEXT: .LBB1_24: ; %Flow24 +; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_3 Depth=1 ; GLOBALNESS1-NEXT: s_or_b64 exec, exec, s[80:81] ; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[4:5], s[62:63] -; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_2 -; GLOBALNESS1-NEXT: ; %bb.27: ; %bb67.i -; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_29 +; GLOBALNESS1-NEXT: ; %bb.25: ; %bb67.i +; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_3 Depth=1 ; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[58:59] -; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_1 -; GLOBALNESS1-NEXT: ; %bb.28: ; %bb69.i -; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_27 +; GLOBALNESS1-NEXT: ; %bb.26: ; %bb69.i +; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_3 Depth=1 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v41, v40 ; GLOBALNESS1-NEXT: v_pk_mov_b32 v[2:3], 0, 0 ; GLOBALNESS1-NEXT: global_store_dwordx2 v[2:3], v[40:41], off -; GLOBALNESS1-NEXT: s_branch .LBB1_1 -; GLOBALNESS1-NEXT: .LBB1_29: ; %bb73.i -; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS1-NEXT: .LBB1_27: ; %bb70.i +; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_3 Depth=1 +; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[60:61] +; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_29 +; GLOBALNESS1-NEXT: ; %bb.28: ; %bb73.i +; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_3 Depth=1 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v41, v40 ; GLOBALNESS1-NEXT: v_pk_mov_b32 v[2:3], 0, 0 ; GLOBALNESS1-NEXT: global_store_dwordx2 v[2:3], v[40:41], off +; GLOBALNESS1-NEXT: .LBB1_29: ; %Flow15 +; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_3 Depth=1 +; GLOBALNESS1-NEXT: s_or_b64 exec, exec, s[4:5] +; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], 0 +; GLOBALNESS1-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GLOBALNESS1-NEXT: s_branch .LBB1_2 ; GLOBALNESS1-NEXT: .LBB1_30: ; %loop.exit.guard ; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[4:5] @@ -385,22 +380,16 @@ ; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[56:57], 1, v2 ; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[58:59], 1, v3 ; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[60:61], 1, v0 -; GLOBALNESS0-NEXT: s_branch .LBB1_4 -; GLOBALNESS0-NEXT: .LBB1_1: ; %bb70.i -; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[60:61] -; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_29 -; GLOBALNESS0-NEXT: .LBB1_2: ; %Flow15 -; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS0-NEXT: s_or_b64 exec, exec, s[4:5] -; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], 0 -; GLOBALNESS0-NEXT: ; implicit-def: $sgpr4_sgpr5 -; GLOBALNESS0-NEXT: .LBB1_3: ; %Flow28 -; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS0-NEXT: s_branch .LBB1_3 +; GLOBALNESS0-NEXT: .LBB1_1: ; in Loop: Header=BB1_3 Depth=1 +; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], -1 +; GLOBALNESS0-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GLOBALNESS0-NEXT: .LBB1_2: ; %Flow28 +; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_3 Depth=1 ; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[6:7] ; GLOBALNESS0-NEXT: v_pk_mov_b32 v[44:45], v[0:1], v[0:1] op_sel:[0,1] ; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_30 -; GLOBALNESS0-NEXT: .LBB1_4: ; %bb5 +; GLOBALNESS0-NEXT: .LBB1_3: ; %bb5 ; GLOBALNESS0-NEXT: ; =>This Loop Header: Depth=1 ; GLOBALNESS0-NEXT: ; Child Loop BB1_15 Depth 2 ; GLOBALNESS0-NEXT: v_pk_mov_b32 v[0:1], s[76:77], s[76:77] op_sel:[0,1] @@ -422,27 +411,31 @@ ; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], -1 ; GLOBALNESS0-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_8 -; GLOBALNESS0-NEXT: ; %bb.5: ; %NodeBlock -; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS0-NEXT: ; %bb.4: ; %NodeBlock +; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_3 Depth=1 ; GLOBALNESS0-NEXT: s_cmp_lt_i32 s75, 1 -; GLOBALNESS0-NEXT: s_cbranch_scc1 .LBB1_7 -; GLOBALNESS0-NEXT: ; %bb.6: ; %LeafBlock12 -; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS0-NEXT: s_cbranch_scc1 .LBB1_6 +; GLOBALNESS0-NEXT: ; %bb.5: ; %LeafBlock12 +; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_3 Depth=1 ; GLOBALNESS0-NEXT: s_cmp_lg_u32 s75, 1 ; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], -1 ; GLOBALNESS0-NEXT: s_cselect_b64 s[6:7], -1, 0 -; GLOBALNESS0-NEXT: s_cbranch_execnz .LBB1_8 -; GLOBALNESS0-NEXT: s_branch .LBB1_23 -; GLOBALNESS0-NEXT: .LBB1_7: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_7 +; GLOBALNESS0-NEXT: s_branch .LBB1_8 +; GLOBALNESS0-NEXT: .LBB1_6: ; in Loop: Header=BB1_3 Depth=1 ; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], 0 ; GLOBALNESS0-NEXT: ; implicit-def: $sgpr4_sgpr5 -; GLOBALNESS0-NEXT: s_branch .LBB1_23 +; GLOBALNESS0-NEXT: .LBB1_7: ; %LeafBlock +; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_3 Depth=1 +; GLOBALNESS0-NEXT: s_cmp_lg_u32 s75, 0 +; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], 0 +; GLOBALNESS0-NEXT: s_cselect_b64 s[6:7], -1, 0 ; GLOBALNESS0-NEXT: .LBB1_8: ; %Flow25 -; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_3 Depth=1 ; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[6:7] -; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_24 -; GLOBALNESS0-NEXT: .LBB1_9: ; %baz.exit.i -; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_1 +; GLOBALNESS0-NEXT: ; %bb.9: ; %baz.exit.i +; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_3 Depth=1 ; GLOBALNESS0-NEXT: v_pk_mov_b32 v[2:3], 0, 0 ; GLOBALNESS0-NEXT: flat_load_dword v0, v[2:3] ; GLOBALNESS0-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -450,19 +443,19 @@ ; GLOBALNESS0-NEXT: v_mov_b32_e32 v0, 0 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v1, 0x3ff00000 ; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[80:81], s[62:63] -; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_26 +; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_24 ; GLOBALNESS0-NEXT: ; %bb.10: ; %bb33.i -; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_3 Depth=1 ; GLOBALNESS0-NEXT: global_load_dwordx2 v[0:1], v[2:3], off ; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[54:55] ; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_12 ; GLOBALNESS0-NEXT: ; %bb.11: ; %bb39.i -; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_3 Depth=1 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v41, v40 ; GLOBALNESS0-NEXT: v_pk_mov_b32 v[2:3], 0, 0 ; GLOBALNESS0-NEXT: global_store_dwordx2 v[2:3], v[40:41], off ; GLOBALNESS0-NEXT: .LBB1_12: ; %bb44.lr.ph.i -; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_3 Depth=1 ; GLOBALNESS0-NEXT: v_cmp_ne_u32_e32 vcc, 0, v46 ; GLOBALNESS0-NEXT: v_cndmask_b32_e32 v2, 0, v43, vcc ; GLOBALNESS0-NEXT: s_waitcnt vmcnt(0) @@ -475,9 +468,9 @@ ; GLOBALNESS0-NEXT: .LBB1_14: ; %bb63.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_15 Depth=2 ; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[52:53] -; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_25 +; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_23 ; GLOBALNESS0-NEXT: .LBB1_15: ; %bb44.i -; GLOBALNESS0-NEXT: ; Parent Loop BB1_4 Depth=1 +; GLOBALNESS0-NEXT: ; Parent Loop BB1_3 Depth=1 ; GLOBALNESS0-NEXT: ; => This Inner Loop Header: Depth=2 ; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[48:49] ; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_14 @@ -531,40 +524,37 @@ ; GLOBALNESS0-NEXT: v_mov_b32_e32 v41, v40 ; GLOBALNESS0-NEXT: global_store_dwordx2 v[46:47], v[40:41], off ; GLOBALNESS0-NEXT: s_branch .LBB1_13 -; GLOBALNESS0-NEXT: .LBB1_23: ; %LeafBlock -; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS0-NEXT: s_cmp_lg_u32 s75, 0 -; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], 0 -; GLOBALNESS0-NEXT: s_cselect_b64 s[6:7], -1, 0 -; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[6:7] -; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_9 -; GLOBALNESS0-NEXT: .LBB1_24: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], -1 -; GLOBALNESS0-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GLOBALNESS0-NEXT: s_branch .LBB1_3 -; GLOBALNESS0-NEXT: .LBB1_25: ; %Flow23 -; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS0-NEXT: .LBB1_23: ; %Flow23 +; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_3 Depth=1 ; GLOBALNESS0-NEXT: v_pk_mov_b32 v[0:1], 0, 0 -; GLOBALNESS0-NEXT: .LBB1_26: ; %Flow24 -; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS0-NEXT: .LBB1_24: ; %Flow24 +; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_3 Depth=1 ; GLOBALNESS0-NEXT: s_or_b64 exec, exec, s[80:81] ; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[4:5], s[62:63] -; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_2 -; GLOBALNESS0-NEXT: ; %bb.27: ; %bb67.i -; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_29 +; GLOBALNESS0-NEXT: ; %bb.25: ; %bb67.i +; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_3 Depth=1 ; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[58:59] -; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_1 -; GLOBALNESS0-NEXT: ; %bb.28: ; %bb69.i -; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_27 +; GLOBALNESS0-NEXT: ; %bb.26: ; %bb69.i +; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_3 Depth=1 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v41, v40 ; GLOBALNESS0-NEXT: v_pk_mov_b32 v[2:3], 0, 0 ; GLOBALNESS0-NEXT: global_store_dwordx2 v[2:3], v[40:41], off -; GLOBALNESS0-NEXT: s_branch .LBB1_1 -; GLOBALNESS0-NEXT: .LBB1_29: ; %bb73.i -; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS0-NEXT: .LBB1_27: ; %bb70.i +; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_3 Depth=1 +; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[60:61] +; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_29 +; GLOBALNESS0-NEXT: ; %bb.28: ; %bb73.i +; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_3 Depth=1 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v41, v40 ; GLOBALNESS0-NEXT: v_pk_mov_b32 v[2:3], 0, 0 ; GLOBALNESS0-NEXT: global_store_dwordx2 v[2:3], v[40:41], off +; GLOBALNESS0-NEXT: .LBB1_29: ; %Flow15 +; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_3 Depth=1 +; GLOBALNESS0-NEXT: s_or_b64 exec, exec, s[4:5] +; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], 0 +; GLOBALNESS0-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GLOBALNESS0-NEXT: s_branch .LBB1_2 ; GLOBALNESS0-NEXT: .LBB1_30: ; %loop.exit.guard ; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[4:5] Index: llvm/test/CodeGen/AMDGPU/uniform-phi-with-undef.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/uniform-phi-with-undef.ll +++ llvm/test/CodeGen/AMDGPU/uniform-phi-with-undef.ll @@ -32,6 +32,7 @@ ; GCN-NEXT: v_div_fixup_f32 v0, v1, s2, v0 ; GCN-NEXT: .LBB0_2: ; %end ; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GCN-NEXT: ; %bb.3: ; %end ; GCN-NEXT: v_add_f32_e64 v0, v0, s0 ; GCN-NEXT: ; return to shader part epilog entry: Index: llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll +++ llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll @@ -32,16 +32,17 @@ ; CHECK-NEXT: s_cbranch_execz .LBB0_2 ; CHECK-NEXT: ; %bb.1: ; %bb193 ; CHECK-NEXT: .LBB0_2: ; %bb194 -; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:4 ; 4-byte Folded Reload ; CHECK-NEXT: v_readlane_b32 s4, v1, 0 ; CHECK-NEXT: v_readlane_b32 s5, v1, 1 ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] +; CHECK-NEXT: ; %bb.3: ; %bb194 +; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:4 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b32 s4, 0 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_cmp_ne_u16_e64 s[4:5], v0, s4 ; CHECK-NEXT: s_and_b64 vcc, exec, s[4:5] -; CHECK-NEXT: s_cbranch_vccnz .LBB0_4 -; CHECK-NEXT: ; %bb.3: ; %bb201 +; CHECK-NEXT: s_cbranch_vccnz .LBB0_5 +; CHECK-NEXT: ; %bb.4: ; %bb201 ; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], 0 offset:4 ; 4-byte Folded Reload ; CHECK-NEXT: s_getpc_b64 s[4:5] ; CHECK-NEXT: s_add_u32 s4, s4, V2@rel32@lo+4 @@ -53,7 +54,7 @@ ; CHECK-NEXT: s_barrier ; CHECK-NEXT: s_trap 2 ; CHECK-NEXT: ; divergent unreachable -; CHECK-NEXT: .LBB0_4: ; %UnifiedReturnBlock +; CHECK-NEXT: .LBB0_5: ; %UnifiedReturnBlock ; CHECK-NEXT: s_endpgm bb: %i10 = tail call i32 @llvm.amdgcn.workitem.id.x() Index: llvm/test/CodeGen/AMDGPU/wave32.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/wave32.ll +++ llvm/test/CodeGen/AMDGPU/wave32.ll @@ -356,36 +356,36 @@ ; GFX1032-LABEL: test_loop_with_if: ; GFX1032: ; %bb.0: ; %bb ; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1032-NEXT: v_mov_b32_e32 v1, 0 +; GFX1032-NEXT: v_mov_b32_e32 v3, 0 ; GFX1032-NEXT: s_mov_b32 s2, 0 -; GFX1032-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GFX1032-NEXT: ; implicit-def: $vgpr1_vgpr2 ; GFX1032-NEXT: s_branch .LBB10_2 ; GFX1032-NEXT: .LBB10_1: ; %bb13 ; GFX1032-NEXT: ; in Loop: Header=BB10_2 Depth=1 ; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX1032-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0xfe, v4 -; GFX1032-NEXT: v_add_nc_u32_e32 v1, 1, v4 +; GFX1032-NEXT: v_add_nc_u32_e32 v3, 1, v4 ; GFX1032-NEXT: s_or_b32 s2, vcc_lo, s2 ; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s2 ; GFX1032-NEXT: s_cbranch_execz .LBB10_8 ; GFX1032-NEXT: .LBB10_2: ; %bb2 ; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX1032-NEXT: v_cmp_ge_i32_e64 s4, v1, v0 -; GFX1032-NEXT: v_cmp_lt_i32_e32 vcc_lo, v1, v0 +; GFX1032-NEXT: v_cmp_ge_i32_e64 s4, v3, v0 +; GFX1032-NEXT: v_cmp_lt_i32_e32 vcc_lo, v3, v0 ; GFX1032-NEXT: s_mov_b32 s3, 0 ; GFX1032-NEXT: s_and_saveexec_b32 s5, vcc_lo ; GFX1032-NEXT: s_cbranch_execz .LBB10_4 ; GFX1032-NEXT: ; %bb.3: ; %bb5 ; GFX1032-NEXT: ; in Loop: Header=BB10_2 Depth=1 -; GFX1032-NEXT: v_ashrrev_i32_e32 v2, 31, v1 +; GFX1032-NEXT: v_ashrrev_i32_e32 v4, 31, v3 ; GFX1032-NEXT: s_andn2_b32 s4, s4, exec_lo ; GFX1032-NEXT: s_mov_b32 s3, exec_lo -; GFX1032-NEXT: v_lshlrev_b64 v[2:3], 2, v[1:2] +; GFX1032-NEXT: v_lshlrev_b64 v[1:2], 2, v[3:4] ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-NEXT: v_add_co_u32 v2, vcc_lo, s0, v2 -; GFX1032-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, s1, v3, vcc_lo -; GFX1032-NEXT: global_load_dword v4, v[2:3], off +; GFX1032-NEXT: v_add_co_u32 v1, vcc_lo, s0, v1 +; GFX1032-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, s1, v2, vcc_lo +; GFX1032-NEXT: global_load_dword v4, v[1:2], off ; GFX1032-NEXT: s_waitcnt vmcnt(0) ; GFX1032-NEXT: v_cmp_gt_i32_e32 vcc_lo, 11, v4 ; GFX1032-NEXT: s_and_b32 s6, vcc_lo, exec_lo @@ -398,9 +398,9 @@ ; GFX1032-NEXT: s_xor_b32 s4, exec_lo, s5 ; GFX1032-NEXT: ; %bb.5: ; %bb11 ; GFX1032-NEXT: ; in Loop: Header=BB10_2 Depth=1 -; GFX1032-NEXT: v_lshrrev_b32_e32 v4, 31, v1 +; GFX1032-NEXT: v_lshrrev_b32_e32 v4, 31, v3 ; GFX1032-NEXT: s_andn2_b32 s3, s3, exec_lo -; GFX1032-NEXT: v_add_nc_u32_e32 v4, v1, v4 +; GFX1032-NEXT: v_add_nc_u32_e32 v4, v3, v4 ; GFX1032-NEXT: v_ashrrev_i32_e32 v4, 1, v4 ; GFX1032-NEXT: ; %bb.6: ; %Flow1 ; GFX1032-NEXT: ; in Loop: Header=BB10_2 Depth=1 @@ -409,8 +409,8 @@ ; GFX1032-NEXT: s_cbranch_execz .LBB10_1 ; GFX1032-NEXT: ; %bb.7: ; %bb10 ; GFX1032-NEXT: ; in Loop: Header=BB10_2 Depth=1 -; GFX1032-NEXT: v_mov_b32_e32 v4, v1 -; GFX1032-NEXT: global_store_dword v[2:3], v0, off +; GFX1032-NEXT: v_mov_b32_e32 v4, v3 +; GFX1032-NEXT: global_store_dword v[1:2], v0, off ; GFX1032-NEXT: s_branch .LBB10_1 ; GFX1032-NEXT: .LBB10_8: ; %bb1 ; GFX1032-NEXT: s_endpgm @@ -418,36 +418,36 @@ ; GFX1064-LABEL: test_loop_with_if: ; GFX1064: ; %bb.0: ; %bb ; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1064-NEXT: v_mov_b32_e32 v1, 0 +; GFX1064-NEXT: v_mov_b32_e32 v3, 0 ; GFX1064-NEXT: s_mov_b64 s[2:3], 0 -; GFX1064-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GFX1064-NEXT: ; implicit-def: $vgpr1_vgpr2 ; GFX1064-NEXT: s_branch .LBB10_2 ; GFX1064-NEXT: .LBB10_1: ; %bb13 ; GFX1064-NEXT: ; in Loop: Header=BB10_2 Depth=1 ; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[6:7] ; GFX1064-NEXT: v_cmp_lt_i32_e32 vcc, 0xfe, v4 -; GFX1064-NEXT: v_add_nc_u32_e32 v1, 1, v4 +; GFX1064-NEXT: v_add_nc_u32_e32 v3, 1, v4 ; GFX1064-NEXT: s_or_b64 s[2:3], vcc, s[2:3] ; GFX1064-NEXT: s_andn2_b64 exec, exec, s[2:3] ; GFX1064-NEXT: s_cbranch_execz .LBB10_8 ; GFX1064-NEXT: .LBB10_2: ; %bb2 ; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX1064-NEXT: v_cmp_ge_i32_e64 s[6:7], v1, v0 -; GFX1064-NEXT: v_cmp_lt_i32_e32 vcc, v1, v0 +; GFX1064-NEXT: v_cmp_ge_i32_e64 s[6:7], v3, v0 +; GFX1064-NEXT: v_cmp_lt_i32_e32 vcc, v3, v0 ; GFX1064-NEXT: s_mov_b64 s[4:5], 0 ; GFX1064-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX1064-NEXT: s_cbranch_execz .LBB10_4 ; GFX1064-NEXT: ; %bb.3: ; %bb5 ; GFX1064-NEXT: ; in Loop: Header=BB10_2 Depth=1 -; GFX1064-NEXT: v_ashrrev_i32_e32 v2, 31, v1 +; GFX1064-NEXT: v_ashrrev_i32_e32 v4, 31, v3 ; GFX1064-NEXT: s_andn2_b64 s[6:7], s[6:7], exec ; GFX1064-NEXT: s_mov_b64 s[4:5], exec -; GFX1064-NEXT: v_lshlrev_b64 v[2:3], 2, v[1:2] +; GFX1064-NEXT: v_lshlrev_b64 v[1:2], 2, v[3:4] ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-NEXT: v_add_co_u32 v2, vcc, s0, v2 -; GFX1064-NEXT: v_add_co_ci_u32_e32 v3, vcc, s1, v3, vcc -; GFX1064-NEXT: global_load_dword v4, v[2:3], off +; GFX1064-NEXT: v_add_co_u32 v1, vcc, s0, v1 +; GFX1064-NEXT: v_add_co_ci_u32_e32 v2, vcc, s1, v2, vcc +; GFX1064-NEXT: global_load_dword v4, v[1:2], off ; GFX1064-NEXT: s_waitcnt vmcnt(0) ; GFX1064-NEXT: v_cmp_gt_i32_e32 vcc, 11, v4 ; GFX1064-NEXT: s_and_b64 s[10:11], vcc, exec @@ -460,9 +460,9 @@ ; GFX1064-NEXT: s_xor_b64 s[6:7], exec, s[8:9] ; GFX1064-NEXT: ; %bb.5: ; %bb11 ; GFX1064-NEXT: ; in Loop: Header=BB10_2 Depth=1 -; GFX1064-NEXT: v_lshrrev_b32_e32 v4, 31, v1 +; GFX1064-NEXT: v_lshrrev_b32_e32 v4, 31, v3 ; GFX1064-NEXT: s_andn2_b64 s[4:5], s[4:5], exec -; GFX1064-NEXT: v_add_nc_u32_e32 v4, v1, v4 +; GFX1064-NEXT: v_add_nc_u32_e32 v4, v3, v4 ; GFX1064-NEXT: v_ashrrev_i32_e32 v4, 1, v4 ; GFX1064-NEXT: ; %bb.6: ; %Flow1 ; GFX1064-NEXT: ; in Loop: Header=BB10_2 Depth=1 @@ -471,8 +471,8 @@ ; GFX1064-NEXT: s_cbranch_execz .LBB10_1 ; GFX1064-NEXT: ; %bb.7: ; %bb10 ; GFX1064-NEXT: ; in Loop: Header=BB10_2 Depth=1 -; GFX1064-NEXT: v_mov_b32_e32 v4, v1 -; GFX1064-NEXT: global_store_dword v[2:3], v0, off +; GFX1064-NEXT: v_mov_b32_e32 v4, v3 +; GFX1064-NEXT: global_store_dword v[1:2], v0, off ; GFX1064-NEXT: s_branch .LBB10_1 ; GFX1064-NEXT: .LBB10_8: ; %bb1 ; GFX1064-NEXT: s_endpgm Index: llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll +++ llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll @@ -211,12 +211,12 @@ ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1 ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O0-NEXT: .LBB1_2: ; %merge +; GFX9-O0-NEXT: v_readlane_b32 s34, v3, 4 +; GFX9-O0-NEXT: v_readlane_b32 s35, v3, 5 +; GFX9-O0-NEXT: s_or_b64 exec, exec, s[34:35] +; GFX9-O0-NEXT: ; %bb.3: ; %merge ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_nop 0 ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: v_readlane_b32 s36, v3, 4 -; GFX9-O0-NEXT: v_readlane_b32 s37, v3, 5 -; GFX9-O0-NEXT: s_or_b64 exec, exec, s[36:37] ; GFX9-O0-NEXT: v_readlane_b32 s38, v3, 0 ; GFX9-O0-NEXT: v_readlane_b32 s39, v3, 1 ; GFX9-O0-NEXT: v_readlane_b32 s34, v3, 2