Index: llvm/lib/Target/AMDGPU/SIFoldOperands.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1709,6 +1709,9 @@ if (!I->isCopy() && !I->isRegSequence()) return false; Register DstReg = I->getOperand(0).getReg(); + // Physical registers may have more than one instruction definitions + if (DstReg.isPhysical()) + return false; if (TRI->isAGPR(*MRI, DstReg)) continue; MoveRegs.push_back(DstReg); Index: llvm/test/CodeGen/AMDGPU/swdev373493.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/swdev373493.ll @@ -0,0 +1,78 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -o - %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs | FileCheck %s + +@a0000000000000000000000000000000004040 = external protected addrspace(4) externally_initialized global [4096 x i64], align 16 + +define hidden fastcc void @func1(i32 %cond, ptr %a015194, ptr %a015195, ptr %a14919, ptr %a14642, ptr %a11358, ptr %a10670) unnamed_addr #0 align 2 { +; CHECK-LABEL: func1: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v15, v12 +; CHECK-NEXT: v_mov_b32_e32 v14, v11 +; CHECK-NEXT: v_mov_b32_e32 v13, v10 +; CHECK-NEXT: v_mov_b32_e32 v12, v9 +; CHECK-NEXT: v_mov_b32_e32 v11, v8 +; CHECK-NEXT: v_mov_b32_e32 v10, v7 +; CHECK-NEXT: v_mov_b32_e32 v9, v6 +; CHECK-NEXT: v_mov_b32_e32 v8, v5 +; CHECK-NEXT: v_mov_b32_e32 v7, v4 +; CHECK-NEXT: v_mov_b32_e32 v6, v3 +; CHECK-NEXT: s_cmp_lt_i32 s4, 3 +; CHECK-NEXT: s_cbranch_scc0 .LBB0_3 +; CHECK-NEXT: ; %bb.1: ; %LeafBlock +; CHECK-NEXT: s_cbranch_scc1 .LBB0_5 +; CHECK-NEXT: ; %bb.2: ; %sw.bb2 +; CHECK-NEXT: flat_load_dwordx2 v[2:3], v[0:1] +; CHECK-NEXT: s_getpc_b64 s[16:17] +; CHECK-NEXT: s_add_u32 s16, s16, a0000000000000000000000000000000004040@rel32@lo+1948 +; CHECK-NEXT: s_addc_u32 s17, s17, a0000000000000000000000000000000004040@rel32@hi+1956 +; CHECK-NEXT: v_mov_b32_e32 v5, 0 +; CHECK-NEXT: v_mov_b32_e32 v0, s16 +; CHECK-NEXT: v_mov_b32_e32 v1, s17 +; CHECK-NEXT: s_getpc_b64 s[18:19] +; CHECK-NEXT: s_add_u32 s18, s18, func2@rel32@lo+4 +; CHECK-NEXT: s_addc_u32 s19, s19, func2@rel32@hi+12 +; CHECK-NEXT: s_setpc_b64 s[18:19] +; CHECK-NEXT: .LBB0_3: ; %LeafBlock1 +; CHECK-NEXT: s_cbranch_scc0 .LBB0_5 +; CHECK-NEXT: ; %bb.4: ; %sw.bb8 +; CHECK-NEXT: v_mov_b32_e32 v0, v1 +; CHECK-NEXT: v_mov_b32_e32 v1, v2 +; CHECK-NEXT: v_mov_b32_e32 v2, v6 +; CHECK-NEXT: v_mov_b32_e32 v3, v7 +; CHECK-NEXT: v_mov_b32_e32 v4, v8 +; CHECK-NEXT: v_mov_b32_e32 v5, v9 +; CHECK-NEXT: v_mov_b32_e32 v6, v10 +; CHECK-NEXT: v_mov_b32_e32 v7, v11 +; CHECK-NEXT: v_mov_b32_e32 v8, v12 +; CHECK-NEXT: v_mov_b32_e32 v9, v13 +; CHECK-NEXT: v_mov_b32_e32 v10, v14 +; CHECK-NEXT: v_mov_b32_e32 v11, v15 +; CHECK-NEXT: s_getpc_b64 s[16:17] +; CHECK-NEXT: s_add_u32 s16, s16, func3@rel32@lo+4 +; CHECK-NEXT: s_addc_u32 s17, s17, func3@rel32@hi+12 +; CHECK-NEXT: s_setpc_b64 s[16:17] +; CHECK-NEXT: .LBB0_5: ; %sw.epilog +; CHECK-NEXT: s_setpc_b64 s[30:31] +entry: + switch i32 undef, label %sw.epilog [ + i32 3, label %sw.bb8 + i32 1, label %sw.bb2 + ] + +sw.bb2: ; preds = %entry + %a015194.val = load ptr, ptr undef, align 8 + tail call fastcc void @func2(ptr noundef addrspacecast (ptr addrspace(4) getelementptr inbounds ([4096 x i64], ptr addrspace(4) @a0000000000000000000000000000000004040, i64 0, i64 243) to ptr), ptr %a015194.val, ptr undef, ptr noundef nonnull align 8 dereferenceable(24) %a015195, ptr noundef %a14919, ptr noundef %a14642, ptr noundef %a11358) #2 + br label %sw.epilog + +sw.bb8: ; preds = %entry + tail call fastcc void @func3(ptr noundef nonnull align 8 dereferenceable(24) %a015194, ptr noundef nonnull align 8 dereferenceable(24) %a015195, ptr noundef %a14919, ptr noundef %a14642, ptr noundef %a11358, ptr noundef nonnull align 8 dereferenceable(8) %a10670) #2 + br label %sw.epilog + +sw.epilog: ; preds = %sw.bb8, %sw.bb2, %entry + ret void +} + +declare dso_local fastcc void @func2(ptr, ptr, ptr, ptr, ptr, ptr, ptr) unnamed_addr #1 align 2 + +declare dso_local fastcc void @func3(ptr, ptr, ptr, ptr, ptr, ptr) unnamed_addr #1 align 2 Index: llvm/test/CodeGen/AMDGPU/swdev373493.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/swdev373493.mir @@ -0,0 +1,440 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -o - %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -run-pass=si-fold-operands -verify-machineinstrs | FileCheck %s +--- | + @a0000000000000000000000000000000004040 = external protected addrspace(4) externally_initialized global [4096 x i64], align 16 + + define hidden fastcc void @func1(i32 %cond, ptr %a015194, ptr %a015195, ptr %a14919, ptr %a14642, ptr %a11358, ptr %a10670) unnamed_addr #0 align 2 { + entry: + br label %NodeBlock, !amdgpu.uniform !0 + + NodeBlock: ; preds = %entry + %Pivot = icmp sge i32 %cond, 3 + %0 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %Pivot) + %1 = extractvalue { i1, i64 } %0, 0 + %2 = extractvalue { i1, i64 } %0, 1 + br i1 %1, label %LeafBlock1, label %Flow4 + + LeafBlock1: ; preds = %NodeBlock + %SwitchLeaf2 = icmp eq i32 %cond, 3 + %3 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %SwitchLeaf2) + %4 = extractvalue { i1, i64 } %3, 0 + %5 = extractvalue { i1, i64 } %3, 1 + br i1 %4, label %sw.bb8, label %Flow + + Flow4: ; preds = %Flow, %NodeBlock + %6 = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %2) + %7 = extractvalue { i1, i64 } %6, 0 + %8 = extractvalue { i1, i64 } %6, 1 + br i1 %7, label %LeafBlock, label %UnifiedReturnBlock + + LeafBlock: ; preds = %Flow4 + %SwitchLeaf = icmp eq i32 %cond, 1 + %9 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %SwitchLeaf) + %10 = extractvalue { i1, i64 } %9, 0 + %11 = extractvalue { i1, i64 } %9, 1 + br i1 %10, label %sw.bb2, label %Flow3 + + sw.bb2: ; preds = %LeafBlock + %a015194.val = load ptr, ptr undef, align 8 + tail call fastcc void @func2(ptr noundef addrspacecast (ptr addrspace(4) getelementptr inbounds ([4096 x i64], ptr addrspace(4) @a0000000000000000000000000000000004040, i64 0, i64 243) to ptr), ptr %a015194.val, ptr undef, ptr noundef nonnull align 8 dereferenceable(24) %a015195, ptr noundef %a14919, ptr noundef %a14642, ptr noundef %a11358) + br label %Flow3, !amdgpu.uniform !0 + + sw.bb8: ; preds = %LeafBlock1 + tail call fastcc void @func3(ptr noundef nonnull align 8 dereferenceable(24) %a015194, ptr noundef nonnull align 8 dereferenceable(24) %a015195, ptr noundef %a14919, ptr noundef %a14642, ptr noundef %a11358, ptr noundef nonnull align 8 dereferenceable(8) %a10670) + br label %Flow, !amdgpu.uniform !0 + + Flow: ; preds = %sw.bb8, %LeafBlock1 + call void @llvm.amdgcn.end.cf.i64(i64 %5) + br label %Flow4, !amdgpu.uniform !0 + + Flow3: ; preds = %sw.bb2, %LeafBlock + call void @llvm.amdgcn.end.cf.i64(i64 %11) + br label %UnifiedReturnBlock, !amdgpu.uniform !0 + + UnifiedReturnBlock: ; preds = %Flow4, %Flow3 + call void @llvm.amdgcn.end.cf.i64(i64 %8) + ret void + } + + declare dso_local fastcc void @func2(ptr, ptr, ptr, ptr, ptr, ptr, ptr) unnamed_addr #0 align 2 + + declare dso_local fastcc void @func3(ptr, ptr, ptr, ptr, ptr, ptr) unnamed_addr #0 align 2 + + ; Function Attrs: convergent nounwind willreturn + declare { i1, i64 } @llvm.amdgcn.if.i64(i1) #1 + + ; Function Attrs: convergent nounwind willreturn + declare { i1, i64 } @llvm.amdgcn.else.i64.i64(i64) #1 + + ; Function Attrs: convergent nounwind readnone willreturn + declare i64 @llvm.amdgcn.if.break.i64(i1, i64) #2 + + ; Function Attrs: convergent nounwind willreturn + declare i1 @llvm.amdgcn.loop.i64(i64) #1 + + ; Function Attrs: convergent nounwind willreturn + declare void @llvm.amdgcn.end.cf.i64(i64) #1 + + attributes #0 = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" } + attributes #1 = { convergent nounwind willreturn } + attributes #2 = { convergent nounwind readnone willreturn } + + !0 = !{} + +... +--- +name: func1 +machineFunctionInfo: + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + dispatchPtr: { reg: '$sgpr4_sgpr5' } + queuePtr: { reg: '$sgpr6_sgpr7' } + dispatchID: { reg: '$sgpr10_sgpr11' } + workGroupIDX: { reg: '$sgpr12' } + workGroupIDY: { reg: '$sgpr13' } + workGroupIDZ: { reg: '$sgpr14' } + implicitArgPtr: { reg: '$sgpr8_sgpr9' } + workItemIDX: { reg: '$vgpr31', mask: 1023 } + workItemIDY: { reg: '$vgpr31', mask: 1047552 } + workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } +body: | + ; CHECK-LABEL: name: func1 + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr12, $sgpr13, $sgpr14, $vgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr31 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr12 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr11 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY $vgpr7 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY $vgpr6 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY $vgpr5 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY11]], %subreg.sub0, [[COPY10]], %subreg.sub1 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY13]], %subreg.sub0, [[COPY12]], %subreg.sub1 + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY15]], %subreg.sub0, [[COPY14]], %subreg.sub1 + ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY17]], %subreg.sub0, [[COPY16]], %subreg.sub1 + ; CHECK-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY19]], %subreg.sub0, [[COPY18]], %subreg.sub1 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE2]] + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]] + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE4]] + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE5]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.NodeBlock: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_I32_e64 [[COPY20]], 2, implicit $exec + ; CHECK-NEXT: [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_GT_I32_e64_]], %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.LeafBlock1: + ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.7(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY20]], 3, implicit $exec + ; CHECK-NEXT: [[SI_IF1:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_EQ_U32_e64_]], %bb.7, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.Flow4: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.9(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[SI_ELSE:%[0-9]+]]:sreg_64 = SI_ELSE [[SI_IF]], %bb.9, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4.LeafBlock: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.8(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY20]], 1, implicit $exec + ; CHECK-NEXT: [[SI_IF2:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_EQ_U32_e64_1]], %bb.8, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5.sw.bb2: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:vreg_64_align2 = COPY [[DEF]] + ; CHECK-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = FLAT_LOAD_DWORDX2 killed [[COPY27]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from `ptr undef`) + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[FLAT_LOAD_DWORDX2_]].sub1 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + ; CHECK-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @a0000000000000000000000000000000004040 + 1948, target-flags(amdgpu-rel32-hi) @a0000000000000000000000000000000004040 + 1956, implicit-def dead $scc + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:sreg_32 = COPY [[SI_PC_ADD_REL_OFFSET]].sub0 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY25]].sub0 + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY25]].sub1 + ; CHECK-NEXT: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY24]].sub0 + ; CHECK-NEXT: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY24]].sub1 + ; CHECK-NEXT: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY23]].sub0 + ; CHECK-NEXT: [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY23]].sub1 + ; CHECK-NEXT: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY22]].sub0 + ; CHECK-NEXT: [[COPY37:%[0-9]+]]:vgpr_32 = COPY [[COPY22]].sub1 + ; CHECK-NEXT: [[COPY38:%[0-9]+]]:vgpr_32 = COPY [[FLAT_LOAD_DWORDX2_]].sub0 + ; CHECK-NEXT: [[SI_PC_ADD_REL_OFFSET1:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func2 + 4, target-flags(amdgpu-rel32-hi) @func2 + 12, implicit-def dead $scc + ; CHECK-NEXT: [[COPY39:%[0-9]+]]:sreg_32 = COPY [[SI_PC_ADD_REL_OFFSET]].sub1 + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY7]] + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY6]] + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY5]] + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY4]] + ; CHECK-NEXT: $sgpr12 = COPY [[COPY3]] + ; CHECK-NEXT: $sgpr13 = COPY [[COPY2]] + ; CHECK-NEXT: $sgpr14 = COPY [[COPY1]] + ; CHECK-NEXT: $vgpr31 = COPY [[COPY]] + ; CHECK-NEXT: $vgpr0 = COPY [[COPY29]] + ; CHECK-NEXT: $vgpr1 = COPY [[COPY39]] + ; CHECK-NEXT: $vgpr2 = COPY [[COPY38]] + ; CHECK-NEXT: $vgpr3 = COPY [[COPY28]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; CHECK-NEXT: $vgpr4 = COPY [[DEF1]] + ; CHECK-NEXT: $vgpr5 = V_MOV_B32_e32 0, implicit $exec + ; CHECK-NEXT: $vgpr6 = COPY [[COPY30]] + ; CHECK-NEXT: $vgpr7 = COPY [[COPY31]] + ; CHECK-NEXT: $vgpr8 = COPY [[COPY32]] + ; CHECK-NEXT: $vgpr9 = COPY [[COPY33]] + ; CHECK-NEXT: $vgpr10 = COPY [[COPY34]] + ; CHECK-NEXT: $vgpr11 = COPY [[COPY35]] + ; CHECK-NEXT: $vgpr12 = COPY [[COPY36]] + ; CHECK-NEXT: $vgpr13 = COPY [[COPY37]] + ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[SI_PC_ADD_REL_OFFSET1]], @func2, csr_amdgpu_gfx90ainsts, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13 + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + ; CHECK-NEXT: S_BRANCH %bb.8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6.sw.bb8: + ; CHECK-NEXT: successors: %bb.7(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY40:%[0-9]+]]:vgpr_32 = COPY [[COPY26]].sub0 + ; CHECK-NEXT: [[COPY41:%[0-9]+]]:vgpr_32 = COPY [[COPY26]].sub1 + ; CHECK-NEXT: [[COPY42:%[0-9]+]]:vgpr_32 = COPY [[COPY25]].sub0 + ; CHECK-NEXT: [[COPY43:%[0-9]+]]:vgpr_32 = COPY [[COPY25]].sub1 + ; CHECK-NEXT: [[COPY44:%[0-9]+]]:vgpr_32 = COPY [[COPY24]].sub0 + ; CHECK-NEXT: [[COPY45:%[0-9]+]]:vgpr_32 = COPY [[COPY24]].sub1 + ; CHECK-NEXT: [[COPY46:%[0-9]+]]:vgpr_32 = COPY [[COPY23]].sub0 + ; CHECK-NEXT: [[COPY47:%[0-9]+]]:vgpr_32 = COPY [[COPY23]].sub1 + ; CHECK-NEXT: [[COPY48:%[0-9]+]]:vgpr_32 = COPY [[COPY22]].sub0 + ; CHECK-NEXT: [[COPY49:%[0-9]+]]:vgpr_32 = COPY [[COPY22]].sub1 + ; CHECK-NEXT: [[COPY50:%[0-9]+]]:vgpr_32 = COPY [[COPY21]].sub0 + ; CHECK-NEXT: [[COPY51:%[0-9]+]]:vgpr_32 = COPY [[COPY21]].sub1 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + ; CHECK-NEXT: [[SI_PC_ADD_REL_OFFSET2:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func3 + 4, target-flags(amdgpu-rel32-hi) @func3 + 12, implicit-def dead $scc + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY7]] + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY6]] + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY5]] + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY4]] + ; CHECK-NEXT: $sgpr12 = COPY [[COPY3]] + ; CHECK-NEXT: $sgpr13 = COPY [[COPY2]] + ; CHECK-NEXT: $sgpr14 = COPY [[COPY1]] + ; CHECK-NEXT: $vgpr31 = COPY [[COPY]] + ; CHECK-NEXT: $vgpr0 = COPY [[COPY40]] + ; CHECK-NEXT: $vgpr1 = COPY [[COPY41]] + ; CHECK-NEXT: $vgpr2 = COPY [[COPY42]] + ; CHECK-NEXT: $vgpr3 = COPY [[COPY43]] + ; CHECK-NEXT: $vgpr4 = COPY [[COPY44]] + ; CHECK-NEXT: $vgpr5 = COPY [[COPY45]] + ; CHECK-NEXT: $vgpr6 = COPY [[COPY46]] + ; CHECK-NEXT: $vgpr7 = COPY [[COPY47]] + ; CHECK-NEXT: $vgpr8 = COPY [[COPY48]] + ; CHECK-NEXT: $vgpr9 = COPY [[COPY49]] + ; CHECK-NEXT: $vgpr10 = COPY [[COPY50]] + ; CHECK-NEXT: $vgpr11 = COPY [[COPY51]] + ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[SI_PC_ADD_REL_OFFSET2]], @func3, csr_amdgpu_gfx90ainsts, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7.Flow: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: SI_END_CF [[SI_IF1]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8.Flow3: + ; CHECK-NEXT: successors: %bb.9(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: SI_END_CF [[SI_IF2]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.9.UnifiedReturnBlock: + ; CHECK-NEXT: SI_END_CF [[SI_ELSE]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; CHECK-NEXT: SI_RETURN + bb.0.entry: + successors: %bb.1(0x80000000) + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr12, $sgpr13, $sgpr14, $vgpr31 + + %40:vgpr_32 = COPY $vgpr31 + %23:sgpr_32 = COPY $sgpr14 + %22:sgpr_32 = COPY $sgpr13 + %21:sgpr_32 = COPY $sgpr12 + %20:sgpr_64 = COPY $sgpr10_sgpr11 + %19:sgpr_64 = COPY $sgpr8_sgpr9 + %18:sgpr_64 = COPY $sgpr6_sgpr7 + %17:sgpr_64 = COPY $sgpr4_sgpr5 + %16:vgpr_32 = COPY $vgpr12 + %15:vgpr_32 = COPY $vgpr11 + %14:vgpr_32 = COPY $vgpr10 + %13:vgpr_32 = COPY $vgpr9 + %12:vgpr_32 = COPY $vgpr8 + %11:vgpr_32 = COPY $vgpr7 + %10:vgpr_32 = COPY $vgpr6 + %9:vgpr_32 = COPY $vgpr5 + %8:vgpr_32 = COPY $vgpr4 + %7:vgpr_32 = COPY $vgpr3 + %6:vgpr_32 = COPY $vgpr2 + %5:vgpr_32 = COPY $vgpr1 + %4:vgpr_32 = COPY $vgpr0 + %77:vreg_64_align2 = REG_SEQUENCE %15, %subreg.sub0, %16, %subreg.sub1 + %78:vreg_64_align2 = REG_SEQUENCE %13, %subreg.sub0, %14, %subreg.sub1 + %79:vreg_64_align2 = REG_SEQUENCE %11, %subreg.sub0, %12, %subreg.sub1 + %80:vreg_64_align2 = REG_SEQUENCE %9, %subreg.sub0, %10, %subreg.sub1 + %81:vreg_64_align2 = REG_SEQUENCE %7, %subreg.sub0, %8, %subreg.sub1 + %82:vreg_64_align2 = REG_SEQUENCE %5, %subreg.sub0, %6, %subreg.sub1 + %29:vreg_64_align2 = COPY %77 + %28:vreg_64_align2 = COPY %78 + %27:vreg_64_align2 = COPY %79 + %26:vreg_64_align2 = COPY %80 + %25:vreg_64_align2 = COPY %81 + %24:vreg_64_align2 = COPY %82 + + bb.1.NodeBlock: + successors: %bb.2(0x40000000), %bb.3(0x40000000) + + %36:sreg_32 = S_MOV_B32 2 + %37:sreg_64 = V_CMP_GT_I32_e64 %4, killed %36, implicit $exec + %0:sreg_64 = SI_IF killed %37, %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.2 + + bb.2.LeafBlock1: + successors: %bb.6(0x40000000), %bb.7(0x40000000) + + %38:sreg_32 = S_MOV_B32 3 + %39:sreg_64 = V_CMP_EQ_U32_e64 %4, killed %38, implicit $exec + %1:sreg_64 = SI_IF killed %39, %bb.7, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.6 + + bb.3.Flow4: + successors: %bb.4(0x40000000), %bb.9(0x40000000) + + %2:sreg_64 = SI_ELSE %0, %bb.9, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.4 + + bb.4.LeafBlock: + successors: %bb.5(0x40000000), %bb.8(0x40000000) + + %55:sreg_32 = S_MOV_B32 1 + %56:sreg_64 = V_CMP_EQ_U32_e64 %4, killed %55, implicit $exec + %3:sreg_64 = SI_IF killed %56, %bb.8, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.5 + + bb.5.sw.bb2: + successors: %bb.8(0x80000000) + + %58:sreg_64 = IMPLICIT_DEF + %59:vreg_64_align2 = COPY %58 + %57:vreg_64_align2 = FLAT_LOAD_DWORDX2 killed %59, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from `ptr undef`) + %60:vgpr_32 = COPY %57.sub1 + ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + %61:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @a0000000000000000000000000000000004040 + 1948, target-flags(amdgpu-rel32-hi) @a0000000000000000000000000000000004040 + 1956, implicit-def dead $scc + %62:sreg_32 = COPY %61.sub0 + %63:vgpr_32 = COPY %25.sub0 + %64:vgpr_32 = COPY %25.sub1 + %65:vgpr_32 = COPY %26.sub0 + %66:vgpr_32 = COPY %26.sub1 + %67:vgpr_32 = COPY %27.sub0 + %68:vgpr_32 = COPY %27.sub1 + %69:vgpr_32 = COPY %28.sub0 + %70:vgpr_32 = COPY %28.sub1 + %71:vgpr_32 = COPY %57.sub0 + %72:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func2 + 4, target-flags(amdgpu-rel32-hi) @func2 + 12, implicit-def dead $scc + %74:sreg_32 = COPY %61.sub1 + $sgpr4_sgpr5 = COPY %17 + $sgpr6_sgpr7 = COPY %18 + $sgpr8_sgpr9 = COPY %19 + $sgpr10_sgpr11 = COPY %20 + $sgpr12 = COPY %21 + $sgpr13 = COPY %22 + $sgpr14 = COPY %23 + $vgpr31 = COPY %40 + $vgpr0 = COPY %62 + $vgpr1 = COPY %74 + $vgpr2 = COPY %71 + $vgpr3 = COPY %60 + %76:sreg_32 = IMPLICIT_DEF + $vgpr4 = COPY %76 + $vgpr5 = V_MOV_B32_e32 0, implicit $exec + $vgpr6 = COPY %63 + $vgpr7 = COPY %64 + $vgpr8 = COPY %65 + $vgpr9 = COPY %66 + $vgpr10 = COPY %67 + $vgpr11 = COPY %68 + $vgpr12 = COPY %69 + $vgpr13 = COPY %70 + $sgpr30_sgpr31 = SI_CALL killed %72, @func2, csr_amdgpu_gfx90ainsts, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13 + ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + S_BRANCH %bb.8 + + bb.6.sw.bb8: + successors: %bb.7(0x80000000) + + %41:vgpr_32 = COPY %24.sub0 + %42:vgpr_32 = COPY %24.sub1 + %43:vgpr_32 = COPY %25.sub0 + %44:vgpr_32 = COPY %25.sub1 + %45:vgpr_32 = COPY %26.sub0 + %46:vgpr_32 = COPY %26.sub1 + %47:vgpr_32 = COPY %27.sub0 + %48:vgpr_32 = COPY %27.sub1 + %49:vgpr_32 = COPY %28.sub0 + %50:vgpr_32 = COPY %28.sub1 + %51:vgpr_32 = COPY %29.sub0 + %52:vgpr_32 = COPY %29.sub1 + ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + %53:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func3 + 4, target-flags(amdgpu-rel32-hi) @func3 + 12, implicit-def dead $scc + $sgpr4_sgpr5 = COPY %17 + $sgpr6_sgpr7 = COPY %18 + $sgpr8_sgpr9 = COPY %19 + $sgpr10_sgpr11 = COPY %20 + $sgpr12 = COPY %21 + $sgpr13 = COPY %22 + $sgpr14 = COPY %23 + $vgpr31 = COPY %40 + $vgpr0 = COPY %41 + $vgpr1 = COPY %42 + $vgpr2 = COPY %43 + $vgpr3 = COPY %44 + $vgpr4 = COPY %45 + $vgpr5 = COPY %46 + $vgpr6 = COPY %47 + $vgpr7 = COPY %48 + $vgpr8 = COPY %49 + $vgpr9 = COPY %50 + $vgpr10 = COPY %51 + $vgpr11 = COPY %52 + $sgpr30_sgpr31 = SI_CALL killed %53, @func3, csr_amdgpu_gfx90ainsts, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + + bb.7.Flow: + successors: %bb.3(0x80000000) + + SI_END_CF %1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.3 + + bb.8.Flow3: + successors: %bb.9(0x80000000) + + SI_END_CF %3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + + bb.9.UnifiedReturnBlock: + SI_END_CF %2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + SI_RETURN + +...