Index: llvm/test/tools/llvm-reduce/mir/reduce-blocks-basic-phi.mir =================================================================== --- /dev/null +++ llvm/test/tools/llvm-reduce/mir/reduce-blocks-basic-phi.mir @@ -0,0 +1,45 @@ +# REQUIRES: amdgpu-registered-target +# RUN: llvm-reduce -abort-on-invalid-reduction -simplify-mir --delta-passes=basic-blocks -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --test FileCheck --test-arg --check-prefix=CHECK-INTERESTINGNESS --test-arg %s --test-arg --input-file %s -o %t 2> %t.log +# RUN: FileCheck --check-prefix=RESULT %s < %t + +# CHECK-INTERESTINGNESS: PHI + +# RESULT: bb.0: +# RESULT-NEXT: %0:vgpr_32 = IMPLICIT_DEF +# RESULT-NEXT: %generic:_(s32) = G_IMPLICIT_DEF +# RESULT-NEXT: %1:vgpr_32 = IMPLICIT_DEF +# RESULT-NEXT: %4:vgpr_32 = IMPLICIT_DEF +# RESULT-NEXT: %3:vgpr_32 = IMPLICIT_DEF +# RESULT-NEXT: %5:vgpr_32 = IMPLICIT_DEF +# RESULT-NEXT: S_BRANCH %bb.2 + +# RESULT-NOT: bb.1 + +# RESULT: bb.2: +# RESULT-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI %1, %bb.0 +# RESULT-NEXT: S_ENDPGM 0, implicit %0, implicit [[PHI]], implicit %generic(s32) + +# RESULT-NOT: bb. + +--- +name: basic_func_phi +tracksRegLiveness: true +body: | + bb.0: + S_WAITCNT 0 + %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %1:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + %generic:_(s32) = G_CONSTANT i32 123 + S_CBRANCH_SCC0 %bb.1, implicit undef $scc + S_BRANCH %bb.2 + + bb.1: + %2:vgpr_32 = V_MOV_B32_e32 2, implicit $exec + %3:vgpr_32 = V_MOV_B32_e32 3, implicit $exec + %4:vgpr_32 = V_MOV_B32_e32 4, implicit $exec + S_NOP 0 + + bb.2: + %5:vgpr_32 = PHI %1, %bb.0, %2, %bb.1 + S_ENDPGM 0, implicit %0, implicit %5, implicit %generic +... Index: llvm/test/tools/llvm-reduce/mir/reduce-blocks-bundle-liveness.mir =================================================================== --- /dev/null +++ llvm/test/tools/llvm-reduce/mir/reduce-blocks-bundle-liveness.mir @@ -0,0 +1,61 @@ +# REQUIRES: amdgpu-registered-target +# RUN: llvm-reduce -abort-on-invalid-reduction -simplify-mir --delta-passes=basic-blocks -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --test FileCheck --test-arg --check-prefix=CHECK-INTERESTINGNESS0 --test-arg %s --test-arg --input-file %s -o %t.0 2> %t.0.log +# RUN: FileCheck --check-prefix=RESULT0 %s < %t.0 + +# Test that vreg defs on bundles are handled when deleted + +# CHECK-INTERESTINGNESS0: S_NOP 1 +# CHECK-INTERESTINGNESS0: S_ENDPGM + +# FIXME: This leaves the return block as dead code. + + +# RESULT0: bb.0: +# RESULT0-NEXT: %0:vgpr_32 = IMPLICIT_DEF +# RESULT0-NEXT: %7:vgpr_32 = IMPLICIT_DEF +# RESULT0-NEXT: %2:vgpr_32 = IMPLICIT_DEF +# RESULT0-NEXT: %4:vgpr_32 = IMPLICIT_DEF +# RESULT0-NEXT: %6:vgpr_32 = IMPLICIT_DEF +# RESULT0-NEXT: %1:vgpr_32 = IMPLICIT_DEF +# RESULT0-NEXT: %3:vgpr_32 = IMPLICIT_DEF +# RESULT0-NEXT: %5:vgpr_32 = IMPLICIT_DEF +# RESULT0-NEXT: S_BRANCH %bb.5 + +# RESULT0: bb.2: +# RESULT0-NEXT: S_NOP 1, implicit %4 + +# RESULT0: bb.3: +# RESULT0-NEXT: S_ENDPGM 0, implicit %0, implicit %5 + +# RESULT0: bb.5: + + +--- +name: bundle_liveness +tracksRegLiveness: true +body: | + bb.0: + S_WAITCNT 0 + + bb.1: + %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %1:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + %2:vgpr_32 = V_MOV_B32_e32 456, implicit $exec + %3:vgpr_32 = V_MOV_B32_e32 99, implicit $exec + %4, %5, %6 = BUNDLE { + %4:vgpr_32 = V_MOV_B32_e32 4, implicit $exec + %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec + %6:vgpr_32 = V_MOV_B32_e32 8, implicit $exec + } + %7:vgpr_32 = V_MOV_B32_e32 123, implicit $exec + S_NOP 0, implicit %6 + S_CBRANCH_SCC0 %bb.2, implicit undef $scc + S_BRANCH %bb.3 + + bb.2: + S_NOP 1, implicit %4 + + bb.3: + S_ENDPGM 0, implicit %0, implicit %5, implicit %7 + +... Index: llvm/test/tools/llvm-reduce/mir/reduce-blocks-dominator-is-deleted.mir =================================================================== --- /dev/null +++ llvm/test/tools/llvm-reduce/mir/reduce-blocks-dominator-is-deleted.mir @@ -0,0 +1,61 @@ +# REQUIRES: amdgpu-registered-target +# RUN: llvm-reduce -abort-on-invalid-reduction -simplify-mir --delta-passes=basic-blocks -mtriple=amdgcn-amd-amdhsa --test FileCheck --test-arg --check-prefix=CHECK-INTERESTINGNESS0 --test-arg %s --test-arg --input-file %s -o %t.0 2> %t.0.log +# RUN: FileCheck --check-prefix=RESULT0 %s < %t.0 + +# Test preserving the live out register values in a block which is +# deleted, when the immediate dominator is also deleted. + +# CHECK-INTERESTINGNESS0: V_MOV_B32_e32 5, +# CHECK-INTERESTINGNESS0: S_ENDPGM + +# RESULT0: bb.0: +# RESULT0-NEXT: %0:vgpr_32 = IMPLICIT_DEF +# RESULT0-NEXT: %1:vgpr_32 = IMPLICIT_DEF +# RESULT0-NEXT: %2:vgpr_32 = IMPLICIT_DEF +# RESULT0-NEXT: %3:vgpr_32 = IMPLICIT_DEF +# RESULT0-NEXT: %4:vgpr_32 = IMPLICIT_DEF +# RESULT0-NEXT: S_BRANCH %bb.4 + +# RESULT0-NOT: bb.2 + +# RESULT0: bb.3: +# RESULT0-NEXT: S_NOP 0, implicit %4 +# RESULT0-NEXT: %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec +# RESULT0-NEXT: S_BRANCH %bb.4 + +# RESULT0: bb.4: +# RESULT0-NEXT: %6:vgpr_32 = PHI %1, %bb.0, %5, %bb.3 +# RESULT0-NEXT: S_ENDPGM 0, implicit %6 + +# RESULT0-NOT: bb. + +--- +name: func +tracksRegLiveness: true +body: | + bb.0: + S_WAITCNT 0 + %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %1:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + S_CBRANCH_SCC0 %bb.4, implicit undef $scc + S_BRANCH %bb.1 + + bb.1: + %2:vgpr_32 = V_MOV_B32_e32 2, implicit $exec + %3:vgpr_32 = V_MOV_B32_e32 3, implicit $exec + S_BRANCH %bb.2 + + bb.2: + %4:vgpr_32 = V_MOV_B32_e32 4, implicit $exec + S_CBRANCH_SCC1 %bb.4, implicit undef $scc + S_BRANCH %bb.3 + + bb.3: + S_NOP 0, implicit %4 + %6:vgpr_32 = V_MOV_B32_e32 5, implicit $exec + S_BRANCH %bb.4 + + bb.4: + %7:vgpr_32 = PHI %1, %bb.0, %4, %bb.2, %6, %bb.3 + S_ENDPGM 0, implicit %7 +... Index: llvm/test/tools/llvm-reduce/mir/reduce-blocks-entry-fallthrough.mir =================================================================== --- /dev/null +++ llvm/test/tools/llvm-reduce/mir/reduce-blocks-entry-fallthrough.mir @@ -0,0 +1,56 @@ +# REQUIRES: amdgpu-registered-target +# RUN: llvm-reduce -abort-on-invalid-reduction -simplify-mir --delta-passes=basic-blocks -mtriple=amdgcn-amd-amdhsa --test FileCheck --test-arg --check-prefix=CHECK-INTERESTINGNESS0 --test-arg %s --test-arg --input-file %s -o %t.0 2> %t.0.log +# RUN: FileCheck --check-prefix=RESULT0 %s < %t.0 + + +# RUN: llvm-reduce -abort-on-invalid-reduction -simplify-mir --delta-passes=basic-blocks -mtriple=amdgcn-amd-amdhsa --test FileCheck --test-arg --check-prefix=CHECK-INTERESTINGNESS1 --test-arg %s --test-arg --input-file %s -o %t.1 2> %t.1.log +# RUN: FileCheck --check-prefix=RESULT1 %s < %t.1 + + +# CHECK-INTERESTINGNESS0: S_WAITCNT + +# CHECK-INTERESTINGNESS1: S_NOP 0 + +# RESULT0: bb.0: +# RESULT0-NEXT: S_WAITCNT 0 +# RESULT0-NEXT: %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec +# RESULT0-NEXT: %1:vgpr_32 = V_MOV_B32_e32 1, implicit $exec +# RESULT0-NEXT: S_CMP_EQ_I32 0, 0, implicit-def $scc +# RESULT0-NEXT: S_BRANCH %bb.4 + +# RESULT0-NOT: bb. +# RESULT0: bb.4: +# RESULT0-NOT: S_BRANCH +# RESULT0-NOT: S_ENDPGM + + +# RESULT1: bb.0: +# RESULT1-NEXT: %0:vgpr_32 = IMPLICIT_DEF +# RESULT1-NEXT: %1:vgpr_32 = IMPLICIT_DEF +# RESULT1-NEXT: S_BRANCH %bb.1 + + +# RESULT1: bb.1: +# RESULT1-NEXT: S_NOP 0 +# RESULT1-NEXT: S_BRANCH %bb.[[UNDEF_BB:[0-9]+]] + +# RESULT1-NOT: bb +# RESULT1: bb.[[UNDEF_BB]]: + +--- +name: entry_conditional_fallthrough +tracksRegLiveness: true +body: | + bb.0: + S_WAITCNT 0 + %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %1:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + S_CMP_EQ_I32 0, 0, implicit-def $scc + S_CBRANCH_SCC0 %bb.2, implicit $scc + + bb.1: + S_NOP 0 + + bb.2: + S_ENDPGM 0, implicit %0 +... Index: llvm/test/tools/llvm-reduce/mir/reduce-blocks-existing-undef-block.mir =================================================================== --- /dev/null +++ llvm/test/tools/llvm-reduce/mir/reduce-blocks-existing-undef-block.mir @@ -0,0 +1,28 @@ +# REQUIRES: amdgpu-registered-target +# RUN: llvm-reduce -abort-on-invalid-reduction -simplify-mir --delta-passes=basic-blocks -mtriple=amdgcn-amd-amdhsa --test FileCheck --test-arg --check-prefix=CHECK-INTERESTINGNESS --test-arg %s --test-arg --input-file %s -o %t 2> %t.log +# RUN: FileCheck --check-prefix=RESULT %s < %t + +# The last block is already an empty block usable as a pseudo-undef +# block. Make sure there are no assertions. + +# CHECK-INTERESTINGNESS: V_MOV_B32_e32 + +# RESULT: bb.0: +# RESULT-NEXT: %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec +# RESULT-NEXT: S_CBRANCH_SCC0 %bb.1, implicit undef $scc +# RESULT-NEXT: S_BRANCH %bb.1 + +# RESULT: bb.1: +# RESULT-NOT: S_BRANCH + +--- +name: entry_block_uses_already_pseudo_undef +tracksRegLiveness: true +body: | + bb.0: + %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + S_CBRANCH_SCC0 %bb.1, implicit undef $scc + S_BRANCH %bb.1 + + bb.1: +... Index: llvm/test/tools/llvm-reduce/mir/reduce-blocks-g_br.mir =================================================================== --- /dev/null +++ llvm/test/tools/llvm-reduce/mir/reduce-blocks-g_br.mir @@ -0,0 +1,58 @@ +# REQUIRES: amdgpu-registered-target +# RUN: llvm-reduce -abort-on-invalid-reduction -simplify-mir --delta-passes=basic-blocks -mtriple=amdgcn-amd-amdhsa --test FileCheck --test-arg --check-prefix=CHECK-INTERESTINGNESS0 --test-arg %s --test-arg --input-file %s -o %t.0 2> %t.0.log +# RUN: FileCheck --check-prefix=RESULT0 %s < %t.0 + + +# CHECK-INTERESTINGNESS0: S_NOP 0 +# CHECK-INTERESTINGNESS0: S_NOP 1 +# CHECK-INTERESTINGNESS0: S_ENDPGM + +# RESULT0: bb.0: +# RESULT0: %0:_(s32) = COPY $vgpr0 +# RESULT0-NEXT: %1:_(s32) = G_CONSTANT i32 0 +# RESULT0-NEXT: %2:_(s32) = G_ICMP intpred(ugt), %0(s32), %1 +# RESULT0-NEXT: S_NOP 0 +# RESULT0-NEXT: G_BRCOND %2(s32), %bb.[[UNDEF_BB:[0-9]+]] +# RESULT0-NEXT: G_BR %bb.1 + +# RESULT0: bb.1: +# RESULT0-NEXT: S_NOP 1 +# RESULT0-NEXT: G_BRCOND %2(s32), %bb.[[UNDEF_BB]] +# RESULT0-NEXT: G_BR %bb.[[UNDEF_BB]] + +# RESULT0: bb.4: +# RESULT0-NEXT: S_ENDPGM 0 + +# RESULT0: bb.[[UNDEF_BB]]: + +--- +name: g_br +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(s32) = G_ICMP intpred(ugt), %0, %1 + S_NOP 0 + G_BRCOND %2(s32), %bb.3 + G_BR %bb.1 + + bb.1: + S_NOP 1 + G_BRCOND %2(s32), %bb.3 + G_BR %bb.2 + + bb.2: + S_NOP 2 + G_BRCOND %2(s32), %bb.4 + G_BR %bb.3 + + bb.3: + S_NOP 3 + + bb.4: + S_ENDPGM 0 + +... Index: llvm/test/tools/llvm-reduce/mir/reduce-blocks-g_brjt.mir =================================================================== --- /dev/null +++ llvm/test/tools/llvm-reduce/mir/reduce-blocks-g_brjt.mir @@ -0,0 +1,145 @@ +# REQUIRES: amdgpu-registered-target +# RUN: llvm-reduce -abort-on-invalid-reduction -simplify-mir --delta-passes=basic-blocks -mtriple=amdgcn-amd-amdhsa --test FileCheck --test-arg --check-prefix=CHECK-INTERESTINGNESS0 --test-arg %s --test-arg --input-file %s -o %t.0 2> %t.0.log +# RUN: FileCheck --check-prefix=RESULT0 %s < %t.0 + + +# RUN: llvm-reduce -abort-on-invalid-reduction -simplify-mir --delta-passes=basic-blocks -mtriple=amdgcn-amd-amdhsa --test FileCheck --test-arg --check-prefix=CHECK-INTERESTINGNESS1 --test-arg %s --test-arg --input-file %s -o %t.1 2> %t.1.log +# RUN: FileCheck --check-prefix=RESULT1 %s < %t.1 + +# CHECK-INTERESTINGNESS0: S_NOP 1 + +# CHECK-INTERESTINGNESS1: S_NOP 1 +# CHECK-INTERESTINGNESS1: S_NOP 4 + +# RESULT0: bb.0.entry: +# RESULT0: %0:_(s32) = G_IMPLICIT_DEF +# RESULT0-NEXT: %7:_(s64) = G_IMPLICIT_DEF +# RESULT0-NEXT: %2:_(s32) = G_IMPLICIT_DEF +# RESULT0-NEXT: %9:_(s32) = G_IMPLICIT_DEF +# RESULT0-NEXT: %4:_(s32) = G_IMPLICIT_DEF +# RESULT0-NEXT: %6:_(s32) = G_IMPLICIT_DEF +# RESULT0-NEXT: %1:_(s32) = G_IMPLICIT_DEF +# RESULT0-NEXT: %8:_(s64) = G_IMPLICIT_DEF +# RESULT0-NEXT: %3:_(s32) = G_IMPLICIT_DEF +# RESULT0-NEXT: %10:_(s1) = G_IMPLICIT_DEF +# RESULT0-NEXT: %5:_(s32) = G_IMPLICIT_DEF +# RESULT0-NEXT: %17:_(s32) = G_IMPLICIT_DEF +# RESULT0-NEXT: G_BRCOND %10(s1), %bb.[[UNDEF_BB:[0-9]+]] + +# RESULT0: bb.1.entry: +# RESULT0-NEXT: S_NOP 1 +# RESULT0-NEXT: %11:_(s32) = G_CONSTANT i32 0 +# RESULT0-NEXT: %12:_(p0) = G_JUMP_TABLE %jump-table.0 +# RESULT0-NEXT: %14:_(s32) = G_IMPLICIT_DEF +# RESULT0-NEXT: %13:_(s32) = G_IMPLICIT_DEF +# RESULT0-NEXT: %16:_(s32) = G_IMPLICIT_DEF +# RESULT0-NEXT: %15:_(s32) = G_IMPLICIT_DEF +# RESULT0-NEXT: G_BRJT %12(p0), %jump-table.0, %7(s64) + +# RESULT0: bb.[[UNDEF_BB]]: + + +# RESULT1: bb.0.entry: +# RESULT1: G_BRCOND %10(s1), %bb.4 + +# RESULT1: bb.1.entry: +# RESULT1: S_NOP 1 +# RESULT1: G_BRJT %12(p0), %jump-table.0, %7(s64) + +# RESULT1: bb.4.return: +# RESULT1-NEXT: %17:_(s32) = G_PHI %4(s32), %bb.0, %11(s32), %bb.1 +# RESULT1-NEXT: S_NOP 4 + + + +--- | + define i32 @jt_test(i32 %x) { + entry: + switch i32 %x, label %return [ + i32 75, label %sw.bb + i32 34, label %sw.bb + i32 56, label %sw.bb + i32 35, label %sw.bb + i32 40, label %sw.bb + i32 4, label %sw.bb1 + i32 5, label %sw.bb1 + i32 6, label %sw.bb1 + ] + + sw.bb: + %add = add nsw i32 %x, 42 + br label %return + + sw.bb1: + %mul = mul nsw i32 %x, 3 + br label %return + + return: + %retval.0 = phi i32 [ %mul, %sw.bb1 ], [ %add, %sw.bb ], [ 0, %entry ] + ret i32 %retval.0 + } + +... +--- +name: jt_test +tracksRegLiveness: true +jumpTable: + kind: block-address + entries: + - id: 0 + blocks: [ '%bb.3', '%bb.3', '%bb.3', '%bb.4', '%bb.4', '%bb.4', + '%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.4', + '%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.4', + '%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.4', + '%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.4', + '%bb.2', '%bb.2', '%bb.4', '%bb.4', '%bb.4', '%bb.4', + '%bb.2', '%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.4', + '%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.4', + '%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.2', '%bb.4', + '%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.4', + '%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.4', + '%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.2' ] +body: | + bb.0.entry: + liveins: $vgpr0 + + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = G_CONSTANT i32 71 + %2:_(s32) = G_CONSTANT i32 3 + %3:_(s32) = G_CONSTANT i32 42 + %4:_(s32) = G_CONSTANT i32 0 + %5:_(s32) = G_CONSTANT i32 4 + %6:_(s32) = G_SUB %0, %5 + %7:_(s64) = G_ZEXT %6(s32) + %8:_(s64) = G_ZEXT %1(s32) + %9:_(s32) = G_ICMP intpred(ugt), %7(s64), %8 + %10:_(s1) = G_TRUNC %9(s32) + S_NOP 0 + G_BRCOND %10(s1), %bb.4 + + bb.1.entry: + successors: %bb.3(0x2aaaaaab), %bb.4(0x2aaaaaab), %bb.2(0x2aaaaaab) + + S_NOP 1 + %11:_(s32) = G_CONSTANT i32 0 + %12:_(p0) = G_JUMP_TABLE %jump-table.0 + G_BRJT %12(p0), %jump-table.0, %7(s64) + + bb.2.sw.bb: + S_NOP 2 + %13:_(s32) = G_CONSTANT i32 42 + %14:_(s32) = nsw G_ADD %0, %13 + G_BR %bb.4 + + bb.3.sw.bb1: + S_NOP 3 + %15:_(s32) = G_CONSTANT i32 3 + %16:_(s32) = nsw G_MUL %0, %15 + + bb.4.return: + %17:_(s32) = G_PHI %16(s32), %bb.3, %14(s32), %bb.2, %4(s32), %bb.0, %11(s32), %bb.1 + S_NOP 4 + $vgpr0 = COPY %17(s32) + S_ENDPGM 0, implicit $vgpr0 + +... Index: llvm/test/tools/llvm-reduce/mir/reduce-blocks-live-through-idom.mir =================================================================== --- /dev/null +++ llvm/test/tools/llvm-reduce/mir/reduce-blocks-live-through-idom.mir @@ -0,0 +1,58 @@ +# REQUIRES: amdgpu-registered-target +# RUN: llvm-reduce -abort-on-invalid-reduction -simplify-mir --delta-passes=basic-blocks -mtriple=amdgcn-amd-amdhsa --test FileCheck --test-arg --check-prefix=CHECK-INTERESTINGNESS0 --test-arg %s --test-arg --input-file %s -o %t.0 2> %t.0.log +# RUN: FileCheck --check-prefix=RESULT0 %s < %t.0 + +# %2 is defined in in %bb.0, and redefined in %bb.2. We're deleting +# %bb.2, and don't want to insert a redef in %bb.1 since it's already +# live in from %bb.0 + +# CHECK-INTERESTINGNESS0: V_MOV_B32_e32 20, +# CHECK-INTERESTINGNESS0: S_NOP 0 +# CHECK-INTERESTINGNESS0: S_ENDPGM + +# RESULT0: bb.0: +# RESULT0-NEXT: %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec +# RESULT0-NEXT: %1:vgpr_32 = V_MOV_B32_e32 1, implicit $exec +# RESULT0-NEXT: %2:vgpr_32 = V_MOV_B32_e32 20, implicit $exec +# RESULT0-NEXT: S_BRANCH %bb.1 + +# RESULT0: bb.1: +# RESULT0-NEXT: S_NOP 0 +# RESULT0-NEXT: S_BRANCH %bb.[[UNDEF_BB:[0-9]+]] + +# RESULT0-NOT: bb.3: + +# RESULT0: bb.4: +# RESULT0-NEXT: S_ENDPGM 0, implicit %2 + +# RESULT0: bb.[[UNDEF_BB]] +# RESULT0-NOT: S_BRANCH + + +--- +name: func +tracksRegLiveness: true +body: | + bb.0: + %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %1:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + %2:vgpr_32 = V_MOV_B32_e32 20, implicit $exec + S_CBRANCH_SCC0 %bb.3, implicit undef $scc + S_BRANCH %bb.1 + + bb.1: + S_NOP 0 + S_CBRANCH_SCC1 %bb.3, implicit undef $scc + S_BRANCH %bb.2 + + bb.2: + %2:vgpr_32 = V_MOV_B32_e32 30, implicit $exec + S_NOP 0, implicit %2 + S_BRANCH %bb.3 + + bb.3: + S_NOP 7 + + bb.4: + S_ENDPGM 0, implicit %2 +... Index: llvm/test/tools/llvm-reduce/mir/reduce-blocks-liveout-last-instruction-def.mir =================================================================== --- /dev/null +++ llvm/test/tools/llvm-reduce/mir/reduce-blocks-liveout-last-instruction-def.mir @@ -0,0 +1,32 @@ +# REQUIRES: amdgpu-registered-target +# RUN: llvm-reduce -abort-on-invalid-reduction -simplify-mir --delta-passes=basic-blocks -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --test FileCheck --test-arg --check-prefix=CHECK-INTERESTINGNESS --test-arg %s --test-arg --input-file %s -o %t 2> %t.log +# RUN: FileCheck --check-prefix=RESULT %s < %t + +# Check that replacement defs are correctly inserted if a live out def +# is the last instruction in a block. + +# CHECK-INTERESTINGNESS: S_ENDPGM + +# RESULT: bb.0: +# RESULT-NEXT: %0:vgpr_32 = IMPLICIT_DEF +# RESULT-NEXT: %1:vgpr_32 = IMPLICIT_DEF +# RESULT-NEXT: S_BRANCH %bb.[[UNDEF_BB:[0-9]+]] + +# RESULT: bb.2: +# RESULT-NEXT: S_ENDPGM 0, implicit %0, implicit %1 + +# RESULT: [[UNDEF_BB]] + +--- +name: last_inst_liveout_reg +tracksRegLiveness: true +body: | + bb.0: + %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + + bb.1: + %1:vgpr_32 = V_MOV_B32_e32 2, implicit $exec + + bb.2: + S_ENDPGM 0, implicit %0, implicit %1 +... Index: llvm/test/tools/llvm-reduce/mir/reduce-blocks-phi-live-throughs.mir =================================================================== --- /dev/null +++ llvm/test/tools/llvm-reduce/mir/reduce-blocks-phi-live-throughs.mir @@ -0,0 +1,100 @@ +# REQUIRES: amdgpu-registered-target +# RUN: llvm-reduce -abort-on-invalid-reduction -simplify-mir --delta-passes=basic-blocks -mtriple=amdgcn-amd-amdhsa --test FileCheck --test-arg --check-prefix=CHECK-INTERESTINGNESS0 --test-arg %s --test-arg --input-file %s -o %t.0 2> %t.0.log +# RUN: FileCheck --check-prefix=RESULT0 %s < %t.0 + + + +# RUN: llvm-reduce -abort-on-invalid-reduction -simplify-mir --delta-passes=basic-blocks -mtriple=amdgcn-amd-amdhsa --test FileCheck --test-arg --check-prefix=CHECK-INTERESTINGNESS1 --test-arg %s --test-arg --input-file %s -o %t.1 2> %t.1.log +# RUN: FileCheck --check-prefix=RESULT1 %s < %t.1 + + +# CHECK-INTERESTINGNESS0: V_MOV_B32_e32 6, +# CHECK-INTERESTINGNESS0: S_ENDPGM + +# CHECK-INTERESTINGNESS1: V_MOV_B32_e32 5, +# CHECK-INTERESTINGNESS1: S_ENDPGM + +# RESULT0: bb.0: +# RESULT0-NEXT: %0:vgpr_32 = IMPLICIT_DEF +# RESULT0-NEXT: %1:vgpr_32 = IMPLICIT_DEF +# RESULT0-NEXT: %2:vgpr_32 = IMPLICIT_DEF +# RESULT0-NEXT: %3:vgpr_32 = IMPLICIT_DEF +# RESULT0-NEXT: %4:vgpr_32 = IMPLICIT_DEF +# RESULT0-NEXT: %5:vgpr_32 = IMPLICIT_DEF +# RESULT0-NEXT: S_BRANCH %bb.3 + +# RESULT0-NOT: bb.1 +# RESULT0-NOT: bb.2 + +# RESULT0: bb.3: +# RESULT0-NEXT: %2:vgpr_32 = PHI %0, %bb.0 +# RESULT0-NEXT: S_NOP 0, implicit %2 +# RESULT0-NEXT: %6:vgpr_32 = V_MOV_B32_e32 6, implicit $exec +# RESULT0-NEXT: S_BRANCH %bb.4 + +# RESULT0: bb.4: +# RESULT0-NEXT: %7:vgpr_32 = PHI %2, %bb.3 +# RESULT0-NEXT: S_ENDPGM 0, implicit %7 + +# RESULT0-NOT: bb. + + + +# RESULT1: bb.0: +# RESULT1-NEXT: %0:vgpr_32 = IMPLICIT_DEF +# RESULT1-NEXT: %1:vgpr_32 = IMPLICIT_DEF +# RESULT1-NEXT: %2:vgpr_32 = IMPLICIT_DEF +# RESULT1-NEXT: %3:vgpr_32 = IMPLICIT_DEF +# RESULT1-NEXT: %6:vgpr_32 = IMPLICIT_DEF +# RESULT1-NEXT: S_BRANCH %bb.[[UNDEF_BB:[0-9]+]] + +# RESULT1-NOT: bb.1 + +# RESULT1: bb.2: +# RESULT1-NEXT: %4:vgpr_32 = IMPLICIT_DEF +# RESULT1-NEXT: %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec +# RESULT1-NEXT: S_BRANCH %bb.4 + +# RESULT1-NOT: bb.3 + +# RESULT1: bb.4: +# RESULT1-NEXT: %7:vgpr_32 = PHI %4, %bb.2 +# RESULT1-NEXT: S_ENDPGM 0, implicit %7 + +# RESULT1: bb.[[UNDEF_BB]] +# RESULT1-NOT: S_BRANCH + + +--- +name: func +tracksRegLiveness: true +body: | + bb.0: + S_WAITCNT 0 + %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %1:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + S_CBRANCH_SCC0 %bb.3, implicit undef $scc + S_BRANCH %bb.1 + + bb.1: + %2:vgpr_32 = V_MOV_B32_e32 2, implicit $exec + %3:vgpr_32 = V_MOV_B32_e32 3, implicit $exec + S_BRANCH %bb.2 + + bb.2: + %4:vgpr_32 = PHI %2, %bb.1, %2, %bb.3 + %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec + S_CBRANCH_SCC1 %bb.4, implicit undef $scc + S_BRANCH %bb.3 + + bb.3: + %2:vgpr_32 = PHI %0, %bb.0, %4, %bb.2 + S_NOP 0, implicit %2 + %6:vgpr_32 = V_MOV_B32_e32 6, implicit $exec + S_CBRANCH_SCC1 %bb.4, implicit undef $scc + S_BRANCH %bb.2 + + bb.4: + %7:vgpr_32 = PHI %2, %bb.3, %4, %bb.2 + S_ENDPGM 0, implicit %7 +... Index: llvm/test/tools/llvm-reduce/mir/reduce-blocks-phi-loop.mir =================================================================== --- /dev/null +++ llvm/test/tools/llvm-reduce/mir/reduce-blocks-phi-loop.mir @@ -0,0 +1,87 @@ +# REQUIRES: amdgpu-registered-target +# RUN: llvm-reduce -abort-on-invalid-reduction -simplify-mir --delta-passes=basic-blocks -mtriple=amdgcn-amd-amdhsa --test FileCheck --test-arg --check-prefix=CHECK-INTERESTINGNESS0 --test-arg %s --test-arg --input-file %s -o %t.0 2> %t.0.log +# RUN: FileCheck --check-prefix=RESULT0 %s < %t.0 + + +# RUN: llvm-reduce -abort-on-invalid-reduction -simplify-mir --delta-passes=basic-blocks -mtriple=amdgcn-amd-amdhsa --test FileCheck --test-arg --check-prefix=CHECK-INTERESTINGNESS1 --test-arg %s --test-arg --input-file %s -o %t.1 2> %t.1.log +# RUN: FileCheck --check-prefix=RESULT1 %s < %t.1 + + +# RUN: llvm-reduce -abort-on-invalid-reduction -simplify-mir --delta-passes=basic-blocks -mtriple=amdgcn-amd-amdhsa --test FileCheck --test-arg --check-prefix=CHECK-INTERESTINGNESS2 --test-arg %s --test-arg --input-file %s -o %t.2 2> %t.2.log +# RUN: FileCheck --check-prefix=RESULT2 %s < %t.2 + + + +# Save the first block +# CHECK-INTERESTINGNESS0: V_MOV_B32_e32 0, + +# Save the second block +# CHECK-INTERESTINGNESS1: PHI + +# Save the third block +# CHECK-INTERESTINGNESS2: S_ENDPGM + + +# RESULT0: bb.0: + +# RESULT0: S_WAITCNT 0 +# RESULT0-NEXT: %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec +# RESULT0-NEXT: %1:vgpr_32 = V_MOV_B32_e32 1, implicit $exec +# RESULT0-NEXT: %2:vgpr_32 = IMPLICIT_DEF +# RESULT0-NEXT: %4:vgpr_32 = IMPLICIT_DEF +# RESULT0-NEXT: %3:vgpr_32 = IMPLICIT_DEF +# RESULT0-NEXT: S_BRANCH %bb.[[UNDEF_BB:[0-9]+]] +# RESULT0: bb.[[UNDEF_BB]]: +# RESULT0-NOT: PHI# RESULT0-NOT: S_ENDPGM + + +# RESULT1: bb.0: +# RESULT1: %0:vgpr_32 = IMPLICIT_DEF +# RESULT1-NEXT: %1:vgpr_32 = IMPLICIT_DEF + +# RESULT1: bb.1: +# RESULT1-NEXT: %2:vgpr_32 = PHI %0, %bb.0, %3, %bb.1 +# RESULT1-NEXT: %4:vgpr_32 = V_MOV_B32_e32 3, implicit $exec +# RESULT1-NEXT: %3:vgpr_32 = V_MOV_B32_e32 4, implicit $exec +# RESULT1-NEXT: S_BRANCH %bb.1 +# RESULT1-NOT: %bb.2 +# RESULT1-NOT: S_ENDPGM + +# RESULT1-NOT: bb. + + +# RESULT2: bb.0: +# RESULT2: %0:vgpr_32 = IMPLICIT_DEF +# RESULT2-NEXT: %1:vgpr_32 = IMPLICIT_DEF +# RESULT2-NEXT: %2:vgpr_32 = IMPLICIT_DEF +# RESULT2-NEXT: %4:vgpr_32 = IMPLICIT_DEF +# RESULT2-NEXT: %3:vgpr_32 = IMPLICIT_DEF +# RESULT2-NEXT: S_BRANCH %bb.[[UNDEF_BB:[0-9]+]] + +# RESULT2-NOT: bb.1 + +# RESULT2: bb.2: +# RESULT2-NEXT: S_ENDPGM 0, implicit %2, implicit %3 + +# RESULT2: bb.[[UNDEF_BB]]: +# RESULT2-NOT: S_ENDPGM + +--- +name: func +tracksRegLiveness: true +body: | + bb.0: + S_WAITCNT 0 + %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %1:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + S_BRANCH %bb.1 + + bb.1: + %2:vgpr_32 = PHI %0, %bb.0, %4, %bb.1 + %3:vgpr_32 = V_MOV_B32_e32 3, implicit $exec + %4:vgpr_32 = V_MOV_B32_e32 4, implicit $exec + S_CBRANCH_SCC0 %bb.1, implicit undef $scc + + bb.2: + S_ENDPGM 0, implicit %2, implicit %4 +... Index: llvm/test/tools/llvm-reduce/mir/reduce-blocks-redef-reg-in-deleted-block.mir =================================================================== --- /dev/null +++ llvm/test/tools/llvm-reduce/mir/reduce-blocks-redef-reg-in-deleted-block.mir @@ -0,0 +1,94 @@ +# REQUIRES: amdgpu-registered-target +# RUN: llvm-reduce -abort-on-invalid-reduction -simplify-mir --delta-passes=basic-blocks -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --test FileCheck --test-arg --check-prefix=CHECK-INTERESTINGNESS0 --test-arg %s --test-arg --input-file %s -o %t.0 2> %t.0.log +# RUN: FileCheck --check-prefix=RESULT0 %s < %t.0 + + +# RUN: llvm-reduce -abort-on-invalid-reduction -simplify-mir --delta-passes=basic-blocks -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --test FileCheck --test-arg --check-prefix=CHECK-INTERESTINGNESS1 --test-arg %s --test-arg --input-file %s -o %t.1 2> %t.1.log +# RUN: FileCheck --check-prefix=RESULT1 %s < %t.1 + + +# CHECK-INTERESTINGNESS0: V_MOV_B32_e32 456, +# CHECK-INTERESTINGNESS0: S_ENDPGM + + +# Delete both def blocks +# CHECK-INTERESTINGNESS1: S_ENDPGM + + +# RESULT0: bb.0: + +# RESULT0: bb.1: +# RESULT0-NEXT: %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec +# RESULT0-NEXT: %1:vgpr_32 = V_MOV_B32_e32 1, implicit $exec +# RESULT0-NEXT: %2:vgpr_32 = V_MOV_B32_e32 456, implicit $exec +# RESULT0-NEXT: %3:vgpr_32 = V_MOV_B32_e32 99, implicit $exec +# RESULT0-NEXT: %4:vgpr_32 = IMPLICIT_DEF +# RESULT0-NEXT: S_BRANCH %bb.3 + +# RESULT0-NOT: bb. + +# RESULT0: bb.3: +# RESULT0: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr6 + +# RESULT0: S_NOP 1, implicit $vgpr6 +# RESULT0-NEXT: %5:vgpr_32 = COPY %2 +# RESULT0-NEXT: %6:vgpr_32 = COPY $vgpr1_vgpr2 +# RESULT0-NEXT: S_ENDPGM 0, implicit %0, implicit %5, implicit %6 + + +# RESULT1: bb.0: +# RESULT1-NEXT: %0:vgpr_32 = IMPLICIT_DEF +# RESULT1-NEXT: %2:vgpr_32 = IMPLICIT_DEF +# RESULT1-NEXT: %1:vgpr_32 = IMPLICIT_DEF +# RESULT1-NEXT: %3:vgpr_32 = IMPLICIT_DEF +# RESULT1-NEXT: %4:vgpr_32 = IMPLICIT_DEF +# RESULT1-NEXT: S_BRANCH %bb.[[UNDEF_BB:[0-9]+]] + +# RESULT1: bb.3: +# RESULT1: S_NOP 1, implicit $vgpr6 +# RESULT1-NEXT: %5:vgpr_32 = COPY %2 +# RESULT1-NEXT: %6:vgpr_32 = COPY $vgpr1_vgpr2 +# RESULT1-NEXT: S_ENDPGM 0, implicit %0, implicit %5, implicit %6 + +# RESULT1: bb.[[UNDEF_BB]]: + +--- +name: redefined_vregs +tracksRegLiveness: true +body: | + bb.0: + S_WAITCNT 0 + + bb.1: + %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %1:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + %3:vgpr_32 = V_MOV_B32_e32 456, implicit $exec + %4:vgpr_32 = V_MOV_B32_e32 99, implicit $exec + S_CBRANCH_SCC0 %bb.2, implicit undef $scc + S_BRANCH %bb.3 + + bb.2: + liveins: $vgpr0, $vgpr1_vgpr2 + + ; %3 is redefined in the block and is live out + %3:vgpr_32 = V_ADD_U32_e32 2, %3, implicit $exec + + ; %4 is live in, redefined in the block, and not live out + %4:vgpr_32 = V_ADD_U32_e32 8, %4, implicit $exec + S_NOP 0, implicit %4 + + ; %5 is defined and redefined in the block but is not live out + %5:vgpr_32 = V_MOV_B32_e32 4, implicit $exec + %5:vgpr_32 = V_ADD_U32_e32 2, %5, implicit $exec + + S_NOP 0, implicit $vgpr0, implicit $vgpr1, implicit %5 + $vgpr6 = V_MOV_B32_e32 123, implicit $exec + + bb.3: + liveins: $vgpr0, $vgpr1_vgpr2, $vgpr6 + S_NOP 1, implicit $vgpr6 + %6:vgpr_32 = COPY %3 + %8:vgpr_32 = COPY $vgpr1_vgpr2 + S_ENDPGM 0, implicit %0, implicit %6, implicit %8 + +... Index: llvm/test/tools/llvm-reduce/mir/reduce-blocks-remove-multi-pred-undef-block.mir =================================================================== --- /dev/null +++ llvm/test/tools/llvm-reduce/mir/reduce-blocks-remove-multi-pred-undef-block.mir @@ -0,0 +1,42 @@ +# RUN: llvm-reduce -abort-on-invalid-reduction -simplify-mir --delta-passes=basic-blocks -mtriple=amdgcn-amd-amdhsa --test FileCheck --test-arg --check-prefix=CHECK-INTERESTINGNESS0 --test-arg %s --test-arg --input-file %s -o %t.0 +# RUN: FileCheck --check-prefix=RESULT0 %s < %t.0 + + +# CHECK-INTERESTINGNESS0: S_NOP 7 + +# RESULT0: bb.0: +# RESULT0-NEXT: liveins: $sgpr16 + +# RESULT0: %0:sgpr_32 = IMPLICIT_DEF +# RESULT0-NEXT: S_BRANCH %bb.2 + +# RESULT0: bb.2: +# RESULT0-NEXT: S_NOP 7 +# RESULT0-NEXT: S_BRANCH %bb.[[UNDEF_BB:[0-9]+]] + +# RESULT0: bb.[[UNDEF_BB]]: +# RESULT0-NOT: S_ENDPGM +# RESULT0-NOT: S_BRANCH +--- +name: multiple_predecessors_for_undef_block +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr16 + + %0:sgpr_32 = COPY $sgpr16 + S_CMP_LT_I32 %0, 1, implicit-def $scc + S_CBRANCH_SCC1 %bb.2, implicit $scc + S_BRANCH %bb.1 + + bb.1: + + bb.2: + S_NOP 7 + + bb.3: + + bb.4: + S_ENDPGM 0 + +... Index: llvm/test/tools/llvm-reduce/mir/reduce-blocks-remove-undef-block-predecessor.mir =================================================================== --- /dev/null +++ llvm/test/tools/llvm-reduce/mir/reduce-blocks-remove-undef-block-predecessor.mir @@ -0,0 +1,45 @@ +# RUN: llvm-reduce -abort-on-invalid-reduction -simplify-mir --delta-passes=basic-blocks -mtriple=amdgcn-amd-amdhsa --test FileCheck --test-arg --check-prefix=CHECK-INTERESTINGNESS0 --test-arg %s --test-arg --input-file %s -o %t.0 2> %t.0.log +# RUN: FileCheck --check-prefix=RESULT0 %s < %t.0 + +# When %bb.2 is deleted, make sure we properly remove it as a +# predecessor from the undef block. + +# CHECK-INTERESTINGNESS0: S_NOP 1 + +# RESULT0: bb.0: +# RESULT0: %4:sreg_64 = SI_IF killed %3, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +# RESULT0-NEXT: S_BRANCH %bb.3 + +# RESULT0: bb.1: +# RESULT0-NEXT: S_NOP 1 +# RESULT0-NEXT: dead %5:sreg_64 = SI_ELSE killed %4, %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +# RESULT0-NEXT: S_BRANCH %bb.3 + +# RESULT0: bb.3: + +--- +name: remove_undef_block_successor +tracksRegLiveness: true +body: | + bb.0: + %0:sreg_64_xexec = S_MOV_B64 0 + %1:sreg_64_xexec = S_MOV_B64 1 + S_NOP 0 + %2:vgpr_32 = V_MOV_B32_e32 123, implicit $exec + %3:sreg_64 = V_CMP_LT_I32_e64 0, %2, implicit $exec + %4:sreg_64 = SI_IF killed %3, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.3 + + bb.1: + S_NOP 1 + dead %5:sreg_64 = SI_ELSE killed %4, %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.2 + + bb.2: + %6:sreg_64 = V_CMP_EQ_U32_e64 0, killed %2, implicit $exec + dead %7:sreg_64 = SI_IF killed %6, %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.3 + + bb.3: + +... Index: llvm/test/tools/llvm-reduce/mir/reduce-blocks-si-if.mir =================================================================== --- /dev/null +++ llvm/test/tools/llvm-reduce/mir/reduce-blocks-si-if.mir @@ -0,0 +1,103 @@ +# REQUIRES: amdgpu-registered-target + +# RUN: llvm-reduce -abort-on-invalid-reduction -simplify-mir --delta-passes=basic-blocks -mtriple=amdgcn-amd-amdhsa --test FileCheck --test-arg --check-prefix=CHECK-INTERESTINGNESS0 --test-arg %s --test-arg --input-file %s -o %t.0 2> %t.0.log +# RUN: FileCheck --check-prefix=RESULT0 %s < %t.0 + + +# RUN: llvm-reduce -abort-on-invalid-reduction -simplify-mir --delta-passes=basic-blocks -mtriple=amdgcn-amd-amdhsa --test FileCheck --test-arg --check-prefix=CHECK-INTERESTINGNESS1 --test-arg %s --test-arg --input-file %s -o %t.1 2> %t.1.log +# RUN: FileCheck --check-prefix=RESULT1 %s < %t.1 + + +# RUN: llvm-reduce -abort-on-invalid-reduction -simplify-mir --delta-passes=basic-blocks -mtriple=amdgcn-amd-amdhsa --test FileCheck --test-arg --check-prefix=CHECK-INTERESTINGNESS2 --test-arg %s --test-arg --input-file %s -o %t.2 2> %t.2.log +# RUN: FileCheck --check-prefix=RESULT2 %s < %t.2 + + + +# analyzeBranch fails on the SI_IF block, so stresses the alternative +# path that doesn't use removeBranch/insertBranch. + +# Keep the first block +# CHECK-INTERESTINGNESS0: SI_IF + +# Keep the second block +# CHECK-INTERESTINGNESS1: V_MOV_B32_e32 3 + +# Keep the third block +# CHECK-INTERESTINGNESS2: S_ENDPGM + + + +# RESULT0: bb.0: +# RESULT0-NEXT: %0:vgpr_32 = IMPLICIT_DEF +# RESULT0-NEXT: %1:vgpr_32 = IMPLICIT_DEF +# RESULT0-NEXT: %2:sreg_64 = IMPLICIT_DEF +# RESULT0-NEXT: %5:vgpr_32 = IMPLICIT_DEF +# RESULT0-NEXT: %3:sreg_64 = SI_IF killed %2, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +# RESULT0-NEXT: S_BRANCH %bb.[[UNDEF_BB:[0-9]+]] +# RESULT0-NOT: bb.{{[0-9]+}} + +# RESULT0: bb.[[UNDEF_BB]]: +# RESULT0-NOT: V_MOV +# RESULT0-NOT: PHI +# RESULT0-NOT: S_ENDPGM + + + +# RESULT1: bb.0: +# RESULT1-NEXT: %0:vgpr_32 = IMPLICIT_DEF +# RESULT1-NEXT: %1:vgpr_32 = IMPLICIT_DEF +# RESULT1-NEXT: %2:sreg_64 = IMPLICIT_DEF +# RESULT1-NEXT: %5:vgpr_32 = IMPLICIT_DEF +# RESULT1-NEXT: %3:sreg_64 = SI_IF killed %2, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +# RESULT1-NEXT: S_BRANCH %bb.[[UNDEF_BB:[0-9]+]] +# RESULT1-NOT: bb.{{[0-9]+}} + +# RESULT1: bb.1: +# RESULT1-NEXT: %4:vgpr_32 = V_MOV_B32_e32 2, implicit $exec +# RESULT1-NEXT: %5:vgpr_32 = V_MOV_B32_e32 3, implicit $exec +# RESULT1-NEXT: %6:vgpr_32 = V_MOV_B32_e32 4, implicit $exec +# RESULT1-NEXT: S_NOP 0 +# RESULT1-NEXT: S_BRANCH %bb.[[UNDEF_BB:[0-9]+]] +# RESULT1-NOT: bb.{{[0-9]+}} + +# RESULT1: bb.[[UNDEF_BB]]: +# RESULT1-NOT: V_MOV +# RESULT1-NOT: PHI +# RESULT1-NOT: S_ENDPGM + + + +# RESULT2: bb.0: +# RESULT2-NEXT: %0:vgpr_32 = IMPLICIT_DEF +# RESULT2-NEXT: %1:vgpr_32 = IMPLICIT_DEF +# RESULT2-NEXT: %2:sreg_64 = IMPLICIT_DEF +# RESULT2-NEXT: %3:sreg_64 = SI_IF killed %2, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +# RESULT2-NEXT: S_BRANCH %bb.[[UNDEF_BB:[0-9]+]] +# RESULT2-NOT: bb.{{[0-9]+}} + +# RESULT2: bb.[[UNDEF_BB]]: +# RESULT2-NEXT: %5:vgpr_32 = PHI %1, %bb.0 +# RESULT2-NEXT: S_ENDPGM 0, implicit %0, implicit %5, implicit %3 + +--- +name: analyze_branch_fails +tracksRegLiveness: true +body: | + bb.0: + S_WAITCNT 0 + %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %1:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + %2:sreg_64 = S_MOV_B64 -1 + %3:sreg_64 = SI_IF killed %2:sreg_64, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.2 + + bb.1: + %4:vgpr_32 = V_MOV_B32_e32 2, implicit $exec + %5:vgpr_32 = V_MOV_B32_e32 3, implicit $exec + %6:vgpr_32 = V_MOV_B32_e32 4, implicit $exec + S_NOP 0 + + bb.2: + %5:vgpr_32 = PHI %1, %bb.0, %4, %bb.1 + S_ENDPGM 0, implicit %0, implicit %5, implicit %3 +... Index: llvm/tools/llvm-reduce/CMakeLists.txt =================================================================== --- llvm/tools/llvm-reduce/CMakeLists.txt +++ llvm/tools/llvm-reduce/CMakeLists.txt @@ -24,6 +24,7 @@ deltas/ReduceArguments.cpp deltas/ReduceAttributes.cpp deltas/ReduceBasicBlocks.cpp + deltas/ReduceBlocksMIR.cpp deltas/ReduceFunctionBodies.cpp deltas/ReduceFunctions.cpp deltas/ReduceGlobalObjects.cpp Index: llvm/tools/llvm-reduce/DeltaManager.cpp =================================================================== --- llvm/tools/llvm-reduce/DeltaManager.cpp +++ llvm/tools/llvm-reduce/DeltaManager.cpp @@ -19,6 +19,7 @@ #include "deltas/ReduceArguments.h" #include "deltas/ReduceAttributes.h" #include "deltas/ReduceBasicBlocks.h" +#include "deltas/ReduceBlocksMIR.h" #include "deltas/ReduceFunctionBodies.h" #include "deltas/ReduceFunctions.h" #include "deltas/ReduceGlobalObjects.h" @@ -72,6 +73,7 @@ DELTA_PASS("module-data", reduceModuleDataDeltaPass) #define DELTA_PASSES_MIR \ + DELTA_PASS("basic-blocks", reduceBlocksMIRDeltaPass) \ DELTA_PASS("instructions", reduceInstructionsMIRDeltaPass) \ DELTA_PASS("ir-instruction-references", \ reduceIRInstructionReferencesDeltaPass) \ Index: llvm/tools/llvm-reduce/ReducerWorkItem.cpp =================================================================== --- llvm/tools/llvm-reduce/ReducerWorkItem.cpp +++ llvm/tools/llvm-reduce/ReducerWorkItem.cpp @@ -491,8 +491,13 @@ // Add for stack objects Score += MFI.getNumObjects(); + size_t NumBlocks = MF.size(); +#if 0 + if (MF.back().empty()) + --NumBlocks; +#endif // Add in the block count. - Score += 2 * MF.size(); + Score += 2 * NumBlocks; const MachineRegisterInfo &MRI = MF.getRegInfo(); for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { Index: llvm/tools/llvm-reduce/deltas/ReduceBlocksMIR.h =================================================================== --- /dev/null +++ llvm/tools/llvm-reduce/deltas/ReduceBlocksMIR.h @@ -0,0 +1,23 @@ +//===- ReduceBlocksMIR.h - Specialized Delta Pass --------------*- c++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a function which calls the Generic Delta pass in order +// to reduce uninteresting MachineBasicBlocks from the MachineFunction. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEBLOCKSMIR_H +#define LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEBLOCKSMIR_H + +namespace llvm { +class TestRunner; + +void reduceBlocksMIRDeltaPass(TestRunner &Test); +} // namespace llvm + +#endif Index: llvm/tools/llvm-reduce/deltas/ReduceBlocksMIR.cpp =================================================================== --- /dev/null +++ llvm/tools/llvm-reduce/deltas/ReduceBlocksMIR.cpp @@ -0,0 +1,376 @@ +//===- ReduceBlocksMIR.cpp - Specialized Delta Pass -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a function which calls the Generic Delta pass in order +// to reduce uninteresting MachineBasicBlocks from the MachineFunction. +// +//===----------------------------------------------------------------------===// + +#include "ReduceBlocksMIR.h" +#include "Delta.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/CodeGen/LivenessVerifier.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" + +#define DEBUG_TYPE "llvm-reduce" + +using namespace llvm; + +using BlockSet = DenseSet; + +static bool isPseudoUndefBlock(const MachineBasicBlock &MBB) { + return MBB.empty() && &MBB == &MBB.getParent()->back() && MBB.succ_empty(); +} + +// We can't have a real undef block like in the IR, so treat an empty block at +// the end of the function as a pseudo undef block. +static MachineBasicBlock *getPseudoUndefBlock(MachineFunction &MF) { + if (isPseudoUndefBlock(MF.back())) { + assert(MF.back().getNumber() != -1); + return &MF.back(); + } + + MachineBasicBlock *UndefBlock = MF.CreateMachineBasicBlock(); + MF.addToMBBNumbering(UndefBlock); + MF.push_back(UndefBlock); + assert(UndefBlock->getNumber() != -1); + + LLVM_DEBUG(dbgs() << "Created new pseudo-undef block: " + << printMBBReference(*UndefBlock) << '\n'); + + return UndefBlock; +} + +static void updateCFG(const TargetInstrInfo &TII, MachineBasicBlock &MBB, + MachineBasicBlock *&UndefBlock, + const BlockSet &BBsToDelete) { + MachineFunction *MF = MBB.getParent(); + MachineBasicBlock *EntryBB = &MF->front(); + + // The successors aren't stored in any particular order, so we can't rely on + // early increment. + SmallVector Succs(MBB.successors()); + for (MachineBasicBlock *Succ : Succs) { + if (BBsToDelete.count(Succ)) + MBB.removeSuccessor(Succ); + } + + SmallVector Preds(MBB.predecessors()); + for (MachineBasicBlock *Pred : Preds) { + if (Pred != EntryBB && BBsToDelete.count(Pred)) + Pred->removeSuccessor(&MBB); + } + + // TODO: May need to replace block address references elsewhere in function. + + // The successor list for this block didn't change, so we don't need to update + // the terminators. + if (MBB.succ_size() == Succs.size()) + return; + + // No point in updating the terminators if we're just going to delete all the + // instructions. + if (&MBB != EntryBB && BBsToDelete.count(&MBB)) + return; + + MachineBasicBlock *TBB = nullptr; + MachineBasicBlock *FBB = nullptr; + SmallVector Cond; + if (TII.analyzeBranch(MBB, TBB, FBB, Cond)) { + // If we don't understand this branch, replace deleted block references with + // the dummy undef block. + bool UsedUndefBlock = false; + + for (MachineInstr &Term : MBB.terminators()) { + for (MachineOperand &MO : Term.operands()) { + if (MO.isMBB() && BBsToDelete.count(MO.getMBB())) { + if (!UndefBlock) + UndefBlock = getPseudoUndefBlock(*MF); + + MO.setMBB(UndefBlock); + UsedUndefBlock = true; + } + } + } + + if (UsedUndefBlock && !MBB.isSuccessor(UndefBlock)) + MBB.addSuccessor(UndefBlock); + + return; + } + + // If we can analyze this branch, insert simplified terminators. + TII.removeBranch(MBB); + + MachineBasicBlock *NewSucc; + if (MBB.succ_empty()) { + // We don't have an API to insert a return, so insert a branch to fall off + // the end of the function. + if (!UndefBlock) + UndefBlock = getPseudoUndefBlock(*MBB.getParent()); + MBB.addSuccessor(UndefBlock); + NewSucc = UndefBlock; + } else { + assert(MBB.succ_size() == 1); + NewSucc = *MBB.succ_begin(); + } + + TII.insertUnconditionalBranch(MBB, NewSucc, DebugLoc()); +} + +static unsigned getImpDefOpcode(const MachineRegisterInfo &MRI, Register Reg) { + return Reg.isVirtual() && MRI.getRegClassOrNull(Reg) == nullptr + ? TargetOpcode::G_IMPLICIT_DEF + : TargetOpcode::IMPLICIT_DEF; +} + +static int getBasicBlockIndex(const MachineInstr &Phi, + const MachineBasicBlock *BB) { + for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2) { + if (Phi.getOperand(i + 1).getMBB() == BB) + return i; + } + return -1; +} + +static void removePredFromPhis(const TargetInstrInfo &TII, + MachineBasicBlock &MBB, + MachineBasicBlock &Pred) { + for (MachineInstr &Phi : make_early_inc_range(MBB.phis())) { + int Idx = getBasicBlockIndex(Phi, &Pred); + assert(Idx != -1); + + Phi.removeOperand(Idx + 1); + Phi.removeOperand(Idx); + + if (Phi.getNumOperands() == 1) + Phi.setDesc(TII.get(Phi.getOpcode() == TargetOpcode::PHI + ? TargetOpcode::IMPLICIT_DEF + : TargetOpcode::G_IMPLICIT_DEF)); + } +} + +static void collectTerminatorUses(const MachineBasicBlock &MBB, + SmallSet &Uses) { + for (const MachineInstr &MI : MBB) { + for (const MachineOperand &MO : MI.operands()) { + if (MO.isReg() && MO.readsReg()) + Uses.insert(MO.getReg()); + } + } +} + +static void collectTerminatorDefs(const MachineBasicBlock &MBB, + SmallSet &Defs) { + for (const MachineInstr &MI : MBB) { + for (const MachineOperand &MO : MI.operands()) { + if (MO.isReg() && MO.isDef()) + Defs.insert(MO.getReg()); + } + } +} + +static void extractBlocksFromFunction(Oracle &O, MachineFunction &MF) { + const TargetSubtargetInfo &STI = MF.getSubtarget(); + const TargetInstrInfo *TII = STI.getInstrInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + BlockSet BBsToDelete; + + LiveRegTracker LiveRegs; + LiveRegs.init(MF); + + // Unlike in the IR, we can't simply replace block values with undef so use a + // dummy, empty block inserted at the end of the function as a pseudo-undef + // block value. + MachineBasicBlock *UndefBlock = nullptr; + + // We're likely to need to use an empty block at the end as a dummy block + // replacement block. See if there's an existing block we can use for this. + if (isPseudoUndefBlock(MF.back())) + UndefBlock = &MF.back(); + + // Specially treat the entry block. We'll bulk delete the instructions it, but + // won't delete the block itself. + MachineBasicBlock *EntryBB = &*MF.begin(); + + MachineDominatorTree MDT; + MDT.runOnMachineFunction(MF); + + for (const MachineBasicBlock &MBB : MF) { + LiveRegs.enterBlock(&MBB); + + // Keep track of the current bundle header. + const MachineInstr *CurBundle = nullptr; + + for (const MachineInstr &MI : MBB) { + if (!MI.isInsideBundle()) { + if (CurBundle) + LiveRegs.visitBundleAfter(*CurBundle); + CurBundle = &MI; + } + + if (!MI.isDebugInstr()) + LiveRegs.visitInstruction(MI); + } + + if (CurBundle) + LiveRegs.visitBundleAfter(*CurBundle); + + LiveRegs.exitBlock(&MBB); + } + + LiveRegs.calcRegsPassed(); + LiveRegs.calcRegsRequired(); + + for (MachineBasicBlock &MBB : MF) { + if (&MBB != UndefBlock && !O.shouldKeep()) + BBsToDelete.insert(&MBB); + } + + LLVM_DEBUG( + dbgs() << "Deleting blocks:"; + for (const MachineBasicBlock *MBB : BBsToDelete) + dbgs() << ' ' << printMBBReference(*MBB); + dbgs() << "\nKeeping blocks:"; + for (MachineBasicBlock &MBB : MF) { + if (!BBsToDelete.count(&MBB)) + dbgs() << ' ' << printMBBReference(MBB); + } + dbgs() << '\n'; + ); + + for (MachineBasicBlock *MBB : BBsToDelete) { + if (MBB == EntryBB) + continue; + + for (MachineBasicBlock *Succ : MBB->successors()) + removePredFromPhis(*TII, *Succ, *MBB); + } + + for (MachineBasicBlock &MBB : MF) + updateCFG(*TII, MBB, UndefBlock, BBsToDelete); + + SmallVector Defs; + + for (MachineBasicBlock &MBB : MF) { + // Delete in function order to get a stable instruction ordering for the + // replacement defs. + if (!BBsToDelete.count(&MBB)) + continue; + + if (&MBB == EntryBB) { + // Trash all the instructions, except the ones we need to maintain the + // CFG. + MBB.erase(MBB.begin(), MBB.getFirstTerminator()); + } else { + MBB.erase(MBB.begin(), MBB.end()); + } + + auto &RegsLiveOut = LiveRegs.MBBInfoMap[&MBB].regsLiveOut; + + MachineBasicBlock *ImpDefBB = nullptr; + MachineBasicBlock::iterator ImpDefInsertPt; + + if (MDT.isReachableFromEntry(&MBB)) { + ImpDefBB = &MBB; + + // Find a place to insert a replacement def that won't be deleted. + + // TODO: For post-SSA, avoid redefining existing values in predecessors. + do { + // Re-insert dummy defs we need to preserve liveness. + if (MachineDomTreeNode *IDom = MDT.getNode(ImpDefBB)->getIDom()) { + assert(IDom->getBlock() != ImpDefBB); + + ImpDefBB = IDom->getBlock(); + ImpDefInsertPt = ImpDefBB->getFirstTerminator(); + } else { + assert(ImpDefBB == EntryBB); + ImpDefInsertPt = ImpDefBB->SkipPHIsLabelsAndDebug(ImpDefBB->begin()); + } + } while (ImpDefBB != EntryBB && is_contained(BBsToDelete, ImpDefBB)); + } + + if (!ImpDefBB) { + ImpDefBB = &MBB; + ImpDefInsertPt = MBB.SkipPHIsLabelsAndDebug(MBB.begin()); + } + + SmallSet TermDefs; + collectTerminatorDefs(MBB, TermDefs); + + auto &ImpDefBBLiveness = LiveRegs.MBBInfoMap[ImpDefBB]; + + for (Register LiveOut : RegsLiveOut) { + // FIXME: We need to do something about physical registers and also + // potentially fixup the block liveins lists. + if (LiveOut.isPhysical()) + continue; + + if (TermDefs.count(LiveOut)) + continue; + + if (ImpDefBB != &MBB) { + // If this was already live out of the block we found, don't insert an + // additional def. + if (ImpDefBBLiveness.regsLiveOut.count(LiveOut) || + ImpDefBBLiveness.vregsPassed.count(LiveOut)) + continue; + } + + // Record that we now define this here in case other blocks also need to + // move a def here. + // FIXME: Need to propagate live through blocks? + ImpDefBBLiveness.regsLiveOut.insert(LiveOut); + + unsigned ImpDef = getImpDefOpcode(MRI, LiveOut); + BuildMI(*ImpDefBB, ImpDefInsertPt, DebugLoc(), TII->get(ImpDef), LiveOut); + } + + if (&MBB == EntryBB) { + SmallSet TermUses; + collectTerminatorUses(MBB, TermUses); + + for (Register TermUse : TermUses) { + if (TermUse.isPhysical() && MRI.isReserved(TermUse)) + continue; + + if (ImpDefBBLiveness.regsLiveOut.count(TermUse)) + continue; + + unsigned ImpDef = getImpDefOpcode(MRI, TermUse); + BuildMI(*ImpDefBB, ImpDefInsertPt, DebugLoc(), TII->get(ImpDef), + TermUse); + } + } + } + + for (MachineBasicBlock *MBB : BBsToDelete) { + if (MBB != EntryBB) { + assert(MBB->succ_empty() && MBB->pred_empty() && + "deleting block with improperly updated CFG"); + MBB->eraseFromParent(); + } + } +} + +static void extractBlocksFromModule(Oracle &O, ReducerWorkItem &WorkItem) { + for (const Function &F : WorkItem.getModule()) { + if (auto *MF = WorkItem.MMI->getMachineFunction(F)) + extractBlocksFromFunction(O, *MF); + } +} + +void llvm::reduceBlocksMIRDeltaPass(TestRunner &Test) { + outs() << "*** Reducing Blocks...\n"; + runDeltaPass(Test, extractBlocksFromModule); +}