diff --git a/llvm/test/CodeGen/AMDGPU/split-liverange-overlapping-copies.mir b/llvm/test/CodeGen/AMDGPU/split-liverange-overlapping-copies.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/split-liverange-overlapping-copies.mir @@ -0,0 +1,166 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-regalloc -run-pass=greedy -o - %s | FileCheck %s + +# Check that liverange splitting does not create copies that overlap within a bundle. +# By overlap, we mean that they write to the same subregisters. +# e.g. the following bundle is desirable +# %0.sub1_sub2 = COPY ... { +# %0.sub3 = COPY ... +# } +# but the following bundle isn't desirable as the overlap of the copies can make +# virtregrewriter fail due to cycles in the copy bundle. +# %0.sub1_sub2 = COPY ... { +# %0.sub2_sub3 = COPY ... +# } +--- +name: split_liverange_copy_overlap_31 +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' + occupancy: 7 +body: | + ; CHECK-LABEL: name: split_liverange_copy_overlap_31 + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:av_1024_align2 = COPY [[DEF1]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead [[DEF2:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit [[DEF1]] + ; CHECK-NEXT: S_NOP 0, implicit [[DEF1]] + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024_align2 = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef %6.sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16:av_1024_align2 = COPY [[COPY]].sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16 { + ; CHECK-NEXT: internal %6.sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31:av_1024_align2 = COPY [[COPY]].sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31 + ; CHECK-NEXT: } + ; CHECK-NEXT: %6.sub0:av_1024_align2 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit %6.sub0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_NOP 0, implicit %6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:av_1024_align2 = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.3, implicit undef $vcc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: undef %4.sub0:vreg_1024_align2 = COPY [[DEF]] + ; CHECK-NEXT: S_NOP 0, implicit %4 + bb.0: + %0:vgpr_32 = IMPLICIT_DEF + %1:vreg_1024_align2 = IMPLICIT_DEF + %2:vreg_1024_align2 = COPY %1 + + bb.1: + %5:vreg_64 = IMPLICIT_DEF + S_NOP 0, implicit %1 + S_NOP 0, implicit %1 + %1:vreg_1024_align2 = IMPLICIT_DEF + S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc + + bb.2: + %2.sub0:vreg_1024_align2 = IMPLICIT_DEF + S_NOP 0, implicit %2.sub0 + + bb.3: + S_NOP 0, implicit %2 + + bb.4: + %2:vreg_1024_align2 = IMPLICIT_DEF + S_CBRANCH_VCCNZ %bb.3, implicit undef $vcc + + bb.5: + undef %4.sub0:vreg_1024_align2 = COPY %0 + S_NOP 0, implicit %4 +... +--- +name: split_liverange_copy_overlap_30 +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' + occupancy: 7 +body: | + ; CHECK-LABEL: name: split_liverange_copy_overlap_30 + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:av_1024 = COPY [[DEF1]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead [[DEF2:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit [[DEF1]] + ; CHECK-NEXT: S_NOP 0, implicit [[DEF1]] + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef %6.sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16:av_1024 = COPY [[COPY]].sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16 { + ; CHECK-NEXT: internal %6.sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30:av_1024 = COPY [[COPY]].sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30 + ; CHECK-NEXT: } + ; CHECK-NEXT: %6.sub0:av_1024 = IMPLICIT_DEF + ; CHECK-NEXT: %6.sub31:av_1024 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit %6.sub0, implicit %6.sub31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_NOP 0, implicit %6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:av_1024 = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.3, implicit undef $vcc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: undef %4.sub0:vreg_1024 = COPY [[DEF]] + ; CHECK-NEXT: S_NOP 0, implicit %4 + bb.0: + %0:vgpr_32 = IMPLICIT_DEF + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = COPY %1 + + bb.1: + %5:vreg_64 = IMPLICIT_DEF + S_NOP 0, implicit %1 + S_NOP 0, implicit %1 + %1:vreg_1024 = IMPLICIT_DEF + S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc + + bb.2: + %2.sub0:vreg_1024 = IMPLICIT_DEF + %2.sub31:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit %2.sub0, implicit %2.sub31 + + bb.3: + S_NOP 0, implicit %2 + + bb.4: + %2:vreg_1024 = IMPLICIT_DEF + S_CBRANCH_VCCNZ %bb.3, implicit undef $vcc + + bb.5: + undef %4.sub0:vreg_1024 = COPY %0 + S_NOP 0, implicit %4 +...