Index: lib/CodeGen/RegisterCoalescer.cpp =================================================================== --- lib/CodeGen/RegisterCoalescer.cpp +++ lib/CodeGen/RegisterCoalescer.cpp @@ -2071,7 +2071,7 @@ LaneBitmask computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const; /// Find the ultimate value that VNI was copied from. - std::pair followCopyChain(const VNInfo *VNI) const; + std::pair followCopyChain(const VNInfo *VNI, bool &SubRangeDead) const; bool valuesIdentical(VNInfo *Val0, VNInfo *Val1, const JoinVals &Other) const; @@ -2190,8 +2190,9 @@ } std::pair JoinVals::followCopyChain( - const VNInfo *VNI) const { + const VNInfo *VNI, bool &SubRangeDead) const { unsigned Reg = this->Reg; + SubRangeDead = false; while (!VNI->isPHIDef()) { SlotIndex Def = VNI->def; @@ -2222,6 +2223,13 @@ break; } } + if (ValueIn == nullptr && SubIdx == 0) { + // Didn't match a subrange so it must actually be dead + // Set the flag and let the caller deal with this scenario + //llvm_unreachable("Found a dead subrange"); + SubRangeDead = true; + break; + } if (ValueIn == nullptr) break; VNI = ValueIn; @@ -2234,19 +2242,20 @@ const JoinVals &Other) const { const VNInfo *Orig0; unsigned Reg0; - std::tie(Orig0, Reg0) = followCopyChain(Value0); - if (Orig0 == Value1) + bool SubRangeDead = false; + std::tie(Orig0, Reg0) = followCopyChain(Value0, SubRangeDead); + if (Orig0 == Value1 || SubRangeDead) return true; const VNInfo *Orig1; unsigned Reg1; - std::tie(Orig1, Reg1) = Other.followCopyChain(Value1); + std::tie(Orig1, Reg1) = Other.followCopyChain(Value1, SubRangeDead); // The values are equal if they are defined at the same place and use the // same register. Note that we cannot compare VNInfos directly as some of // them might be from a copy created in mergeSubRangeInto() while the other // is from the original LiveInterval. - return Orig0->def == Orig1->def && Reg0 == Reg1; + return (Orig0->def == Orig1->def && Reg0 == Reg1) || SubRangeDead; } JoinVals::ConflictResolution Index: test/CodeGen/AMDGPU/regcoalesce-subregjoin-fullcopy.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/regcoalesce-subregjoin-fullcopy.mir @@ -0,0 +1,362 @@ +# RUN: llc -march=amdgcn -run-pass simple-register-coalescing -verify-machineinstrs -o - %s | FileCheck --check-prefix=GCN %s +# +# See bug http://llvm.org/PR33152 for details of the bug this test is checking +# for. +# This test will provoke a subrange join during simple register +# coalescing. Withough a fix for PR33152 this causes an unreachable in SubRange +# Join +# +# The lines where the problem exhibits are the last 2 copy instructions in the +# BB (bb.25) +# +# GCN-LABEL: bb.10: +# GCN: successors: %bb.11(0x{{[0-9]+}}), %bb.22(0x{{[0-9]+}}) +# GCN: %{{[0-9]+}}:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %{{[0-9]+}}, 0, 0, 0, 0, 0, implicit %exec +# + +--- | + ; Function Attrs: nounwind + define amdgpu_ps void @main() local_unnamed_addr #0 { + ret void + } + +... +--- +name: main +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: sreg_128, preferred-register: '' } + - { id: 1, class: sreg_64, preferred-register: '' } + - { id: 2, class: sreg_128, preferred-register: '' } + - { id: 3, class: sreg_128, preferred-register: '' } + - { id: 4, class: sreg_128, preferred-register: '' } + - { id: 5, class: sreg_64, preferred-register: '' } + - { id: 6, class: sreg_64, preferred-register: '' } + - { id: 7, class: sreg_128, preferred-register: '' } + - { id: 8, class: sreg_128, preferred-register: '' } + - { id: 9, class: sreg_128, preferred-register: '' } + - { id: 10, class: sreg_64, preferred-register: '' } + - { id: 11, class: sreg_128, preferred-register: '' } + - { id: 12, class: sreg_128, preferred-register: '' } + - { id: 13, class: sreg_128, preferred-register: '' } + - { id: 14, class: sreg_128, preferred-register: '' } + - { id: 15, class: sreg_128, preferred-register: '' } + - { id: 16, class: sreg_128, preferred-register: '' } + - { id: 17, class: sreg_128, preferred-register: '' } + - { id: 18, class: sreg_128, preferred-register: '' } + - { id: 19, class: sreg_128, preferred-register: '' } + - { id: 20, class: sreg_128, preferred-register: '' } + - { id: 21, class: sreg_64, preferred-register: '' } + - { id: 22, class: sreg_128, preferred-register: '' } + - { id: 23, class: sreg_128, preferred-register: '' } + - { id: 24, class: sreg_64, preferred-register: '' } + - { id: 25, class: sreg_128, preferred-register: '' } + - { id: 26, class: sreg_128, preferred-register: '' } + - { id: 27, class: sreg_64, preferred-register: '' } + - { id: 28, class: sreg_32_xm0, preferred-register: '' } + - { id: 29, class: sreg_64, preferred-register: '' } + - { id: 30, class: sreg_32_xm0, preferred-register: '' } + - { id: 31, class: sreg_32_xm0, preferred-register: '' } + - { id: 32, class: sreg_32_xm0, preferred-register: '' } + - { id: 33, class: sreg_128, preferred-register: '' } + - { id: 34, class: vreg_128, preferred-register: '' } + - { id: 35, class: sreg_32_xm0, preferred-register: '' } + - { id: 36, class: sreg_32_xm0, preferred-register: '' } + - { id: 37, class: sreg_32_xm0, preferred-register: '' } + - { id: 38, class: sreg_32_xm0, preferred-register: '' } + - { id: 39, class: sreg_128, preferred-register: '' } + - { id: 40, class: vgpr_32, preferred-register: '' } + - { id: 41, class: sreg_64, preferred-register: '%vcc' } + - { id: 42, class: vgpr_32, preferred-register: '' } + - { id: 43, class: vgpr_32, preferred-register: '' } + - { id: 44, class: vgpr_32, preferred-register: '' } + - { id: 45, class: sreg_64, preferred-register: '%vcc' } + - { id: 46, class: sreg_64, preferred-register: '' } + - { id: 47, class: sreg_32_xm0, preferred-register: '' } + - { id: 48, class: vgpr_32, preferred-register: '' } + - { id: 49, class: vgpr_32, preferred-register: '' } + - { id: 50, class: vgpr_32, preferred-register: '' } + - { id: 51, class: vgpr_32, preferred-register: '' } + - { id: 52, class: sreg_32_xm0, preferred-register: '' } + - { id: 53, class: vgpr_32, preferred-register: '' } + - { id: 54, class: sreg_32_xm0, preferred-register: '' } + - { id: 55, class: sreg_32_xm0, preferred-register: '' } + - { id: 56, class: sreg_128, preferred-register: '' } + - { id: 57, class: sreg_32_xm0, preferred-register: '' } + - { id: 58, class: sreg_32_xm0, preferred-register: '' } + - { id: 59, class: sreg_32_xm0, preferred-register: '' } + - { id: 60, class: sreg_128, preferred-register: '' } + - { id: 61, class: sreg_32_xm0, preferred-register: '' } + - { id: 62, class: sreg_32_xm0, preferred-register: '' } + - { id: 63, class: vgpr_32, preferred-register: '' } + - { id: 64, class: sreg_64, preferred-register: '%vcc' } + - { id: 65, class: vgpr_32, preferred-register: '' } + - { id: 66, class: vgpr_32, preferred-register: '' } + - { id: 67, class: vgpr_32, preferred-register: '' } + - { id: 68, class: vgpr_32, preferred-register: '' } + - { id: 69, class: vgpr_32, preferred-register: '' } + - { id: 70, class: sreg_64, preferred-register: '%vcc' } + - { id: 71, class: sreg_64, preferred-register: '' } + - { id: 72, class: sreg_128, preferred-register: '' } + - { id: 73, class: sreg_32_xm0, preferred-register: '' } + - { id: 74, class: vgpr_32, preferred-register: '' } + - { id: 75, class: vgpr_32, preferred-register: '' } + - { id: 76, class: vgpr_32, preferred-register: '' } + - { id: 77, class: vgpr_32, preferred-register: '' } + - { id: 78, class: sreg_128, preferred-register: '' } + - { id: 79, class: sreg_128, preferred-register: '' } + - { id: 80, class: sreg_32_xm0, preferred-register: '' } + - { id: 81, class: vgpr_32, preferred-register: '' } + - { id: 82, class: vgpr_32, preferred-register: '' } + - { id: 83, class: vgpr_32, preferred-register: '' } + - { id: 84, class: vgpr_32, preferred-register: '' } + - { id: 85, class: vgpr_32, preferred-register: '' } + - { id: 86, class: vgpr_32, preferred-register: '' } + - { id: 87, class: vgpr_32, preferred-register: '' } + - { id: 88, class: vgpr_32, preferred-register: '' } + - { id: 89, class: vgpr_32, preferred-register: '' } + - { id: 90, class: vgpr_32, preferred-register: '' } + - { id: 91, class: vgpr_32, preferred-register: '' } + - { id: 92, class: vgpr_32, preferred-register: '' } + - { id: 93, class: vgpr_32, preferred-register: '' } + - { id: 94, class: vgpr_32, preferred-register: '' } + - { id: 95, class: sreg_128, preferred-register: '' } + - { id: 96, class: vgpr_32, preferred-register: '' } + - { id: 97, class: vgpr_32, preferred-register: '' } + - { id: 98, class: vgpr_32, preferred-register: '' } + - { id: 99, class: vgpr_32, preferred-register: '' } + - { id: 100, class: vgpr_32, preferred-register: '' } + - { id: 101, class: vgpr_32, preferred-register: '' } + - { id: 102, class: vreg_128, preferred-register: '' } + - { id: 103, class: vreg_128, preferred-register: '' } + - { id: 104, class: vreg_128, preferred-register: '' } + - { id: 105, class: vreg_128, preferred-register: '' } + - { id: 106, class: vreg_128, preferred-register: '' } + - { id: 107, class: vreg_128, preferred-register: '' } + - { id: 108, class: vreg_128, preferred-register: '' } + - { id: 109, class: vgpr_32, preferred-register: '' } + - { id: 110, class: vreg_128, preferred-register: '' } + - { id: 111, class: vreg_128, preferred-register: '' } + - { id: 112, class: vreg_128, preferred-register: '' } + - { id: 113, class: vgpr_32, preferred-register: '' } + - { id: 114, class: vreg_128, preferred-register: '' } + - { id: 115, class: vgpr_32, preferred-register: '' } + - { id: 116, class: vreg_128, preferred-register: '' } + - { id: 117, class: vreg_128, preferred-register: '' } + - { id: 118, class: vgpr_32, preferred-register: '' } + - { id: 119, class: vreg_128, preferred-register: '' } + - { id: 120, class: vreg_128, preferred-register: '' } + - { id: 121, class: vreg_128, preferred-register: '' } + - { id: 122, class: vgpr_32, preferred-register: '' } + - { id: 123, class: vgpr_32, preferred-register: '' } + - { id: 124, class: vreg_128, preferred-register: '' } + - { id: 125, class: vgpr_32, preferred-register: '' } + - { id: 126, class: vgpr_32, preferred-register: '' } + - { id: 127, class: vgpr_32, preferred-register: '' } + - { id: 128, class: vgpr_32, preferred-register: '' } + - { id: 129, class: vreg_128, preferred-register: '' } + - { id: 130, class: vgpr_32, preferred-register: '' } + - { id: 131, class: vgpr_32, preferred-register: '' } + - { id: 132, class: vgpr_32, preferred-register: '' } + - { id: 133, class: vreg_128, preferred-register: '' } + - { id: 134, class: vreg_128, preferred-register: '' } + - { id: 135, class: vreg_128, preferred-register: '' } + - { id: 136, class: vgpr_32, preferred-register: '' } + - { id: 137, class: vgpr_32, preferred-register: '' } + - { id: 138, class: vgpr_32, preferred-register: '' } + - { id: 139, class: vgpr_32, preferred-register: '' } + - { id: 140, class: vgpr_32, preferred-register: '' } + - { id: 141, class: vreg_128, preferred-register: '' } + - { id: 142, class: vreg_128, preferred-register: '' } + - { id: 143, class: vreg_128, preferred-register: '' } + - { id: 144, class: vgpr_32, preferred-register: '' } + - { id: 145, class: vreg_128, preferred-register: '' } + - { id: 146, class: vreg_128, preferred-register: '' } + - { id: 147, class: vreg_128, preferred-register: '' } + - { id: 148, class: vreg_128, preferred-register: '' } + - { id: 149, class: sreg_128, preferred-register: '' } + - { id: 150, class: vreg_128, preferred-register: '' } + - { id: 151, class: sreg_64, preferred-register: '' } + - { id: 152, class: vreg_128, preferred-register: '' } + - { id: 153, class: sreg_64, preferred-register: '' } + - { id: 154, class: vreg_128, preferred-register: '' } + - { id: 155, class: vreg_128, preferred-register: '' } + - { id: 156, class: vreg_128, preferred-register: '' } + - { id: 157, class: vreg_128, preferred-register: '' } + - { id: 158, class: vreg_128, preferred-register: '' } + - { id: 159, class: sreg_64, preferred-register: '' } + - { id: 160, class: sreg_64, preferred-register: '' } + - { id: 161, class: sreg_128, preferred-register: ''} + - { id: 162, class: sreg_32, preferred-register: ''} +liveins: +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + savePoint: '' + restorePoint: '' +fixedStack: +stack: +constants: +body: | + bb.0: + S_CBRANCH_SCC1 %bb.3, implicit undef %scc + S_BRANCH %bb.1 + + bb.1: + S_CBRANCH_SCC1 %bb.3, implicit undef %scc + S_BRANCH %bb.3 + + bb.3: + S_CBRANCH_SCC1 %bb.6, implicit undef %scc + S_BRANCH %bb.4 + + bb.4: + S_CBRANCH_SCC1 %bb.6, implicit undef %scc + S_BRANCH %bb.6 + + bb.6: + S_CBRANCH_SCC0 %bb.8, implicit undef %scc + + bb.7: + %156 = IMPLICIT_DEF + S_BRANCH %bb.15 + + bb.8: + S_CBRANCH_SCC0 %bb.10, implicit undef %scc + + bb.9: + %150 = IMPLICIT_DEF + S_BRANCH %bb.11 + + bb.10: + %24 = S_MOV_B64 0 + %31 = S_MOV_B32 61440 + %32 = S_MOV_B32 -1 + %151 = COPY killed %24 + %152 = IMPLICIT_DEF + S_BRANCH %bb.12 + + bb.11: + %102 = COPY killed %150 + %148 = COPY killed %102 + %156 = COPY killed %148 + S_BRANCH %bb.15 + + bb.12: + %119 = COPY killed %152 + %1 = COPY killed %151 + undef %33.sub2 = COPY %32 + %33.sub3 = COPY %31 + %34 = BUFFER_LOAD_DWORDX4_OFFSET killed %33, 0, 0, 0, 0, 0, implicit %exec + undef %124.sub1 = COPY %119.sub1 + %124.sub2 = COPY %119.sub2 + %41 = V_CMP_GT_F32_e64 0, target-flags(amdgpu-rel32-lo) 0, 0, killed %34.sub3, 0, implicit %exec + %43 = V_ADD_F32_e32 1065353216, undef %44, implicit %exec + %45 = V_CMP_GT_F32_e64 0, 0, 0, killed %43, 0, implicit %exec + %46 = S_AND_B64 killed %45, killed %41, implicit-def dead %scc + %153 = COPY %1 + %154 = COPY %124 + %155 = IMPLICIT_DEF + %159 = COPY %exec, implicit-def %exec + %160 = S_AND_B64 %159, %46, implicit-def dead %scc + %5 = S_XOR_B64 %160, %159, implicit-def dead %scc + %exec = S_MOV_B64_term killed %160 + SI_MASK_BRANCH %bb.13, implicit %exec + S_BRANCH %bb.25 + + bb.13: + %exec = S_OR_B64 %exec, %5, implicit-def %scc + %133 = COPY killed %155 + %135 = COPY killed %154 + %6 = COPY killed %153 + %10 = S_OR_B64 %5, killed %6, implicit-def dead %scc + %151 = COPY %10 + %152 = COPY killed %133 + %exec = S_ANDN2_B64_term %exec, %10 + S_CBRANCH_EXECNZ %bb.12, implicit %exec + S_BRANCH %bb.14 + + bb.14: + %exec = S_OR_B64 %exec, killed %10, implicit-def %scc + %104 = COPY killed %135 + %150 = COPY killed %104 + S_BRANCH %bb.11 + + bb.15: + %106 = COPY killed %156 + S_BRANCH %bb.16 + + bb.16: + S_CBRANCH_SCC0 %bb.18, implicit undef %scc + + bb.17: + %157 = IMPLICIT_DEF + S_BRANCH %bb.21 + + bb.18: + S_CBRANCH_SCC1 %bb.20, implicit undef %scc + S_BRANCH %bb.20 + + bb.20: + %74 = V_MUL_F32_e32 undef %75, killed %106.sub1, implicit %exec + %76 = V_MUL_F32_e32 undef %77, killed %74, implicit %exec + undef %141.sub0 = COPY %76 + %157 = COPY killed %141 + + bb.21: + %142 = COPY killed %157 + S_CBRANCH_SCC0 %bb.23, implicit undef %scc + + bb.22: + %158 = IMPLICIT_DEF + S_BRANCH %bb.24 + + bb.23: + %81 = V_FMA_F32 0, undef %82, 0, killed %142.sub0, 0, undef %83, 0, 0, implicit %exec + %84 = V_FMA_F32 0, undef %85, 0, undef %86, 0, killed %81, 0, 0, implicit %exec + %87 = V_FMA_F32 0, undef %88, 0, undef %89, 0, killed %84, 0, 0, implicit %exec + dead %90 = V_MUL_F32_e32 undef %91, killed %87, implicit %exec + %92 = V_MUL_F32_e32 undef %94, undef %93, implicit %exec + undef %145.sub1 = COPY %92 + %158 = COPY killed %145 + + bb.24: + %146 = COPY killed %158 + %98 = V_FMA_F32 0, killed %146.sub1, 0, target-flags(amdgpu-gotprel32-lo) 1056964608, 0, 1056964608, 0, 0, implicit %exec + EXP 1, undef %99, killed %98, undef %100, undef %101, -1, 0, 15, implicit %exec + S_ENDPGM + + bb.25: + dead %48 = V_FMA_F32 0, killed %119.sub2, 0, undef %49, 0, undef %50, 0, 0, implicit %exec + dead %51 = BUFFER_LOAD_DWORD_OFFEN undef %53, undef %161, undef %162, 0, 0, 0, 0, implicit %exec + undef %129.sub1 = COPY %124.sub1 + %129.sub2 = COPY %124.sub2 + %64 = V_CMP_NGT_F32_e64 0, 0, 0, undef %65, 0, implicit %exec + %67 = V_ADD_F32_e32 1065353216, undef %68, implicit %exec + %69 = V_ADD_F32_e32 1065353216, killed %67, implicit %exec + %70 = V_CMP_NGT_F32_e64 0, 0, 0, killed %69, 0, implicit %exec + %71 = S_OR_B64 killed %70, killed %64, implicit-def dead %scc + %21 = S_OR_B64 killed %71, killed %1, implicit-def dead %scc + %153 = COPY killed %21 + %154 = COPY %129 + %155 = COPY killed %129 + S_BRANCH %bb.13 + +...