Index: lib/CodeGen/RegisterCoalescer.cpp =================================================================== --- lib/CodeGen/RegisterCoalescer.cpp +++ lib/CodeGen/RegisterCoalescer.cpp @@ -1496,6 +1496,10 @@ unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; isMoveInstr(*TRI, CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx); + // This optimization is valid only for a copy that writes the whole register. + if (DstSubIdx) + return nullptr; + SlotIndex Idx = LIS->getInstructionIndex(*CopyMI); const LiveInterval &SrcLI = LIS->getInterval(SrcReg); // CopyMI is undef iff SrcReg is not live before the instruction. Index: test/CodeGen/AMDGPU/coalescing-subreg-removed-undef-copy.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/coalescing-subreg-removed-undef-copy.mir @@ -0,0 +1,165 @@ +# RUN: llc -march=amdgcn -mcpu=gfx803 -run-pass simple-register-coalescing -verify-machineinstrs -o - %s | FileCheck --check-prefix=GCN %s +# +# This is another example of a test giving "Couldn't join subrange!" +# +# This shows a case where a subreg was defined by a copy from an undef register, +# and that copy got removed and the subreg became undef at a later whole reg use. +# +# GCN: {{^body}} + +--- +name: _amdgpu_ps_main +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +liveins: +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: +stack: +constants: +body: | + bb.0: + successors: %bb.2(0x40000000), %bb.1(0x40000000) + + %23:sreg_32_xm0 = S_MOV_B32 0 + undef %24.sub0:sreg_128 = COPY %23 + %24.sub1:sreg_128 = COPY %23 + %24.sub3:sreg_128 = COPY %23 + %110:sreg_128 = COPY killed %24 + S_CBRANCH_SCC1 %bb.2, implicit undef $scc + S_BRANCH %bb.1 + + bb.1: + successors: %bb.2(0x80000000) + + %110:sreg_128 = IMPLICIT_DEF + + bb.2: + successors: %bb.3(0x40000000), %bb.4(0x40000000) + + %0:sreg_128 = COPY killed %110 + S_CBRANCH_SCC1 %bb.4, implicit undef $scc + S_BRANCH %bb.3 + + bb.3: + successors: %bb.5(0x80000000) + + %88:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + undef %87.sub2:vreg_128 = COPY killed %88 + %43:sreg_32_xm0 = S_MOV_B32 0 + undef %44.sub0:sreg_256 = COPY %43 + %44.sub1:sreg_256 = COPY %43 + %44.sub2:sreg_256 = COPY %43 + %44.sub3:sreg_256 = COPY %43 + %44.sub4:sreg_256 = COPY %43 + %44.sub5:sreg_256 = COPY %43 + %44.sub6:sreg_256 = COPY %43 + %44.sub7:sreg_256 = COPY killed %43 + %45:vgpr_32 = IMAGE_LOAD_MIP_V1_V4 killed %87, killed %44, 1, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, addrspace 4) + dead %89:vgpr_32 = V_BFE_U32 killed %45, 7, 1, implicit $exec + %49:sreg_32_xm0 = S_MOV_B32 1 + %50:sreg_128 = COPY killed %0 + %50.sub2:sreg_128 = COPY killed %49 + %91:vreg_128 = COPY killed %50 + %90:vreg_128 = COPY killed %91 + %90.sub3:vreg_128 = COPY undef %89 + %111:vreg_128 = COPY killed %90 + S_BRANCH %bb.5 + + bb.4: + successors: %bb.5(0x80000000) + + %34:sreg_32_xm0 = S_MOV_B32 0 + %4:sreg_128 = COPY killed %0 + %4.sub2:sreg_128 = COPY killed %34 + %93:vreg_128 = COPY killed %4 + %111:vreg_128 = COPY killed %93 + + bb.5: + successors: %bb.8(0x40000000), %bb.6(0x40000000) + + %92:vreg_128 = COPY killed %111 + %108:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + undef %107.sub2:vreg_128 = COPY killed %108 + %60:sgpr_32 = S_MOV_B32 0 + undef %61.sub0:sreg_256 = COPY %60 + %61.sub1:sreg_256 = COPY %60 + %61.sub2:sreg_256 = COPY %60 + %61.sub3:sreg_256 = COPY %60 + %61.sub4:sreg_256 = COPY %60 + %61.sub5:sreg_256 = COPY %60 + %61.sub6:sreg_256 = COPY %60 + %61.sub7:sreg_256 = COPY killed %60 + %62:vgpr_32 = IMAGE_LOAD_MIP_V1_V4 killed %107, killed %61, 1, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, addrspace 4) + %109:vgpr_32 = V_AND_B32_e32 24, killed %62, implicit $exec + %67:sreg_64_xexec = V_CMP_EQ_U32_e64 0, killed %109, implicit $exec + dead %69:vgpr_32 = V_CNDMASK_B32_e64 0, -1, %67, implicit $exec + %112:vreg_128 = IMPLICIT_DEF + %114:sreg_64 = COPY $exec, implicit-def $exec + %115:sreg_64 = S_AND_B64 %114, %67, implicit-def dead $scc + %8:sreg_64 = S_XOR_B64 %115, %114, implicit-def dead $scc + $exec = S_MOV_B64_term killed %115 + SI_MASK_BRANCH %bb.6, implicit $exec + S_BRANCH %bb.8 + + bb.6: + successors: %bb.7(0x40000000), %bb.9(0x40000000) + + %116:sreg_64 = COPY killed %8 + %10:sreg_64 = S_OR_SAVEEXEC_B64 %116, implicit-def $exec, implicit-def $scc, implicit $exec + %105:vreg_128 = COPY killed %112 + %113:vreg_128 = COPY killed %105 + $exec = S_XOR_B64_term $exec, %10, implicit-def $scc + SI_MASK_BRANCH %bb.9, implicit $exec + S_BRANCH %bb.7 + + bb.7: + successors: %bb.9(0x80000000) + + dead %97:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + undef %95.sub1:vreg_128 = COPY killed %92.sub1 + %113:vreg_128 = COPY killed %95 + S_BRANCH %bb.9 + + bb.8: + successors: %bb.6(0x80000000) + + %103:vreg_128 = COPY %92 + %103.sub2:vreg_128 = COPY undef %69 + %104:vreg_128 = COPY killed %103 + %104.sub3:vreg_128 = COPY undef %69 + %112:vreg_128 = COPY killed %104 + S_BRANCH %bb.6 + + bb.9: + $exec = S_OR_B64 $exec, killed %10, implicit-def $scc + %99:vreg_128 = COPY killed %113 + %77:sreg_64_xexec = V_CMP_EQ_U32_e64 target-flags(amdgpu-gotprel32-lo) 0, killed %99.sub1, implicit $exec + %79:vgpr_32 = V_CNDMASK_B32_e64 0, 1065353216, killed %77, implicit $exec + %81:vgpr_32 = V_MUL_F32_e32 0, killed %79, implicit $exec + %82:vgpr_32 = V_CVT_PKRTZ_F16_F32_e64 0, undef %83:vgpr_32, 0, killed %81, 0, 0, implicit $exec + EXP_DONE 0, undef %84:vgpr_32, killed %82, undef %85:vgpr_32, undef %86:vgpr_32, -1, -1, 15, implicit $exec + S_ENDPGM + +...