Index: lib/CodeGen/RenameIndependentSubregs.cpp =================================================================== --- lib/CodeGen/RenameIndependentSubregs.cpp +++ lib/CodeGen/RenameIndependentSubregs.cpp @@ -243,6 +243,8 @@ unsigned VReg = Intervals[ID]->reg; MO.setReg(VReg); + if (MO.isTied()) + MO.getParent()->substituteRegister(Reg, VReg, 0, TRI); } // TODO: We could attempt to recompute new register classes while visiting // the operands: Some of the split register may be fine with less constraint Index: test/CodeGen/AMDGPU/rename-independent-subregs-invalid-mac-operands.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/rename-independent-subregs-invalid-mac-operands.mir @@ -0,0 +1,91 @@ +# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass=simple-register-coalescing,rename-independent-subregs -o - %s | FileCheck -check-prefix=GCN %s +--- | + define amdgpu_kernel void @mac_invalid_operands(float %arg) #0 { + bb: + %tmp = fcmp oeq float %arg, 0.000000e+00 + br i1 %tmp, label %bb1, label %bb6 + + bb1: ; preds = %bb + %tmp2 = insertelement <3 x float> undef, float %arg, i32 0 + %tmp3 = shufflevector <3 x float> %tmp2, <3 x float> undef, <3 x i32> zeroinitializer + %tmp4 = fmul <3 x float> %tmp3, undef + %tmp5 = fadd <3 x float> %tmp4, undef + br label %bb6 + + bb6: ; preds = %bb1, %bb + %tmp7 = phi <3 x float> [ , %bb ], [ %tmp5, %bb1 ] + store volatile <3 x float> %tmp7, <3 x float> addrspace(1)* undef, align 16 + ret void + } + + attributes #0 = { norecurse nounwind } + +... +--- + +# GCN-LABEL: name: mac_invalid_operands +# GCN: undef %18.sub0 = V_MAC_F32_e32 undef %3, undef %9, undef %18.sub0, implicit %exec + +name: mac_invalid_operands +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: vreg_128 } + - { id: 1, class: vreg_128 } + - { id: 2, class: sgpr_64 } + - { id: 3, class: vgpr_32 } + - { id: 4, class: vgpr_32 } + - { id: 5, class: vgpr_32 } + - { id: 6, class: vgpr_32 } + - { id: 7, class: sreg_64 } + - { id: 8, class: vgpr_32 } + - { id: 9, class: vgpr_32 } + - { id: 10, class: vreg_64 } + - { id: 11, class: vreg_64 } + - { id: 12, class: vreg_128 } + - { id: 13, class: vreg_128 } + - { id: 14, class: vgpr_32 } + - { id: 15, class: vreg_64 } + - { id: 16, class: vgpr_32 } + - { id: 17, class: vreg_128 } +body: | + bb.0.bb: + successors: %bb.2.bb1(0x50000000), %bb.1(0x30000000) + + %7 = V_CMP_NEQ_F32_e64 0, 0, 0, undef %3, 0, 0, implicit %exec + %vcc = COPY killed %7 + S_CBRANCH_VCCZ %bb.2.bb1, implicit killed %vcc + + bb.1: + successors: %bb.3.bb6(0x80000000) + + %4 = V_ADD_F32_e32 undef %6, undef %5, implicit %exec + undef %12.sub0 = COPY killed %4 + %17 = COPY killed %12 + S_BRANCH %bb.3.bb6 + + bb.2.bb1: + successors: %bb.3.bb6(0x80000000) + + %8 = V_MAC_F32_e32 undef %3, undef %9, undef %8, implicit %exec + undef %13.sub0 = COPY %8 + %13.sub1 = COPY %8 + %13.sub2 = COPY killed %8 + %0 = COPY killed %13 + %17 = COPY killed %0 + + bb.3.bb6: + %1 = COPY killed %17 + FLAT_STORE_DWORD undef %10, %1.sub2, 0, 0, implicit %exec, implicit %flat_scr + %14 = COPY %1.sub1 + %16 = COPY killed %1.sub0 + undef %15.sub0 = COPY killed %16 + %15.sub1 = COPY killed %14 + FLAT_STORE_DWORDX2 undef %11, killed %15, 0, 0, implicit %exec, implicit %flat_scr + S_ENDPGM + +...