Index: lib/Target/AMDGPU/SIFoldOperands.cpp =================================================================== --- lib/Target/AMDGPU/SIFoldOperands.cpp +++ lib/Target/AMDGPU/SIFoldOperands.cpp @@ -208,12 +208,14 @@ if (Liveness != MachineBasicBlock::LQR_Dead) return false; + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); int Op32 = Fold.getShrinkOpcode(); MachineOperand &Dst0 = MI->getOperand(0); MachineOperand &Dst1 = MI->getOperand(1); assert(Dst0.isDef() && Dst1.isDef()); - MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg()); + const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg()); unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC); const TargetRegisterClass *Dst1RC = MRI.getRegClass(Dst1.getReg()); @@ -221,6 +223,11 @@ MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32); + if (HaveNonDbgCarryUse) { + BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), Dst1.getReg()) + .addReg(AMDGPU::VCC, RegState::Kill); + } + // Keep the old instruction around to avoid breaking iterators, but // replace the outputs with dummy registers. Dst0.setReg(NewReg0); @@ -351,10 +358,6 @@ const MachineOperand &SDst = MI->getOperand(1); assert(SDst.isDef()); - // TODO: Handle cases with a used carry. - if (!MRI.use_nodbg_empty(SDst.getReg())) - return false; - int Op32 = AMDGPU::getVOPe32(Opc); FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true, Op32)); Index: test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir =================================================================== --- test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir +++ test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir @@ -13,8 +13,9 @@ ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_other_carry_out_use ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[DEF]], [[S_MOV_B32_]], implicit $exec - ; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_1]] + ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec + ; GCN: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY killed $vcc + ; GCN: S_ENDPGM implicit [[COPY]] %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = IMPLICIT_DEF @@ -26,6 +27,31 @@ ... --- +name: shrink_scalar_imm_multi_use_with_used_carry +tracksRegLiveness: true + +body: | + bb.0: + ; GCN-LABEL: name: shrink_scalar_imm_multi_use_with_used_carry + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 + ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec + ; GCN: [[V_ADD_I32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF1]], implicit $exec + ; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_1]], implicit [[V_ADD_I32_e64_2]] + %0:sreg_32_xm0 = S_MOV_B32 12345 + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32 = IMPLICIT_DEF + %3:vgpr_32 = IMPLICIT_DEF + %4:vgpr_32 = IMPLICIT_DEF + + %5:vgpr_32, %6:sreg_64_xexec = V_ADD_I32_e64 %0, %1, implicit $exec + %7:vgpr_32, %8:sreg_64_xexec = V_ADD_I32_e64 %0, %2, implicit $exec + S_ENDPGM implicit %6, implicit %7 + +... +--- + # TODO: Is it OK to leave the broken use around on the DBG_VALUE? name: shrink_scalar_imm_vgpr_v_add_i32_e64_dbg_only_carry_out_use @@ -64,8 +90,9 @@ ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[DEF]], [[S_MOV_B32_]], implicit $exec - ; GCN: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[DEF1]], [[DEF2]], [[V_ADD_I32_e64_1]], implicit $exec + ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec + ; GCN: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY killed $vcc + ; GCN: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[DEF1]], [[DEF2]], [[COPY]], implicit $exec ; GCN: S_ENDPGM implicit [[V_ADDC_U32_e64_]] %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF