Index: lib/Target/AMDGPU/SIFoldOperands.cpp =================================================================== --- lib/Target/AMDGPU/SIFoldOperands.cpp +++ lib/Target/AMDGPU/SIFoldOperands.cpp @@ -35,13 +35,16 @@ uint64_t ImmToFold; int FrameIndexToFold; }; + int ShrinkOpcode; unsigned char UseOpNo; MachineOperand::MachineOperandType Kind; bool Commuted; FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp, - bool Commuted_ = false) : - UseMI(MI), OpToFold(nullptr), UseOpNo(OpNo), Kind(FoldOp->getType()), + bool Commuted_ = false, + int ShrinkOp = -1) : + UseMI(MI), OpToFold(nullptr), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo), + Kind(FoldOp->getType()), Commuted(Commuted_) { if (FoldOp->isImm()) { ImmToFold = FoldOp->getImm(); @@ -68,6 +71,14 @@ bool isCommuted() const { return Commuted; } + + bool needsShrink() const { + return ShrinkOpcode != -1; + } + + int getShrinkOpcode() const { + return ShrinkOpcode; + } }; class SIFoldOperands : public MachineFunctionPass { @@ -154,6 +165,7 @@ } static bool updateOperand(FoldCandidate &Fold, + const SIInstrInfo &TII, const TargetRegisterInfo &TRI) { MachineInstr *MI = Fold.UseMI; MachineOperand &Old = MI->getOperand(Fold.UseOpNo); @@ -189,10 +201,42 @@ Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1); } } + + if (Fold.needsShrink()) { + MachineBasicBlock *MBB = MI->getParent(); + auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI); + if (Liveness != MachineBasicBlock::LQR_Dead) + return false; + + int Op32 = Fold.getShrinkOpcode(); + MachineOperand &Dst0 = MI->getOperand(0); + MachineOperand &Dst1 = MI->getOperand(1); + assert(Dst0.isDef() && Dst1.isDef()); + + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg()); + unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC); + const TargetRegisterClass *Dst1RC = MRI.getRegClass(Dst1.getReg()); + unsigned NewReg1 = MRI.createVirtualRegister(Dst1RC); + + MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32); + + // Keep the old instruction around to avoid breaking iterators, but + // replace the outputs with dummy registers. + Dst0.setReg(NewReg0); + Dst1.setReg(NewReg1); + + if (Fold.isCommuted()) + TII.commuteInstruction(*Inst32, false); + return true; + } + Old.ChangeToImmediate(Fold.ImmToFold); return true; } + assert(!Fold.needsShrink() && "not handled"); + if (Fold.isFI()) { Old.ChangeToFrameIndex(Fold.FrameIndexToFold); return true; @@ -261,6 +305,8 @@ if (isUseMIInFoldList(FoldList, MI)) return false; + unsigned CommuteOpNo = OpNo; + // Operand is not legal, so try to commute the instruction to // see if this makes it possible to fold. unsigned CommuteIdx0 = TargetInstrInfo::CommuteAnyOperandIndex; @@ -269,11 +315,12 @@ if (CanCommute) { if (CommuteIdx0 == OpNo) - OpNo = CommuteIdx1; + CommuteOpNo = CommuteIdx1; else if (CommuteIdx1 == OpNo) - OpNo = CommuteIdx0; + CommuteOpNo = CommuteIdx0; } + // One of operands might be an Imm operand, and OpNo may refer to it after // the call of commuteInstruction() below. Such situations are avoided // here explicitly as OpNo must be a register operand to be a candidate @@ -286,12 +333,39 @@ !TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1)) return false; - if (!TII->isOperandLegal(*MI, OpNo, OpToFold)) { + if (!TII->isOperandLegal(*MI, CommuteOpNo, OpToFold)) { + if ((Opc == AMDGPU::V_ADD_I32_e64 || + Opc == AMDGPU::V_SUB_I32_e64 || + Opc == AMDGPU::V_SUBREV_I32_e64) && // FIXME + OpToFold->isImm()) { + MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + + // Verify the other operand is a VGPR, otherwise we would violate the + // constant bus restriction. + unsigned OtherIdx = CommuteOpNo == CommuteIdx0 ? CommuteIdx1 : CommuteIdx0; + MachineOperand &OtherOp = MI->getOperand(OtherIdx); + if (!OtherOp.isReg() || + !TII->getRegisterInfo().isVGPR(MRI, OtherOp.getReg())) + return false; + + const MachineOperand &SDst = MI->getOperand(1); + assert(SDst.isDef()); + + // TODO: Handle cases with a used carry. + if (!MRI.use_nodbg_empty(SDst.getReg())) + return false; + + int Op32 = AMDGPU::getVOPe32(Opc); + FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true, + Op32)); + return true; + } + TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1); return false; } - FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold, true)); + FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true)); return true; } @@ -757,7 +831,7 @@ Copy->addImplicitDefUseOperands(*MF); for (FoldCandidate &Fold : FoldList) { - if (updateOperand(Fold, *TRI)) { + if (updateOperand(Fold, *TII, *TRI)) { // Clear kill flags. if (Fold.isReg()) { assert(Fold.OpToFold && Fold.OpToFold->isReg()); Index: lib/Target/AMDGPU/SIInstrInfo.h =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.h +++ lib/Target/AMDGPU/SIInstrInfo.h @@ -694,6 +694,9 @@ bool canShrink(const MachineInstr &MI, const MachineRegisterInfo &MRI) const; + MachineInstr *buildShrunkInst(MachineInstr &MI, + unsigned NewOpcode) const; + bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override; Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2572,7 +2572,60 @@ // Check output modifiers return !hasModifiersSet(MI, AMDGPU::OpName::omod) && !hasModifiersSet(MI, AMDGPU::OpName::clamp); +} + +// Set VCC operand with all flags from \p Orig, except for setting it as +// implicit. +static void copyFlagsToImplicitVCC(MachineInstr &MI, + const MachineOperand &Orig) { + + for (MachineOperand &Use : MI.implicit_operands()) { + if (Use.isUse() && Use.getReg() == AMDGPU::VCC) { + Use.setIsUndef(Orig.isUndef()); + Use.setIsKill(Orig.isKill()); + return; + } + } +} + +MachineInstr *SIInstrInfo::buildShrunkInst(MachineInstr &MI, + unsigned Op32) const { + MachineBasicBlock *MBB = MI.getParent();; + MachineInstrBuilder Inst32 = + BuildMI(*MBB, MI, MI.getDebugLoc(), get(Op32)); + + // Add the dst operand if the 32-bit encoding also has an explicit $vdst. + // For VOPC instructions, this is replaced by an implicit def of vcc. + int Op32DstIdx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::vdst); + if (Op32DstIdx != -1) { + // dst + Inst32.add(MI.getOperand(0)); + } else { + assert(MI.getOperand(0).getReg() == AMDGPU::VCC && + "Unexpected case"); + } + + Inst32.add(*getNamedOperand(MI, AMDGPU::OpName::src0)); + + const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1); + if (Src1) + Inst32.add(*Src1); + + const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2); + + if (Src2) { + int Op32Src2Idx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2); + if (Op32Src2Idx != -1) { + Inst32.add(*Src2); + } else { + // In the case of V_CNDMASK_B32_e32, the explicit operand src2 is + // replaced with an implicit read of vcc. This was already added + // during the initial BuildMI, so find it to preserve the flags. + copyFlagsToImplicitVCC(*Inst32, *Src2); + } + } + return Inst32; } bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI, Index: lib/Target/AMDGPU/SIShrinkInstructions.cpp =================================================================== --- lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -120,19 +120,6 @@ return false; } -// Copy MachineOperand with all flags except setting it as implicit. -static void copyFlagsToImplicitVCC(MachineInstr &MI, - const MachineOperand &Orig) { - - for (MachineOperand &Use : MI.implicit_operands()) { - if (Use.isUse() && Use.getReg() == AMDGPU::VCC) { - Use.setIsUndef(Orig.isUndef()); - Use.setIsKill(Orig.isKill()); - return; - } - } -} - static bool isKImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) { return isInt<16>(Src.getImm()) && !TII->isInlineConstant(*Src.getParent(), @@ -232,7 +219,6 @@ MachineRegisterInfo &MRI = MF.getRegInfo(); const GCNSubtarget &ST = MF.getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); - const SIRegisterInfo &TRI = TII->getRegisterInfo(); std::vector I1Defs; @@ -435,40 +421,7 @@ // We can shrink this instruction LLVM_DEBUG(dbgs() << "Shrinking " << MI); - MachineInstrBuilder Inst32 = - BuildMI(MBB, I, MI.getDebugLoc(), TII->get(Op32)); - - // Add the dst operand if the 32-bit encoding also has an explicit $vdst. - // For VOPC instructions, this is replaced by an implicit def of vcc. - int Op32DstIdx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::vdst); - if (Op32DstIdx != -1) { - // dst - Inst32.add(MI.getOperand(0)); - } else { - assert(MI.getOperand(0).getReg() == AMDGPU::VCC && - "Unexpected case"); - } - - - Inst32.add(*TII->getNamedOperand(MI, AMDGPU::OpName::src0)); - - const MachineOperand *Src1 = - TII->getNamedOperand(MI, AMDGPU::OpName::src1); - if (Src1) - Inst32.add(*Src1); - - if (Src2) { - int Op32Src2Idx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2); - if (Op32Src2Idx != -1) { - Inst32.add(*Src2); - } else { - // In the case of V_CNDMASK_B32_e32, the explicit operand src2 is - // replaced with an implicit read of vcc. This was already added - // during the initial BuildMI, so find it to preserve the flags. - copyFlagsToImplicitVCC(*Inst32, *Src2); - } - } - + MachineInstr *Inst32 = TII->buildShrunkInst(MI, Op32); ++NumInstructionsShrunk; // Copy extra operands not present in the instruction definition. Index: test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir @@ -0,0 +1,79 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass si-fold-operands,dead-mi-elimination %s -o - | FileCheck -check-prefix=GCN %s + +--- + +# Uses a carry out in an instruction that can't be shrunk. + +name: shrink_scalar_imm_vgpr_v_add_i32_e64_other_carry_out_use +tracksRegLiveness: true + +body: | + bb.0: + ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_other_carry_out_use + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 + ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[DEF]], [[S_MOV_B32_]], implicit $exec + ; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_1]] + %0:sreg_32_xm0 = S_MOV_B32 12345 + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32 = IMPLICIT_DEF + %3:vgpr_32 = IMPLICIT_DEF + + %4:vgpr_32, %5:sreg_64_xexec = V_ADD_I32_e64 %0, %1, implicit $exec + S_ENDPGM implicit %5 + +... +--- + +# TODO: Is it OK to leave the broken use around on the DBG_VALUE? + +name: shrink_scalar_imm_vgpr_v_add_i32_e64_dbg_only_carry_out_use +tracksRegLiveness: true + +body: | + bb.0: + ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_dbg_only_carry_out_use + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 + ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec + ; GCN: DBG_VALUE debug-use %5:sreg_64_xexec, debug-use $noreg + ; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]] + %0:sreg_32_xm0 = S_MOV_B32 12345 + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32 = IMPLICIT_DEF + %3:vgpr_32 = IMPLICIT_DEF + + %4:vgpr_32, %5:sreg_64_xexec = V_ADD_I32_e64 %0, %1, implicit $exec + DBG_VALUE debug-use %5, debug-use $noreg + S_ENDPGM implicit %4 + +... + +--- + +# Uses carry out in a normal pattern + +name: shrink_scalar_imm_vgpr_v_add_i32_e64_carry_out_use +tracksRegLiveness: true + +body: | + bb.0: + ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_carry_out_use + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 + ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[DEF]], [[S_MOV_B32_]], implicit $exec + ; GCN: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[DEF1]], [[DEF2]], [[V_ADD_I32_e64_1]], implicit $exec + ; GCN: S_ENDPGM implicit [[V_ADDC_U32_e64_]] + %0:sreg_32_xm0 = S_MOV_B32 12345 + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32 = IMPLICIT_DEF + %3:vgpr_32 = IMPLICIT_DEF + + %4:vgpr_32, %5:sreg_64_xexec = V_ADD_I32_e64 %0, %1, implicit $exec + %6:vgpr_32, %7:sreg_64_xexec = V_ADDC_U32_e64 %2, %3, %5, implicit $exec + S_ENDPGM implicit %6 + +... Index: test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir @@ -0,0 +1,347 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass si-fold-operands,dead-mi-elimination %s -o - | FileCheck -check-prefix=GCN %s + +--- + +name: shrink_scalar_imm_vgpr_v_add_i32_e64_no_carry_out_use +tracksRegLiveness: true + +body: | + bb.0: + ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_no_carry_out_use + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 + ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]] + %0:sreg_32_xm0 = S_MOV_B32 12345 + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + S_ENDPGM implicit %2 + +... + +--- + +name: shrink_vgpr_scalar_imm_v_add_i32_e64_no_carry_out_use +tracksRegLiveness: true + +body: | + bb.0: + ; GCN-LABEL: name: shrink_vgpr_scalar_imm_v_add_i32_e64_no_carry_out_use + ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 + ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]] + %0:vgpr_32 = IMPLICIT_DEF + %1:sreg_32_xm0 = S_MOV_B32 12345 + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + S_ENDPGM implicit %2 + +... +--- + +name: shrink_scalar_imm_vgpr_v_add_i32_e64_carry_out_use +tracksRegLiveness: true + +body: | + bb.0: + ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_carry_out_use + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 + ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]] + %0:sreg_32_xm0 = S_MOV_B32 12345 + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + S_ENDPGM implicit %2 + +... +--- + +# This does not shrink because it would violate the constant bus +# restriction. to have an SGPR input and an immediate, so a copy would +# be required. + +name: shrink_vector_imm_sgpr_v_add_i32_e64_no_carry_out_use +tracksRegLiveness: true + +body: | + bb.0: + ; GCN-LABEL: name: shrink_vector_imm_sgpr_v_add_i32_e64_no_carry_out_use + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec + ; GCN: [[DEF:%[0-9]+]]:sreg_32_xm0 = IMPLICIT_DEF + ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[DEF]], [[V_MOV_B32_e32_]], implicit $exec + ; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]] + %0:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec + %1:sreg_32_xm0 = IMPLICIT_DEF + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + S_ENDPGM implicit %2 + +... + +--- + +name: shrink_sgpr_vector_imm_v_add_i32_e64_no_carry_out_use +tracksRegLiveness: true + +body: | + bb.0: + ; GCN-LABEL: name: shrink_sgpr_vector_imm_v_add_i32_e64_no_carry_out_use + ; GCN: [[DEF:%[0-9]+]]:sreg_32_xm0 = IMPLICIT_DEF + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec + ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[DEF]], implicit $exec + ; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]] + %0:sreg_32_xm0 = IMPLICIT_DEF + %1:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + S_ENDPGM implicit %2 + +... + +--- + +name: shrink_scalar_imm_vgpr_v_add_i32_e64_live_vcc_use +tracksRegLiveness: true + +body: | + bb.0: + ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_live_vcc_use + ; GCN: $vcc = S_MOV_B64 -1 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 + ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec + ; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]], implicit $vcc + $vcc = S_MOV_B64 -1 + %0:sreg_32_xm0 = S_MOV_B32 12345 + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + S_ENDPGM implicit %2, implicit $vcc + +... + +--- + +name: shrink_scalar_imm_vgpr_v_add_i32_e64_liveout_vcc_use +tracksRegLiveness: true + +body: | + ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_liveout_vcc_use + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: $vcc = S_MOV_B64 -1 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 + ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec + ; GCN: bb.1: + ; GCN: liveins: $vcc + ; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]], implicit $vcc + bb.0: + successors: %bb.1 + $vcc = S_MOV_B64 -1 + %0:sreg_32_xm0 = S_MOV_B32 12345 + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + + bb.1: + liveins: $vcc + S_ENDPGM implicit %2, implicit $vcc + +... +--- + +name: shrink_scalar_imm_vgpr_v_add_i32_e64_liveout_vcc_lo_use +tracksRegLiveness: true + +body: | + ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_liveout_vcc_lo_use + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 + ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec + ; GCN: bb.1: + ; GCN: liveins: $vcc_lo + ; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]], implicit $vcc_lo + bb.0: + successors: %bb.1 + $vcc = S_MOV_B64 -1 + %0:sreg_32_xm0 = S_MOV_B32 12345 + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + + bb.1: + liveins: $vcc_lo + S_ENDPGM implicit %2, implicit $vcc_lo + +... +--- + +# This is not OK to clobber because vcc_lo has a livein use. + +name: shrink_scalar_imm_vgpr_v_add_i32_e64_livein_vcc +tracksRegLiveness: true + +body: | + ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_livein_vcc + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: $vcc = S_MOV_B64 -1 + ; GCN: bb.1: + ; GCN: liveins: $vcc + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 + ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec + ; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]], implicit $vcc_lo + bb.0: + successors: %bb.1 + $vcc = S_MOV_B64 -1 + + bb.1: + liveins: $vcc + %0:sreg_32_xm0 = S_MOV_B32 12345 + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + S_ENDPGM implicit %2, implicit $vcc_lo + +... +--- + +name: shrink_scalar_imm_vgpr_v_add_i32_e64_livein_vcc_hi +tracksRegLiveness: true + +body: | + ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_livein_vcc_hi + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: $vcc_hi = S_MOV_B32 -1 + ; GCN: bb.1: + ; GCN: successors: %bb.2(0x80000000) + ; GCN: liveins: $vcc_hi + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 + ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec + ; GCN: bb.2: + ; GCN: liveins: $vcc_hi + ; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]], implicit $vcc_hi + bb.0: + successors: %bb.1 + $vcc_hi = S_MOV_B32 -1 + + bb.1: + liveins: $vcc_hi + %0:sreg_32_xm0 = S_MOV_B32 12345 + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + + bb.2: + liveins: $vcc_hi + + S_ENDPGM implicit %2, implicit $vcc_hi + +... + +--- + +name: shrink_scalar_imm_vgpr_v_sub_i32_e64_no_carry_out_use +tracksRegLiveness: true + +body: | + bb.0: + ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_sub_i32_e64_no_carry_out_use + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 + ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN: [[V_SUBREV_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM implicit [[V_SUBREV_I32_e32_]] + %0:sreg_32_xm0 = S_MOV_B32 12345 + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32, %3:sreg_64 = V_SUB_I32_e64 %0, %1, implicit $exec + S_ENDPGM implicit %2 + +... + +--- + +name: shrink_vgpr_scalar_imm_v_sub_i32_e64_no_carry_out_use +tracksRegLiveness: true + +body: | + bb.0: + ; GCN-LABEL: name: shrink_vgpr_scalar_imm_v_sub_i32_e64_no_carry_out_use + ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 + ; GCN: [[V_SUB_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM implicit [[V_SUB_I32_e32_]] + %0:vgpr_32 = IMPLICIT_DEF + %1:sreg_32_xm0 = S_MOV_B32 12345 + %2:vgpr_32, %3:sreg_64 = V_SUB_I32_e64 %0, %1, implicit $exec + S_ENDPGM implicit %2 + +... + +--- + +name: shrink_scalar_imm_vgpr_v_subrev_i32_e64_no_carry_out_use +tracksRegLiveness: true + +body: | + bb.0: + ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_subrev_i32_e64_no_carry_out_use + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 + ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN: [[V_SUB_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM implicit [[V_SUB_I32_e32_]] + %0:sreg_32_xm0 = S_MOV_B32 12345 + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32, %3:sreg_64 = V_SUBREV_I32_e64 %0, %1, implicit $exec + S_ENDPGM implicit %2 + +... + +--- + +name: shrink_vgpr_scalar_imm_v_subrev_i32_e64_no_carry_out_use +tracksRegLiveness: true + +body: | + bb.0: + ; GCN-LABEL: name: shrink_vgpr_scalar_imm_v_subrev_i32_e64_no_carry_out_use + ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 + ; GCN: [[V_SUBREV_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM implicit [[V_SUBREV_I32_e32_]] + %0:vgpr_32 = IMPLICIT_DEF + %1:sreg_32_xm0 = S_MOV_B32 12345 + %2:vgpr_32, %3:sreg_64 = V_SUBREV_I32_e64 %0, %1, implicit $exec + S_ENDPGM implicit %2 + +... + +--- + +# We know this is OK because vcc isn't live out of the block, even +# though it had a defined value + +name: shrink_scalar_imm_vgpr_v_add_i32_e64_known_dead_no_liveout +tracksRegLiveness: true + +body: | + ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_known_dead_no_liveout + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 + ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec + ; GCN: bb.1: + ; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]] + bb.0: + successors: %bb.1 + + $vcc = S_MOV_B64 -1 + %0:sreg_32_xm0 = S_MOV_B32 12345 + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + + bb.1: + S_ENDPGM implicit %2 + +...