diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp --- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp +++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp @@ -54,6 +54,7 @@ class GCNDPPCombine : public MachineFunctionPass { MachineRegisterInfo *MRI; const SIInstrInfo *TII; + const GCNSubtarget *ST = nullptr; using RegSubRegPair = TargetInstrInfo::RegSubRegPair; @@ -63,12 +64,14 @@ MachineInstr &MovMI, RegSubRegPair CombOldVGPR, MachineOperand *OldOpnd, - bool CombBCZ) const; + bool CombBCZ, + bool IsShrinkable) const; MachineInstr *createDPPInst(MachineInstr &OrigMI, MachineInstr &MovMI, RegSubRegPair CombOldVGPR, - bool CombBCZ) const; + bool CombBCZ, + bool IsShrinkable) const; bool hasNoImmOrEqual(MachineInstr &MI, unsigned OpndName, @@ -99,7 +102,8 @@ } private: - int getDPPOp(unsigned Op) const; + int getDPPOp(unsigned Op, bool IsShrinkable) const; + bool isShrinkable(MachineInstr &OrigMI, unsigned OrigOp) const; }; } // end anonymous namespace @@ -114,11 +118,32 @@ return new GCNDPPCombine(); } -int GCNDPPCombine::getDPPOp(unsigned Op) const { +bool GCNDPPCombine::isShrinkable(MachineInstr &OrigMI, unsigned OrigOp) const { + if (!TII->isVOP3(OrigOp)) { + return false; + } + if (!TII->hasVALU32BitEncoding(OrigOp)) { + LLVM_DEBUG(dbgs() << " Inst hasn't e32 equivalent\n"); + return false; + } + // check if other than abs|neg modifiers are set (opsel for example) + const int64_t Mask = ~(SISrcMods::ABS | SISrcMods::NEG); + if (!hasNoImmOrEqual(OrigMI, AMDGPU::OpName::src0_modifiers, 0, Mask) || + !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::src1_modifiers, 0, Mask) || + !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::clamp, 0) || + !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::omod, 0)) { + LLVM_DEBUG(dbgs() << " Inst has non-default modifiers\n"); + return false; + } + return true; +} + +int GCNDPPCombine::getDPPOp(unsigned Op, bool IsShrinkable) const { auto DPP32 = AMDGPU::getDPPOp32(Op); - if (DPP32 == -1) { + if (IsShrinkable) { + assert(DPP32 == -1); auto E32 = AMDGPU::getVOPe32(Op); - DPP32 = (E32 == -1)? -1 : AMDGPU::getDPPOp32(E32); + DPP32 = (E32 == -1) ? -1 : AMDGPU::getDPPOp32(E32); } return (DPP32 == -1 || TII->pseudoToMCOpcode(DPP32) == -1) ? -1 : DPP32; } @@ -151,12 +176,13 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, MachineInstr &MovMI, RegSubRegPair CombOldVGPR, - bool CombBCZ) const { + bool CombBCZ, + bool IsShrinkable) const { assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp || MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO); auto OrigOp = OrigMI.getOpcode(); - auto DPPOp = getDPPOp(OrigOp); + auto DPPOp = getDPPOp(OrigOp, IsShrinkable); if (DPPOp == -1) { LLVM_DEBUG(dbgs() << " failed: no DPP opcode\n"); return nullptr; @@ -318,7 +344,8 @@ MachineInstr &MovMI, RegSubRegPair CombOldVGPR, MachineOperand *OldOpndValue, - bool CombBCZ) const { + bool CombBCZ, + bool IsShrinkable) const { assert(CombOldVGPR.Reg); if (!CombBCZ && OldOpndValue && OldOpndValue->isImm()) { auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1); @@ -338,7 +365,7 @@ return nullptr; } } - return createDPPInst(OrigMI, MovMI, CombOldVGPR, CombBCZ); + return createDPPInst(OrigMI, MovMI, CombOldVGPR, CombBCZ, IsShrinkable); } // returns true if MI doesn't have OpndName immediate operand or the @@ -503,21 +530,8 @@ continue; } - if (TII->isVOP3(OrigOp)) { - if (!TII->hasVALU32BitEncoding(OrigOp)) { - LLVM_DEBUG(dbgs() << " failed: VOP3 hasn't e32 equivalent\n"); - break; - } - // check if other than abs|neg modifiers are set (opsel for example) - const int64_t Mask = ~(SISrcMods::ABS | SISrcMods::NEG); - if (!hasNoImmOrEqual(OrigMI, AMDGPU::OpName::src0_modifiers, 0, Mask) || - !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::src1_modifiers, 0, Mask) || - !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::clamp, 0) || - !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::omod, 0)) { - LLVM_DEBUG(dbgs() << " failed: VOP3 has non-default modifiers\n"); - break; - } - } else if (!TII->isVOP1(OrigOp) && !TII->isVOP2(OrigOp)) { + bool IsShrinkable = isShrinkable(OrigMI, OrigOp); + if (!(IsShrinkable || TII->isVOP1(OrigOp) || TII->isVOP2(OrigOp))) { LLVM_DEBUG(dbgs() << " failed: not VOP1/2/3\n"); break; } @@ -542,7 +556,7 @@ LLVM_DEBUG(dbgs() << " combining: " << OrigMI); if (Use == Src0) { if (auto *DPPInst = createDPPInst(OrigMI, MovMI, CombOldVGPR, - OldOpndValue, CombBCZ)) { + OldOpndValue, CombBCZ, IsShrinkable)) { DPPMIs.push_back(DPPInst); Rollback = false; } @@ -553,8 +567,9 @@ BB->insert(OrigMI, NewMI); if (TII->commuteInstruction(*NewMI)) { LLVM_DEBUG(dbgs() << " commuted: " << *NewMI); - if (auto *DPPInst = createDPPInst(*NewMI, MovMI, CombOldVGPR, - OldOpndValue, CombBCZ)) { + if (auto *DPPInst = + createDPPInst(*NewMI, MovMI, CombOldVGPR, OldOpndValue, CombBCZ, + IsShrinkable)) { DPPMIs.push_back(DPPInst); Rollback = false; } @@ -587,12 +602,12 @@ } bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) { - auto &ST = MF.getSubtarget(); - if (!ST.hasDPP() || skipFunction(MF.getFunction())) + ST = &MF.getSubtarget(); + if (!ST->hasDPP() || skipFunction(MF.getFunction())) return false; MRI = &MF.getRegInfo(); - TII = ST.getInstrInfo(); + TII = ST->getInstrInfo(); bool Changed = false; for (auto &MBB : MF) { @@ -602,7 +617,7 @@ Changed = true; ++NumDPPMovsCombined; } else if (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO) { - if (ST.has64BitDPP() && combineDPPMov(MI)) { + if (ST->has64BitDPP() && combineDPPMov(MI)) { Changed = true; ++NumDPPMovsCombined; } else {