Index: lib/Target/AMDGPU/SIFoldOperands.cpp =================================================================== --- lib/Target/AMDGPU/SIFoldOperands.cpp +++ lib/Target/AMDGPU/SIFoldOperands.cpp @@ -371,6 +371,30 @@ MRI->getRegClass(DestReg) : TRI->getPhysRegClass(DestReg); + unsigned SrcReg = UseMI->getOperand(1).getReg(); + if (TargetRegisterInfo::isVirtualRegister(DestReg) && + TargetRegisterInfo::isVirtualRegister(SrcReg)) { + const TargetRegisterClass * SrcRC = MRI->getRegClass(SrcReg); + if (TRI->isSGPRClass(SrcRC) && TRI->hasVGPRs(DestRC)) { + MachineRegisterInfo::use_iterator NextUse; + SmallVector CopyUses; + for (MachineRegisterInfo::use_iterator + Use = MRI->use_begin(DestReg), E = MRI->use_end(); + Use != E; Use = NextUse) { + NextUse = std::next(Use); + FoldCandidate FC = FoldCandidate(Use->getParent(), Use.getOperandNo(), &UseMI->getOperand(1)); + CopyUses.push_back(FC); + //foldOperand(UseMI->getOperand(1), Use->getParent(), Use.getOperandNo(), FoldList, CopiesToReplace); + } + for (auto & F : CopyUses) { + foldOperand(*F.OpToFold, F.UseMI, F.UseOpNo, FoldList, CopiesToReplace); + } + } + } + + // In order to fold immediates into copies, we need to change the + // copy to a MOV. + unsigned MovOp = TII->getMovOpcode(DestRC); if (MovOp == AMDGPU::COPY) return; @@ -689,6 +713,17 @@ MachineInstr *UseMI = Use->getParent(); unsigned OpNo = Use.getOperandNo(); + // In case we have a choice to fold the FI or + // immediate - opt to immediate. + if (OpToFold.isFI()) { + unsigned Commuted0 = TargetInstrInfo::CommuteAnyOperandIndex; + if (TII->findCommutedOpIndices(*UseMI, Commuted0, OpNo)) { + MachineOperand *Src0 = getImmOrMaterializedImm(*MRI, UseMI->getOperand(Commuted0)); + if (Src0->isImm()) + continue; + } + } + // Folding the immediate may reveal operations that can be constant // folded or replaced with a copy. This can happen for example after // frame indices are lowered to constants or from splitting 64-bit Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2070,9 +2070,19 @@ (Src0->isReg() && RI.isSGPRClass(MRI->getRegClass(Src0->getReg())))) return false; - if (!Src1->isReg() || RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))) + if (!Src1->isReg()) return false; + if(RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))) { + MachineInstr *Def = MRI->getUniqueVRegDef(Src1->getReg()); + if (Def && Def->isMoveImmediate() && + isInlineConstant(Def->getOperand(1)) && + commuteInstruction(UseMI)) { + Src0->ChangeToImmediate(Def->getOperand(1).getImm()); + } else + return false; + } + const int64_t Imm = ImmOp->getImm(); // FIXME: This would be a lot easier if we could return a new instruction @@ -3933,9 +3943,19 @@ getNamedOperand(*Add, SrcNames[i]); if (Src->isReg()) { - auto Mov = MRI.getUniqueVRegDef(Src->getReg()); - if (Mov && Mov->getOpcode() == AMDGPU::S_MOV_B32) - Src = &Mov->getOperand(1); + MachineInstr *Def = MRI.getUniqueVRegDef(Src->getReg()); + if (Def) { + if (Def->isMoveImmediate()) + Src = &Def->getOperand(1); + else if (Def->isCopy()) { + if (Def->getOperand(1).isReg()) { + auto Mov = MRI.getUniqueVRegDef(Def->getOperand(1).getReg()); + if (Mov && Mov->isMoveImmediate()) { + Src = &Mov->getOperand(1); + } + } + } + } } if (Src) {