diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -44,12 +44,14 @@ class LiveIntervals; class LiveVariables; class MachineLoop; +class MachineBlockFrequencyInfo; class MachineMemOperand; class MachineRegisterInfo; class MCAsmInfo; class MCInst; struct MCSchedModel; class Module; +class ProfileSummaryInfo; class ScheduleDAG; class ScheduleHazardRecognizer; class SDNode; @@ -131,7 +133,10 @@ /// Do not call this method for a non-commutable instruction. /// Even though the instruction is commutable, the method may still /// fail to commute the operands, null pointer is returned in such cases. - virtual MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI, + virtual MachineInstr *commuteInstructionImpl(MachineInstr &MI, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI, + bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const; @@ -402,7 +407,9 @@ /// Even though the instruction is commutable, the method may still /// fail to commute the operands, null pointer is returned in such cases. MachineInstr * - commuteInstruction(MachineInstr &MI, bool NewMI = false, + commuteInstruction(MachineInstr &MI, ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI, + bool NewMI = false, unsigned OpIdx1 = CommuteAnyOperandIndex, unsigned OpIdx2 = CommuteAnyOperandIndex) const; @@ -1012,6 +1019,8 @@ /// decide on using an opcode (note that those assignments can still change). MachineInstr *foldMemoryOperand(MachineInstr &MI, ArrayRef Ops, int FI, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI, LiveIntervals *LIS = nullptr, VirtRegMap *VRM = nullptr) const; @@ -1019,6 +1028,8 @@ /// store from / to any address, not just from a specific stack slot. MachineInstr *foldMemoryOperand(MachineInstr &MI, ArrayRef Ops, MachineInstr &LoadMI, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI, LiveIntervals *LIS = nullptr) const; /// Return true when there is potentially a faster code sequence @@ -1103,6 +1114,8 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI, LiveIntervals *LIS = nullptr, VirtRegMap *VRM = nullptr) const { return nullptr; @@ -1116,6 +1129,7 @@ virtual MachineInstr *foldMemoryOperandImpl( MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI, + ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *MBFI, LiveIntervals *LIS = nullptr) const { return nullptr; } @@ -1402,7 +1416,9 @@ virtual MachineInstr *optimizeLoadInstr(MachineInstr &MI, const MachineRegisterInfo *MRI, unsigned &FoldAsLoadDefReg, - MachineInstr *&DefMI) const { + MachineInstr *&DefMI, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI) const { return nullptr; } @@ -1524,7 +1540,9 @@ /// /// The bit (1 << Domain) must be set in the mask returned from /// getExecutionDomain(MI). - virtual void setExecutionDomain(MachineInstr &MI, unsigned Domain) const {} + virtual void setExecutionDomain(MachineInstr &MI, unsigned Domain, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI) const {} /// Returns the preferred minimum clearance /// before an instruction with an unwanted partial register update. diff --git a/llvm/lib/CodeGen/ExecutionDomainFix.cpp b/llvm/lib/CodeGen/ExecutionDomainFix.cpp --- a/llvm/lib/CodeGen/ExecutionDomainFix.cpp +++ b/llvm/lib/CodeGen/ExecutionDomainFix.cpp @@ -114,7 +114,8 @@ // Collapse all the instructions. while (!dv->Instrs.empty()) - TII->setExecutionDomain(*dv->Instrs.pop_back_val(), domain); + TII->setExecutionDomain(*dv->Instrs.pop_back_val(), domain, nullptr, + nullptr); dv->setSingleDomain(domain); // If there are multiple users, give them new, unique DomainValues. @@ -319,7 +320,7 @@ // If the collapsed operands force a single domain, propagate the collapse. if (isPowerOf2_32(available)) { unsigned domain = countTrailingZeros(available); - TII->setExecutionDomain(*mi, domain); + TII->setExecutionDomain(*mi, domain, nullptr, nullptr); visitHardInstr(mi, domain); return; } diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp --- a/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/llvm/lib/CodeGen/InlineSpiller.cpp @@ -835,8 +835,10 @@ MachineInstrSpan MIS(MI, MI->getParent()); MachineInstr *FoldMI = - LoadMI ? TII.foldMemoryOperand(*MI, FoldOps, *LoadMI, &LIS) - : TII.foldMemoryOperand(*MI, FoldOps, StackSlot, &LIS, &VRM); + LoadMI ? TII.foldMemoryOperand(*MI, FoldOps, *LoadMI, nullptr, nullptr, + &LIS) + : TII.foldMemoryOperand(*MI, FoldOps, StackSlot, nullptr, nullptr, + &LIS, &VRM); if (!FoldMI) return false; diff --git a/llvm/lib/CodeGen/LiveRangeEdit.cpp b/llvm/lib/CodeGen/LiveRangeEdit.cpp --- a/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -226,7 +226,8 @@ if (UseMI->readsWritesVirtualRegister(LI->reg, &Ops).second) return false; - MachineInstr *FoldMI = TII.foldMemoryOperand(*UseMI, Ops, *DefMI, &LIS); + MachineInstr *FoldMI = TII.foldMemoryOperand(*UseMI, Ops, *DefMI, + nullptr, nullptr, &LIS); if (!FoldMI) return false; LLVM_DEBUG(dbgs() << " folded: " << *FoldMI); diff --git a/llvm/lib/CodeGen/MachineCSE.cpp b/llvm/lib/CodeGen/MachineCSE.cpp --- a/llvm/lib/CodeGen/MachineCSE.cpp +++ b/llvm/lib/CodeGen/MachineCSE.cpp @@ -538,7 +538,8 @@ // Commute commutable instructions. bool Commuted = false; if (!FoundCSE && MI->isCommutable()) { - if (MachineInstr *NewMI = TII->commuteInstruction(*MI)) { + if (MachineInstr *NewMI = TII->commuteInstruction(*MI, nullptr, + nullptr)) { Commuted = true; FoundCSE = VNT.count(NewMI); if (NewMI != MI) { @@ -547,7 +548,7 @@ Changed = true; } else if (!FoundCSE) // MI was changed but it didn't help, commute it back! - (void)TII->commuteInstruction(*MI); + (void)TII->commuteInstruction(*MI, nullptr, nullptr); } } diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp --- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp +++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -1580,8 +1580,8 @@ auto CP = RI.getCommutePair(); if (CP) { Changed = true; - TII->commuteInstruction(*(RI.getMI()), false, (*CP).first, - (*CP).second); + TII->commuteInstruction(*(RI.getMI()), nullptr, nullptr, false, + (*CP).first, (*CP).second); LLVM_DEBUG(dbgs() << "\t\tCommuted: " << *(RI.getMI())); } } @@ -1767,7 +1767,8 @@ unsigned FoldedReg = FoldAsLoadDefReg; MachineInstr *DefMI = nullptr; if (MachineInstr *FoldMI = - TII->optimizeLoadInstr(*MI, MRI, FoldAsLoadDefReg, DefMI)) { + TII->optimizeLoadInstr(*MI, MRI, FoldAsLoadDefReg, DefMI, + nullptr, nullptr)) { // Update LocalMIs since we replaced MI with FoldMI and deleted // DefMI. LLVM_DEBUG(dbgs() << "Replacing: " << *MI); diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -832,7 +832,8 @@ // transformation. Start by commuting the instruction. MachineBasicBlock *MBB = DefMI->getParent(); MachineInstr *NewMI = - TII->commuteInstruction(*DefMI, false, UseOpIdx, NewDstIdx); + TII->commuteInstruction(*DefMI, nullptr, nullptr, false, UseOpIdx, + NewDstIdx); if (!NewMI) return { false, false }; if (Register::isVirtualRegister(IntA.reg) && diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -154,6 +154,8 @@ } MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI, bool NewMI, unsigned Idx1, unsigned Idx2) const { const MCInstrDesc &MCID = MI.getDesc(); @@ -236,7 +238,10 @@ return CommutedMI; } -MachineInstr *TargetInstrInfo::commuteInstruction(MachineInstr &MI, bool NewMI, +MachineInstr *TargetInstrInfo::commuteInstruction(MachineInstr &MI, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI, + bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const { // If OpIdx1 or OpIdx2 is not specified, then this method is free to choose @@ -248,7 +253,7 @@ "Precondition violation: MI must be commutable."); return nullptr; } - return commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); + return commuteInstructionImpl(MI, PSI, MBFI, NewMI, OpIdx1, OpIdx2); } bool TargetInstrInfo::fixCommutedOpIndices(unsigned &ResultIdx1, @@ -530,6 +535,8 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, ArrayRef Ops, int FI, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI, LiveIntervals *LIS, VirtRegMap *VRM) const { auto Flags = MachineMemOperand::MONone; @@ -577,7 +584,7 @@ MBB->insert(MI, NewMI); } else { // Ask the target to do the actual folding. - NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, FI, LIS, VRM); + NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, FI, PSI, MBFI, LIS, VRM); } if (NewMI) { @@ -619,6 +626,8 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, ArrayRef Ops, MachineInstr &LoadMI, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI, LiveIntervals *LIS) const { assert(LoadMI.canFoldAsLoad() && "LoadMI isn't foldable!"); #ifndef NDEBUG @@ -643,7 +652,7 @@ NewMI = &*MBB.insert(MI, NewMI); } else { // Ask the target to do the actual folding. - NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, LoadMI, LIS); + NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, LoadMI, PSI, MBFI, LIS); } if (!NewMI) diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp --- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -684,7 +684,8 @@ unsigned Dist) { Register RegC = MI->getOperand(RegCIdx).getReg(); LLVM_DEBUG(dbgs() << "2addr: COMMUTING : " << *MI); - MachineInstr *NewMI = TII->commuteInstruction(*MI, false, RegBIdx, RegCIdx); + MachineInstr *NewMI = TII->commuteInstruction(*MI, nullptr, nullptr, false, + RegBIdx, RegCIdx); if (NewMI == nullptr) { LLVM_DEBUG(dbgs() << "2addr: COMMUTING FAILED!\n"); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -162,6 +162,8 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI, LiveIntervals *LIS = nullptr, VirtRegMap *VRM = nullptr) const override; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -3177,6 +3177,7 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, + ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *MBFI, LiveIntervals *LIS, VirtRegMap *VRM) const { // This is a bit of a hack. Consider this instruction: // diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp --- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp +++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp @@ -515,7 +515,7 @@ auto *BB = OrigMI.getParent(); auto *NewMI = BB->getParent()->CloneMachineInstr(&OrigMI); BB->insert(OrigMI, NewMI); - if (TII->commuteInstruction(*NewMI)) { + if (TII->commuteInstruction(*NewMI, nullptr, nullptr)) { LLVM_DEBUG(dbgs() << " commuted: " << *NewMI); if (auto *DPPInst = createDPPInst(*NewMI, MovMI, CombOldVGPR, OldOpndValue, CombBCZ)) { diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -276,7 +276,7 @@ MI->setDesc(TII.get(AMDGPU::IMPLICIT_DEF)); if (Fold.isCommuted()) - TII.commuteInstruction(*Inst32, false); + TII.commuteInstruction(*Inst32, nullptr, nullptr, false); return true; } @@ -393,7 +393,8 @@ return false; if (!CanCommute || - !TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1)) + !TII->commuteInstruction(*MI, nullptr, nullptr, false, CommuteIdx0, + CommuteIdx1)) return false; if (!TII->isOperandLegal(*MI, CommuteOpNo, OpToFold)) { @@ -421,7 +422,8 @@ return true; } - TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1); + TII->commuteInstruction(*MI, nullptr, nullptr, false, CommuteIdx0, + CommuteIdx1); return false; } @@ -1230,7 +1232,7 @@ tryFoldInst(TII, Fold.UseMI); } else if (Fold.isCommuted()) { // Restoring instruction's original operand order if fold has failed. - TII->commuteInstruction(*Fold.UseMI, false); + TII->commuteInstruction(*Fold.UseMI, nullptr, nullptr, false); } } } diff --git a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp --- a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp @@ -371,7 +371,7 @@ MachineOperand &Op1 = A->getOperand(1); MachineOperand &Op2 = A->getOperand(2); if (Op1.getReg() != ExecReg && Op2.isReg() && Op2.getReg() == ExecReg) { - TII->commuteInstruction(*A); + TII->commuteInstruction(*A, nullptr, nullptr); Changed = true; } if (Op1.getReg() != ExecReg) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -137,7 +137,10 @@ MachineOperand &Src0, unsigned Src0OpName, MachineOperand &Src1, unsigned Src1OpName) const; - MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI, + MachineInstr *commuteInstructionImpl(MachineInstr &MI, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI, + bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override; @@ -1032,6 +1035,8 @@ ArrayRef Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI, LiveIntervals *LIS = nullptr, VirtRegMap *VRM = nullptr) const override; }; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1664,7 +1664,10 @@ return &MI; } -MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, +MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI, + bool NewMI, unsigned Src0Idx, unsigned Src1Idx) const { assert(!NewMI && "this should never be used"); @@ -1688,7 +1691,8 @@ if (isOperandLegal(MI, Src1Idx, &Src0)) { // Be sure to copy the source modifiers to the right place. CommutedMI - = TargetInstrInfo::commuteInstructionImpl(MI, NewMI, Src0Idx, Src1Idx); + = TargetInstrInfo::commuteInstructionImpl(MI, PSI, MBFI, NewMI, + Src0Idx, Src1Idx); } } else if (Src0.isReg() && !Src1.isReg()) { @@ -2454,7 +2458,7 @@ if (Def && Def->isMoveImmediate() && isInlineConstant(Def->getOperand(1)) && MRI->hasOneUse(Src1->getReg()) && - commuteInstruction(UseMI)) { + commuteInstruction(UseMI, nullptr, nullptr)) { Src0->ChangeToImmediate(Def->getOperand(1).getImm()); } else if ((Register::isPhysicalRegister(Src1->getReg()) && RI.isSGPRClass(RI.getPhysRegClass(Src1->getReg()))) || @@ -6563,8 +6567,9 @@ MachineInstr *SIInstrInfo::foldMemoryOperandImpl( MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, - MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS, - VirtRegMap *VRM) const { + MachineBasicBlock::iterator InsertPt, int FrameIndex, + ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *MBFI, + LiveIntervals *LIS, VirtRegMap *VRM) const { // This is a bit of a hack (copied from AArch64). Consider this instruction: // // %0:sreg_32 = COPY $m0 diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp --- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -113,12 +113,12 @@ // We have failed to fold src0, so commute the instruction and try again. if (TryToCommute && MI.isCommutable()) { - if (TII->commuteInstruction(MI)) { + if (TII->commuteInstruction(MI, nullptr, nullptr)) { if (foldImmediates(MI, TII, MRI, false)) return true; // Commute back. - TII->commuteInstruction(MI); + TII->commuteInstruction(MI, nullptr, nullptr); } } @@ -183,7 +183,7 @@ // cmpk instructions do scc = dst imm16, so commute the instruction to // get constants on the RHS. if (!MI.getOperand(0).isReg()) - TII->commuteInstruction(MI, false, 0, 1); + TII->commuteInstruction(MI, nullptr, nullptr, false, 0, 1); const MachineOperand &Src1 = MI.getOperand(1); if (!Src1.isImm()) @@ -355,7 +355,7 @@ if ((Opc == AMDGPU::S_ANDN2_B32 || Opc == AMDGPU::S_ORN2_B32) && SrcImm == Src0) { - if (!TII->commuteInstruction(MI, false, 1, 2)) + if (!TII->commuteInstruction(MI, nullptr, nullptr, false, 1, 2)) NewImm = 0; } @@ -636,7 +636,7 @@ MachineOperand *Src1 = &MI.getOperand(2); if (!Src0->isReg() && Src1->isReg()) { - if (TII->commuteInstruction(MI, false, 1, 2)) + if (TII->commuteInstruction(MI, nullptr, nullptr, false, 1, 2)) std::swap(Src0, Src1); } @@ -706,7 +706,8 @@ if (!TII->canShrink(MI, MRI)) { // Try commuting the instruction and see if that enables us to shrink // it. - if (!MI.isCommutable() || !TII->commuteInstruction(MI) || + if (!MI.isCommutable() || + !TII->commuteInstruction(MI, nullptr, nullptr) || !TII->canShrink(MI, MRI)) continue; } diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -96,7 +96,10 @@ /// non-commutable pair of operand indices OpIdx1 and OpIdx2. /// Even though the instruction is commutable, the method may still /// fail to commute the operands, null pointer is returned in such cases. - MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI, + MachineInstr *commuteInstructionImpl(MachineInstr &MI, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI, + bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const override; @@ -326,7 +329,9 @@ /// VFP/NEON execution domains. std::pair getExecutionDomain(const MachineInstr &MI) const override; - void setExecutionDomain(MachineInstr &MI, unsigned Domain) const override; + void setExecutionDomain(MachineInstr &MI, unsigned Domain, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI) const override; unsigned getPartialRegUpdateClearance(const MachineInstr &, unsigned, diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -2146,6 +2146,8 @@ } MachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr &MI, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const { @@ -2159,7 +2161,8 @@ if (CC == ARMCC::AL || PredReg != ARM::CPSR) return nullptr; MachineInstr *CommutedMI = - TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); + TargetInstrInfo::commuteInstructionImpl(MI, PSI, MBFI, NewMI, OpIdx1, + OpIdx2); if (!CommutedMI) return nullptr; // After swapping the MOVCC operands, also invert the condition. @@ -2168,7 +2171,8 @@ return CommutedMI; } } - return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); + return TargetInstrInfo::commuteInstructionImpl(MI, PSI, MBFI, NewMI, OpIdx1, + OpIdx2); } /// Identify instructions that can be folded into a MOVCC instruction, and @@ -4926,7 +4930,9 @@ } void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI, - unsigned Domain) const { + unsigned Domain, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI) const { unsigned DstReg, SrcReg, DReg; unsigned Lane; MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI); diff --git a/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp --- a/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -758,7 +758,7 @@ if (Reg1 != Reg0) return false; // Try to commute the operands to make it a 2-address instruction. - MachineInstr *CommutedMI = TII->commuteInstruction(*MI); + MachineInstr *CommutedMI = TII->commuteInstruction(*MI, nullptr, nullptr); if (!CommutedMI) return false; } @@ -770,7 +770,8 @@ MI->getOperand(CommOpIdx2).getReg() != Reg0) return false; MachineInstr *CommutedMI = - TII->commuteInstruction(*MI, false, CommOpIdx1, CommOpIdx2); + TII->commuteInstruction(*MI, nullptr, nullptr, false, + CommOpIdx1, CommOpIdx2); if (!CommutedMI) return false; } diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -171,7 +171,10 @@ /// /// For example, we can commute rlwimi instructions, but only if the /// rotate amt is zero. We also have to munge the immediates a bit. - MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI, + MachineInstr *commuteInstructionImpl(MachineInstr &MI, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI, + bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const override; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -365,14 +365,18 @@ return 0; } -MachineInstr *PPCInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, +MachineInstr *PPCInstrInfo::commuteInstructionImpl(MachineInstr &MI, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI, + bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const { MachineFunction &MF = *MI.getParent()->getParent(); // Normal instructions can be commuted the obvious way. if (MI.getOpcode() != PPC::RLWIMI && MI.getOpcode() != PPC::RLWIMIo) - return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); + return TargetInstrInfo::commuteInstructionImpl(MI, PSI, MBFI, NewMI, OpIdx1, + OpIdx2); // Note that RLWIMI can be commuted as a 32-bit instruction, but not as a // 64-bit instruction (so we don't handle PPC::RLWIMI8 here), because // changing the relative order of the mask operands might change what happens diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h @@ -194,7 +194,10 @@ /// non-commutable operands. /// Even though the instruction is commutable, the method may still /// fail to commute the operands, null pointer is returned in such cases. - MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI, + MachineInstr *commuteInstructionImpl(MachineInstr &MI, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI, + bool NewMI, unsigned CommuteOpIdx1, unsigned CommuteOpIdx2) const override; @@ -261,11 +264,15 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI, LiveIntervals *LIS = nullptr, VirtRegMap *VRM = nullptr) const override; MachineInstr *foldMemoryOperandImpl( MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI, LiveIntervals *LIS = nullptr) const override; bool expandPostRAPseudo(MachineInstr &MBBI) const override; bool reverseBranchCondition(SmallVectorImpl &Cond) const diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -270,6 +270,8 @@ } MachineInstr *SystemZInstrInfo::commuteInstructionImpl(MachineInstr &MI, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const { @@ -293,11 +295,13 @@ unsigned CCValid = WorkingMI.getOperand(3).getImm(); unsigned CCMask = WorkingMI.getOperand(4).getImm(); WorkingMI.getOperand(4).setImm(CCMask ^ CCValid); - return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, + return TargetInstrInfo::commuteInstructionImpl(WorkingMI, PSI, MBFI, + /*NewMI=*/false, OpIdx1, OpIdx2); } default: - return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); + return TargetInstrInfo::commuteInstructionImpl(MI, PSI, MBFI, + NewMI, OpIdx1, OpIdx2); } } @@ -655,7 +659,7 @@ } if (CommuteIdx != -1) - if (!commuteInstruction(UseMI, false, CommuteIdx, UseIdx)) + if (!commuteInstruction(UseMI, nullptr, nullptr, false, CommuteIdx, UseIdx)) return false; bool DeleteDef = MRI->hasOneNonDBGUse(Reg); @@ -997,6 +1001,7 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, + ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *MBFI, LiveIntervals *LIS, VirtRegMap *VRM) const { const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); const MachineFrameInfo &MFI = MF.getFrameInfo(); @@ -1210,6 +1215,7 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI, + ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *MBFI, LiveIntervals *LIS) const { return nullptr; } diff --git a/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp b/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp --- a/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp +++ b/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp @@ -137,7 +137,7 @@ // If the destination (now) matches one source, prefer this to be first. if (DestReg != Src1Reg && DestReg == Src2Reg) { - TII->commuteInstruction(*MBBI, false, 1, 2); + TII->commuteInstruction(*MBBI, nullptr, nullptr, false, 1, 2); std::swap(Src1Reg, Src2Reg); std::swap(Src1IsHigh, Src2IsHigh); } @@ -269,4 +269,3 @@ return Modified; } - diff --git a/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp b/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp --- a/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp +++ b/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp @@ -185,7 +185,7 @@ return true; } if (MI.getOperand(0).getReg() == MI.getOperand(2).getReg()) { - TII->commuteInstruction(MI, false, 1, 2); + TII->commuteInstruction(MI, nullptr, nullptr, false, 1, 2); MI.setDesc(TII->get(Opcode)); MI.tieOperands(0, 1); return true; @@ -338,7 +338,7 @@ if ((MI.getOperand(0).getReg() != MI.getOperand(1).getReg()) && (!MI.isCommutable() || MI.getOperand(0).getReg() != MI.getOperand(2).getReg() || - !TII->commuteInstruction(MI, false, 1, 2))) + !TII->commuteInstruction(MI, nullptr, nullptr, false, 1, 2))) break; MI.setDesc(TII->get(TwoOperandOpcode)); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h @@ -48,7 +48,10 @@ void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const override; - MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI, + MachineInstr *commuteInstructionImpl(MachineInstr &MI, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI, + bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const override; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp @@ -85,7 +85,9 @@ } MachineInstr *WebAssemblyInstrInfo::commuteInstructionImpl( - MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const { + MachineInstr &MI, + ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *MBFI, + bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const { // If the operands are stackified, we can't reorder them. WebAssemblyFunctionInfo &MFI = *MI.getParent()->getParent()->getInfo(); @@ -94,7 +96,8 @@ return nullptr; // Otherwise use the default implementation. - return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); + return TargetInstrInfo::commuteInstructionImpl(MI, PSI, MBFI, NewMI, + OpIdx1, OpIdx2); } // Branch analysis. diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp @@ -740,7 +740,8 @@ assert(!Declined && "Don't decline commuting until you've finished trying it"); // Commuting didn't help. Revert it. - TII->commuteInstruction(*Insert, /*NewMI=*/false, Operand0, Operand1); + TII->commuteInstruction(*Insert, nullptr, nullptr, /*NewMI=*/false, + Operand0, Operand1); TentativelyCommuting = false; Declined = true; } else if (!Declined && TreeWalker.hasRemainingOperands(Insert)) { @@ -748,7 +749,8 @@ Operand1 = TargetInstrInfo::CommuteAnyOperandIndex; if (TII->findCommutedOpIndices(*Insert, Operand0, Operand1)) { // Tentatively commute the operands and try again. - TII->commuteInstruction(*Insert, /*NewMI=*/false, Operand0, Operand1); + TII->commuteInstruction(*Insert, nullptr, nullptr, /*NewMI=*/false, + Operand0, Operand1); TreeWalker.resetTopOperands(Insert); TentativelyCommuting = true; Declined = false; diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -3941,7 +3941,7 @@ MachineInstr *Result = XII.foldMemoryOperandImpl( *FuncInfo.MF, *MI, OpNo, AddrOps, FuncInfo.InsertPt, Size, Alignment, - /*AllowCommute=*/true); + /*AllowCommute=*/true, nullptr, nullptr); if (!Result) return false; diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -24,7 +24,9 @@ #include "X86GenInstrInfo.inc" namespace llvm { +class MachineBlockFrequencyInfo; class MachineInstrBuilder; +class ProfileSummaryInfo; class X86RegisterInfo; class X86Subtarget; @@ -341,6 +343,8 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI, LiveIntervals *LIS = nullptr, VirtRegMap *VRM = nullptr) const override; @@ -350,6 +354,7 @@ MachineInstr *foldMemoryOperandImpl( MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI, + ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *MBFI, LiveIntervals *LIS = nullptr) const override; /// unfoldMemoryOperand - Separate a single instruction which folded a load or @@ -427,9 +432,13 @@ uint16_t getExecutionDomainCustom(const MachineInstr &MI) const; - void setExecutionDomain(MachineInstr &MI, unsigned Domain) const override; + void setExecutionDomain(MachineInstr &MI, unsigned Domain, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI) const override; - bool setExecutionDomainCustom(MachineInstr &MI, unsigned Domain) const; + bool setExecutionDomainCustom(MachineInstr &MI, unsigned Domain, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI) const; unsigned getPartialRegUpdateClearance(const MachineInstr &MI, unsigned OpNum, @@ -444,7 +453,9 @@ ArrayRef MOs, MachineBasicBlock::iterator InsertPt, unsigned Size, unsigned Alignment, - bool AllowCommute) const; + bool AllowCommute, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI) const; bool isHighLatencyDef(int opc) const override; @@ -490,7 +501,9 @@ MachineInstr *optimizeLoadInstr(MachineInstr &MI, const MachineRegisterInfo *MRI, unsigned &FoldAsLoadDefReg, - MachineInstr *&DefMI) const override; + MachineInstr *&DefMI, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI) const override; std::pair decomposeMachineOperandsTargetFlags(unsigned TF) const override; @@ -537,7 +550,10 @@ /// non-commutable operands. /// Even though the instruction is commutable, the method may still /// fail to commute the operands, null pointer is returned in such cases. - MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI, + MachineInstr *commuteInstructionImpl(MachineInstr &MI, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI, + bool NewMI, unsigned CommuteOpIdx1, unsigned CommuteOpIdx2) const override; diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -19,14 +19,17 @@ #include "X86TargetMachine.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Sequence.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineSizeOpts.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" @@ -1524,7 +1527,10 @@ #undef VPERM_CASES } -MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, +MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI, + bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const { auto cloneIfNew = [NewMI](MachineInstr &MI) -> MachineInstr & { @@ -1555,7 +1561,8 @@ auto &WorkingMI = cloneIfNew(MI); WorkingMI.setDesc(get(Opc)); WorkingMI.getOperand(3).setImm(Size - Amt); - return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, + return TargetInstrInfo::commuteInstructionImpl(WorkingMI, PSI, MBFI, + /*NewMI=*/false, OpIdx1, OpIdx2); } case X86::PFSUBrr: @@ -1566,15 +1573,20 @@ (X86::PFSUBRrr == MI.getOpcode() ? X86::PFSUBrr : X86::PFSUBRrr); auto &WorkingMI = cloneIfNew(MI); WorkingMI.setDesc(get(Opc)); - return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, + return TargetInstrInfo::commuteInstructionImpl(WorkingMI, PSI, MBFI, + /*NewMI=*/false, OpIdx1, OpIdx2); } case X86::BLENDPDrri: case X86::BLENDPSrri: case X86::VBLENDPDrri: - case X86::VBLENDPSrri: + case X86::VBLENDPSrri: { // If we're optimizing for size, try to use MOVSD/MOVSS. - if (MI.getParent()->getParent()->getFunction().hasOptSize()) { + auto *MBB = MI.getParent(); + auto MF = MBB->getParent(); + bool OptForSize = MF->getFunction().hasOptSize() || + llvm::shouldOptimizeForSize(MBB, PSI, MBFI); + if (OptForSize) { unsigned Mask, Opc; switch (MI.getOpcode()) { default: llvm_unreachable("Unreachable!"); @@ -1588,11 +1600,13 @@ WorkingMI.setDesc(get(Opc)); WorkingMI.RemoveOperand(3); return TargetInstrInfo::commuteInstructionImpl(WorkingMI, + PSI, MBFI, /*NewMI=*/false, OpIdx1, OpIdx2); } } LLVM_FALLTHROUGH; + } case X86::PBLENDWrri: case X86::VBLENDPDYrri: case X86::VBLENDPSYrri: @@ -1621,7 +1635,8 @@ int8_t Imm = MI.getOperand(3).getImm() & Mask; auto &WorkingMI = cloneIfNew(MI); WorkingMI.getOperand(3).setImm(Mask ^ Imm); - return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, + return TargetInstrInfo::commuteInstructionImpl(WorkingMI, PSI, MBFI, + /*NewMI=*/false, OpIdx1, OpIdx2); } case X86::INSERTPSrr: @@ -1641,7 +1656,8 @@ unsigned AltImm = (AltIdx << 6) | (AltIdx << 4) | ZMask; auto &WorkingMI = cloneIfNew(MI); WorkingMI.getOperand(MI.getNumOperands() - 1).setImm(AltImm); - return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, + return TargetInstrInfo::commuteInstructionImpl(WorkingMI, PSI, MBFI, + /*NewMI=*/false, OpIdx1, OpIdx2); } return nullptr; @@ -1664,7 +1680,8 @@ auto &WorkingMI = cloneIfNew(MI); WorkingMI.setDesc(get(Opc)); WorkingMI.addOperand(MachineOperand::CreateImm(Mask)); - return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, + return TargetInstrInfo::commuteInstructionImpl(WorkingMI, PSI, MBFI, + /*NewMI=*/false, OpIdx1, OpIdx2); } @@ -1675,7 +1692,8 @@ auto &WorkingMI = cloneIfNew(MI); WorkingMI.setDesc(get(X86::SHUFPDrri)); WorkingMI.addOperand(MachineOperand::CreateImm(0x02)); - return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, + return TargetInstrInfo::commuteInstructionImpl(WorkingMI, PSI, MBFI, + /*NewMI=*/false, OpIdx1, OpIdx2); } case X86::SHUFPDrri: { @@ -1684,7 +1702,8 @@ auto &WorkingMI = cloneIfNew(MI); WorkingMI.setDesc(get(X86::MOVSDrr)); WorkingMI.RemoveOperand(3); - return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, + return TargetInstrInfo::commuteInstructionImpl(WorkingMI, PSI, MBFI, + /*NewMI=*/false, OpIdx1, OpIdx2); } case X86::PCLMULQDQrr: @@ -1700,7 +1719,8 @@ unsigned Src2Hi = Imm & 0x10; auto &WorkingMI = cloneIfNew(MI); WorkingMI.getOperand(3).setImm((Src1Hi << 4) | (Src2Hi >> 4)); - return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, + return TargetInstrInfo::commuteInstructionImpl(WorkingMI, PSI, MBFI, + /*NewMI=*/false, OpIdx1, OpIdx2); } case X86::VPCMPBZ128rri: case X86::VPCMPUBZ128rri: @@ -1732,7 +1752,8 @@ Imm = X86::getSwappedVPCMPImm(Imm); auto &WorkingMI = cloneIfNew(MI); WorkingMI.getOperand(MI.getNumOperands() - 1).setImm(Imm); - return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, + return TargetInstrInfo::commuteInstructionImpl(WorkingMI, PSI, MBFI, + /*NewMI=*/false, OpIdx1, OpIdx2); } case X86::VPCOMBri: case X86::VPCOMUBri: @@ -1744,7 +1765,8 @@ Imm = X86::getSwappedVPCOMImm(Imm); auto &WorkingMI = cloneIfNew(MI); WorkingMI.getOperand(3).setImm(Imm); - return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, + return TargetInstrInfo::commuteInstructionImpl(WorkingMI, PSI, MBFI, + /*NewMI=*/false, OpIdx1, OpIdx2); } case X86::VCMPSDZrr: @@ -1765,7 +1787,8 @@ Imm = X86::getSwappedVCMPImm(Imm); auto &WorkingMI = cloneIfNew(MI); WorkingMI.getOperand(MI.getNumOperands() - 1).setImm(Imm); - return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, + return TargetInstrInfo::commuteInstructionImpl(WorkingMI, PSI, MBFI, + /*NewMI=*/false, OpIdx1, OpIdx2); } case X86::VPERM2F128rr: @@ -1776,7 +1799,8 @@ int8_t Imm = MI.getOperand(3).getImm() & 0xFF; auto &WorkingMI = cloneIfNew(MI); WorkingMI.getOperand(3).setImm(Imm ^ 0x22); - return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, + return TargetInstrInfo::commuteInstructionImpl(WorkingMI, PSI, MBFI, + /*NewMI=*/false, OpIdx1, OpIdx2); } case X86::MOVHLPSrr: @@ -1799,7 +1823,8 @@ } auto &WorkingMI = cloneIfNew(MI); WorkingMI.setDesc(get(Opc)); - return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, + return TargetInstrInfo::commuteInstructionImpl(WorkingMI, PSI, MBFI, + /*NewMI=*/false, OpIdx1, OpIdx2); } case X86::CMOV16rr: case X86::CMOV32rr: case X86::CMOV64rr: { @@ -1807,7 +1832,8 @@ unsigned OpNo = MI.getDesc().getNumOperands() - 1; X86::CondCode CC = static_cast(MI.getOperand(OpNo).getImm()); WorkingMI.getOperand(OpNo).setImm(X86::GetOppositeBranchCondition(CC)); - return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, + return TargetInstrInfo::commuteInstructionImpl(WorkingMI, PSI, MBFI, + /*NewMI=*/false, OpIdx1, OpIdx2); } case X86::VPTERNLOGDZrri: case X86::VPTERNLOGDZrmi: @@ -1842,7 +1868,8 @@ case X86::VPTERNLOGQZrmbikz: { auto &WorkingMI = cloneIfNew(MI); commuteVPTERNLOG(WorkingMI, OpIdx1, OpIdx2); - return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, + return TargetInstrInfo::commuteInstructionImpl(WorkingMI, PSI, MBFI, + /*NewMI=*/false, OpIdx1, OpIdx2); } default: { @@ -1850,7 +1877,8 @@ unsigned Opc = getCommutedVPERMV3Opcode(MI.getOpcode()); auto &WorkingMI = cloneIfNew(MI); WorkingMI.setDesc(get(Opc)); - return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, + return TargetInstrInfo::commuteInstructionImpl(WorkingMI, PSI, MBFI, + /*NewMI=*/false, OpIdx1, OpIdx2); } @@ -1861,11 +1889,13 @@ getFMA3OpcodeToCommuteOperands(MI, OpIdx1, OpIdx2, *FMA3Group); auto &WorkingMI = cloneIfNew(MI); WorkingMI.setDesc(get(Opc)); - return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, + return TargetInstrInfo::commuteInstructionImpl(WorkingMI, PSI, MBFI, + /*NewMI=*/false, OpIdx1, OpIdx2); } - return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); + return TargetInstrInfo::commuteInstructionImpl(MI, PSI, MBFI, + NewMI, OpIdx1, OpIdx2); } } } @@ -3833,7 +3863,9 @@ MachineInstr *X86InstrInfo::optimizeLoadInstr(MachineInstr &MI, const MachineRegisterInfo *MRI, unsigned &FoldAsLoadDefReg, - MachineInstr *&DefMI) const { + MachineInstr *&DefMI, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI) const { // Check whether we can move DefMI here. DefMI = MRI->getVRegDef(FoldAsLoadDefReg); assert(DefMI); @@ -3859,7 +3891,8 @@ return nullptr; // Check whether we can fold the def into SrcOperandId. - if (MachineInstr *FoldMI = foldMemoryOperand(MI, SrcOperandIds, *DefMI)) { + if (MachineInstr *FoldMI = foldMemoryOperand(MI, SrcOperandIds, *DefMI, + PSI, MBFI)) { FoldAsLoadDefReg = 0; return FoldMI; } @@ -4820,7 +4853,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( MachineFunction &MF, MachineInstr &MI, unsigned OpNum, ArrayRef MOs, MachineBasicBlock::iterator InsertPt, - unsigned Size, unsigned Align, bool AllowCommute) const { + unsigned Size, unsigned Align, bool AllowCommute, + ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *MBFI) const { bool isSlowTwoMemOps = Subtarget.slowTwoMemOps(); bool isTwoAddrFold = false; @@ -4953,7 +4987,7 @@ return nullptr; MachineInstr *CommutedMI = - commuteInstruction(MI, false, CommuteOpIdx1, CommuteOpIdx2); + commuteInstruction(MI, PSI, MBFI, false, CommuteOpIdx1, CommuteOpIdx2); if (!CommutedMI) { // Unable to commute. return nullptr; @@ -4966,13 +5000,14 @@ // Attempt to fold with the commuted version of the instruction. NewMI = foldMemoryOperandImpl(MF, MI, CommuteOpIdx2, MOs, InsertPt, - Size, Align, /*AllowCommute=*/false); + Size, Align, /*AllowCommute=*/false, + PSI, MBFI); if (NewMI) return NewMI; // Folding failed again - undo the commute before returning. MachineInstr *UncommutedMI = - commuteInstruction(MI, false, CommuteOpIdx1, CommuteOpIdx2); + commuteInstruction(MI, PSI, MBFI, false, CommuteOpIdx1, CommuteOpIdx2); if (!UncommutedMI) { // Unable to commute. return nullptr; @@ -4998,7 +5033,10 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, - int FrameIndex, LiveIntervals *LIS, + int FrameIndex, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI, + LiveIntervals *LIS, VirtRegMap *VRM) const { // Check switch flag if (NoFusing) @@ -5048,7 +5086,8 @@ return foldMemoryOperandImpl(MF, MI, Ops[0], MachineOperand::CreateFI(FrameIndex), InsertPt, - Size, Alignment, /*AllowCommute=*/true); + Size, Alignment, /*AllowCommute=*/true, + PSI, MBFI); } /// Check if \p LoadMI is a partial register load that we can't fold into \p MI @@ -5189,6 +5228,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI, + ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *MBFI, LiveIntervals *LIS) const { // TODO: Support the case where LoadMI loads a wide register, but MI @@ -5204,7 +5244,8 @@ if (isLoadFromStackSlot(LoadMI, FrameIndex)) { if (isNonFoldablePartialRegisterLoad(LoadMI, MI, MF)) return nullptr; - return foldMemoryOperandImpl(MF, MI, Ops, InsertPt, FrameIndex, LIS); + return foldMemoryOperandImpl(MF, MI, Ops, InsertPt, FrameIndex, PSI, MBFI, + LIS); } // Check switch flag @@ -5356,7 +5397,8 @@ } } return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, InsertPt, - /*Size=*/0, Alignment, /*AllowCommute=*/true); + /*Size=*/0, Alignment, /*AllowCommute=*/true, + PSI, MBFI); } static SmallVector @@ -6683,7 +6725,9 @@ } bool X86InstrInfo::setExecutionDomainCustom(MachineInstr &MI, - unsigned Domain) const { + unsigned Domain, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI) const { assert(Domain > 0 && Domain < 4 && "Invalid execution domain"); uint16_t dom = (MI.getDesc().TSFlags >> X86II::SSEDomainShift) & 3; assert(dom && "Not an SSE instruction"); @@ -6792,7 +6836,7 @@ MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).getSubReg() == 0 && MI.getOperand(2).getSubReg() == 0) { - commuteInstruction(MI, false); + commuteInstruction(MI, PSI, MBFI, false); return true; } // We must always return true for MOVHLPSrr. @@ -6855,13 +6899,15 @@ return std::make_pair(domain, validDomains); } -void X86InstrInfo::setExecutionDomain(MachineInstr &MI, unsigned Domain) const { +void X86InstrInfo::setExecutionDomain(MachineInstr &MI, unsigned Domain, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI) const { assert(Domain>0 && Domain<4 && "Invalid execution domain"); uint16_t dom = (MI.getDesc().TSFlags >> X86II::SSEDomainShift) & 3; assert(dom && "Not an SSE instruction"); // Attempt to match for custom instructions. - if (setExecutionDomainCustom(MI, Domain)) + if (setExecutionDomainCustom(MI, Domain, PSI, MBFI)) return; const uint16_t *table = lookup(MI.getOpcode(), dom, ReplaceableInstrs);