diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -1234,15 +1234,21 @@ } /// Get the base operand and byte offset of an instruction that reads/writes + /// memory. This is a convenience function for callers that are only prepared + /// to handle a single base operand. + bool getMemOperandWithOffset(const MachineInstr &MI, + const MachineOperand *&BaseOp, int64_t &Offset, + const TargetRegisterInfo *TRI) const; + + /// Get the base operands and byte offset of an instruction that reads/writes /// memory. /// It returns false if MI does not read/write memory. - /// It returns false if no base operand and offset was found. - /// It is not guaranteed to always recognize base operand and offsets in all + /// It returns false if no base operands and offset was found. + /// It is not guaranteed to always recognize base operands and offsets in all /// cases. - virtual bool getMemOperandWithOffset(const MachineInstr &MI, - const MachineOperand *&BaseOp, - int64_t &Offset, - const TargetRegisterInfo *TRI) const { + virtual bool getMemOperandsWithOffset( + const MachineInstr &MI, SmallVectorImpl &BaseOps, + int64_t &Offset, const TargetRegisterInfo *TRI) const { return false; } @@ -1266,8 +1272,8 @@ /// or /// DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); /// to TargetPassConfig::createMachineScheduler() to have an effect. - virtual bool shouldClusterMemOps(const MachineOperand &BaseOp1, - const MachineOperand &BaseOp2, + virtual bool shouldClusterMemOps(ArrayRef BaseOps1, + ArrayRef BaseOps2, unsigned NumLoads) const { llvm_unreachable("target did not implement shouldClusterMemOps()"); } diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -1471,41 +1471,46 @@ class BaseMemOpClusterMutation : public ScheduleDAGMutation { struct MemOpInfo { SUnit *SU; - const MachineOperand *BaseOp; + SmallVector BaseOps; int64_t Offset; - MemOpInfo(SUnit *su, const MachineOperand *Op, int64_t ofs) - : SU(su), BaseOp(Op), Offset(ofs) {} - - bool operator<(const MemOpInfo &RHS) const { - if (BaseOp->getType() != RHS.BaseOp->getType()) - return BaseOp->getType() < RHS.BaseOp->getType(); - - if (BaseOp->isReg()) - return std::make_tuple(BaseOp->getReg(), Offset, SU->NodeNum) < - std::make_tuple(RHS.BaseOp->getReg(), RHS.Offset, - RHS.SU->NodeNum); - if (BaseOp->isFI()) { - const MachineFunction &MF = - *BaseOp->getParent()->getParent()->getParent(); + MemOpInfo(SUnit *SU, ArrayRef BaseOps, + int64_t Offset) + : SU(SU), BaseOps(BaseOps.begin(), BaseOps.end()), Offset(Offset) {} + + static bool Compare(const MachineOperand *const &A, + const MachineOperand *const &B) { + if (A->getType() != B->getType()) + return A->getType() < B->getType(); + if (A->isReg()) + return A->getReg() < B->getReg(); + if (A->isFI()) { + const MachineFunction &MF = *A->getParent()->getParent()->getParent(); const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); bool StackGrowsDown = TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; - // Can't use tuple comparison here since we might need to use a - // different order when the stack grows down. - if (BaseOp->getIndex() != RHS.BaseOp->getIndex()) - return StackGrowsDown ? BaseOp->getIndex() > RHS.BaseOp->getIndex() - : BaseOp->getIndex() < RHS.BaseOp->getIndex(); - - if (Offset != RHS.Offset) - return Offset < RHS.Offset; - - return SU->NodeNum < RHS.SU->NodeNum; + return StackGrowsDown ? A->getIndex() > B->getIndex() + : A->getIndex() < B->getIndex(); } llvm_unreachable("MemOpClusterMutation only supports register or frame " "index bases."); } + + bool operator<(const MemOpInfo &RHS) const { + // FIXME: Don't compare everything twice. Maybe use C++20 three way + // comparison instead when it's available. + if (std::lexicographical_compare(BaseOps.begin(), BaseOps.end(), + RHS.BaseOps.begin(), RHS.BaseOps.end(), + Compare)) + return true; + if (std::lexicographical_compare(RHS.BaseOps.begin(), RHS.BaseOps.end(), + BaseOps.begin(), BaseOps.end(), Compare)) + return false; + if (Offset != RHS.Offset) + return Offset < RHS.Offset; + return SU->NodeNum < RHS.SU->NodeNum; + } }; const TargetInstrInfo *TII; @@ -1560,10 +1565,14 @@ ArrayRef MemOps, ScheduleDAGInstrs *DAG) { SmallVector MemOpRecords; for (SUnit *SU : MemOps) { - const MachineOperand *BaseOp; + SmallVector BaseOps; int64_t Offset; - if (TII->getMemOperandWithOffset(*SU->getInstr(), BaseOp, Offset, TRI)) - MemOpRecords.push_back(MemOpInfo(SU, BaseOp, Offset)); + if (TII->getMemOperandsWithOffset(*SU->getInstr(), BaseOps, Offset, TRI)) + MemOpRecords.push_back(MemOpInfo(SU, BaseOps, Offset)); +#ifndef NDEBUG + for (auto *Op : BaseOps) + assert(Op); +#endif } if (MemOpRecords.size() < 2) return; @@ -1573,8 +1582,8 @@ for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) { SUnit *SUa = MemOpRecords[Idx].SU; SUnit *SUb = MemOpRecords[Idx+1].SU; - if (TII->shouldClusterMemOps(*MemOpRecords[Idx].BaseOp, - *MemOpRecords[Idx + 1].BaseOp, + if (TII->shouldClusterMemOps(MemOpRecords[Idx].BaseOps, + MemOpRecords[Idx + 1].BaseOps, ClusterLength) && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) { LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU(" diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -1030,6 +1030,18 @@ new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched"); } +// Default implementation of getMemOperandWithOffset. +bool TargetInstrInfo::getMemOperandWithOffset( + const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, + const TargetRegisterInfo *TRI) const { + SmallVector BaseOps; + if (!getMemOperandsWithOffset(MI, BaseOps, Offset, TRI) || + BaseOps.size() != 1) + return false; + BaseOp = BaseOps.front(); + return true; +} + //===----------------------------------------------------------------------===// // SelectionDAG latency interface. //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -112,10 +112,9 @@ /// Hint that pairing the given load or store is unprofitable. static void suppressLdStPair(MachineInstr &MI); - bool getMemOperandWithOffset(const MachineInstr &MI, - const MachineOperand *&BaseOp, - int64_t &Offset, - const TargetRegisterInfo *TRI) const override; + bool getMemOperandsWithOffset( + const MachineInstr &MI, SmallVectorImpl &BaseOps, + int64_t &Offset, const TargetRegisterInfo *TRI) const override; bool getMemOperandWithOffsetWidth(const MachineInstr &MI, const MachineOperand *&BaseOp, @@ -132,8 +131,8 @@ static bool getMemOpInfo(unsigned Opcode, unsigned &Scale, unsigned &Width, int64_t &MinOffset, int64_t &MaxOffset); - bool shouldClusterMemOps(const MachineOperand &BaseOp1, - const MachineOperand &BaseOp2, + bool shouldClusterMemOps(ArrayRef BaseOps1, + ArrayRef BaseOps2, unsigned NumLoads) const override; void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1978,15 +1978,18 @@ return true; } -bool AArch64InstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt, - const MachineOperand *&BaseOp, - int64_t &Offset, - const TargetRegisterInfo *TRI) const { +bool AArch64InstrInfo::getMemOperandsWithOffset( + const MachineInstr &LdSt, SmallVectorImpl &BaseOps, + int64_t &Offset, const TargetRegisterInfo *TRI) const { if (!LdSt.mayLoadOrStore()) return false; + const MachineOperand *BaseOp; unsigned Width; - return getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI); + if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI)) + return false; + BaseOps.push_back(BaseOp); + return true; } bool AArch64InstrInfo::getMemOperandWithOffsetWidth( @@ -2363,9 +2366,12 @@ /// Detect opportunities for ldp/stp formation. /// /// Only called for LdSt for which getMemOperandWithOffset returns true. -bool AArch64InstrInfo::shouldClusterMemOps(const MachineOperand &BaseOp1, - const MachineOperand &BaseOp2, - unsigned NumLoads) const { +bool AArch64InstrInfo::shouldClusterMemOps( + ArrayRef BaseOps1, + ArrayRef BaseOps2, unsigned NumLoads) const { + assert(BaseOps1.size() == 1 && BaseOps2.size() == 1); + const MachineOperand &BaseOp1 = *BaseOps1.front(); + const MachineOperand &BaseOp2 = *BaseOps2.front(); const MachineInstr &FirstLdSt = *BaseOp1.getParent(); const MachineInstr &SecondLdSt = *BaseOp2.getParent(); if (BaseOp1.getType() != BaseOp2.getType()) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -179,13 +179,14 @@ int64_t &Offset1, int64_t &Offset2) const override; - bool getMemOperandWithOffset(const MachineInstr &LdSt, - const MachineOperand *&BaseOp, - int64_t &Offset, - const TargetRegisterInfo *TRI) const final; + bool + getMemOperandsWithOffset(const MachineInstr &LdSt, + SmallVectorImpl &BaseOps, + int64_t &Offset, + const TargetRegisterInfo *TRI) const final; - bool shouldClusterMemOps(const MachineOperand &BaseOp1, - const MachineOperand &BaseOp2, + bool shouldClusterMemOps(ArrayRef BaseOps1, + ArrayRef BaseOps2, unsigned NumLoads) const override; bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -256,14 +256,14 @@ } } -bool SIInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt, - const MachineOperand *&BaseOp, - int64_t &Offset, - const TargetRegisterInfo *TRI) const { +bool SIInstrInfo::getMemOperandsWithOffset( + const MachineInstr &LdSt, SmallVectorImpl &BaseOps, + int64_t &Offset, const TargetRegisterInfo *TRI) const { if (!LdSt.mayLoadOrStore()) return false; unsigned Opc = LdSt.getOpcode(); + const MachineOperand *BaseOp; if (isDS(LdSt)) { const MachineOperand *OffsetImm = @@ -276,6 +276,7 @@ if (!BaseOp || !BaseOp->isReg()) return false; + BaseOps.push_back(BaseOp); Offset = OffsetImm->getImm(); return true; @@ -312,6 +313,7 @@ if (!BaseOp->isReg()) return false; + BaseOps.push_back(BaseOp); Offset = EltSize * Offset0; return true; @@ -337,7 +339,7 @@ const MachineOperand *OffsetImm = getNamedOperand(LdSt, AMDGPU::OpName::offset); - BaseOp = SOffset; + BaseOps.push_back(SOffset); Offset = OffsetImm->getImm(); return true; } @@ -356,6 +358,7 @@ if (!BaseOp->isReg()) return false; + BaseOps.push_back(BaseOp); return true; } @@ -371,6 +374,7 @@ if (!BaseOp->isReg()) return false; + BaseOps.push_back(BaseOp); return true; } @@ -378,6 +382,7 @@ const MachineOperand *VAddr = getNamedOperand(LdSt, AMDGPU::OpName::vaddr); if (VAddr) { // Can't analyze 2 offsets. + // FIXME remove this restriction! if (getNamedOperand(LdSt, AMDGPU::OpName::saddr)) return false; @@ -390,6 +395,7 @@ Offset = getNamedOperand(LdSt, AMDGPU::OpName::offset)->getImm(); if (!BaseOp->isReg()) return false; + BaseOps.push_back(BaseOp); return true; } @@ -431,9 +437,12 @@ return Base1 == Base2; } -bool SIInstrInfo::shouldClusterMemOps(const MachineOperand &BaseOp1, - const MachineOperand &BaseOp2, +bool SIInstrInfo::shouldClusterMemOps(ArrayRef BaseOps1, + ArrayRef BaseOps2, unsigned NumLoads) const { + assert(BaseOps1.size() == 1 && BaseOps2.size() == 1); + const MachineOperand &BaseOp1 = *BaseOps1.front(); + const MachineOperand &BaseOp2 = *BaseOps2.front(); const MachineInstr &FirstLdSt = *BaseOp1.getParent(); const MachineInstr &SecondLdSt = *BaseOp2.getParent(); diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h @@ -204,10 +204,11 @@ bool expandPostRAPseudo(MachineInstr &MI) const override; /// Get the base register and byte offset of a load/store instr. - bool getMemOperandWithOffset(const MachineInstr &LdSt, - const MachineOperand *&BaseOp, - int64_t &Offset, - const TargetRegisterInfo *TRI) const override; + bool + getMemOperandsWithOffset(const MachineInstr &LdSt, + SmallVectorImpl &BaseOps, + int64_t &Offset, + const TargetRegisterInfo *TRI) const override; /// Reverses the branch condition of the specified condition list, /// returning false on success and true if it cannot be reversed. diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -2940,12 +2940,15 @@ } /// Get the base register and byte offset of a load/store instr. -bool HexagonInstrInfo::getMemOperandWithOffset( - const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, - const TargetRegisterInfo *TRI) const { +bool HexagonInstrInfo::getMemOperandsWithOffset( + const MachineInstr &LdSt, SmallVectorImpl &BaseOps, + int64_t &Offset, const TargetRegisterInfo *TRI) const { unsigned AccessSize = 0; - BaseOp = getBaseAndOffset(LdSt, Offset, AccessSize); - return BaseOp != nullptr && BaseOp->isReg(); + const MachineOperand *BaseOp = getBaseAndOffset(LdSt, Offset, AccessSize); + if (!BaseOp || !BaseOp->isReg()) + return false; + BaseOps.push_back(BaseOp); + return true; } /// Can these instructions execute at the same time in a bundle. diff --git a/llvm/lib/Target/Lanai/LanaiInstrInfo.h b/llvm/lib/Target/Lanai/LanaiInstrInfo.h --- a/llvm/lib/Target/Lanai/LanaiInstrInfo.h +++ b/llvm/lib/Target/Lanai/LanaiInstrInfo.h @@ -67,10 +67,11 @@ bool expandPostRAPseudo(MachineInstr &MI) const override; - bool getMemOperandWithOffset(const MachineInstr &LdSt, - const MachineOperand *&BaseOp, - int64_t &Offset, - const TargetRegisterInfo *TRI) const override; + bool + getMemOperandsWithOffset(const MachineInstr &LdSt, + SmallVectorImpl &BaseOps, + int64_t &Offset, + const TargetRegisterInfo *TRI) const override; bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt, const MachineOperand *&BaseOp, diff --git a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp --- a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp +++ b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp @@ -795,10 +795,9 @@ return true; } -bool LanaiInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt, - const MachineOperand *&BaseOp, - int64_t &Offset, - const TargetRegisterInfo *TRI) const { +bool LanaiInstrInfo::getMemOperandsWithOffset( + const MachineInstr &LdSt, SmallVectorImpl &BaseOps, + int64_t &Offset, const TargetRegisterInfo *TRI) const { switch (LdSt.getOpcode()) { default: return false; @@ -811,7 +810,11 @@ case Lanai::STH_RI: case Lanai::LDBs_RI: case Lanai::LDBz_RI: + const MachineOperand *BaseOp; unsigned Width; - return getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI); + if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI)) + return false; + BaseOps.push_back(BaseOp); + return true; } } diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -291,10 +291,11 @@ SmallVectorImpl &Cond, bool AllowModify) const override; - bool getMemOperandWithOffset(const MachineInstr &LdSt, - const MachineOperand *&BaseOp, - int64_t &Offset, - const TargetRegisterInfo *TRI) const override; + bool + getMemOperandsWithOffset(const MachineInstr &LdSt, + SmallVectorImpl &BaseOps, + int64_t &Offset, + const TargetRegisterInfo *TRI) const override; bool analyzeBranchPredicate(MachineBasicBlock &MBB, TargetInstrInfo::MachineBranchPredicate &MBP, bool AllowModify = false) const override; diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -3189,9 +3189,9 @@ } } -bool X86InstrInfo::getMemOperandWithOffset( - const MachineInstr &MemOp, const MachineOperand *&BaseOp, int64_t &Offset, - const TargetRegisterInfo *TRI) const { +bool X86InstrInfo::getMemOperandsWithOffset( + const MachineInstr &MemOp, SmallVectorImpl &BaseOps, + int64_t &Offset, const TargetRegisterInfo *TRI) const { const MCInstrDesc &Desc = MemOp.getDesc(); int MemRefBegin = X86II::getMemoryOperandNo(Desc.TSFlags); if (MemRefBegin < 0) @@ -3199,7 +3199,8 @@ MemRefBegin += X86II::getOperandBias(Desc); - BaseOp = &MemOp.getOperand(MemRefBegin + X86::AddrBaseReg); + const MachineOperand *BaseOp = + &MemOp.getOperand(MemRefBegin + X86::AddrBaseReg); if (!BaseOp->isReg()) // Can be an MO_FrameIndex return false; @@ -3221,6 +3222,7 @@ if (!BaseOp->isReg()) return false; + BaseOps.push_back(BaseOp); return true; }