Index: include/llvm/CodeGen/TargetInstrInfo.h =================================================================== --- include/llvm/CodeGen/TargetInstrInfo.h +++ include/llvm/CodeGen/TargetInstrInfo.h @@ -1136,11 +1136,11 @@ return false; } - /// Get the base register and byte offset of an instruction that reads/writes + /// Get the base operand and byte offset of an instruction that reads/writes /// memory. - virtual bool getMemOpBaseRegImmOfs(MachineInstr &MemOp, unsigned &BaseReg, - int64_t &Offset, - const TargetRegisterInfo *TRI) const { + virtual bool getMemOpBaseImmOfs(MachineInstr &MI, MachineOperand *&BaseOp, + int64_t &Offset, + const TargetRegisterInfo *TRI) const { return false; } @@ -1164,8 +1164,8 @@ /// or /// DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); /// to TargetPassConfig::createMachineScheduler() to have an effect. - virtual bool shouldClusterMemOps(MachineInstr &FirstLdSt, unsigned BaseReg1, - MachineInstr &SecondLdSt, unsigned BaseReg2, + virtual bool shouldClusterMemOps(MachineOperand &BaseOp1, + MachineOperand &BaseOp2, unsigned NumLoads) const { llvm_unreachable("target did not implement shouldClusterMemOps()"); } Index: lib/CodeGen/ImplicitNullChecks.cpp =================================================================== --- lib/CodeGen/ImplicitNullChecks.cpp +++ lib/CodeGen/ImplicitNullChecks.cpp @@ -360,10 +360,10 @@ ImplicitNullChecks::isSuitableMemoryOp(MachineInstr &MI, unsigned PointerReg, ArrayRef PrevInsts) { int64_t Offset; - unsigned BaseReg; + MachineOperand *BaseOp; - if (!TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI) || - BaseReg != PointerReg) + if (!TII->getMemOpBaseImmOfs(MI, BaseOp, Offset, TRI) || !BaseOp->isReg() || + BaseOp->getReg() != PointerReg) return SR_Unsuitable; // We want the mem access to be issued at a sane offset from PointerReg, Index: lib/CodeGen/MachinePipeliner.cpp =================================================================== --- lib/CodeGen/MachinePipeliner.cpp +++ lib/CodeGen/MachinePipeliner.cpp @@ -1121,11 +1121,13 @@ // First, perform the cheaper check that compares the base register. // If they are the same and the load offset is less than the store // offset, then mark the dependence as loop carried potentially. - unsigned BaseReg1, BaseReg2; + MachineOperand *BaseOp1, *BaseOp2; int64_t Offset1, Offset2; - if (TII->getMemOpBaseRegImmOfs(LdMI, BaseReg1, Offset1, TRI) && - TII->getMemOpBaseRegImmOfs(MI, BaseReg2, Offset2, TRI)) { - if (BaseReg1 == BaseReg2 && (int)Offset1 < (int)Offset2) { + if (TII->getMemOpBaseImmOfs(LdMI, BaseOp1, Offset1, TRI) && + TII->getMemOpBaseImmOfs(MI, BaseOp2, Offset2, TRI)) { + if (BaseOp1->isReg() && BaseOp2->isReg() && + BaseOp1->getReg() == BaseOp2->getReg() && + (int)Offset1 < (int)Offset2) { assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) && "What happened to the chain edge?"); SDep Dep(Load, SDep::Barrier); @@ -3246,11 +3248,16 @@ /// during each iteration. Set Delta to the amount of the change. bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) { const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - unsigned BaseReg; + MachineOperand *BaseOp; int64_t Offset; - if (!TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI)) + if (!TII->getMemOpBaseImmOfs(MI, BaseOp, Offset, TRI)) return false; + if (!BaseOp->isReg()) + return false; + + unsigned BaseReg = BaseOp->getReg(); + MachineRegisterInfo &MRI = MF.getRegInfo(); // Check if there is a Phi. If so, get the definition in the loop. MachineInstr *BaseDef = MRI.getVRegDef(BaseReg); @@ -3653,19 +3660,20 @@ if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD)) return true; - unsigned BaseRegS, BaseRegD; + MachineOperand *BaseOpS, *BaseOpD; int64_t OffsetS, OffsetD; const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - if (!TII->getMemOpBaseRegImmOfs(*SI, BaseRegS, OffsetS, TRI) || - !TII->getMemOpBaseRegImmOfs(*DI, BaseRegD, OffsetD, TRI)) + if (!TII->getMemOpBaseImmOfs(*SI, BaseOpS, OffsetS, TRI) || + !TII->getMemOpBaseImmOfs(*DI, BaseOpD, OffsetD, TRI)) return true; - if (BaseRegS != BaseRegD) + if (!BaseOpS->isReg() || !BaseOpD->isReg() || + BaseOpS->getReg() != BaseOpD->getReg()) return true; // Check that the base register is incremented by a constant value for each // iteration. - MachineInstr *Def = MRI.getVRegDef(BaseRegS); + MachineInstr *Def = MRI.getVRegDef(BaseOpS->getReg()); if (!Def || !Def->isPHI()) return true; unsigned InitVal = 0; Index: lib/CodeGen/MachineScheduler.cpp =================================================================== --- lib/CodeGen/MachineScheduler.cpp +++ lib/CodeGen/MachineScheduler.cpp @@ -1483,15 +1483,15 @@ class BaseMemOpClusterMutation : public ScheduleDAGMutation { struct MemOpInfo { SUnit *SU; - unsigned BaseReg; + MachineOperand *BaseOp; int64_t Offset; - MemOpInfo(SUnit *su, unsigned reg, int64_t ofs) - : SU(su), BaseReg(reg), Offset(ofs) {} + MemOpInfo(SUnit *su, MachineOperand *Op, int64_t ofs) + : SU(su), BaseOp(Op), Offset(ofs) {} - bool operator<(const MemOpInfo&RHS) const { - return std::tie(BaseReg, Offset, SU->NodeNum) < - std::tie(RHS.BaseReg, RHS.Offset, RHS.SU->NodeNum); + bool operator<(const MemOpInfo &RHS) const { + return std::make_tuple(BaseOp->getReg(), Offset, SU->NodeNum) < + std::make_tuple(RHS.BaseOp->getReg(), RHS.Offset, RHS.SU->NodeNum); } }; @@ -1547,10 +1547,10 @@ ArrayRef MemOps, ScheduleDAGMI *DAG) { SmallVector MemOpRecords; for (SUnit *SU : MemOps) { - unsigned BaseReg; + MachineOperand *BaseOp; int64_t Offset; - if (TII->getMemOpBaseRegImmOfs(*SU->getInstr(), BaseReg, Offset, TRI)) - MemOpRecords.push_back(MemOpInfo(SU, BaseReg, Offset)); + if (TII->getMemOpBaseImmOfs(*SU->getInstr(), BaseOp, Offset, TRI)) + MemOpRecords.push_back(MemOpInfo(SU, BaseOp, Offset)); } if (MemOpRecords.size() < 2) return; @@ -1560,8 +1560,8 @@ for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) { SUnit *SUa = MemOpRecords[Idx].SU; SUnit *SUb = MemOpRecords[Idx+1].SU; - if (TII->shouldClusterMemOps(*SUa->getInstr(), MemOpRecords[Idx].BaseReg, - *SUb->getInstr(), MemOpRecords[Idx+1].BaseReg, + if (TII->shouldClusterMemOps(*MemOpRecords[Idx].BaseOp, + *MemOpRecords[Idx + 1].BaseOp, ClusterLength) && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) { LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU(" Index: lib/CodeGen/MachineSink.cpp =================================================================== --- lib/CodeGen/MachineSink.cpp +++ lib/CodeGen/MachineSink.cpp @@ -716,9 +716,12 @@ !PredBB->getTerminator()->getMetadata(LLVMContext::MD_make_implicit)) return false; - unsigned BaseReg; + MachineOperand *BaseOp; int64_t Offset; - if (!TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI)) + if (!TII->getMemOpBaseImmOfs(MI, BaseOp, Offset, TRI)) + return false; + + if (!BaseOp->isReg()) return false; if (!(MI.mayLoad() && !MI.isPredicable())) @@ -731,7 +734,7 @@ return MBP.LHS.isReg() && MBP.RHS.isImm() && MBP.RHS.getImm() == 0 && (MBP.Predicate == MachineBranchPredicate::PRED_NE || MBP.Predicate == MachineBranchPredicate::PRED_EQ) && - MBP.LHS.getReg() == BaseReg; + MBP.LHS.getReg() == BaseOp->getReg(); } /// Sink an instruction and its associated debug instructions. If the debug Index: lib/Target/AArch64/AArch64InstrInfo.h =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.h +++ lib/Target/AArch64/AArch64InstrInfo.h @@ -110,13 +110,13 @@ /// Hint that pairing the given load or store is unprofitable. static void suppressLdStPair(MachineInstr &MI); - bool getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg, - int64_t &Offset, - const TargetRegisterInfo *TRI) const override; + bool getMemOpBaseImmOfs(MachineInstr &MI, MachineOperand *&BaseOp, + int64_t &Offset, + const TargetRegisterInfo *TRI) const override; - bool getMemOpBaseRegImmOfsWidth(MachineInstr &LdSt, unsigned &BaseReg, - int64_t &Offset, unsigned &Width, - const TargetRegisterInfo *TRI) const; + bool getMemOpBaseImmOfsWidth(MachineInstr &MI, MachineOperand *&BaseOp, + int64_t &Offset, unsigned &Width, + const TargetRegisterInfo *TRI) const; /// Return the immediate offset of the base register in a load/store \p LdSt. MachineOperand &getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const; @@ -128,8 +128,7 @@ bool getMemOpInfo(unsigned Opcode, unsigned &Scale, unsigned &Width, int64_t &MinOffset, int64_t &MaxOffset) const; - bool shouldClusterMemOps(MachineInstr &FirstLdSt, unsigned BaseReg1, - MachineInstr &SecondLdSt, unsigned BaseReg2, + bool shouldClusterMemOps(MachineOperand &BaseOp1, MachineOperand &BaseOp2, unsigned NumLoads) const override; void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Index: lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.cpp +++ lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1135,7 +1135,7 @@ bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint( MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const { const TargetRegisterInfo *TRI = &getRegisterInfo(); - unsigned BaseRegA = 0, BaseRegB = 0; + MachineOperand *BaseOpA = 0, *BaseOpB = 0; int64_t OffsetA = 0, OffsetB = 0; unsigned WidthA = 0, WidthB = 0; @@ -1146,14 +1146,15 @@ MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef()) return false; - // Retrieve the base register, offset from the base register and width. Width + // Retrieve the base register, offset from the base and width. Width // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If // base registers are identical, and the offset of a lower memory access + // the width doesn't overlap the offset of a higher memory access, // then the memory accesses are different. - if (getMemOpBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) && - getMemOpBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) { - if (BaseRegA == BaseRegB) { + if (getMemOpBaseImmOfsWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) && + getMemOpBaseImmOfsWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) { + if ((BaseOpA->isReg() && BaseOpB->isReg() && + BaseOpA->getReg() == BaseOpB->getReg())) { int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB; int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA; int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; @@ -2168,10 +2169,13 @@ // Can't merge/pair if the instruction modifies the base register. // e.g., ldr x0, [x0] - unsigned BaseReg = MI.getOperand(1).getReg(); - const TargetRegisterInfo *TRI = &getRegisterInfo(); - if (MI.modifiesRegister(BaseReg, TRI)) - return false; + // This case will never occur with an FI base. + if (MI.getOperand(1).isReg()) { + unsigned BaseReg = MI.getOperand(1).getReg(); + const TargetRegisterInfo *TRI = &getRegisterInfo(); + if (MI.modifiesRegister(BaseReg, TRI)) + return false; + } // Check if this load/store has a hint to avoid pair formation. // MachineMemOperands hints are set by the AArch64StorePairSuppress pass. @@ -2194,16 +2198,17 @@ return true; } -bool AArch64InstrInfo::getMemOpBaseRegImmOfs( - MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, - const TargetRegisterInfo *TRI) const { +bool AArch64InstrInfo::getMemOpBaseImmOfs(MachineInstr &LdSt, + MachineOperand *&BaseOp, + int64_t &Offset, + const TargetRegisterInfo *TRI) const { unsigned Width; - return getMemOpBaseRegImmOfsWidth(LdSt, BaseReg, Offset, Width, TRI); + return getMemOpBaseImmOfsWidth(LdSt, BaseOp, Offset, Width, TRI); } -bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth( - MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, unsigned &Width, - const TargetRegisterInfo *TRI) const { +bool AArch64InstrInfo::getMemOpBaseImmOfsWidth( + MachineInstr &LdSt, MachineOperand *&BaseOp, int64_t &Offset, + unsigned &Width, const TargetRegisterInfo *TRI) const { assert(LdSt.mayLoadOrStore() && "Expected a memory operation."); // Handle only loads/stores with base register followed by immediate offset. if (LdSt.getNumExplicitOperands() == 3) { @@ -2231,11 +2236,11 @@ // multiplied by the scaling factor. Unscaled instructions have scaling factor // set to 1. if (LdSt.getNumExplicitOperands() == 3) { - BaseReg = LdSt.getOperand(1).getReg(); + BaseOp = &LdSt.getOperand(1); Offset = LdSt.getOperand(2).getImm() * Scale; } else { assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands"); - BaseReg = LdSt.getOperand(2).getReg(); + BaseOp = &LdSt.getOperand(2); Offset = LdSt.getOperand(3).getImm() * Scale; } return true; @@ -2448,13 +2453,19 @@ /// Detect opportunities for ldp/stp formation. /// -/// Only called for LdSt for which getMemOpBaseRegImmOfs returns true. -bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt, - unsigned BaseReg1, - MachineInstr &SecondLdSt, - unsigned BaseReg2, +/// Only called for LdSt for which getMemOpBaseImmOfs returns true. +bool AArch64InstrInfo::shouldClusterMemOps(MachineOperand &BaseOp1, + MachineOperand &BaseOp2, unsigned NumLoads) const { - if (BaseReg1 != BaseReg2) + MachineInstr &FirstLdSt = *BaseOp1.getParent(); + MachineInstr &SecondLdSt = *BaseOp2.getParent(); + if (BaseOp1.getType() != BaseOp2.getType()) + return false; + + assert(BaseOp1.isReg() && "Only base registers are supported."); + + // Check for base regs. + if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg()) return false; // Only cluster up to a single pair. @@ -5528,19 +5539,20 @@ // At this point, we have a stack instruction that we might need to fix // up. We'll handle it if it's a load or store. if (MI.mayLoadOrStore()) { - unsigned Base; // Filled with the base regiser of MI. + MachineOperand *Base; // Filled with the base operand of MI. int64_t Offset; // Filled with the offset of MI. - unsigned DummyWidth; - // Does it allow us to offset the base register and is the base SP? - if (!getMemOpBaseRegImmOfsWidth(MI, Base, Offset, DummyWidth, &RI) || - Base != AArch64::SP) + // Does it allow us to offset the base operand and is the base the + // register SP? + if (!getMemOpBaseImmOfs(MI, Base, Offset, &RI) || + !Base->isReg() || Base->getReg() != AArch64::SP) return outliner::InstrType::Illegal; // Find the minimum/maximum offset for this instruction and check if // fixing it up would be in range. int64_t MinOffset, MaxOffset; // Unscaled offsets for the instruction. unsigned Scale; // The scale to multiply the offsets by. + unsigned DummyWidth; getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset); // TODO: We should really test what happens if an instruction overflows. @@ -5565,13 +5577,14 @@ void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const { for (MachineInstr &MI : MBB) { - unsigned Base, Width; + MachineOperand *Base; + unsigned Width; int64_t Offset; // Is this a load or store with an immediate offset with SP as the base? if (!MI.mayLoadOrStore() || - !getMemOpBaseRegImmOfsWidth(MI, Base, Offset, Width, &RI) || - Base != AArch64::SP) + !getMemOpBaseImmOfsWidth(MI, Base, Offset, Width, &RI) || + (Base->isReg() && Base->getReg() != AArch64::SP)) continue; // It is, so we have to fix it up. Index: lib/Target/AArch64/AArch64StorePairSuppress.cpp =================================================================== --- lib/Target/AArch64/AArch64StorePairSuppress.cpp +++ lib/Target/AArch64/AArch64StorePairSuppress.cpp @@ -148,9 +148,10 @@ for (auto &MI : MBB) { if (!isNarrowFPStore(MI)) continue; - unsigned BaseReg; + MachineOperand *BaseOp; int64_t Offset; - if (TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI)) { + if (TII->getMemOpBaseImmOfs(MI, BaseOp, Offset, TRI) && BaseOp->isReg()) { + unsigned BaseReg = BaseOp->getReg(); if (PrevBaseReg == BaseReg) { // If this block can take STPs, skip ahead to the next block. if (!SuppressSTP && shouldAddSTPToBlock(MI.getParent())) Index: lib/Target/AMDGPU/SIInstrInfo.h =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.h +++ lib/Target/AMDGPU/SIInstrInfo.h @@ -164,12 +164,11 @@ int64_t &Offset1, int64_t &Offset2) const override; - bool getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg, - int64_t &Offset, - const TargetRegisterInfo *TRI) const final; + bool getMemOpBaseImmOfs(MachineInstr &LdSt, MachineOperand *&BaseOp, + int64_t &Offset, + const TargetRegisterInfo *TRI) const final; - bool shouldClusterMemOps(MachineInstr &FirstLdSt, unsigned BaseReg1, - MachineInstr &SecondLdSt, unsigned BaseReg2, + bool shouldClusterMemOps(MachineOperand &BaseOp1, MachineOperand &BaseOp2, unsigned NumLoads) const override; bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -265,9 +265,9 @@ } } -bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg, - int64_t &Offset, - const TargetRegisterInfo *TRI) const { +bool SIInstrInfo::getMemOpBaseImmOfs(MachineInstr &LdSt, + MachineOperand *&BaseOp, int64_t &Offset, + const TargetRegisterInfo *TRI) const { unsigned Opc = LdSt.getOpcode(); if (isDS(LdSt)) { @@ -275,10 +275,9 @@ getNamedOperand(LdSt, AMDGPU::OpName::offset); if (OffsetImm) { // Normal, single offset LDS instruction. - const MachineOperand *AddrReg = - getNamedOperand(LdSt, AMDGPU::OpName::addr); + MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::addr); - BaseReg = AddrReg->getReg(); + BaseOp = AddrReg; Offset = OffsetImm->getImm(); return true; } @@ -310,9 +309,8 @@ if (isStride64(Opc)) EltSize *= 64; - const MachineOperand *AddrReg = - getNamedOperand(LdSt, AMDGPU::OpName::addr); - BaseReg = AddrReg->getReg(); + MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::addr); + BaseOp = AddrReg; Offset = EltSize * Offset0; return true; } @@ -325,14 +323,13 @@ if (SOffset && SOffset->isReg()) return false; - const MachineOperand *AddrReg = - getNamedOperand(LdSt, AMDGPU::OpName::vaddr); + MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::vaddr); if (!AddrReg) return false; const MachineOperand *OffsetImm = getNamedOperand(LdSt, AMDGPU::OpName::offset); - BaseReg = AddrReg->getReg(); + BaseOp = AddrReg; Offset = OffsetImm->getImm(); if (SOffset) // soffset can be an inline immediate. @@ -347,24 +344,23 @@ if (!OffsetImm) return false; - const MachineOperand *SBaseReg = - getNamedOperand(LdSt, AMDGPU::OpName::sbase); - BaseReg = SBaseReg->getReg(); + MachineOperand *SBaseReg = getNamedOperand(LdSt, AMDGPU::OpName::sbase); + BaseOp = SBaseReg; Offset = OffsetImm->getImm(); return true; } if (isFLAT(LdSt)) { - const MachineOperand *VAddr = getNamedOperand(LdSt, AMDGPU::OpName::vaddr); + MachineOperand *VAddr = getNamedOperand(LdSt, AMDGPU::OpName::vaddr); if (VAddr) { // Can't analyze 2 offsets. if (getNamedOperand(LdSt, AMDGPU::OpName::saddr)) return false; - BaseReg = VAddr->getReg(); + BaseOp = VAddr; } else { // scratch instructions have either vaddr or saddr. - BaseReg = getNamedOperand(LdSt, AMDGPU::OpName::saddr)->getReg(); + BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::saddr); } Offset = getNamedOperand(LdSt, AMDGPU::OpName::offset)->getImm(); @@ -402,11 +398,19 @@ return Base1 == Base2; } -bool SIInstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt, - unsigned BaseReg1, - MachineInstr &SecondLdSt, - unsigned BaseReg2, +bool SIInstrInfo::shouldClusterMemOps(MachineOperand &BaseOp1, + MachineOperand &BaseOp2, unsigned NumLoads) const { + // Support only base operands with base registers. + // Note: this could be extended to support FI operands. + if (BaseOp1.getType() != BaseOp2.getType() || !BaseOp1.isReg()) + return false; + + MachineInstr &FirstLdSt = *BaseOp1.getParent(); + MachineInstr &SecondLdSt = *BaseOp2.getParent(); + unsigned BaseReg1 = BaseOp1.getReg(); + unsigned BaseReg2 = BaseOp2.getReg(); + if (!memOpsHaveSameBasePtr(FirstLdSt, BaseReg1, SecondLdSt, BaseReg2)) return false; @@ -2160,11 +2164,13 @@ bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr &MIa, MachineInstr &MIb) const { - unsigned BaseReg0, BaseReg1; + MachineOperand *BaseOp0, *BaseOp1; int64_t Offset0, Offset1; - if (getMemOpBaseRegImmOfs(MIa, BaseReg0, Offset0, &RI) && - getMemOpBaseRegImmOfs(MIb, BaseReg1, Offset1, &RI)) { + if (getMemOpBaseImmOfs(MIa, BaseOp0, Offset0, &RI) && + getMemOpBaseImmOfs(MIb, BaseOp1, Offset1, &RI)) { + if (!BaseOp0->isReg() || !BaseOp1->isReg()) + return false; if (!MIa.hasOneMemOperand() || !MIb.hasOneMemOperand()) { // FIXME: Handle ds_read2 / ds_write2. @@ -2172,7 +2178,7 @@ } unsigned Width0 = (*MIa.memoperands_begin())->getSize(); unsigned Width1 = (*MIb.memoperands_begin())->getSize(); - if (BaseReg0 == BaseReg1 && + if (BaseOp0->getReg() == BaseOp1->getReg() && offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1)) { return true; } Index: lib/Target/AMDGPU/SIMachineScheduler.cpp =================================================================== --- lib/Target/AMDGPU/SIMachineScheduler.cpp +++ lib/Target/AMDGPU/SIMachineScheduler.cpp @@ -1955,12 +1955,11 @@ for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) { SUnit *SU = &SUnits[i]; - unsigned BaseLatReg; + MachineOperand *BaseLatOp; int64_t OffLatReg; if (SITII->isLowLatencyInstruction(*SU->getInstr())) { IsLowLatencySU[i] = 1; - if (SITII->getMemOpBaseRegImmOfs(*SU->getInstr(), BaseLatReg, OffLatReg, - TRI)) + if (SITII->getMemOpBaseImmOfs(*SU->getInstr(), BaseLatOp, OffLatReg, TRI)) LowLatencyOffset[i] = OffLatReg; } else if (SITII->isHighLatencyInstruction(*SU->getInstr())) IsHighLatencySU[i] = 1; Index: lib/Target/Hexagon/HexagonInstrInfo.h =================================================================== --- lib/Target/Hexagon/HexagonInstrInfo.h +++ lib/Target/Hexagon/HexagonInstrInfo.h @@ -216,9 +216,9 @@ bool expandPostRAPseudo(MachineInstr &MI) const override; /// Get the base register and byte offset of a load/store instr. - bool getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg, - int64_t &Offset, - const TargetRegisterInfo *TRI) const override; + bool getMemOpBaseImmOfs(MachineInstr &LdSt, MachineOperand *&BaseOp, + int64_t &Offset, + const TargetRegisterInfo *TRI) const override; /// Reverses the branch condition of the specified condition list, /// returning false on success and true if it cannot be reversed. @@ -436,8 +436,8 @@ bool predOpcodeHasNot(ArrayRef Cond) const; unsigned getAddrMode(const MachineInstr &MI) const; - unsigned getBaseAndOffset(const MachineInstr &MI, int &Offset, - unsigned &AccessSize) const; + MachineOperand *getBaseAndOffset(const MachineInstr &MI, int &Offset, + unsigned &AccessSize) const; SmallVector getBranchingInstrs(MachineBasicBlock& MBB) const; unsigned getCExtOpNum(const MachineInstr &MI) const; HexagonII::CompoundGroup Index: lib/Target/Hexagon/HexagonInstrInfo.cpp =================================================================== --- lib/Target/Hexagon/HexagonInstrInfo.cpp +++ lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -2894,14 +2894,15 @@ } /// Get the base register and byte offset of a load/store instr. -bool HexagonInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, - unsigned &BaseReg, int64_t &Offset, const TargetRegisterInfo *TRI) - const { +bool HexagonInstrInfo::getMemOpBaseImmOfs(MachineInstr &LdSt, + MachineOperand *&BaseOp, + int64_t &Offset, + const TargetRegisterInfo *TRI) const { unsigned AccessSize = 0; int OffsetVal = 0; - BaseReg = getBaseAndOffset(LdSt, OffsetVal, AccessSize); + BaseOp = getBaseAndOffset(LdSt, OffsetVal, AccessSize); Offset = OffsetVal; - return BaseReg != 0; + return BaseOp != nullptr; } /// Can these instructions execute at the same time in a bundle. @@ -3108,21 +3109,22 @@ // Returns the base register in a memory access (load/store). The offset is // returned in Offset and the access size is returned in AccessSize. -// If the base register has a subregister or the offset field does not contain -// an immediate value, return 0. -unsigned HexagonInstrInfo::getBaseAndOffset(const MachineInstr &MI, - int &Offset, unsigned &AccessSize) const { +// If the base operand has a subregister or the offset field does not contain +// an immediate value, return nullptr. +MachineOperand *HexagonInstrInfo::getBaseAndOffset(const MachineInstr &MI, + int &Offset, + unsigned &AccessSize) const { // Return if it is not a base+offset type instruction or a MemOp. if (getAddrMode(MI) != HexagonII::BaseImmOffset && getAddrMode(MI) != HexagonII::BaseLongOffset && !isMemOp(MI) && !isPostIncrement(MI)) - return 0; + return nullptr; AccessSize = getMemAccessSize(MI); unsigned BasePos = 0, OffsetPos = 0; if (!getBaseAndOffsetPosition(MI, BasePos, OffsetPos)) - return 0; + return nullptr; // Post increment updates its EA after the mem access, // so we need to treat its offset as zero. @@ -3138,7 +3140,7 @@ const MachineOperand &BaseOp = MI.getOperand(BasePos); if (BaseOp.getSubReg() != 0) return 0; - return BaseOp.getReg(); + return &const_cast(BaseOp); } /// Return the position of the base and offset operands for this instruction. Index: lib/Target/Hexagon/HexagonSubtarget.cpp =================================================================== --- lib/Target/Hexagon/HexagonSubtarget.cpp +++ lib/Target/Hexagon/HexagonSubtarget.cpp @@ -277,9 +277,9 @@ continue; int Offset0; unsigned Size0; - unsigned Base0 = HII.getBaseAndOffset(L0, Offset0, Size0); + MachineOperand *BaseOp0 = HII.getBaseAndOffset(L0, Offset0, Size0); // Is the access size is longer than the L1 cache line, skip the check. - if (Base0 == 0 || Size0 >= 32) + if (BaseOp0 == nullptr || !BaseOp0->isReg() || Size0 >= 32) continue; // Scan only up to 32 instructions ahead (to avoid n^2 complexity). for (unsigned j = i+1, m = std::min(i+32, e); j != m; ++j) { @@ -290,8 +290,9 @@ continue; int Offset1; unsigned Size1; - unsigned Base1 = HII.getBaseAndOffset(L1, Offset1, Size1); - if (Base1 == 0 || Size1 >= 32 || Base0 != Base1) + MachineOperand *BaseOp1 = HII.getBaseAndOffset(L1, Offset1, Size1); + if (BaseOp1 == nullptr || !BaseOp1->isReg() || Size1 >= 32 || + BaseOp0->getReg() != BaseOp1->getReg()) continue; // Check bits 3 and 4 of the offset: if they differ, a bank conflict // is unlikely. Index: lib/Target/Lanai/LanaiInstrInfo.h =================================================================== --- lib/Target/Lanai/LanaiInstrInfo.h +++ lib/Target/Lanai/LanaiInstrInfo.h @@ -68,13 +68,13 @@ bool expandPostRAPseudo(MachineInstr &MI) const override; - bool getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg, - int64_t &Offset, - const TargetRegisterInfo *TRI) const override; + bool getMemOpBaseImmOfs(MachineInstr &LdSt, MachineOperand *&BaseOp, + int64_t &Offset, + const TargetRegisterInfo *TRI) const override; - bool getMemOpBaseRegImmOfsWidth(MachineInstr &LdSt, unsigned &BaseReg, - int64_t &Offset, unsigned &Width, - const TargetRegisterInfo *TRI) const; + bool getMemOpBaseImmOfsWidth(MachineInstr &LdSt, MachineOperand *&BaseOp, + int64_t &Offset, unsigned &Width, + const TargetRegisterInfo *TRI) const; std::pair decomposeMachineOperandsTargetFlags(unsigned TF) const override; Index: lib/Target/Lanai/LanaiInstrInfo.cpp =================================================================== --- lib/Target/Lanai/LanaiInstrInfo.cpp +++ lib/Target/Lanai/LanaiInstrInfo.cpp @@ -101,12 +101,13 @@ // the width doesn't overlap the offset of a higher memory access, // then the memory accesses are different. const TargetRegisterInfo *TRI = &getRegisterInfo(); - unsigned BaseRegA = 0, BaseRegB = 0; + MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr; int64_t OffsetA = 0, OffsetB = 0; unsigned int WidthA = 0, WidthB = 0; - if (getMemOpBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) && - getMemOpBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) { - if (BaseRegA == BaseRegB) { + if (getMemOpBaseImmOfsWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) && + getMemOpBaseImmOfsWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) { + if (BaseOpA->isReg() && BaseOpB->isReg() && + BaseOpA->getReg() == BaseOpB->getReg()) { int LowOffset = std::min(OffsetA, OffsetB); int HighOffset = std::max(OffsetA, OffsetB); int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; @@ -755,9 +756,9 @@ return 0; } -bool LanaiInstrInfo::getMemOpBaseRegImmOfsWidth( - MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, unsigned &Width, - const TargetRegisterInfo * /*TRI*/) const { +bool LanaiInstrInfo::getMemOpBaseImmOfsWidth( + MachineInstr &LdSt, MachineOperand *&BaseOp, int64_t &Offset, + unsigned &Width, const TargetRegisterInfo * /*TRI*/) const { // Handle only loads/stores with base register followed by immediate offset // and with add as ALU op. if (LdSt.getNumOperands() != 4) @@ -787,14 +788,15 @@ break; } - BaseReg = LdSt.getOperand(1).getReg(); + BaseOp = &LdSt.getOperand(1); Offset = LdSt.getOperand(2).getImm(); return true; } -bool LanaiInstrInfo::getMemOpBaseRegImmOfs( - MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, - const TargetRegisterInfo *TRI) const { +bool LanaiInstrInfo::getMemOpBaseImmOfs(MachineInstr &LdSt, + MachineOperand *&BaseOp, + int64_t &Offset, + const TargetRegisterInfo *TRI) const { switch (LdSt.getOpcode()) { default: return false; @@ -808,6 +810,6 @@ case Lanai::LDBs_RI: case Lanai::LDBz_RI: unsigned Width; - return getMemOpBaseRegImmOfsWidth(LdSt, BaseReg, Offset, Width, TRI); + return getMemOpBaseImmOfsWidth(LdSt, BaseOp, Offset, Width, TRI); } } Index: lib/Target/X86/X86InstrInfo.h =================================================================== --- lib/Target/X86/X86InstrInfo.h +++ lib/Target/X86/X86InstrInfo.h @@ -327,9 +327,9 @@ SmallVectorImpl &Cond, bool AllowModify) const override; - bool getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg, - int64_t &Offset, - const TargetRegisterInfo *TRI) const override; + bool getMemOpBaseImmOfs(MachineInstr &LdSt, MachineOperand *&BaseOp, + int64_t &Offset, + const TargetRegisterInfo *TRI) const override; bool analyzeBranchPredicate(MachineBasicBlock &MBB, TargetInstrInfo::MachineBranchPredicate &MBP, bool AllowModify = false) const override; Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -3257,9 +3257,9 @@ } } -bool X86InstrInfo::getMemOpBaseRegImmOfs(MachineInstr &MemOp, unsigned &BaseReg, - int64_t &Offset, - const TargetRegisterInfo *TRI) const { +bool X86InstrInfo::getMemOpBaseImmOfs(MachineInstr &MemOp, + MachineOperand *&BaseOp, int64_t &Offset, + const TargetRegisterInfo *TRI) const { const MCInstrDesc &Desc = MemOp.getDesc(); int MemRefBegin = X86II::getMemoryOperandNo(Desc.TSFlags); if (MemRefBegin < 0) @@ -3267,11 +3267,10 @@ MemRefBegin += X86II::getOperandBias(Desc); - MachineOperand &BaseMO = MemOp.getOperand(MemRefBegin + X86::AddrBaseReg); - if (!BaseMO.isReg()) // Can be an MO_FrameIndex + BaseOp = &MemOp.getOperand(MemRefBegin + X86::AddrBaseReg); + if (!BaseOp->isReg()) // Can be an MO_FrameIndex return false; - BaseReg = BaseMO.getReg(); if (MemOp.getOperand(MemRefBegin + X86::AddrScaleAmt).getImm() != 1) return false;