Index: llvm/include/llvm/CodeGen/TargetInstrInfo.h =================================================================== --- llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -1242,6 +1242,7 @@ virtual bool getMemOperandWithOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, + bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const { return false; } Index: llvm/lib/CodeGen/ImplicitNullChecks.cpp =================================================================== --- llvm/lib/CodeGen/ImplicitNullChecks.cpp +++ llvm/lib/CodeGen/ImplicitNullChecks.cpp @@ -364,12 +364,18 @@ unsigned PointerReg, ArrayRef PrevInsts) { int64_t Offset; + bool OffsetIsScalable; const MachineOperand *BaseOp; - if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI) || + + if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI) || !BaseOp->isReg() || BaseOp->getReg() != PointerReg) return SR_Unsuitable; + // FIXME: This algorithm assumes instructions have fixed-size offsets. + if (OffsetIsScalable) + return SR_Unsuitable; + // We want the mem access to be issued at a sane offset from PointerReg, // so that if PointerReg is null then the access reliably page faults. if (!(MI.mayLoadOrStore() && !MI.isPredicable() && Index: llvm/lib/CodeGen/MachinePipeliner.cpp =================================================================== --- llvm/lib/CodeGen/MachinePipeliner.cpp +++ llvm/lib/CodeGen/MachinePipeliner.cpp @@ -693,9 +693,13 @@ // offset, then mark the dependence as loop carried potentially. const MachineOperand *BaseOp1, *BaseOp2; int64_t Offset1, Offset2; - if (TII->getMemOperandWithOffset(LdMI, BaseOp1, Offset1, TRI) && - TII->getMemOperandWithOffset(MI, BaseOp2, Offset2, TRI)) { + bool Offset1IsScalable, Offset2IsScalable; + if (TII->getMemOperandWithOffset(LdMI, BaseOp1, Offset1, + Offset1IsScalable, TRI) && + TII->getMemOperandWithOffset(MI, BaseOp2, Offset2, + Offset1IsScalable, TRI)) { if (BaseOp1->isIdenticalTo(*BaseOp2) && + Offset1IsScalable == Offset2IsScalable && (int)Offset1 < (int)Offset2) { assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI) && "What happened to the chain edge?"); @@ -2058,7 +2062,12 @@ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); const MachineOperand *BaseOp; int64_t Offset; - if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI)) + bool OffsetIsScalable; + if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI)) + return false; + + // FIXME: This algorithm assumes instructions have fixed-size offsets. + if (OffsetIsScalable) return false; if (!BaseOp->isReg()) @@ -2236,11 +2245,17 @@ const MachineOperand *BaseOpS, *BaseOpD; int64_t OffsetS, OffsetD; + bool OffsetSIsScalable, OffsetDIsScalable; const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - if (!TII->getMemOperandWithOffset(*SI, BaseOpS, OffsetS, TRI) || - !TII->getMemOperandWithOffset(*DI, BaseOpD, OffsetD, TRI)) + if (!TII->getMemOperandWithOffset(*SI, BaseOpS, OffsetS, OffsetSIsScalable, + TRI) || + !TII->getMemOperandWithOffset(*DI, BaseOpD, OffsetD, OffsetDIsScalable, + TRI)) return true; + assert(!OffsetSIsScalable && !OffsetDIsScalable && + "Expected offsets to be byte offsets"); + if (!BaseOpS->isIdenticalTo(*BaseOpD)) return true; Index: llvm/lib/CodeGen/MachineScheduler.cpp =================================================================== --- llvm/lib/CodeGen/MachineScheduler.cpp +++ llvm/lib/CodeGen/MachineScheduler.cpp @@ -1562,7 +1562,9 @@ for (SUnit *SU : MemOps) { const MachineOperand *BaseOp; int64_t Offset; - if (TII->getMemOperandWithOffset(*SU->getInstr(), BaseOp, Offset, TRI)) + bool OffsetIsScalable; + if (TII->getMemOperandWithOffset(*SU->getInstr(), BaseOp, Offset, + OffsetIsScalable, TRI)) MemOpRecords.push_back(MemOpInfo(SU, BaseOp, Offset)); } if (MemOpRecords.size() < 2) Index: llvm/lib/CodeGen/MachineSink.cpp =================================================================== --- llvm/lib/CodeGen/MachineSink.cpp +++ llvm/lib/CodeGen/MachineSink.cpp @@ -764,7 +764,8 @@ const MachineOperand *BaseOp; int64_t Offset; - if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI)) + bool OffsetIsScalable; + if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI)) return false; if (!BaseOp->isReg()) Index: llvm/lib/CodeGen/ModuloSchedule.cpp =================================================================== --- llvm/lib/CodeGen/ModuloSchedule.cpp +++ llvm/lib/CodeGen/ModuloSchedule.cpp @@ -913,7 +913,12 @@ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); const MachineOperand *BaseOp; int64_t Offset; - if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI)) + bool OffsetIsScalable; + if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI)) + return false; + + // FIXME: This algorithm assumes instructions have fixed-size offsets. + if (OffsetIsScalable) return false; if (!BaseOp->isReg()) Index: llvm/lib/CodeGen/TargetInstrInfo.cpp =================================================================== --- llvm/lib/CodeGen/TargetInstrInfo.cpp +++ llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -1127,6 +1127,7 @@ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); DIExpression *Expr = DIExpression::get(MF->getFunction().getContext(), {}); int64_t Offset; + bool OffsetIsScalable; // To simplify the sub-register handling, verify that we only need to // consider physical registers. @@ -1165,7 +1166,12 @@ return None; const MachineOperand *BaseOp; - if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI)) + if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, + TRI)) + return None; + + // FIXME: Scalable offsets are not yet handled in the offset code below. + if (OffsetIsScalable) return None; assert(MI.getNumExplicitDefs() == 1 && Index: llvm/lib/Target/AArch64/AArch64InstrInfo.h =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -19,6 +19,7 @@ #include "llvm/ADT/Optional.h" #include "llvm/CodeGen/MachineCombinerPattern.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/Support/TypeSize.h" #define GET_INSTRINFO_HEADER #include "AArch64GenInstrInfo.inc" @@ -115,11 +116,13 @@ bool getMemOperandWithOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, + bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const override; bool getMemOperandWithOffsetWidth(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, unsigned &Width, + bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const; /// Return the immediate offset of the base register in a load/store \p LdSt. @@ -129,7 +132,7 @@ /// \p Scale, \p Width, \p MinOffset, and \p MaxOffset accordingly. /// /// For unscaled instructions, \p Scale is set to 1. - static bool getMemOpInfo(unsigned Opcode, unsigned &Scale, unsigned &Width, + static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, unsigned &Width, int64_t &MinOffset, int64_t &MaxOffset); bool shouldClusterMemOps(const MachineOperand &BaseOp1, Index: llvm/lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -935,6 +935,7 @@ const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr; int64_t OffsetA = 0, OffsetB = 0; unsigned WidthA = 0, WidthB = 0; + bool OffsetAIsScalable = false, OffsetBIsScalable = false; assert(MIa.mayLoadOrStore() && "MIa must be a load or store."); assert(MIb.mayLoadOrStore() && "MIb must be a load or store."); @@ -948,9 +949,12 @@ // base are identical, and the offset of a lower memory access + // the width doesn't overlap the offset of a higher memory access, // then the memory accesses are different. - if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) && - getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) { - if (BaseOpA->isIdenticalTo(*BaseOpB)) { + if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, + OffsetAIsScalable, TRI) && + getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, + OffsetBIsScalable, TRI)) { + if (BaseOpA->isIdenticalTo(*BaseOpB) && + OffsetAIsScalable == OffsetBIsScalable) { int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB; int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA; int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; @@ -1993,17 +1997,20 @@ bool AArch64InstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, + bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const { if (!LdSt.mayLoadOrStore()) return false; unsigned Width; - return getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI); + return getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, + OffsetIsScalable, TRI); } bool AArch64InstrInfo::getMemOperandWithOffsetWidth( const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, - unsigned &Width, const TargetRegisterInfo *TRI) const { + unsigned &Width, bool &OffsetIsScalable, + const TargetRegisterInfo *TRI) const { assert(LdSt.mayLoadOrStore() && "Expected a memory operation."); // Handle only loads/stores with base register followed by immediate offset. if (LdSt.getNumExplicitOperands() == 3) { @@ -2022,7 +2029,7 @@ // Get the scaling factor for the instruction and set the width for the // instruction. - unsigned Scale = 0; + TypeSize Scale(0U, false); int64_t Dummy1, Dummy2; // If this returns false, then it's an instruction we don't want to handle. @@ -2034,12 +2041,13 @@ // set to 1. if (LdSt.getNumExplicitOperands() == 3) { BaseOp = &LdSt.getOperand(1); - Offset = LdSt.getOperand(2).getImm() * Scale; + Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinSize(); } else { assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands"); BaseOp = &LdSt.getOperand(2); - Offset = LdSt.getOperand(3).getImm() * Scale; + Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinSize(); } + OffsetIsScalable = Scale.isScalable(); if (!BaseOp->isReg() && !BaseOp->isFI()) return false; @@ -2055,27 +2063,28 @@ return OfsOp; } -bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale, +bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale, unsigned &Width, int64_t &MinOffset, int64_t &MaxOffset) { const unsigned SVEMaxBytesPerVector = AArch64::SVEMaxBitsPerVector / 8; switch (Opcode) { // Not a memory operation or something we want to handle. default: - Scale = Width = 0; + Scale = TypeSize::Fixed(0); + Width = 0; MinOffset = MaxOffset = 0; return false; case AArch64::STRWpost: case AArch64::LDRWpost: Width = 32; - Scale = 4; + Scale = TypeSize::Fixed(4); MinOffset = -256; MaxOffset = 255; break; case AArch64::LDURQi: case AArch64::STURQi: Width = 16; - Scale = 1; + Scale = TypeSize::Fixed(1); MinOffset = -256; MaxOffset = 255; break; @@ -2085,7 +2094,7 @@ case AArch64::STURXi: case AArch64::STURDi: Width = 8; - Scale = 1; + Scale = TypeSize::Fixed(1); MinOffset = -256; MaxOffset = 255; break; @@ -2095,7 +2104,7 @@ case AArch64::STURWi: case AArch64::STURSi: Width = 4; - Scale = 1; + Scale = TypeSize::Fixed(1); MinOffset = -256; MaxOffset = 255; break; @@ -2106,7 +2115,7 @@ case AArch64::STURHi: case AArch64::STURHHi: Width = 2; - Scale = 1; + Scale = TypeSize::Fixed(1); MinOffset = -256; MaxOffset = 255; break; @@ -2117,7 +2126,7 @@ case AArch64::STURBi: case AArch64::STURBBi: Width = 1; - Scale = 1; + Scale = TypeSize::Fixed(1); MinOffset = -256; MaxOffset = 255; break; @@ -2125,14 +2134,15 @@ case AArch64::LDNPQi: case AArch64::STPQi: case AArch64::STNPQi: - Scale = 16; + Scale = TypeSize::Fixed(16); Width = 32; MinOffset = -64; MaxOffset = 63; break; case AArch64::LDRQui: case AArch64::STRQui: - Scale = Width = 16; + Scale = TypeSize::Fixed(16); + Width = 16; MinOffset = 0; MaxOffset = 4095; break; @@ -2144,7 +2154,7 @@ case AArch64::STPDi: case AArch64::STNPXi: case AArch64::STNPDi: - Scale = 8; + Scale = TypeSize::Fixed(8); Width = 16; MinOffset = -64; MaxOffset = 63; @@ -2154,7 +2164,8 @@ case AArch64::LDRDui: case AArch64::STRXui: case AArch64::STRDui: - Scale = Width = 8; + Scale = TypeSize::Fixed(8); + Width = 8; MinOffset = 0; MaxOffset = 4095; break; @@ -2166,7 +2177,7 @@ case AArch64::STPSi: case AArch64::STNPWi: case AArch64::STNPSi: - Scale = 4; + Scale = TypeSize::Fixed(4); Width = 8; MinOffset = -64; MaxOffset = 63; @@ -2176,7 +2187,8 @@ case AArch64::LDRSWui: case AArch64::STRWui: case AArch64::STRSui: - Scale = Width = 4; + Scale = TypeSize::Fixed(4); + Width = 4; MinOffset = 0; MaxOffset = 4095; break; @@ -2186,7 +2198,8 @@ case AArch64::LDRSHXui: case AArch64::STRHui: case AArch64::STRHHui: - Scale = Width = 2; + Scale = TypeSize::Fixed(2); + Width = 2; MinOffset = 0; MaxOffset = 4095; break; @@ -2196,13 +2209,14 @@ case AArch64::LDRSBXui: case AArch64::STRBui: case AArch64::STRBBui: - Scale = Width = 1; + Scale = TypeSize::Fixed(1); + Width = 1; MinOffset = 0; MaxOffset = 4095; break; case AArch64::ADDG: case AArch64::TAGPstack: - Scale = 16; + Scale = TypeSize::Fixed(16); Width = 0; MinOffset = 0; MaxOffset = 63; @@ -2210,20 +2224,21 @@ case AArch64::LDG: case AArch64::STGOffset: case AArch64::STZGOffset: - Scale = Width = 16; + Scale = TypeSize::Fixed(16); + Width = 16; MinOffset = -256; MaxOffset = 255; break; case AArch64::LDR_PXI: case AArch64::STR_PXI: - Scale = 2; + Scale = TypeSize::Scalable(2); Width = SVEMaxBytesPerVector / 8; MinOffset = -256; MaxOffset = 255; break; case AArch64::LDR_ZXI: case AArch64::STR_ZXI: - Scale = 16; + Scale = TypeSize::Scalable(16); Width = SVEMaxBytesPerVector; MinOffset = -256; MaxOffset = 255; @@ -2238,20 +2253,21 @@ case AArch64::ST1D_IMM: // A full vectors worth of data // Width = mbytes * elements - Scale = 16; + Scale = TypeSize::Scalable(16); Width = SVEMaxBytesPerVector; MinOffset = -8; MaxOffset = 7; break; case AArch64::ST2GOffset: case AArch64::STZ2GOffset: - Scale = 16; + Scale = TypeSize::Fixed(16); Width = 32; MinOffset = -256; MaxOffset = 255; break; case AArch64::STGPi: - Scale = Width = 16; + Scale = TypeSize::Fixed(16); + Width = 16; MinOffset = -64; MaxOffset = 63; break; @@ -3437,26 +3453,6 @@ return nullptr; } -static bool isSVEScaledImmInstruction(unsigned Opcode) { - switch (Opcode) { - case AArch64::LDR_ZXI: - case AArch64::STR_ZXI: - case AArch64::LDR_PXI: - case AArch64::STR_PXI: - case AArch64::LD1B_IMM: - case AArch64::LD1H_IMM: - case AArch64::LD1W_IMM: - case AArch64::LD1D_IMM: - case AArch64::ST1B_IMM: - case AArch64::ST1H_IMM: - case AArch64::ST1W_IMM: - case AArch64::ST1D_IMM: - return true; - default: - return false; - } -} - int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &SOffset, bool *OutUseUnscaledOp, @@ -3493,16 +3489,17 @@ } // Get the min/max offset and the scale. - unsigned Scale, Width; + TypeSize ScaleValue(0U, false); + unsigned Width; int64_t MinOff, MaxOff; - if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), Scale, Width, MinOff, + if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff, MaxOff)) llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal"); // Construct the complete offset. - bool IsMulVL = isSVEScaledImmInstruction(MI.getOpcode()); - int64_t Offset = - IsMulVL ? (SOffset.getScalableBytes()) : (SOffset.getBytes()); + bool IsMulVL = ScaleValue.isScalable(); + unsigned Scale = ScaleValue.getKnownMinSize(); + int64_t Offset = IsMulVL ? SOffset.getScalableBytes() : SOffset.getBytes(); const MachineOperand &ImmOpnd = MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode())); @@ -3515,9 +3512,14 @@ AArch64InstrInfo::getUnscaledLdSt(MI.getOpcode()); bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0); if (useUnscaledOp && - !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, Scale, Width, MinOff, MaxOff)) + !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, ScaleValue, Width, MinOff, + MaxOff)) llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal"); + Scale = ScaleValue.getKnownMinSize(); + assert(IsMulVL == ScaleValue.isScalable() && + "Unscaled opcode has different value for scalable"); + int64_t Remainder = Offset % Scale; assert(!(Remainder && useUnscaledOp) && "Cannot have remainder when using unscaled op"); @@ -5842,23 +5844,29 @@ if (MI.mayLoadOrStore()) { const MachineOperand *Base; // Filled with the base operand of MI. int64_t Offset; // Filled with the offset of MI. + bool OffsetIsScalable; // Does it allow us to offset the base operand and is the base the // register SP? - if (!getMemOperandWithOffset(MI, Base, Offset, &TRI) || !Base->isReg() || - Base->getReg() != AArch64::SP) + if (!getMemOperandWithOffset(MI, Base, Offset, OffsetIsScalable, &TRI) || + !Base->isReg() || Base->getReg() != AArch64::SP) + return false; + + // Fixe-up code below assumes bytes. + if (OffsetIsScalable) return false; // Find the minimum/maximum offset for this instruction and check // if fixing it up would be in range. int64_t MinOffset, MaxOffset; // Unscaled offsets for the instruction. - unsigned Scale; // The scale to multiply the offsets by. + TypeSize Scale(0U, false); // The scale to multiply the offsets by. unsigned DummyWidth; getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset); Offset += 16; // Update the offset to what it would be if we outlined. - if (Offset < MinOffset * Scale || Offset > MaxOffset * Scale) + if (Offset < MinOffset * (int64_t)Scale.getFixedSize() || + Offset > MaxOffset * (int64_t)Scale.getFixedSize()) return false; // It's in range, so we can outline it. @@ -6226,26 +6234,29 @@ const MachineOperand *Base; unsigned Width; int64_t Offset; + bool OffsetIsScalable; // Is this a load or store with an immediate offset with SP as the base? if (!MI.mayLoadOrStore() || - !getMemOperandWithOffsetWidth(MI, Base, Offset, Width, &RI) || + !getMemOperandWithOffsetWidth(MI, Base, Offset, Width, OffsetIsScalable, + &RI) || (Base->isReg() && Base->getReg() != AArch64::SP)) continue; // It is, so we have to fix it up. - unsigned Scale; + TypeSize Scale(0U, false); int64_t Dummy1, Dummy2; MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI); assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!"); getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2); assert(Scale != 0 && "Unexpected opcode!"); + assert(!OffsetIsScalable && "Expected offset to be a byte offset"); // We've pushed the return address to the stack, so add 16 to the offset. // This is safe, since we already checked if it would overflow when we // checked if this instruction was legal to outline. - int64_t NewImm = (Offset + 16) / Scale; + int64_t NewImm = (Offset + 16) / (int64_t)Scale.getFixedSize(); StackOffsetOperand.setImm(NewImm); } } Index: llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp +++ llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp @@ -149,7 +149,9 @@ continue; const MachineOperand *BaseOp; int64_t Offset; - if (TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI) && + bool OffsetIsScalable; + if (TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, + TRI) && BaseOp->isReg()) { Register BaseReg = BaseOp->getReg(); if (PrevBaseReg == BaseReg) { Index: llvm/lib/Target/AMDGPU/SIInstrInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -182,6 +182,7 @@ bool getMemOperandWithOffset(const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, + bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const final; bool shouldClusterMemOps(const MachineOperand &BaseOp1, Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -259,11 +259,13 @@ bool SIInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, + bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const { if (!LdSt.mayLoadOrStore()) return false; unsigned Opc = LdSt.getOpcode(); + OffsetIsScalable = false; if (isDS(LdSt)) { const MachineOperand *OffsetImm = @@ -2527,9 +2529,10 @@ const MachineInstr &MIb) const { const MachineOperand *BaseOp0, *BaseOp1; int64_t Offset0, Offset1; + bool Offset0IsScalable, Offset1IsScalable; - if (getMemOperandWithOffset(MIa, BaseOp0, Offset0, &RI) && - getMemOperandWithOffset(MIb, BaseOp1, Offset1, &RI)) { + if (getMemOperandWithOffset(MIa, BaseOp0, Offset0, Offset0IsScalable, &RI) && + getMemOperandWithOffset(MIb, BaseOp1, Offset1, Offset1IsScalable, &RI)) { if (!BaseOp0->isIdenticalTo(*BaseOp1)) return false; Index: llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp +++ llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp @@ -1952,8 +1952,9 @@ int64_t OffLatReg; if (SITII->isLowLatencyInstruction(*SU->getInstr())) { IsLowLatencySU[i] = 1; + bool OffsetIsScalable; if (SITII->getMemOperandWithOffset(*SU->getInstr(), BaseLatOp, OffLatReg, - TRI)) + OffsetIsScalable, TRI)) LowLatencyOffset[i] = OffLatReg; } else if (SITII->isHighLatencyInstruction(*SU->getInstr())) IsHighLatencySU[i] = 1; Index: llvm/lib/Target/Hexagon/HexagonInstrInfo.h =================================================================== --- llvm/lib/Target/Hexagon/HexagonInstrInfo.h +++ llvm/lib/Target/Hexagon/HexagonInstrInfo.h @@ -206,7 +206,7 @@ /// Get the base register and byte offset of a load/store instr. bool getMemOperandWithOffset(const MachineInstr &LdSt, const MachineOperand *&BaseOp, - int64_t &Offset, + int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const override; /// Reverses the branch condition of the specified condition list, Index: llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp =================================================================== --- llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -2942,8 +2942,9 @@ /// Get the base register and byte offset of a load/store instr. bool HexagonInstrInfo::getMemOperandWithOffset( const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, - const TargetRegisterInfo *TRI) const { + bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const { unsigned AccessSize = 0; + OffsetIsScalable = false; BaseOp = getBaseAndOffset(LdSt, Offset, AccessSize); return BaseOp != nullptr && BaseOp->isReg(); } Index: llvm/lib/Target/Lanai/LanaiInstrInfo.h =================================================================== --- llvm/lib/Target/Lanai/LanaiInstrInfo.h +++ llvm/lib/Target/Lanai/LanaiInstrInfo.h @@ -69,12 +69,13 @@ bool getMemOperandWithOffset(const MachineInstr &LdSt, const MachineOperand *&BaseOp, - int64_t &Offset, + int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const override; bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, unsigned &Width, + bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const; std::pair Index: llvm/lib/Target/Lanai/LanaiInstrInfo.cpp =================================================================== --- llvm/lib/Target/Lanai/LanaiInstrInfo.cpp +++ llvm/lib/Target/Lanai/LanaiInstrInfo.cpp @@ -102,9 +102,12 @@ const TargetRegisterInfo *TRI = &getRegisterInfo(); const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr; int64_t OffsetA = 0, OffsetB = 0; + bool OffsetAIsScalable = false, OffsetBIsScalable = false; unsigned int WidthA = 0, WidthB = 0; - if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) && - getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) { + if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, + OffsetAIsScalable, TRI) && + getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, + OffsetBIsScalable, TRI)) { if (BaseOpA->isIdenticalTo(*BaseOpB)) { int LowOffset = std::min(OffsetA, OffsetB); int HighOffset = std::max(OffsetA, OffsetB); @@ -756,7 +759,9 @@ bool LanaiInstrInfo::getMemOperandWithOffsetWidth( const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, - unsigned &Width, const TargetRegisterInfo * /*TRI*/) const { + unsigned &Width, bool &OffsetIsScalable, + const TargetRegisterInfo * /*TRI*/) const { + OffsetIsScalable = false; // Handle only loads/stores with base register followed by immediate offset // and with add as ALU op. if (LdSt.getNumOperands() != 4) @@ -798,6 +803,7 @@ bool LanaiInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, + bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const { switch (LdSt.getOpcode()) { default: @@ -812,6 +818,7 @@ case Lanai::LDBs_RI: case Lanai::LDBz_RI: unsigned Width; - return getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI); + return getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, + OffsetIsScalable, TRI); } } Index: llvm/lib/Target/PowerPC/PPCInstrInfo.h =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -364,6 +364,7 @@ bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, unsigned &Width, + bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const; /// Return true if two MIs access different memory addresses and false Index: llvm/lib/Target/PowerPC/PPCInstrInfo.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -4264,7 +4264,8 @@ // memory width. Width is the size of memory that is being loaded/stored. bool PPCInstrInfo::getMemOperandWithOffsetWidth( const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset, - unsigned &Width, const TargetRegisterInfo *TRI) const { + unsigned &Width, bool &OffsetIsScalable, + const TargetRegisterInfo *TRI) const { if (!LdSt.mayLoadOrStore()) return false; @@ -4280,6 +4281,7 @@ Width = (*LdSt.memoperands_begin())->getSize(); Offset = LdSt.getOperand(1).getImm(); BaseReg = &LdSt.getOperand(2); + OffsetIsScalable = false; return true; } @@ -4300,9 +4302,14 @@ const TargetRegisterInfo *TRI = &getRegisterInfo(); const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr; int64_t OffsetA = 0, OffsetB = 0; + bool OffsetAIsScalable = false, OffsetBIsScalable = false; unsigned int WidthA = 0, WidthB = 0; - if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) && - getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) { + if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, + OffsetAIsScalable, TRI) && + getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, + OffsetBIsScalable, TRI)) { + assert(!OffsetAIsScalable && !OffsetBIsScalable && + "expected byte offsets"); if (BaseOpA->isIdenticalTo(*BaseOpB)) { int LowOffset = std::min(OffsetA, OffsetB); int HighOffset = std::max(OffsetA, OffsetB); Index: llvm/lib/Target/RISCV/RISCVInstrInfo.h =================================================================== --- llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -89,6 +89,7 @@ bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, unsigned &Width, + bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const; bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, Index: llvm/lib/Target/RISCV/RISCVInstrInfo.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -561,7 +561,8 @@ // memory width. Width is the size of memory that is being loaded/stored. bool RISCVInstrInfo::getMemOperandWithOffsetWidth( const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset, - unsigned &Width, const TargetRegisterInfo *TRI) const { + unsigned &Width, bool &OffsetIsScalable, + const TargetRegisterInfo *TRI) const { if (!LdSt.mayLoadOrStore()) return false; @@ -579,6 +580,7 @@ Width = (*LdSt.memoperands_begin())->getSize(); BaseReg = &LdSt.getOperand(1); Offset = LdSt.getOperand(2).getImm(); + OffsetIsScalable = false; return true; } @@ -599,9 +601,14 @@ const TargetRegisterInfo *TRI = STI.getRegisterInfo(); const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr; int64_t OffsetA = 0, OffsetB = 0; + bool OffsetAIsScalable = false, OffsetBIsScalable = false; unsigned int WidthA = 0, WidthB = 0; - if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) && - getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) { + if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, + OffsetAIsScalable, TRI) && + getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, + OffsetBIsScalable, TRI)) { + assert(!OffsetAIsScalable && !OffsetBIsScalable && + "Expected byte offsets"); if (BaseOpA->isIdenticalTo(*BaseOpB)) { int LowOffset = std::min(OffsetA, OffsetB); int HighOffset = std::max(OffsetA, OffsetB); Index: llvm/lib/Target/X86/X86InstrInfo.h =================================================================== --- llvm/lib/Target/X86/X86InstrInfo.h +++ llvm/lib/Target/X86/X86InstrInfo.h @@ -293,7 +293,7 @@ bool getMemOperandWithOffset(const MachineInstr &LdSt, const MachineOperand *&BaseOp, - int64_t &Offset, + int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const override; bool analyzeBranchPredicate(MachineBasicBlock &MBB, TargetInstrInfo::MachineBranchPredicate &MBP, Index: llvm/lib/Target/X86/X86InstrInfo.cpp =================================================================== --- llvm/lib/Target/X86/X86InstrInfo.cpp +++ llvm/lib/Target/X86/X86InstrInfo.cpp @@ -3191,7 +3191,7 @@ bool X86InstrInfo::getMemOperandWithOffset( const MachineInstr &MemOp, const MachineOperand *&BaseOp, int64_t &Offset, - const TargetRegisterInfo *TRI) const { + bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const { const MCInstrDesc &Desc = MemOp.getDesc(); int MemRefBegin = X86II::getMemoryOperandNo(Desc.TSFlags); if (MemRefBegin < 0) @@ -3221,6 +3221,8 @@ if (!BaseOp->isReg()) return false; + OffsetIsScalable = false; + return true; }