diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -1242,6 +1242,7 @@ /// to handle a single base operand. bool getMemOperandWithOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, + bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const; /// Get the base operands and byte offset of an instruction that reads/writes @@ -1250,9 +1251,11 @@ /// It returns false if no base operands and offset was found. /// It is not guaranteed to always recognize base operands and offsets in all /// cases. - virtual bool getMemOperandsWithOffset( - const MachineInstr &MI, SmallVectorImpl &BaseOps, - int64_t &Offset, const TargetRegisterInfo *TRI) const { + virtual bool + getMemOperandsWithOffset(const MachineInstr &MI, + SmallVectorImpl &BaseOps, + int64_t &Offset, bool &OffsetIsScalable, + const TargetRegisterInfo *TRI) const { return false; } diff --git a/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/llvm/lib/CodeGen/ImplicitNullChecks.cpp --- a/llvm/lib/CodeGen/ImplicitNullChecks.cpp +++ b/llvm/lib/CodeGen/ImplicitNullChecks.cpp @@ -364,12 +364,18 @@ unsigned PointerReg, ArrayRef PrevInsts) { int64_t Offset; + bool OffsetIsScalable; const MachineOperand *BaseOp; - if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI) || + + if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI) || !BaseOp->isReg() || BaseOp->getReg() != PointerReg) return SR_Unsuitable; + // FIXME: This algorithm assumes instructions have fixed-size offsets. + if (OffsetIsScalable) + return SR_Unsuitable; + // We want the mem access to be issued at a sane offset from PointerReg, // so that if PointerReg is null then the access reliably page faults. if (!(MI.mayLoadOrStore() && !MI.isPredicable() && diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -693,9 +693,13 @@ // offset, then mark the dependence as loop carried potentially. const MachineOperand *BaseOp1, *BaseOp2; int64_t Offset1, Offset2; - if (TII->getMemOperandWithOffset(LdMI, BaseOp1, Offset1, TRI) && - TII->getMemOperandWithOffset(MI, BaseOp2, Offset2, TRI)) { + bool Offset1IsScalable, Offset2IsScalable; + if (TII->getMemOperandWithOffset(LdMI, BaseOp1, Offset1, + Offset1IsScalable, TRI) && + TII->getMemOperandWithOffset(MI, BaseOp2, Offset2, + Offset2IsScalable, TRI)) { if (BaseOp1->isIdenticalTo(*BaseOp2) && + Offset1IsScalable == Offset2IsScalable && (int)Offset1 < (int)Offset2) { assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI) && "What happened to the chain edge?"); @@ -2058,7 +2062,12 @@ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); const MachineOperand *BaseOp; int64_t Offset; - if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI)) + bool OffsetIsScalable; + if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI)) + return false; + + // FIXME: This algorithm assumes instructions have fixed-size offsets. + if (OffsetIsScalable) return false; if (!BaseOp->isReg()) @@ -2236,11 +2245,17 @@ const MachineOperand *BaseOpS, *BaseOpD; int64_t OffsetS, OffsetD; + bool OffsetSIsScalable, OffsetDIsScalable; const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - if (!TII->getMemOperandWithOffset(*SI, BaseOpS, OffsetS, TRI) || - !TII->getMemOperandWithOffset(*DI, BaseOpD, OffsetD, TRI)) + if (!TII->getMemOperandWithOffset(*SI, BaseOpS, OffsetS, OffsetSIsScalable, + TRI) || + !TII->getMemOperandWithOffset(*DI, BaseOpD, OffsetD, OffsetDIsScalable, + TRI)) return true; + assert(!OffsetSIsScalable && !OffsetDIsScalable && + "Expected offsets to be byte offsets"); + if (!BaseOpS->isIdenticalTo(*BaseOpD)) return true; diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -1567,7 +1567,9 @@ for (SUnit *SU : MemOps) { SmallVector BaseOps; int64_t Offset; - if (TII->getMemOperandsWithOffset(*SU->getInstr(), BaseOps, Offset, TRI)) + bool OffsetIsScalable; + if (TII->getMemOperandsWithOffset(*SU->getInstr(), BaseOps, Offset, + OffsetIsScalable, TRI)) MemOpRecords.push_back(MemOpInfo(SU, BaseOps, Offset)); #ifndef NDEBUG for (auto *Op : BaseOps) diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -764,7 +764,8 @@ const MachineOperand *BaseOp; int64_t Offset; - if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI)) + bool OffsetIsScalable; + if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI)) return false; if (!BaseOp->isReg()) diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp --- a/llvm/lib/CodeGen/ModuloSchedule.cpp +++ b/llvm/lib/CodeGen/ModuloSchedule.cpp @@ -913,7 +913,12 @@ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); const MachineOperand *BaseOp; int64_t Offset; - if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI)) + bool OffsetIsScalable; + if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI)) + return false; + + // FIXME: This algorithm assumes instructions have fixed-size offsets. + if (OffsetIsScalable) return false; if (!BaseOp->isReg()) diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -1031,9 +1031,9 @@ // Default implementation of getMemOperandWithOffset. bool TargetInstrInfo::getMemOperandWithOffset( const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, - const TargetRegisterInfo *TRI) const { + bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const { SmallVector BaseOps; - if (!getMemOperandsWithOffset(MI, BaseOps, Offset, TRI) || + if (!getMemOperandsWithOffset(MI, BaseOps, Offset, OffsetIsScalable, TRI) || BaseOps.size() != 1) return false; BaseOp = BaseOps.front(); @@ -1137,6 +1137,7 @@ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); DIExpression *Expr = DIExpression::get(MF->getFunction().getContext(), {}); int64_t Offset; + bool OffsetIsScalable; // To simplify the sub-register handling, verify that we only need to // consider physical registers. @@ -1175,7 +1176,12 @@ return None; const MachineOperand *BaseOp; - if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI)) + if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, + TRI)) + return None; + + // FIXME: Scalable offsets are not yet handled in the offset code below. + if (OffsetIsScalable) return None; // TODO: Can currently only handle mem instructions with a single define. diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -19,6 +19,7 @@ #include "llvm/ADT/Optional.h" #include "llvm/CodeGen/MachineCombinerPattern.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/Support/TypeSize.h" #define GET_INSTRINFO_HEADER #include "AArch64GenInstrInfo.inc" @@ -114,11 +115,17 @@ bool getMemOperandsWithOffset( const MachineInstr &MI, SmallVectorImpl &BaseOps, - int64_t &Offset, const TargetRegisterInfo *TRI) const override; + int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) + const override; + /// If \p OffsetIsScalable is set to 'true', the offset is scaled by `vscale`. + /// This is true for some SVE instructions like ldr/str that have a + /// 'reg + imm' addressing mode where the immediate is an index to the + /// scalable vector located at 'reg + imm * vscale x #bytes'. bool getMemOperandWithOffsetWidth(const MachineInstr &MI, const MachineOperand *&BaseOp, - int64_t &Offset, unsigned &Width, + int64_t &Offset, bool &OffsetIsScalable, + unsigned &Width, const TargetRegisterInfo *TRI) const; /// Return the immediate offset of the base register in a load/store \p LdSt. @@ -128,7 +135,7 @@ /// \p Scale, \p Width, \p MinOffset, and \p MaxOffset accordingly. /// /// For unscaled instructions, \p Scale is set to 1. - static bool getMemOpInfo(unsigned Opcode, unsigned &Scale, unsigned &Width, + static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, unsigned &Width, int64_t &MinOffset, int64_t &MaxOffset); bool shouldClusterMemOps(ArrayRef BaseOps1, diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -942,6 +942,7 @@ const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr; int64_t OffsetA = 0, OffsetB = 0; unsigned WidthA = 0, WidthB = 0; + bool OffsetAIsScalable = false, OffsetBIsScalable = false; assert(MIa.mayLoadOrStore() && "MIa must be a load or store."); assert(MIb.mayLoadOrStore() && "MIb must be a load or store."); @@ -955,9 +956,14 @@ // base are identical, and the offset of a lower memory access + // the width doesn't overlap the offset of a higher memory access, // then the memory accesses are different. - if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) && - getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) { - if (BaseOpA->isIdenticalTo(*BaseOpB)) { + // If OffsetAIsScalable and OffsetBIsScalable are both true, they + // are assumed to have the same scale (vscale). + if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable, + WidthA, TRI) && + getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable, + WidthB, TRI)) { + if (BaseOpA->isIdenticalTo(*BaseOpB) && + OffsetAIsScalable == OffsetBIsScalable) { int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB; int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA; int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; @@ -1999,13 +2005,15 @@ bool AArch64InstrInfo::getMemOperandsWithOffset( const MachineInstr &LdSt, SmallVectorImpl &BaseOps, - int64_t &Offset, const TargetRegisterInfo *TRI) const { + int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) + const { if (!LdSt.mayLoadOrStore()) return false; const MachineOperand *BaseOp; unsigned Width; - if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI)) + if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable, + Width, TRI)) return false; BaseOps.push_back(BaseOp); return true; @@ -2013,7 +2021,8 @@ bool AArch64InstrInfo::getMemOperandWithOffsetWidth( const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, - unsigned &Width, const TargetRegisterInfo *TRI) const { + bool &OffsetIsScalable, unsigned &Width, + const TargetRegisterInfo *TRI) const { assert(LdSt.mayLoadOrStore() && "Expected a memory operation."); // Handle only loads/stores with base register followed by immediate offset. if (LdSt.getNumExplicitOperands() == 3) { @@ -2032,7 +2041,7 @@ // Get the scaling factor for the instruction and set the width for the // instruction. - unsigned Scale = 0; + TypeSize Scale(0U, false); int64_t Dummy1, Dummy2; // If this returns false, then it's an instruction we don't want to handle. @@ -2044,12 +2053,13 @@ // set to 1. if (LdSt.getNumExplicitOperands() == 3) { BaseOp = &LdSt.getOperand(1); - Offset = LdSt.getOperand(2).getImm() * Scale; + Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinSize(); } else { assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands"); BaseOp = &LdSt.getOperand(2); - Offset = LdSt.getOperand(3).getImm() * Scale; + Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinSize(); } + OffsetIsScalable = Scale.isScalable(); if (!BaseOp->isReg() && !BaseOp->isFI()) return false; @@ -2065,27 +2075,28 @@ return OfsOp; } -bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale, +bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale, unsigned &Width, int64_t &MinOffset, int64_t &MaxOffset) { const unsigned SVEMaxBytesPerVector = AArch64::SVEMaxBitsPerVector / 8; switch (Opcode) { // Not a memory operation or something we want to handle. default: - Scale = Width = 0; + Scale = TypeSize::Fixed(0); + Width = 0; MinOffset = MaxOffset = 0; return false; case AArch64::STRWpost: case AArch64::LDRWpost: Width = 32; - Scale = 4; + Scale = TypeSize::Fixed(4); MinOffset = -256; MaxOffset = 255; break; case AArch64::LDURQi: case AArch64::STURQi: Width = 16; - Scale = 1; + Scale = TypeSize::Fixed(1); MinOffset = -256; MaxOffset = 255; break; @@ -2095,7 +2106,7 @@ case AArch64::STURXi: case AArch64::STURDi: Width = 8; - Scale = 1; + Scale = TypeSize::Fixed(1); MinOffset = -256; MaxOffset = 255; break; @@ -2105,7 +2116,7 @@ case AArch64::STURWi: case AArch64::STURSi: Width = 4; - Scale = 1; + Scale = TypeSize::Fixed(1); MinOffset = -256; MaxOffset = 255; break; @@ -2116,7 +2127,7 @@ case AArch64::STURHi: case AArch64::STURHHi: Width = 2; - Scale = 1; + Scale = TypeSize::Fixed(1); MinOffset = -256; MaxOffset = 255; break; @@ -2127,7 +2138,7 @@ case AArch64::STURBi: case AArch64::STURBBi: Width = 1; - Scale = 1; + Scale = TypeSize::Fixed(1); MinOffset = -256; MaxOffset = 255; break; @@ -2135,14 +2146,15 @@ case AArch64::LDNPQi: case AArch64::STPQi: case AArch64::STNPQi: - Scale = 16; + Scale = TypeSize::Fixed(16); Width = 32; MinOffset = -64; MaxOffset = 63; break; case AArch64::LDRQui: case AArch64::STRQui: - Scale = Width = 16; + Scale = TypeSize::Fixed(16); + Width = 16; MinOffset = 0; MaxOffset = 4095; break; @@ -2154,7 +2166,7 @@ case AArch64::STPDi: case AArch64::STNPXi: case AArch64::STNPDi: - Scale = 8; + Scale = TypeSize::Fixed(8); Width = 16; MinOffset = -64; MaxOffset = 63; @@ -2164,7 +2176,8 @@ case AArch64::LDRDui: case AArch64::STRXui: case AArch64::STRDui: - Scale = Width = 8; + Scale = TypeSize::Fixed(8); + Width = 8; MinOffset = 0; MaxOffset = 4095; break; @@ -2176,7 +2189,7 @@ case AArch64::STPSi: case AArch64::STNPWi: case AArch64::STNPSi: - Scale = 4; + Scale = TypeSize::Fixed(4); Width = 8; MinOffset = -64; MaxOffset = 63; @@ -2186,7 +2199,8 @@ case AArch64::LDRSWui: case AArch64::STRWui: case AArch64::STRSui: - Scale = Width = 4; + Scale = TypeSize::Fixed(4); + Width = 4; MinOffset = 0; MaxOffset = 4095; break; @@ -2196,7 +2210,8 @@ case AArch64::LDRSHXui: case AArch64::STRHui: case AArch64::STRHHui: - Scale = Width = 2; + Scale = TypeSize::Fixed(2); + Width = 2; MinOffset = 0; MaxOffset = 4095; break; @@ -2206,18 +2221,19 @@ case AArch64::LDRSBXui: case AArch64::STRBui: case AArch64::STRBBui: - Scale = Width = 1; + Scale = TypeSize::Fixed(1); + Width = 1; MinOffset = 0; MaxOffset = 4095; break; case AArch64::ADDG: - Scale = 16; + Scale = TypeSize::Fixed(16); Width = 0; MinOffset = 0; MaxOffset = 63; break; case AArch64::TAGPstack: - Scale = 16; + Scale = TypeSize::Fixed(16); Width = 0; // TAGP with a negative offset turns into SUBP, which has a maximum offset // of 63 (not 64!). @@ -2227,20 +2243,21 @@ case AArch64::LDG: case AArch64::STGOffset: case AArch64::STZGOffset: - Scale = Width = 16; + Scale = TypeSize::Fixed(16); + Width = 16; MinOffset = -256; MaxOffset = 255; break; case AArch64::LDR_PXI: case AArch64::STR_PXI: - Scale = 2; + Scale = TypeSize::Scalable(2); Width = SVEMaxBytesPerVector / 8; MinOffset = -256; MaxOffset = 255; break; case AArch64::LDR_ZXI: case AArch64::STR_ZXI: - Scale = 16; + Scale = TypeSize::Scalable(16); Width = SVEMaxBytesPerVector; MinOffset = -256; MaxOffset = 255; @@ -2255,20 +2272,21 @@ case AArch64::ST1D_IMM: // A full vectors worth of data // Width = mbytes * elements - Scale = 16; + Scale = TypeSize::Scalable(16); Width = SVEMaxBytesPerVector; MinOffset = -8; MaxOffset = 7; break; case AArch64::ST2GOffset: case AArch64::STZ2GOffset: - Scale = 16; + Scale = TypeSize::Fixed(16); Width = 32; MinOffset = -256; MaxOffset = 255; break; case AArch64::STGPi: - Scale = Width = 16; + Scale = TypeSize::Fixed(16); + Width = 16; MinOffset = -64; MaxOffset = 63; break; @@ -3457,26 +3475,6 @@ return nullptr; } -static bool isSVEScaledImmInstruction(unsigned Opcode) { - switch (Opcode) { - case AArch64::LDR_ZXI: - case AArch64::STR_ZXI: - case AArch64::LDR_PXI: - case AArch64::STR_PXI: - case AArch64::LD1B_IMM: - case AArch64::LD1H_IMM: - case AArch64::LD1W_IMM: - case AArch64::LD1D_IMM: - case AArch64::ST1B_IMM: - case AArch64::ST1H_IMM: - case AArch64::ST1W_IMM: - case AArch64::ST1D_IMM: - return true; - default: - return false; - } -} - int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &SOffset, bool *OutUseUnscaledOp, @@ -3515,16 +3513,17 @@ } // Get the min/max offset and the scale. - unsigned Scale, Width; + TypeSize ScaleValue(0U, false); + unsigned Width; int64_t MinOff, MaxOff; - if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), Scale, Width, MinOff, + if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff, MaxOff)) llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal"); // Construct the complete offset. - bool IsMulVL = isSVEScaledImmInstruction(MI.getOpcode()); - int64_t Offset = - IsMulVL ? (SOffset.getScalableBytes()) : (SOffset.getBytes()); + bool IsMulVL = ScaleValue.isScalable(); + unsigned Scale = ScaleValue.getKnownMinSize(); + int64_t Offset = IsMulVL ? SOffset.getScalableBytes() : SOffset.getBytes(); const MachineOperand &ImmOpnd = MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode())); @@ -3537,9 +3536,14 @@ AArch64InstrInfo::getUnscaledLdSt(MI.getOpcode()); bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0); if (useUnscaledOp && - !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, Scale, Width, MinOff, MaxOff)) + !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, ScaleValue, Width, MinOff, + MaxOff)) llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal"); + Scale = ScaleValue.getKnownMinSize(); + assert(IsMulVL == ScaleValue.isScalable() && + "Unscaled opcode has different value for scalable"); + int64_t Remainder = Offset % Scale; assert(!(Remainder && useUnscaledOp) && "Cannot have remainder when using unscaled op"); @@ -5864,23 +5868,29 @@ if (MI.mayLoadOrStore()) { const MachineOperand *Base; // Filled with the base operand of MI. int64_t Offset; // Filled with the offset of MI. + bool OffsetIsScalable; // Does it allow us to offset the base operand and is the base the // register SP? - if (!getMemOperandWithOffset(MI, Base, Offset, &TRI) || !Base->isReg() || - Base->getReg() != AArch64::SP) + if (!getMemOperandWithOffset(MI, Base, Offset, OffsetIsScalable, &TRI) || + !Base->isReg() || Base->getReg() != AArch64::SP) + return false; + + // Fixe-up code below assumes bytes. + if (OffsetIsScalable) return false; // Find the minimum/maximum offset for this instruction and check // if fixing it up would be in range. int64_t MinOffset, MaxOffset; // Unscaled offsets for the instruction. - unsigned Scale; // The scale to multiply the offsets by. + TypeSize Scale(0U, false); // The scale to multiply the offsets by. unsigned DummyWidth; getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset); Offset += 16; // Update the offset to what it would be if we outlined. - if (Offset < MinOffset * Scale || Offset > MaxOffset * Scale) + if (Offset < MinOffset * (int64_t)Scale.getFixedSize() || + Offset > MaxOffset * (int64_t)Scale.getFixedSize()) return false; // It's in range, so we can outline it. @@ -6258,26 +6268,29 @@ const MachineOperand *Base; unsigned Width; int64_t Offset; + bool OffsetIsScalable; // Is this a load or store with an immediate offset with SP as the base? if (!MI.mayLoadOrStore() || - !getMemOperandWithOffsetWidth(MI, Base, Offset, Width, &RI) || + !getMemOperandWithOffsetWidth(MI, Base, Offset, OffsetIsScalable, Width, + &RI) || (Base->isReg() && Base->getReg() != AArch64::SP)) continue; // It is, so we have to fix it up. - unsigned Scale; + TypeSize Scale(0U, false); int64_t Dummy1, Dummy2; MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI); assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!"); getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2); assert(Scale != 0 && "Unexpected opcode!"); + assert(!OffsetIsScalable && "Expected offset to be a byte offset"); // We've pushed the return address to the stack, so add 16 to the offset. // This is safe, since we already checked if it would overflow when we // checked if this instruction was legal to outline. - int64_t NewImm = (Offset + 16) / Scale; + int64_t NewImm = (Offset + 16) / (int64_t)Scale.getFixedSize(); StackOffsetOperand.setImm(NewImm); } } diff --git a/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp b/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp --- a/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp +++ b/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp @@ -149,7 +149,9 @@ continue; const MachineOperand *BaseOp; int64_t Offset; - if (TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI) && + bool OffsetIsScalable; + if (TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, + TRI) && BaseOp->isReg()) { Register BaseReg = BaseOp->getReg(); if (PrevBaseReg == BaseReg) { diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -184,7 +184,7 @@ bool getMemOperandsWithOffset(const MachineInstr &LdSt, SmallVectorImpl &BaseOps, - int64_t &Offset, + int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const final; bool shouldClusterMemOps(ArrayRef BaseOps1, diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -260,11 +260,13 @@ bool SIInstrInfo::getMemOperandsWithOffset( const MachineInstr &LdSt, SmallVectorImpl &BaseOps, - int64_t &Offset, const TargetRegisterInfo *TRI) const { + int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) + const { if (!LdSt.mayLoadOrStore()) return false; unsigned Opc = LdSt.getOpcode(); + OffsetIsScalable = false; const MachineOperand *BaseOp, *OffsetOp; if (isDS(LdSt)) { @@ -2610,8 +2612,9 @@ const MachineInstr &MIb) const { SmallVector BaseOps0, BaseOps1; int64_t Offset0, Offset1; - if (!getMemOperandsWithOffset(MIa, BaseOps0, Offset0, &RI) || - !getMemOperandsWithOffset(MIb, BaseOps1, Offset1, &RI)) + bool Offset0IsScalable, Offset1IsScalable; + if (!getMemOperandsWithOffset(MIa, BaseOps0, Offset0, Offset0IsScalable, &RI) || + !getMemOperandsWithOffset(MIb, BaseOps1, Offset1, Offset1IsScalable, &RI)) return false; if (!memOpsHaveSameBaseOperands(BaseOps0, BaseOps1)) diff --git a/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp b/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp --- a/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp @@ -1952,8 +1952,9 @@ int64_t OffLatReg; if (SITII->isLowLatencyInstruction(*SU->getInstr())) { IsLowLatencySU[i] = 1; + bool OffsetIsScalable; if (SITII->getMemOperandWithOffset(*SU->getInstr(), BaseLatOp, OffLatReg, - TRI)) + OffsetIsScalable, TRI)) LowLatencyOffset[i] = OffLatReg; } else if (SITII->isHighLatencyDef(SU->getInstr()->getOpcode())) IsHighLatencySU[i] = 1; diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h @@ -207,7 +207,7 @@ bool getMemOperandsWithOffset(const MachineInstr &LdSt, SmallVectorImpl &BaseOps, - int64_t &Offset, + int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const override; /// Reverses the branch condition of the specified condition list, diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -2948,8 +2948,9 @@ /// Get the base register and byte offset of a load/store instr. bool HexagonInstrInfo::getMemOperandsWithOffset( const MachineInstr &LdSt, SmallVectorImpl &BaseOps, - int64_t &Offset, const TargetRegisterInfo *TRI) const { + int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const { unsigned AccessSize = 0; + OffsetIsScalable = false; const MachineOperand *BaseOp = getBaseAndOffset(LdSt, Offset, AccessSize); if (!BaseOp || !BaseOp->isReg()) return false; diff --git a/llvm/lib/Target/Lanai/LanaiInstrInfo.h b/llvm/lib/Target/Lanai/LanaiInstrInfo.h --- a/llvm/lib/Target/Lanai/LanaiInstrInfo.h +++ b/llvm/lib/Target/Lanai/LanaiInstrInfo.h @@ -70,7 +70,7 @@ bool getMemOperandsWithOffset(const MachineInstr &LdSt, SmallVectorImpl &BaseOps, - int64_t &Offset, + int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const override; bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt, diff --git a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp --- a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp +++ b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp @@ -797,7 +797,7 @@ bool LanaiInstrInfo::getMemOperandsWithOffset( const MachineInstr &LdSt, SmallVectorImpl &BaseOps, - int64_t &Offset, const TargetRegisterInfo *TRI) const { + int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const { switch (LdSt.getOpcode()) { default: return false; @@ -812,6 +812,7 @@ case Lanai::LDBz_RI: const MachineOperand *BaseOp; unsigned Width; + OffsetIsScalable = false; if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI)) return false; BaseOps.push_back(BaseOp); diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -294,7 +294,7 @@ bool getMemOperandsWithOffset(const MachineInstr &LdSt, SmallVectorImpl &BaseOps, - int64_t &Offset, + int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const override; bool analyzeBranchPredicate(MachineBasicBlock &MBB, TargetInstrInfo::MachineBranchPredicate &MBP, diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -3191,7 +3191,7 @@ bool X86InstrInfo::getMemOperandsWithOffset( const MachineInstr &MemOp, SmallVectorImpl &BaseOps, - int64_t &Offset, const TargetRegisterInfo *TRI) const { + int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const { const MCInstrDesc &Desc = MemOp.getDesc(); int MemRefBegin = X86II::getMemoryOperandNo(Desc.TSFlags); if (MemRefBegin < 0) @@ -3222,6 +3222,7 @@ if (!BaseOp->isReg()) return false; + OffsetIsScalable = false; BaseOps.push_back(BaseOp); return true; }