Index: llvm/include/llvm/Support/TypeSize.h =================================================================== --- llvm/include/llvm/Support/TypeSize.h +++ llvm/include/llvm/Support/TypeSize.h @@ -117,17 +117,12 @@ // StackOffset - Represent an offset with named fixed and scalable components. //===----------------------------------------------------------------------===// -namespace NewStackOffset { class StackOffset; -} // end namespace NewStackOffset - -template <> struct LinearPolyBaseTypeTraits { +template <> struct LinearPolyBaseTypeTraits { using ScalarTy = int64_t; static constexpr unsigned Dimensions = 2; }; -namespace NewStackOffset { - /// StackOffset is a class to represent an offset with 2 dimensions, /// named fixed and scalable, respectively. This class allows a value for both /// dimensions to depict e.g. "8 bytes and 16 scalable bytes", which is needed @@ -151,9 +146,6 @@ ScalarTy getScalable() const { return this->getValue(1); } }; -} // end namespace NewStackOffset - - //===----------------------------------------------------------------------===// // UnivariateLinearPolyBase - a base class for linear polynomials with multiple // dimensions, but where only one dimension can be set at any time. Index: llvm/lib/Target/AArch64/AArch64FrameLowering.h =================================================================== --- llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -13,7 +13,7 @@ #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64FRAMELOWERING_H #define LLVM_LIB_TARGET_AARCH64_AARCH64FRAMELOWERING_H -#include "AArch64StackOffset.h" +#include "llvm/Support/TypeSize.h" #include "llvm/CodeGen/TargetFrameLowering.h" namespace llvm { Index: llvm/lib/Target/AArch64/AArch64FrameLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -116,7 +116,6 @@ #include "AArch64InstrInfo.h" #include "AArch64MachineFunctionInfo.h" #include "AArch64RegisterInfo.h" -#include "AArch64StackOffset.h" #include "AArch64Subtarget.h" #include "AArch64TargetMachine.h" #include "MCTargetDesc/AArch64AddressingModes.h" @@ -270,7 +269,7 @@ /// Returns the size of the entire SVE stackframe (calleesaves + spills). static StackOffset getSVEStackSize(const MachineFunction &MF) { const AArch64FunctionInfo *AFI = MF.getInfo(); - return {(int64_t)AFI->getStackSizeSVE(), MVT::nxv1i8}; + return StackOffset::getScalable((int64_t)AFI->getStackSizeSVE()); } bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { @@ -362,15 +361,15 @@ // Most call frames will be allocated at the start of a function so // this is OK, but it is a limitation that needs dealing with. assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large"); - emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, {Amount, MVT::i8}, - TII); + emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, + StackOffset::getFixed(Amount), TII); } } else if (CalleePopAmount != 0) { // If the calling convention demands that the callee pops arguments from the // stack, we want to add it back if we have a reserved call frame. assert(CalleePopAmount < 0xffffff && "call frame too large"); emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, - {-(int64_t)CalleePopAmount, MVT::i8}, TII); + StackOffset::getFixed(-(int64_t)CalleePopAmount), TII); } return MBB.erase(I); } @@ -410,7 +409,8 @@ MCCFIInstruction AArch64FrameLowering::createDefCFAExpressionFromSP( const TargetRegisterInfo &TRI, const StackOffset &OffsetFromSP) const { int64_t NumBytes, NumVGScaledBytes; - OffsetFromSP.getForDwarfOffset(NumBytes, NumVGScaledBytes); + AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(OffsetFromSP, NumBytes, + NumVGScaledBytes); std::string CommentBuffer = "sp"; llvm::raw_string_ostream Comment(CommentBuffer); @@ -437,7 +437,8 @@ const TargetRegisterInfo &TRI, unsigned Reg, const StackOffset &OffsetFromDefCFA) const { int64_t NumBytes, NumVGScaledBytes; - OffsetFromDefCFA.getForDwarfOffset(NumBytes, NumVGScaledBytes); + AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets( + OffsetFromDefCFA, NumBytes, NumVGScaledBytes); unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true); @@ -493,12 +494,12 @@ StackOffset Offset; if (MFI.getStackID(Info.getFrameIdx()) == TargetStackID::SVEVector) { AArch64FunctionInfo *AFI = MF.getInfo(); - Offset = StackOffset(MFI.getObjectOffset(Info.getFrameIdx()), MVT::nxv1i8) - - StackOffset(AFI->getCalleeSavedStackSize(MFI), MVT::i8); + Offset = + StackOffset::getScalable(MFI.getObjectOffset(Info.getFrameIdx())) - + StackOffset::getFixed(AFI->getCalleeSavedStackSize(MFI)); } else { - Offset = {MFI.getObjectOffset(Info.getFrameIdx()) - - getOffsetOfLocalArea(), - MVT::i8}; + Offset = StackOffset::getFixed(MFI.getObjectOffset(Info.getFrameIdx()) - + getOffsetOfLocalArea()); } unsigned CFIIndex = MF.addFrameInst(createCfaOffset(*TRI, Reg, Offset)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) @@ -1099,8 +1100,8 @@ ++NumRedZoneFunctions; } else { emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, - {-NumBytes, MVT::i8}, TII, MachineInstr::FrameSetup, - false, NeedsWinCFI, &HasWinCFI); + StackOffset::getFixed(-NumBytes), TII, + MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI); if (!NeedsWinCFI && needsFrameMoves) { // Label used to tie together the PROLOG_LABEL and the MachineMoves. MCSymbol *FrameLabel = MMI.getContext().createTempSymbol(); @@ -1133,8 +1134,8 @@ if (CombineSPBump) { assert(!SVEStackSize && "Cannot combine SP bump with SVE"); emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, - {-NumBytes, MVT::i8}, TII, MachineInstr::FrameSetup, false, - NeedsWinCFI, &HasWinCFI); + StackOffset::getFixed(-NumBytes), TII, + MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI); NumBytes = 0; } else if (PrologueSaveSize != 0) { MBBI = convertCalleeSaveRestoreToSPPrePostIncDec( @@ -1168,8 +1169,8 @@ // Note: All stores of callee-saved registers are marked as "FrameSetup". // This code marks the instruction(s) that set the FP also. emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, - {FPOffset, MVT::i8}, TII, MachineInstr::FrameSetup, false, - NeedsWinCFI, &HasWinCFI); + StackOffset::getFixed(FPOffset), TII, + MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI); } if (windowsRequiresStackProbe(MF, NumBytes)) { @@ -1279,7 +1280,7 @@ ++MBBI; CalleeSavesEnd = MBBI; - AllocateBefore = {CalleeSavedSize, MVT::nxv1i8}; + AllocateBefore = StackOffset::getScalable(CalleeSavedSize); AllocateAfter = SVEStackSize - AllocateBefore; } @@ -1311,8 +1312,8 @@ // the correct value here, as NumBytes also includes padding bytes, // which shouldn't be counted here. emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, - {-NumBytes, MVT::i8}, TII, MachineInstr::FrameSetup, - false, NeedsWinCFI, &HasWinCFI); + StackOffset::getFixed(-NumBytes), TII, + MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI); if (NeedsRealignment) { const unsigned NrBitsToZero = Log2(MFI.getMaxAlign()); @@ -1467,7 +1468,7 @@ const TargetSubtargetInfo &STI = MF.getSubtarget(); const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); StackOffset TotalSize = - SVEStackSize + StackOffset((int64_t)MFI.getStackSize(), MVT::i8); + SVEStackSize + StackOffset::getFixed((int64_t)MFI.getStackSize()); CFIIndex = MF.addFrameInst(createDefCFAExpressionFromSP(TRI, TotalSize)); } else { // Encode the stack size of the leaf function. @@ -1654,8 +1655,9 @@ if (CombineSPBump) { assert(!SVEStackSize && "Cannot combine SP bump with SVE"); emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP, - {NumBytes + (int64_t)AfterCSRPopSize, MVT::i8}, TII, - MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); + StackOffset::getFixed(NumBytes + (int64_t)AfterCSRPopSize), + TII, MachineInstr::FrameDestroy, false, NeedsWinCFI, + &HasWinCFI); if (HasWinCFI) BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd)) @@ -1679,7 +1681,8 @@ assert(IsSVECalleeSave(RestoreBegin) && IsSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction"); - StackOffset CalleeSavedSizeAsOffset = {CalleeSavedSize, MVT::nxv1i8}; + StackOffset CalleeSavedSizeAsOffset = + StackOffset::getScalable(CalleeSavedSize); DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset; DeallocateAfter = CalleeSavedSizeAsOffset; } @@ -1692,14 +1695,15 @@ // be reloaded. The code below will deallocate the stack space // space by moving FP -> SP. emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::FP, - {-CalleeSavedSize, MVT::nxv1i8}, TII, + StackOffset::getScalable(-CalleeSavedSize), TII, MachineInstr::FrameDestroy); } else { if (AFI->getSVECalleeSavedStackSize()) { // Deallocate the non-SVE locals first before we can deallocate (and // restore callee saves) from the SVE area. emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP, - {NumBytes, MVT::i8}, TII, MachineInstr::FrameDestroy); + StackOffset::getFixed(NumBytes), TII, + MachineInstr::FrameDestroy); NumBytes = 0; } @@ -1732,7 +1736,7 @@ adaptForLdStOpt(MBB, MBB.getFirstTerminator(), LastPopI); emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, - {StackRestoreBytes, MVT::i8}, TII, + StackOffset::getFixed(StackRestoreBytes), TII, MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); if (Done) { if (HasWinCFI) { @@ -1751,13 +1755,14 @@ // non-post-indexed loads for the restores if we aren't actually going to // be able to save any instructions. if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) { - emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP, - {-AFI->getCalleeSaveBaseToFrameRecordOffset(), MVT::i8}, - TII, MachineInstr::FrameDestroy, false, NeedsWinCFI); + emitFrameOffset( + MBB, LastPopI, DL, AArch64::SP, AArch64::FP, + StackOffset::getFixed(-AFI->getCalleeSaveBaseToFrameRecordOffset()), + TII, MachineInstr::FrameDestroy, false, NeedsWinCFI); } else if (NumBytes) emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, - {NumBytes, MVT::i8}, TII, MachineInstr::FrameDestroy, false, - NeedsWinCFI); + StackOffset::getFixed(NumBytes), TII, + MachineInstr::FrameDestroy, false, NeedsWinCFI); // This must be placed after the callee-save restore code because that code // assumes the SP is at the same location as it was after the callee-save save @@ -1778,7 +1783,7 @@ adaptForLdStOpt(MBB, FirstSPPopI, LastPopI); emitFrameOffset(MBB, FirstSPPopI, DL, AArch64::SP, AArch64::SP, - {(int64_t)AfterCSRPopSize, MVT::i8}, TII, + StackOffset::getFixed((int64_t)AfterCSRPopSize), TII, MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); } if (HasWinCFI) @@ -1798,7 +1803,7 @@ /*PreferFP=*/ MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress), /*ForSimm=*/false) - .getBytes(); + .getFixed(); } int AArch64FrameLowering::getNonLocalFrameIndexReference( @@ -1806,7 +1811,8 @@ return getSEHFrameIndexOffset(MF, FI); } -static StackOffset getFPOffset(const MachineFunction &MF, int64_t ObjectOffset) { +static StackOffset getFPOffset(const MachineFunction &MF, + int64_t ObjectOffset) { const auto *AFI = MF.getInfo(); const auto &Subtarget = MF.getSubtarget(); bool IsWin64 = @@ -1816,12 +1822,13 @@ int64_t CalleeSaveSize = AFI->getCalleeSavedStackSize(MF.getFrameInfo()); int64_t FPAdjust = CalleeSaveSize - AFI->getCalleeSaveBaseToFrameRecordOffset(); - return {ObjectOffset + FixedObject + FPAdjust, MVT::i8}; + return StackOffset::getFixed(ObjectOffset + FixedObject + FPAdjust); } -static StackOffset getStackOffset(const MachineFunction &MF, int64_t ObjectOffset) { +static StackOffset getStackOffset(const MachineFunction &MF, + int64_t ObjectOffset) { const auto &MFI = MF.getFrameInfo(); - return {ObjectOffset + (int64_t)MFI.getStackSize(), MVT::i8}; + return StackOffset::getFixed(ObjectOffset + (int64_t)MFI.getStackSize()); } int AArch64FrameLowering::getSEHFrameIndexOffset(const MachineFunction &MF, @@ -1830,8 +1837,8 @@ MF.getSubtarget().getRegisterInfo()); int ObjectOffset = MF.getFrameInfo().getObjectOffset(FI); return RegInfo->getLocalAddressRegister(MF) == AArch64::FP - ? getFPOffset(MF, ObjectOffset).getBytes() - : getStackOffset(MF, ObjectOffset).getBytes(); + ? getFPOffset(MF, ObjectOffset).getFixed() + : getStackOffset(MF, ObjectOffset).getFixed(); } StackOffset AArch64FrameLowering::resolveFrameIndexReference( @@ -1854,8 +1861,8 @@ const auto *AFI = MF.getInfo(); const auto &Subtarget = MF.getSubtarget(); - int64_t FPOffset = getFPOffset(MF, ObjectOffset).getBytes(); - int64_t Offset = getStackOffset(MF, ObjectOffset).getBytes(); + int64_t FPOffset = getFPOffset(MF, ObjectOffset).getFixed(); + int64_t Offset = getStackOffset(MF, ObjectOffset).getFixed(); bool isCSR = !isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize(MFI)); @@ -1930,19 +1937,16 @@ "non-argument/CSR objects cannot be accessed through the frame pointer"); if (isSVE) { - int64_t OffsetFromSPToSVEArea = - MFI.getStackSize() - AFI->getCalleeSavedStackSize(); - int64_t OffsetFromFPToSVEArea = - -AFI->getCalleeSaveBaseToFrameRecordOffset(); - StackOffset FPOffset = StackOffset(OffsetFromFPToSVEArea, MVT::i8) + - StackOffset(ObjectOffset, MVT::nxv1i8); - StackOffset SPOffset = SVEStackSize + - StackOffset(ObjectOffset, MVT::nxv1i8) + - StackOffset(OffsetFromSPToSVEArea, MVT::i8); + StackOffset FPOffset = + StackOffset::get(-AFI->getCalleeSaveBaseToFrameRecordOffset(), ObjectOffset); + StackOffset SPOffset = + SVEStackSize + + StackOffset::get(MFI.getStackSize() - AFI->getCalleeSavedStackSize(), + ObjectOffset); // Always use the FP for SVE spills if available and beneficial. if (hasFP(MF) && - (SPOffset.getBytes() || - FPOffset.getScalableBytes() < SPOffset.getScalableBytes() || + (SPOffset.getFixed() || + FPOffset.getScalable() < SPOffset.getScalable() || RegInfo->needsStackRealignment(MF))) { FrameReg = RegInfo->getFrameRegister(MF); return FPOffset; @@ -1961,7 +1965,7 @@ if (UseFP) { FrameReg = RegInfo->getFrameRegister(MF); - return StackOffset(FPOffset, MVT::i8) + ScalableOffset; + return StackOffset::getFixed(FPOffset) + ScalableOffset; } // Use the base pointer if we have one. @@ -1978,7 +1982,7 @@ Offset -= AFI->getLocalStackSize(); } - return StackOffset(Offset, MVT::i8) + ScalableOffset; + return StackOffset::getFixed(Offset) + ScalableOffset; } static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) { @@ -2932,12 +2936,12 @@ const int64_t kMaxOffset = 255 * 16; Register BaseReg = FrameReg; - int64_t BaseRegOffsetBytes = FrameRegOffset.getBytes(); + int64_t BaseRegOffsetBytes = FrameRegOffset.getFixed(); if (BaseRegOffsetBytes < kMinOffset || BaseRegOffsetBytes + (Size - Size % 32) > kMaxOffset) { Register ScratchReg = MRI->createVirtualRegister(&AArch64::GPR64RegClass); emitFrameOffset(*MBB, InsertI, DL, ScratchReg, BaseReg, - {BaseRegOffsetBytes, MVT::i8}, TII); + StackOffset::getFixed(BaseRegOffsetBytes), TII); BaseReg = ScratchReg; BaseRegOffsetBytes = 0; } @@ -2994,7 +2998,7 @@ LoopI->setFlags(FrameRegUpdateFlags); int64_t ExtraBaseRegUpdate = - FrameRegUpdate ? (*FrameRegUpdate - FrameRegOffset.getBytes() - Size) : 0; + FrameRegUpdate ? (*FrameRegUpdate - FrameRegOffset.getFixed() - Size) : 0; if (LoopSize < Size) { assert(FrameRegUpdate); assert(Size - LoopSize == 16); @@ -3098,7 +3102,7 @@ // realistically happens in function epilogue. Also, STGloop is expanded // before that pass. if (InsertI != MBB->end() && - canMergeRegUpdate(InsertI, FrameReg, FrameRegOffset.getBytes() + Size, + canMergeRegUpdate(InsertI, FrameReg, FrameRegOffset.getFixed() + Size, &TotalOffset)) { UpdateInstr = &*InsertI++; LLVM_DEBUG(dbgs() << "Folding SP update into loop:\n " Index: llvm/lib/Target/AArch64/AArch64InstrInfo.h =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -15,7 +15,6 @@ #include "AArch64.h" #include "AArch64RegisterInfo.h" -#include "AArch64StackOffset.h" #include "llvm/ADT/Optional.h" #include "llvm/CodeGen/MachineCombinerPattern.h" #include "llvm/CodeGen/TargetInstrInfo.h" @@ -293,6 +292,13 @@ Optional describeLoadedValue(const MachineInstr &MI, Register Reg) const override; + static void decomposeStackOffsetForFrameOffsets(const StackOffset &Offset, + int64_t &NumBytes, + int64_t &NumPredicateVectors, + int64_t &NumDataVectors); + static void decomposeStackOffsetForDwarfOffsets(const StackOffset &Offset, + int64_t &ByteSized, + int64_t &VGSized); #define GET_INSTRINFO_HELPER_DECLS #include "AArch64GenInstrInfo.inc" Index: llvm/lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -3429,6 +3429,47 @@ }); } +void AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets( + const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) { + // The smallest scalable element supported by scaled SVE addressing + // modes are predicates, which are 2 scalable bytes in size. So the scalable + // byte offset must always be a multiple of 2. + assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset"); + + // VGSized offsets are divided by '2', because the VG register is the + // the number of 64bit granules as opposed to 128bit vector chunks, + // which is how the 'n' in e.g. MVT::nxv1i8 is modelled. + // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes. + // VG = n * 2 and the dwarf offset must be VG * 8 bytes. + ByteSized = Offset.getFixed(); + VGSized = Offset.getScalable() / 2; +} + +/// Returns the offset in parts to which this frame offset can be +/// decomposed for the purpose of describing a frame offset. +/// For non-scalable offsets this is simply its byte size. +void AArch64InstrInfo::decomposeStackOffsetForFrameOffsets( + const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors, + int64_t &NumDataVectors) { + // The smallest scalable element supported by scaled SVE addressing + // modes are predicates, which are 2 scalable bytes in size. So the scalable + // byte offset must always be a multiple of 2. + assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset"); + + NumBytes = Offset.getFixed(); + NumDataVectors = 0; + NumPredicateVectors = Offset.getScalable() / 2; + // This method is used to get the offsets to adjust the frame offset. + // If the function requires ADDPL to be used and needs more than two ADDPL + // instructions, part of the offset is folded into NumDataVectors so that it + // uses ADDVL for part of it, reducing the number of ADDPL instructions. + if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 || + NumPredicateVectors > 62) { + NumDataVectors = NumPredicateVectors / 8; + NumPredicateVectors -= NumDataVectors * 8; + } +} + // Helper function to emit a frame offset adjustment from a given // pointer (SrcReg), stored into DestReg. This function is explicit // in that it requires the opcode. @@ -3538,7 +3579,8 @@ MachineInstr::MIFlag Flag, bool SetNZCV, bool NeedsWinCFI, bool *HasWinCFI) { int64_t Bytes, NumPredicateVectors, NumDataVectors; - Offset.getForFrameOffset(Bytes, NumPredicateVectors, NumDataVectors); + AArch64InstrInfo::decomposeStackOffsetForFrameOffsets( + Offset, Bytes, NumPredicateVectors, NumDataVectors); // First emit non-scalable frame offsets, or a simple 'mov'. if (Bytes || (!Offset && SrcReg != DestReg)) { @@ -3798,7 +3840,7 @@ // Construct the complete offset. bool IsMulVL = ScaleValue.isScalable(); unsigned Scale = ScaleValue.getKnownMinSize(); - int64_t Offset = IsMulVL ? SOffset.getScalableBytes() : SOffset.getBytes(); + int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed(); const MachineOperand &ImmOpnd = MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode())); @@ -3840,11 +3882,9 @@ *OutUnscaledOp = *UnscaledOp; if (IsMulVL) - SOffset = StackOffset(Offset, MVT::nxv1i8) + - StackOffset(SOffset.getBytes(), MVT::i8); + SOffset = StackOffset::get(SOffset.getFixed(), Offset); else - SOffset = StackOffset(Offset, MVT::i8) + - StackOffset(SOffset.getScalableBytes(), MVT::nxv1i8); + SOffset = StackOffset::get(Offset, SOffset.getScalable()); return AArch64FrameOffsetCanUpdate | (SOffset ? 0 : AArch64FrameOffsetIsLegal); } @@ -3856,7 +3896,7 @@ unsigned ImmIdx = FrameRegIdx + 1; if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) { - Offset += StackOffset(MI.getOperand(ImmIdx).getImm(), MVT::i8); + Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm()); emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(), MI.getOperand(0).getReg(), FrameReg, Offset, TII, MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri)); Index: llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -15,7 +15,6 @@ #include "AArch64FrameLowering.h" #include "AArch64InstrInfo.h" #include "AArch64MachineFunctionInfo.h" -#include "AArch64StackOffset.h" #include "AArch64Subtarget.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/ADT/BitVector.h" @@ -525,7 +524,7 @@ Register BaseReg, int64_t Offset) const { assert(MI && "Unable to get the legal offset for nil instruction."); - StackOffset SaveOffset(Offset, MVT::i8); + StackOffset SaveOffset = StackOffset::getFixed(Offset); return isAArch64FrameOffsetLegal(*MI, SaveOffset) & AArch64FrameOffsetIsLegal; } @@ -556,7 +555,7 @@ void AArch64RegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg, int64_t Offset) const { // ARM doesn't need the general 64-bit offsets - StackOffset Off(Offset, MVT::i8); + StackOffset Off = StackOffset::getFixed(Offset); unsigned i = 0; @@ -619,9 +618,9 @@ TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg, /*PreferFP=*/true, /*ForSimm=*/false); - Offset += StackOffset(MI.getOperand(FIOperandNum + 1).getImm(), MVT::i8); + Offset += StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm()); MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false /*isDef*/); - MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getBytes()); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed()); return; } @@ -637,12 +636,11 @@ // TAGPstack must use the virtual frame register in its 3rd operand. const AArch64FunctionInfo *AFI = MF.getInfo(); FrameReg = MI.getOperand(3).getReg(); - Offset = {MFI.getObjectOffset(FrameIndex) + - AFI->getTaggedBasePointerOffset(), - MVT::i8}; + Offset = StackOffset::getFixed(MFI.getObjectOffset(FrameIndex) + + AFI->getTaggedBasePointerOffset()); } else if (Tagged) { - StackOffset SPOffset = { - MFI.getObjectOffset(FrameIndex) + (int64_t)MFI.getStackSize(), MVT::i8}; + StackOffset SPOffset = StackOffset::getFixed( + MFI.getObjectOffset(FrameIndex) + (int64_t)MFI.getStackSize()); if (MFI.hasVarSizedObjects() || isAArch64FrameOffsetLegal(MI, SPOffset, nullptr, nullptr, nullptr) != (AArch64FrameOffsetCanUpdate | AArch64FrameOffsetIsLegal)) { @@ -663,8 +661,8 @@ return; } FrameReg = AArch64::SP; - Offset = {MFI.getObjectOffset(FrameIndex) + (int64_t)MFI.getStackSize(), - MVT::i8}; + Offset = StackOffset::getFixed(MFI.getObjectOffset(FrameIndex) + + (int64_t)MFI.getStackSize()); } else { Offset = TFI->resolveFrameIndexReference( MF, FrameIndex, FrameReg, /*PreferFP=*/false, /*ForSimm=*/true); Index: llvm/lib/Target/AArch64/AArch64StackOffset.h =================================================================== --- llvm/lib/Target/AArch64/AArch64StackOffset.h +++ /dev/null @@ -1,151 +0,0 @@ -//==--AArch64StackOffset.h ---------------------------------------*- C++ -*-==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file contains the declaration of the StackOffset class, which is used to -// describe scalable and non-scalable offsets during frame lowering. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64STACKOFFSET_H -#define LLVM_LIB_TARGET_AARCH64_AARCH64STACKOFFSET_H - -#include "llvm/Support/MachineValueType.h" -#include "llvm/Support/TypeSize.h" -#include - -namespace llvm { - -/// StackOffset is a wrapper around scalable and non-scalable offsets and is -/// used in several functions such as 'isAArch64FrameOffsetLegal' and -/// 'emitFrameOffset()'. StackOffsets are described by MVTs, e.g. -// -/// StackOffset(1, MVT::nxv16i8) -// -/// would describe an offset as being the size of a single SVE vector. -/// -/// The class also implements simple arithmetic (addition/subtraction) on these -/// offsets, e.g. -// -/// StackOffset(1, MVT::nxv16i8) + StackOffset(1, MVT::i64) -// -/// describes an offset that spans the combined storage required for an SVE -/// vector and a 64bit GPR. -class StackOffset { - int64_t Bytes; - int64_t ScalableBytes; - - explicit operator int() const; - -public: - using Part = std::pair; - - StackOffset() : Bytes(0), ScalableBytes(0) {} - - StackOffset(int64_t Offset, MVT::SimpleValueType T) : StackOffset() { - assert(MVT(T).isByteSized() && "Offset type is not a multiple of bytes"); - *this += Part(Offset, T); - } - - StackOffset(const StackOffset &Other) - : Bytes(Other.Bytes), ScalableBytes(Other.ScalableBytes) {} - - StackOffset &operator=(const StackOffset &) = default; - - StackOffset &operator+=(const StackOffset::Part &Other) { - const TypeSize Size = Other.second.getSizeInBits(); - if (Size.isScalable()) - ScalableBytes += Other.first * ((int64_t)Size.getKnownMinSize() / 8); - else - Bytes += Other.first * ((int64_t)Size.getFixedSize() / 8); - return *this; - } - - StackOffset &operator+=(const StackOffset &Other) { - Bytes += Other.Bytes; - ScalableBytes += Other.ScalableBytes; - return *this; - } - - StackOffset operator+(const StackOffset &Other) const { - StackOffset Res(*this); - Res += Other; - return Res; - } - - StackOffset &operator-=(const StackOffset &Other) { - Bytes -= Other.Bytes; - ScalableBytes -= Other.ScalableBytes; - return *this; - } - - StackOffset operator-(const StackOffset &Other) const { - StackOffset Res(*this); - Res -= Other; - return Res; - } - - StackOffset operator-() const { - StackOffset Res = {}; - const StackOffset Other(*this); - Res -= Other; - return Res; - } - - /// Returns the scalable part of the offset in bytes. - int64_t getScalableBytes() const { return ScalableBytes; } - - /// Returns the non-scalable part of the offset in bytes. - int64_t getBytes() const { return Bytes; } - - /// Returns the offset in parts to which this frame offset can be - /// decomposed for the purpose of describing a frame offset. - /// For non-scalable offsets this is simply its byte size. - void getForFrameOffset(int64_t &NumBytes, int64_t &NumPredicateVectors, - int64_t &NumDataVectors) const { - assert(isValid() && "Invalid frame offset"); - - NumBytes = Bytes; - NumDataVectors = 0; - NumPredicateVectors = ScalableBytes / 2; - // This method is used to get the offsets to adjust the frame offset. - // If the function requires ADDPL to be used and needs more than two ADDPL - // instructions, part of the offset is folded into NumDataVectors so that it - // uses ADDVL for part of it, reducing the number of ADDPL instructions. - if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 || - NumPredicateVectors > 62) { - NumDataVectors = NumPredicateVectors / 8; - NumPredicateVectors -= NumDataVectors * 8; - } - } - - void getForDwarfOffset(int64_t &ByteSized, int64_t &VGSized) const { - assert(isValid() && "Invalid frame offset"); - - // VGSized offsets are divided by '2', because the VG register is the - // the number of 64bit granules as opposed to 128bit vector chunks, - // which is how the 'n' in e.g. MVT::nxv1i8 is modelled. - // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes. - // VG = n * 2 and the dwarf offset must be VG * 8 bytes. - ByteSized = Bytes; - VGSized = ScalableBytes / 2; - } - - /// Returns whether the offset is known zero. - explicit operator bool() const { return Bytes || ScalableBytes; } - - bool isValid() const { - // The smallest scalable element supported by scaled SVE addressing - // modes are predicates, which are 2 scalable bytes in size. So the scalable - // byte offset must always be a multiple of 2. - return ScalableBytes % 2 == 0; - } -}; - -} // end namespace llvm - -#endif Index: llvm/unittests/Target/AArch64/CMakeLists.txt =================================================================== --- llvm/unittests/Target/AArch64/CMakeLists.txt +++ llvm/unittests/Target/AArch64/CMakeLists.txt @@ -19,5 +19,5 @@ add_llvm_target_unittest(AArch64Tests InstSizes.cpp - TestStackOffset.cpp + DecomposeStackOffsetTest.cpp ) Index: llvm/unittests/Target/AArch64/DecomposeStackOffsetTest.cpp =================================================================== --- /dev/null +++ llvm/unittests/Target/AArch64/DecomposeStackOffsetTest.cpp @@ -0,0 +1,53 @@ +//===- TestStackOffset.cpp - StackOffset unit tests------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/TypeSize.h" +#include "AArch64InstrInfo.h" +#include "gtest/gtest.h" + +using namespace llvm; + +TEST(StackOffset, decomposeStackOffsetForFrameOffsets) { + StackOffset A = StackOffset::getFixed(8); + StackOffset B = StackOffset::getFixed(4); + StackOffset C = StackOffset::getScalable(16); + + // If all offsets can be materialized with only ADDVL, + // make sure PLSized is 0. + int64_t ByteSized, VLSized, PLSized; + AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(A + B + C, ByteSized, PLSized, + VLSized); + EXPECT_EQ(12, ByteSized); + EXPECT_EQ(1, VLSized); + EXPECT_EQ(0, PLSized); + + // If we need an ADDPL to materialize the offset, and the number of scalable + // bytes fits the ADDPL immediate, fold the scalable bytes to fit in PLSized. + StackOffset D = StackOffset::getScalable(2); + AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(C + D, ByteSized, PLSized, VLSized); + EXPECT_EQ(0, ByteSized); + EXPECT_EQ(0, VLSized); + EXPECT_EQ(9, PLSized); + + StackOffset E = StackOffset::getScalable(64); + StackOffset F = StackOffset::getScalable(2); + AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(E + F, ByteSized, PLSized, VLSized); + EXPECT_EQ(0, ByteSized); + EXPECT_EQ(0, VLSized); + EXPECT_EQ(33, PLSized); + + // If the offset requires an ADDPL instruction to materialize, and would + // require more than two instructions, decompose it into both + // ADDVL (n x 16 bytes) and ADDPL (n x 2 bytes) instructions. + StackOffset G = StackOffset::getScalable(128); + StackOffset H = StackOffset::getScalable(2); + AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(G + H, ByteSized, PLSized, VLSized); + EXPECT_EQ(0, ByteSized); + EXPECT_EQ(8, VLSized); + EXPECT_EQ(1, PLSized); +} Index: llvm/unittests/Target/AArch64/TestStackOffset.cpp =================================================================== --- llvm/unittests/Target/AArch64/TestStackOffset.cpp +++ /dev/null @@ -1,131 +0,0 @@ -//===- TestStackOffset.cpp - StackOffset unit tests------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "AArch64StackOffset.h" -#include "gtest/gtest.h" - -using namespace llvm; - -TEST(StackOffset, MixedSize) { - StackOffset A(1, MVT::i8); - EXPECT_EQ(1, A.getBytes()); - - StackOffset B(2, MVT::i32); - EXPECT_EQ(8, B.getBytes()); - - StackOffset C(2, MVT::v4i64); - EXPECT_EQ(64, C.getBytes()); - - StackOffset D(2, MVT::nxv4i64); - EXPECT_EQ(64, D.getScalableBytes()); - - StackOffset E(2, MVT::v4i64); - EXPECT_EQ(0, E.getScalableBytes()); - - StackOffset F(2, MVT::nxv4i64); - EXPECT_EQ(0, F.getBytes()); -} - -TEST(StackOffset, Add) { - StackOffset A(1, MVT::i64); - StackOffset B(1, MVT::i32); - StackOffset C = A + B; - EXPECT_EQ(12, C.getBytes()); - - StackOffset D(1, MVT::i32); - D += A; - EXPECT_EQ(12, D.getBytes()); - - StackOffset E(1, MVT::nxv1i32); - StackOffset F = C + E; - EXPECT_EQ(12, F.getBytes()); - EXPECT_EQ(4, F.getScalableBytes()); -} - -TEST(StackOffset, Sub) { - StackOffset A(1, MVT::i64); - StackOffset B(1, MVT::i32); - StackOffset C = A - B; - EXPECT_EQ(4, C.getBytes()); - - StackOffset D(1, MVT::i64); - D -= A; - EXPECT_EQ(0, D.getBytes()); - - C += StackOffset(2, MVT::nxv1i32); - StackOffset E = StackOffset(1, MVT::nxv1i32); - StackOffset F = C - E; - EXPECT_EQ(4, F.getBytes()); - EXPECT_EQ(4, F.getScalableBytes()); -} - -TEST(StackOffset, isZero) { - StackOffset A(0, MVT::i64); - StackOffset B(0, MVT::i32); - EXPECT_TRUE(!A); - EXPECT_TRUE(!(A + B)); - - StackOffset C(0, MVT::nxv1i32); - EXPECT_TRUE(!(A + C)); - - StackOffset D(1, MVT::nxv1i32); - EXPECT_FALSE(!(A + D)); -} - -TEST(StackOffset, isValid) { - EXPECT_FALSE(StackOffset(1, MVT::nxv8i1).isValid()); - EXPECT_TRUE(StackOffset(2, MVT::nxv8i1).isValid()); - -#ifndef NDEBUG -#ifdef GTEST_HAS_DEATH_TEST - EXPECT_DEATH(StackOffset(1, MVT::i1), - "Offset type is not a multiple of bytes"); - EXPECT_DEATH(StackOffset(1, MVT::nxv1i1), - "Offset type is not a multiple of bytes"); -#endif // defined GTEST_HAS_DEATH_TEST -#endif // not defined NDEBUG -} - -TEST(StackOffset, getForFrameOffset) { - StackOffset A(1, MVT::i64); - StackOffset B(1, MVT::i32); - StackOffset C(1, MVT::nxv4i32); - - // If all offsets can be materialized with only ADDVL, - // make sure PLSized is 0. - int64_t ByteSized, VLSized, PLSized; - (A + B + C).getForFrameOffset(ByteSized, PLSized, VLSized); - EXPECT_EQ(12, ByteSized); - EXPECT_EQ(1, VLSized); - EXPECT_EQ(0, PLSized); - - // If we need an ADDPL to materialize the offset, and the number of scalable - // bytes fits the ADDPL immediate, fold the scalable bytes to fit in PLSized. - StackOffset D(1, MVT::nxv16i1); - (C + D).getForFrameOffset(ByteSized, PLSized, VLSized); - EXPECT_EQ(0, ByteSized); - EXPECT_EQ(0, VLSized); - EXPECT_EQ(9, PLSized); - - StackOffset E(4, MVT::nxv4i32); - StackOffset F(1, MVT::nxv16i1); - (E + F).getForFrameOffset(ByteSized, PLSized, VLSized); - EXPECT_EQ(0, ByteSized); - EXPECT_EQ(0, VLSized); - EXPECT_EQ(33, PLSized); - - // If the offset requires an ADDPL instruction to materialize, and would - // require more than two instructions, decompose it into both - // ADDVL (n x 16 bytes) and ADDPL (n x 2 bytes) instructions. - StackOffset G(8, MVT::nxv4i32); - StackOffset H(1, MVT::nxv16i1); - (G + H).getForFrameOffset(ByteSized, PLSized, VLSized); - EXPECT_EQ(0, ByteSized); - EXPECT_EQ(8, VLSized); - EXPECT_EQ(1, PLSized); -}