diff --git a/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/llvm/include/llvm/CodeGen/MachineFrameInfo.h --- a/llvm/include/llvm/CodeGen/MachineFrameInfo.h +++ b/llvm/include/llvm/CodeGen/MachineFrameInfo.h @@ -553,7 +553,7 @@ void setStackSize(uint64_t Size) { StackSize = Size; } /// Estimate and return the size of the stack frame. - unsigned estimateStackSize(const MachineFunction &MF) const; + uint64_t estimateStackSize(const MachineFunction &MF) const; /// Return the correction for frame offsets. int getOffsetAdjustment() const { return OffsetAdjustment; } diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -225,12 +225,12 @@ if (FrameIndices.find(&AI) != FrameIndices.end()) return FrameIndices[&AI]; - unsigned ElementSize = DL->getTypeAllocSize(AI.getAllocatedType()); - unsigned Size = + uint64_t ElementSize = DL->getTypeAllocSize(AI.getAllocatedType()); + uint64_t Size = ElementSize * cast(AI.getArraySize())->getZExtValue(); // Always allocate at least one byte. - Size = std::max(Size, 1u); + Size = std::max(Size, 1u); unsigned Alignment = AI.getAlignment(); if (!Alignment) diff --git a/llvm/lib/CodeGen/MachineFrameInfo.cpp b/llvm/lib/CodeGen/MachineFrameInfo.cpp --- a/llvm/lib/CodeGen/MachineFrameInfo.cpp +++ b/llvm/lib/CodeGen/MachineFrameInfo.cpp @@ -133,11 +133,11 @@ return BV; } -unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const { +uint64_t MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); unsigned MaxAlign = getMaxAlignment(); - int Offset = 0; + int64_t Offset = 0; // This code is very, very similar to PEI::calculateFrameObjectOffsets(). // It really should be refactored to share code. Until then, changes @@ -147,7 +147,7 @@ // Only estimate stack size of default stack. if (getStackID(i) != TargetStackID::Default) continue; - int FixedOff = -getObjectOffset(i); + int64_t FixedOff = -getObjectOffset(i); if (FixedOff > Offset) Offset = FixedOff; } for (unsigned i = 0, e = getObjectIndexEnd(); i != e; ++i) { @@ -183,7 +183,7 @@ unsigned AlignMask = StackAlign - 1; Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); - return (unsigned)Offset; + return (uint64_t)Offset; } void MachineFrameInfo::computeMaxCallFrameSize(const MachineFunction &MF) { diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -44,7 +44,7 @@ unsigned &FrameReg, bool PreferFP, bool ForSimm) const; StackOffset resolveFrameOffsetReference(const MachineFunction &MF, - int ObjectOffset, bool isFixed, + int64_t ObjectOffset, bool isFixed, bool isSVE, unsigned &FrameReg, bool PreferFP, bool ForSimm) const; bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, @@ -101,7 +101,7 @@ private: bool shouldCombineCSRLocalStackBump(MachineFunction &MF, - unsigned StackBumpBytes) const; + uint64_t StackBumpBytes) const; int64_t estimateSVEStackObjectOffsets(MachineFrameInfo &MF) const; int64_t assignSVEStackObjectOffsets(MachineFrameInfo &MF, diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -227,7 +227,7 @@ const MachineFrameInfo &MFI = MF.getFrameInfo(); const AArch64FunctionInfo *AFI = MF.getInfo(); - unsigned NumBytes = AFI->getLocalStackSize(); + uint64_t NumBytes = AFI->getLocalStackSize(); return !(MFI.hasCalls() || hasFP(MF) || NumBytes > 128 || getSVEStackSize(MF)); @@ -429,7 +429,7 @@ } static bool windowsRequiresStackProbe(MachineFunction &MF, - unsigned StackSizeInBytes) { + uint64_t StackSizeInBytes) { const AArch64Subtarget &Subtarget = MF.getSubtarget(); if (!Subtarget.isTargetWindows()) return false; @@ -446,7 +446,7 @@ } bool AArch64FrameLowering::shouldCombineCSRLocalStackBump( - MachineFunction &MF, unsigned StackBumpBytes) const { + MachineFunction &MF, uint64_t StackBumpBytes) const { AArch64FunctionInfo *AFI = MF.getInfo(); const MachineFrameInfo &MFI = MF.getFrameInfo(); const AArch64Subtarget &Subtarget = MF.getSubtarget(); @@ -725,7 +725,7 @@ // Fixup callee-save register save/restore instructions to take into account // combined SP bump by adding the local stack size to the stack offsets. static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI, - unsigned LocalStackSize, + uint64_t LocalStackSize, bool NeedsWinCFI, bool *HasWinCFI) { if (AArch64InstrInfo::isSEHInstruction(MI)) @@ -913,8 +913,8 @@ // pointer from the funclet. We only save the callee saved registers in the // funclet, which are really the callee saved registers of the parent // function, including the funclet. - int NumBytes = IsFunclet ? (int)getWinEHFuncletFrameSize(MF) - : (int)MFI.getStackSize(); + int64_t NumBytes = IsFunclet ? getWinEHFuncletFrameSize(MF) + : MFI.getStackSize(); if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) { assert(!HasFP && "unexpected function without stack frame but with FP"); assert(!SVEStackSize && @@ -1016,7 +1016,7 @@ if (HasFP) { // Only set up FP if we actually need to. - int FPOffset = isTargetDarwin(MF) ? (AFI->getCalleeSavedStackSize() - 16) : 0; + int64_t FPOffset = isTargetDarwin(MF) ? (AFI->getCalleeSavedStackSize() - 16) : 0; if (CombineSPBump) FPOffset += AFI->getLocalStackSize(); @@ -1031,7 +1031,7 @@ } if (windowsRequiresStackProbe(MF, NumBytes)) { - uint32_t NumWords = NumBytes >> 4; + uint64_t NumWords = NumBytes >> 4; if (NeedsWinCFI) { HasWinCFI = true; // alloc_l can hold at most 256MB, so assume that NumBytes doesn't @@ -1388,8 +1388,8 @@ IsFunclet = isFuncletReturnInstr(*MBBI); } - int NumBytes = IsFunclet ? (int)getWinEHFuncletFrameSize(MF) - : MFI.getStackSize(); + int64_t NumBytes = IsFunclet ? getWinEHFuncletFrameSize(MF) + : MFI.getStackSize(); AArch64FunctionInfo *AFI = MF.getInfo(); // All calls are tail calls in GHC calling conv, and functions have no @@ -1578,7 +1578,7 @@ return; bool NoCalleeSaveRestore = PrologueSaveSize == 0; - int StackRestoreBytes = RedZone ? 0 : NumBytes; + int64_t StackRestoreBytes = RedZone ? 0 : NumBytes; if (NoCalleeSaveRestore) StackRestoreBytes += AfterCSRPopSize; @@ -1670,7 +1670,7 @@ return getSEHFrameIndexOffset(MF, FI); } -static StackOffset getFPOffset(const MachineFunction &MF, int ObjectOffset) { +static StackOffset getFPOffset(const MachineFunction &MF, int64_t ObjectOffset) { const auto *AFI = MF.getInfo(); const auto &Subtarget = MF.getSubtarget(); bool IsWin64 = @@ -1681,9 +1681,9 @@ return {ObjectOffset + FixedObject + FPAdjust, MVT::i8}; } -static StackOffset getStackOffset(const MachineFunction &MF, int ObjectOffset) { +static StackOffset getStackOffset(const MachineFunction &MF, int64_t ObjectOffset) { const auto &MFI = MF.getFrameInfo(); - return {ObjectOffset + (int)MFI.getStackSize(), MVT::i8}; + return {ObjectOffset + (int64_t)MFI.getStackSize(), MVT::i8}; } int AArch64FrameLowering::getSEHFrameIndexOffset(const MachineFunction &MF, @@ -1700,7 +1700,7 @@ const MachineFunction &MF, int FI, unsigned &FrameReg, bool PreferFP, bool ForSimm) const { const auto &MFI = MF.getFrameInfo(); - int ObjectOffset = MFI.getObjectOffset(FI); + int64_t ObjectOffset = MFI.getObjectOffset(FI); bool isFixed = MFI.isFixedObjectIndex(FI); bool isSVE = MFI.getStackID(FI) == TargetStackID::SVEVector; return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, isSVE, FrameReg, @@ -1708,7 +1708,7 @@ } StackOffset AArch64FrameLowering::resolveFrameOffsetReference( - const MachineFunction &MF, int ObjectOffset, bool isFixed, bool isSVE, + const MachineFunction &MF, int64_t ObjectOffset, bool isFixed, bool isSVE, unsigned &FrameReg, bool PreferFP, bool ForSimm) const { const auto &MFI = MF.getFrameInfo(); const auto *RegInfo = static_cast( @@ -1716,8 +1716,8 @@ const auto *AFI = MF.getInfo(); const auto &Subtarget = MF.getSubtarget(); - int FPOffset = getFPOffset(MF, ObjectOffset).getBytes(); - int Offset = getStackOffset(MF, ObjectOffset).getBytes(); + int64_t FPOffset = getFPOffset(MF, ObjectOffset).getBytes(); + int64_t Offset = getStackOffset(MF, ObjectOffset).getBytes(); bool isCSR = !isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize(MFI)); @@ -2402,7 +2402,7 @@ unsigned NumSavedRegs = SavedRegs.count(); // The frame record needs to be created by saving the appropriate registers - unsigned EstimatedStackSize = MFI.estimateStackSize(MF); + uint64_t EstimatedStackSize = MFI.estimateStackSize(MF); if (hasFP(MF) || windowsRequiresStackProbe(MF, EstimatedStackSize + CSStackSize + 16)) { SavedRegs.set(AArch64::FP); @@ -2465,7 +2465,7 @@ // Adding the size of additional 64bit GPR saves. CSStackSize += 8 * (SavedRegs.count() - NumSavedRegs); - unsigned AlignedCSStackSize = alignTo(CSStackSize, 16); + uint64_t AlignedCSStackSize = alignTo(CSStackSize, 16); LLVM_DEBUG(dbgs() << "Estimated stack frame size: " << EstimatedStackSize + AlignedCSStackSize << " bytes.\n"); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -3109,7 +3109,7 @@ const unsigned MaxEncodableValue = MaxEncoding << ShiftSize; do { - unsigned ThisVal = std::min(Offset, MaxEncodableValue); + uint64_t ThisVal = std::min(Offset, MaxEncodableValue); unsigned LocalShiftSize = 0; if (ThisVal > MaxEncoding) { ThisVal = ThisVal >> ShiftSize; diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h --- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -52,7 +52,7 @@ bool HasStackFrame = false; /// Amount of stack frame size, not including callee-saved registers. - unsigned LocalStackSize = 0; + uint64_t LocalStackSize = 0; /// The start and end frame indices for the SVE callee saves. int MinSVECSFrameIndex = 0; @@ -170,8 +170,8 @@ bool isSplitCSR() const { return IsSplitCSR; } void setIsSplitCSR(bool s) { IsSplitCSR = s; } - void setLocalStackSize(unsigned Size) { LocalStackSize = Size; } - unsigned getLocalStackSize() const { return LocalStackSize; } + void setLocalStackSize(uint64_t Size) { LocalStackSize = Size; } + uint64_t getLocalStackSize() const { return LocalStackSize; } void setCalleeSavedStackSize(unsigned Size) { CalleeSavedStackSize = Size; diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -397,7 +397,6 @@ bool AArch64RegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg, int64_t Offset) const { - assert(Offset <= INT_MAX && "Offset too big to fit in int."); assert(MI && "Unable to get the legal offset for nil instruction."); StackOffset SaveOffset(Offset, MVT::i8); return isAArch64FrameOffsetLegal(*MI, SaveOffset) & AArch64FrameOffsetIsLegal; diff --git a/llvm/test/CodeGen/AArch64/large-stack.ll b/llvm/test/CodeGen/AArch64/large-stack.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/large-stack.ll @@ -0,0 +1,49 @@ +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s + +@.str = private unnamed_addr constant [11 x i8] c"val = %ld\0A\00", align 1 + +; Function Attrs: noinline optnone +define dso_local void @set_large(i64 %val) #0 { +entry: + %val.addr = alloca i64, align 8 + %large = alloca [268435456 x i64], align 8 + %i = alloca i32, align 4 + store i64 %val, i64* %val.addr, align 8 + %0 = load i64, i64* %val.addr, align 8 + %arrayidx = getelementptr inbounds [268435456 x i64], [268435456 x i64]* %large, i64 0, i64 %0 + store i64 1, i64* %arrayidx, align 8 + %1 = load i64, i64* %val.addr, align 8 + %arrayidx1 = getelementptr inbounds [268435456 x i64], [268435456 x i64]* %large, i64 0, i64 %1 + %2 = load i64, i64* %arrayidx1, align 8 + %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0), i64 %2) + ret void +} + +declare dso_local i32 @printf(i8*, ...) + +attributes #0 = { noinline optnone "no-frame-pointer-elim"="true" } + +; CHECK: stp x[[SPILL_REG1:[0-9]+]], x[[SPILL_REG2:[0-9]+]], [sp, #-[[SPILL_OFFSET1:[0-9]+]]] +; CHECK-NEXT: str x[[SPILL_REG3:[0-9]+]], [sp, #[[SPILL_OFFSET2:[0-9]+]]] +; CHECK-NEXT: mov x[[FRAME:[0-9]+]], sp +; CHECK-COUNT-128: sub sp, sp, #[[STACK1:[0-9]+]], lsl #12 +; CHECK-NEXT: sub sp, sp, #[[STACK2:[0-9]+]], lsl #12 +; CHECK-NEXT: sub sp, sp, #[[STACK3:[0-9]+]] +; CHECK: sub x[[INDEX:[0-9]+]], x[[FRAME]], #8 +; CHECK-NEXT: str x0, [x[[INDEX]]] +; CHECK-NEXT: ldr x[[VAL1:[0-9]+]], [x[[INDEX]]] +; CHECK-NEXT: mov x[[VAL2:[0-9]+]], #8 +; CHECK-NEXT: add x[[VAL3:[0-9]+]], sp, #8 +; CHECK-NEXT: madd x[[VAL1]], x[[VAL1]], x[[VAL2]], x[[VAL3]] +; CHECK-NEXT: mov x[[TMP1:[0-9]+]], #1 +; CHECK-NEXT: str x[[TMP1]], [x[[VAL1]]] +; CHECK-NEXT: ldr x[[INDEX]], [x[[INDEX]]] +; CHECK-NEXT: mov x[[VAL4:[0-9]+]], #8 +; CHECK-NEXT: madd x[[INDEX]], x[[INDEX]], x[[VAL4]], x[[VAL3]] +; CHECK-NEXT: ldr x1, [x[[INDEX]] +; CHECK: bl printf +; CHECK-COUNT-128: add sp, sp, #[[STACK1]], lsl #12 +; CHECK-NEXT: add sp, sp, #[[STACK2]], lsl #12 +; CHECK-NEXT: add sp, sp, #[[STACK3]] +; CHECK-NEXT: ldr x[[SPILL_REG3]], [sp, #[[SPILL_OFFSET2]]] +; CHECK-NEXT: ldp x[[SPILL_REG1]], x[[SPILL_REG2]], [sp], #[[SPILL_OFFSET1]]