Index: llvm/trunk/include/llvm/Target/TargetFrameLowering.h =================================================================== --- llvm/trunk/include/llvm/Target/TargetFrameLowering.h +++ llvm/trunk/include/llvm/Target/TargetFrameLowering.h @@ -151,6 +151,13 @@ return false; } + /// Returns true if the stack slot holes in the fixed and callee-save stack + /// area should be used when allocating other stack locations to reduce stack + /// size. + virtual bool enableStackSlotScavenging(const MachineFunction &MF) const { + return false; + } + /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. virtual void emitPrologue(MachineFunction &MF, Index: llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp =================================================================== --- llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp +++ llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp @@ -577,6 +577,108 @@ } } +/// Compute which bytes of fixed and callee-save stack area are unused and keep +/// track of them in StackBytesFree. +/// +static inline void +computeFreeStackSlots(MachineFrameInfo *MFI, bool StackGrowsDown, + unsigned MinCSFrameIndex, unsigned MaxCSFrameIndex, + int64_t FixedCSEnd, BitVector &StackBytesFree) { + // Avoid undefined int64_t -> int conversion below in extreme case. + if (FixedCSEnd > std::numeric_limits::max()) + return; + + StackBytesFree.resize(FixedCSEnd, true); + + SmallVector AllocatedFrameSlots; + // Add fixed objects. + for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) + AllocatedFrameSlots.push_back(i); + // Add callee-save objects. + for (int i = MinCSFrameIndex; i <= (int)MaxCSFrameIndex; ++i) + AllocatedFrameSlots.push_back(i); + + for (int i : AllocatedFrameSlots) { + // These are converted from int64_t, but they should always fit in int + // because of the FixedCSEnd check above. + int ObjOffset = MFI->getObjectOffset(i); + int ObjSize = MFI->getObjectSize(i); + int ObjStart, ObjEnd; + if (StackGrowsDown) { + // ObjOffset is negative when StackGrowsDown is true. + ObjStart = -ObjOffset - ObjSize; + ObjEnd = -ObjOffset; + } else { + ObjStart = ObjOffset; + ObjEnd = ObjOffset + ObjSize; + } + // Ignore fixed holes that are in the previous stack frame. + if (ObjEnd > 0) + StackBytesFree.reset(ObjStart, ObjEnd); + } +} + +/// Assign frame object to an unused portion of the stack in the fixed stack +/// object range. Return true if the allocation was successful. +/// +static inline bool scavengeStackSlot(MachineFrameInfo *MFI, int FrameIdx, + bool StackGrowsDown, unsigned MaxAlign, + BitVector &StackBytesFree) { + if (MFI->isVariableSizedObjectIndex(FrameIdx)) + return false; + + if (StackBytesFree.none()) { + // clear it to speed up later scavengeStackSlot calls to + // StackBytesFree.none() + StackBytesFree.clear(); + return false; + } + + unsigned ObjAlign = MFI->getObjectAlignment(FrameIdx); + if (ObjAlign > MaxAlign) + return false; + + int64_t ObjSize = MFI->getObjectSize(FrameIdx); + int FreeStart; + for (FreeStart = StackBytesFree.find_first(); FreeStart != -1; + FreeStart = StackBytesFree.find_next(FreeStart)) { + + // Check that free space has suitable alignment. + unsigned ObjStart = StackGrowsDown ? FreeStart + ObjSize : FreeStart; + if (alignTo(ObjStart, ObjAlign) != ObjStart) + continue; + + if (FreeStart + ObjSize > StackBytesFree.size()) + return false; + + bool AllBytesFree = true; + for (unsigned Byte = 0; Byte < ObjSize; ++Byte) + if (!StackBytesFree.test(FreeStart + Byte)) { + AllBytesFree = false; + break; + } + if (AllBytesFree) + break; + } + + if (FreeStart == -1) + return false; + + if (StackGrowsDown) { + int ObjStart = -(FreeStart + ObjSize); + DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") scavenged at SP[" << ObjStart + << "]\n"); + MFI->setObjectOffset(FrameIdx, ObjStart); + } else { + DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") scavenged at SP[" << FreeStart + << "]\n"); + MFI->setObjectOffset(FrameIdx, FreeStart); + } + + StackBytesFree.reset(FreeStart, FreeStart + ObjSize); + return true; +} + /// AssignProtectedObjSet - Helper function to assign large stack objects (i.e., /// those required to be close to the Stack Protector) to stack offsets. static void @@ -621,9 +723,8 @@ // If there are fixed sized objects that are preallocated in the local area, // non-fixed objects can't be allocated right at the start of local area. - // We currently don't support filling in holes in between fixed sized - // objects, so we adjust 'Offset' to point to the end of last fixed sized - // preallocated object. + // Adjust 'Offset' to point to the end of last fixed sized preallocated + // object. for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) { int64_t FixedOff; if (StackGrowsDown) { @@ -667,6 +768,9 @@ } } + // FixedCSEnd is the stack offset to the end of the fixed and callee-save + // stack area. + int64_t FixedCSEnd = Offset; unsigned MaxAlign = MFI->getMaxAlignment(); // Make sure the special register scavenging spill slot is closest to the @@ -798,10 +902,23 @@ if (Fn.getTarget().getOptLevel() != CodeGenOpt::None && Fn.getTarget().Options.StackSymbolOrdering) TFI.orderFrameObjects(Fn, ObjectsToAllocate); - + + // Keep track of which bytes in the fixed and callee-save range are used so we + // can use the holes when allocating later stack objects. Only do this if + // stack protector isn't being used and the target requests it and we're + // optimizing. + BitVector StackBytesFree; + if (!ObjectsToAllocate.empty() && + Fn.getTarget().getOptLevel() != CodeGenOpt::None && + MFI->getStackProtectorIndex() < 0 && TFI.enableStackSlotScavenging(Fn)) + computeFreeStackSlots(MFI, StackGrowsDown, MinCSFrameIndex, MaxCSFrameIndex, + FixedCSEnd, StackBytesFree); + // Now walk the objects and actually assign base offsets to them. for (auto &Object : ObjectsToAllocate) - AdjustStackOffset(MFI, Object, StackGrowsDown, Offset, MaxAlign, Skew); + if (!scavengeStackSlot(MFI, Object, StackGrowsDown, MaxAlign, + StackBytesFree)) + AdjustStackOffset(MFI, Object, StackGrowsDown, Offset, MaxAlign, Skew); // Make sure the special register scavenging spill slot is closest to the // stack pointer. Index: llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.h =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.h +++ llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.h @@ -67,6 +67,8 @@ return true; } + bool enableStackSlotScavenging(const MachineFunction &MF) const override; + private: bool shouldCombineCSRLocalStackBump(MachineFunction &MF, unsigned StackBumpBytes) const; Index: llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -942,7 +942,8 @@ // callee-save area to ensure 16-byte alignment. Offset -= 16; assert(MFI->getObjectAlignment(RPI.FrameIdx) <= 16); - MFI->setObjectSize(RPI.FrameIdx, 16); + MFI->setObjectAlignment(RPI.FrameIdx, 16); + AFI->setCalleeSaveStackHasFreeSpace(true); } else Offset -= RPI.isPaired() ? 16 : 8; assert(Offset % 8 == 0); @@ -1190,3 +1191,9 @@ // instructions. AFI->setCalleeSavedStackSize(alignTo(8 * NumRegsSpilled, 16)); } + +bool AArch64FrameLowering::enableStackSlotScavenging( + const MachineFunction &MF) const { + const AArch64FunctionInfo *AFI = MF.getInfo(); + return AFI->hasCalleeSaveStackFreeSpace(); +} Index: llvm/trunk/lib/Target/AArch64/AArch64MachineFunctionInfo.h =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ llvm/trunk/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -83,18 +83,24 @@ /// frame is unknown at compile time. e.g., in case of VLAs. bool StackRealigned; + /// True when the callee-save stack area has unused gaps that may be used for + /// other stack allocations. + bool CalleeSaveStackHasFreeSpace; + public: AArch64FunctionInfo() : BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false), NumLocalDynamicTLSAccesses(0), VarArgsStackIndex(0), VarArgsGPRIndex(0), VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0), - IsSplitCSR(false), StackRealigned(false) {} + IsSplitCSR(false), StackRealigned(false), + CalleeSaveStackHasFreeSpace(false) {} explicit AArch64FunctionInfo(MachineFunction &MF) : BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false), NumLocalDynamicTLSAccesses(0), VarArgsStackIndex(0), VarArgsGPRIndex(0), VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0), - IsSplitCSR(false), StackRealigned(false) { + IsSplitCSR(false), StackRealigned(false), + CalleeSaveStackHasFreeSpace(false) { (void)MF; } @@ -112,6 +118,13 @@ bool isStackRealigned() const { return StackRealigned; } void setStackRealigned(bool s) { StackRealigned = s; } + bool hasCalleeSaveStackFreeSpace() const { + return CalleeSaveStackHasFreeSpace; + } + void setCalleeSaveStackHasFreeSpace(bool s) { + CalleeSaveStackHasFreeSpace = s; + } + bool isSplitCSR() const { return IsSplitCSR; } void setIsSplitCSR(bool s) { IsSplitCSR = s; } Index: llvm/trunk/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll +++ llvm/trunk/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll @@ -674,7 +674,7 @@ define void @realign_conditional2(i1 %b) { entry: - %tmp = alloca i8, i32 4 + %tmp = alloca i8, i32 16 br i1 %b, label %bb0, label %bb1 bb0: Index: llvm/trunk/test/CodeGen/AArch64/arm64-hello.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/arm64-hello.ll +++ llvm/trunk/test/CodeGen/AArch64/arm64-hello.ll @@ -14,14 +14,12 @@ ; CHECK-NEXT: ret ; CHECK-LINUX-LABEL: main: -; CHECK-LINUX: sub sp, sp, #32 -; CHECK-LINUX-NEXT: str x30, [sp, #16] +; CHECK-LINUX: str x30, [sp, #-16]! ; CHECK-LINUX-NEXT: str wzr, [sp, #12] ; CHECK-LINUX: adrp x0, .L.str ; CHECK-LINUX: add x0, x0, :lo12:.L.str ; CHECK-LINUX-NEXT: bl puts -; CHECK-LINUX-NEXT: ldr x30, [sp, #16] -; CHECK-LINUX-NEXT: add sp, sp, #32 +; CHECK-LINUX-NEXT: ldr x30, [sp], #16 ; CHECK-LINUX-NEXT: ret @.str = private unnamed_addr constant [7 x i8] c"hello\0A\00"