Index: include/llvm/Target/TargetFrameLowering.h =================================================================== --- include/llvm/Target/TargetFrameLowering.h +++ include/llvm/Target/TargetFrameLowering.h @@ -151,6 +151,13 @@ return false; } + /// Returns true if the stack slot holes in the fixed and callee-save stack + /// area should be used when allocating other stack locations to reduce stack + /// size. + virtual bool enableStackSlotScavenging(const MachineFunction &MF) const { + return false; + } + /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. virtual void emitPrologue(MachineFunction &MF, Index: lib/CodeGen/PrologEpilogInserter.cpp =================================================================== --- lib/CodeGen/PrologEpilogInserter.cpp +++ lib/CodeGen/PrologEpilogInserter.cpp @@ -526,6 +526,68 @@ } } +/// Assign frame object to an unused portion of the stack in the fixed stack +/// object range. Return true if the allocation was successful. +/// +static inline bool scavengeStackSlot(MachineFrameInfo *MFI, int FrameIdx, + bool StackGrowsDown, unsigned MaxAlign, + BitVector &StackBytesFree) { + if (StackBytesFree.empty()) + return false; + + if (MFI->isVariableSizedObjectIndex(FrameIdx)) + return false; + + if (StackBytesFree.all()) { + StackBytesFree.resize(0); + return false; + } + + unsigned ObjAlign = MFI->getObjectAlignment(FrameIdx); + if (ObjAlign > MaxAlign) + return false; + + auto ObjSize = MFI->getObjectSize(FrameIdx); + int FreeStart; + for (FreeStart = StackBytesFree.find_first(); FreeStart != -1; + FreeStart = StackBytesFree.find_next(FreeStart)) { + + // Check that free space has suitable alignment. + unsigned ObjStart = StackGrowsDown ? FreeStart + ObjSize : FreeStart; + if (alignTo(ObjStart, ObjAlign) != ObjStart) + continue; + + if (FreeStart + ObjSize > StackBytesFree.size()) + return false; + + bool AllBytesFree = true; + for (unsigned Byte = 0; Byte < ObjSize; ++Byte) + if (!StackBytesFree.test(FreeStart + Byte)) { + AllBytesFree = false; + break; + } + if (AllBytesFree) + break; + } + + if (FreeStart == -1) + return false; + + if (StackGrowsDown) { + int ObjStart = -(FreeStart + ObjSize); + DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") scavenged at SP[" + << ObjStart << "]\n"); + MFI->setObjectOffset(FrameIdx, ObjStart); + } else { + DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") scavenged at SP[" + << FreeStart << "]\n"); + MFI->setObjectOffset(FrameIdx, FreeStart); + } + + StackBytesFree.reset(FreeStart, FreeStart + ObjSize); + return true; +} + /// AssignProtectedObjSet - Helper function to assign large stack objects (i.e., /// those required to be close to the Stack Protector) to stack offsets. static void @@ -570,9 +632,8 @@ // If there are fixed sized objects that are preallocated in the local area, // non-fixed objects can't be allocated right at the start of local area. - // We currently don't support filling in holes in between fixed sized - // objects, so we adjust 'Offset' to point to the end of last fixed sized - // preallocated object. + // Adjust 'Offset' to point to the end of last fixed sized preallocated + // object. for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) { int64_t FixedOff; if (StackGrowsDown) { @@ -616,6 +677,7 @@ } } + int64_t FixedCSOffset = Offset; unsigned MaxAlign = MFI->getMaxAlignment(); // Make sure the special register scavenging spill slot is closest to the @@ -747,10 +809,45 @@ if (Fn.getTarget().getOptLevel() != CodeGenOpt::None && Fn.getTarget().Options.StackSymbolOrdering) TFI.orderFrameObjects(Fn, ObjectsToAllocate); - + + // Keep track of which bytes in the fixed and callee-save range are used so we + // can use the holes when allocating later stack objects. Only do this if + // stack protecter isn't being used the target requests it. + BitVector StackBytesFree; + if (!ObjectsToAllocate.empty() && + MFI->getStackProtectorIndex() < 0 && TFI.enableStackSlotScavenging(Fn)) { + StackBytesFree.resize(FixedCSOffset, true); + + SmallVector AllocedFrameSlots; + // Fixed objects + for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) + AllocedFrameSlots.push_back(i); + // Callee-save objects + for (int i = MinCSFrameIndex; i <= (int)MaxCSFrameIndex; ++i) + AllocedFrameSlots.push_back(i); + + for (int i : AllocedFrameSlots) { + auto ObjOffset = MFI->getObjectOffset(i); + auto ObjSize = MFI->getObjectSize(i); + int ObjStart, ObjEnd; + if (StackGrowsDown) { + ObjStart = -ObjOffset - ObjSize; + ObjEnd = -ObjOffset; + } else { + ObjSize = ObjOffset; + ObjEnd = ObjOffset + ObjSize; + } + // Ignore fixed holes that are in the previous stack frame. + if (ObjEnd > 0) + StackBytesFree.reset(ObjStart, ObjEnd); + } + } + // Now walk the objects and actually assign base offsets to them. for (auto &Object : ObjectsToAllocate) - AdjustStackOffset(MFI, Object, StackGrowsDown, Offset, MaxAlign, Skew); + if (!scavengeStackSlot(MFI, Object, StackGrowsDown, MaxAlign, + StackBytesFree)) + AdjustStackOffset(MFI, Object, StackGrowsDown, Offset, MaxAlign, Skew); // Make sure the special register scavenging spill slot is closest to the // stack pointer. Index: lib/Target/AArch64/AArch64FrameLowering.h =================================================================== --- lib/Target/AArch64/AArch64FrameLowering.h +++ lib/Target/AArch64/AArch64FrameLowering.h @@ -67,6 +67,8 @@ return true; } + bool enableStackSlotScavenging(const MachineFunction &MF) const override; + private: bool shouldCombineCSRLocalStackBump(MachineFunction &MF, unsigned StackBumpBytes) const; Index: lib/Target/AArch64/AArch64FrameLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64FrameLowering.cpp +++ lib/Target/AArch64/AArch64FrameLowering.cpp @@ -942,7 +942,8 @@ // callee-save area to ensure 16-byte alignment. Offset -= 16; assert(MFI->getObjectAlignment(RPI.FrameIdx) <= 16); - MFI->setObjectSize(RPI.FrameIdx, 16); + MFI->setObjectAlignment(RPI.FrameIdx, 16); + AFI->setCalleeSaveStackHasFreeSpace(true); } else Offset -= RPI.isPaired() ? 16 : 8; assert(Offset % 8 == 0); @@ -1188,3 +1189,9 @@ // instructions. AFI->setCalleeSavedStackSize(alignTo(8 * NumRegsSpilled, 16)); } + +bool AArch64FrameLowering::enableStackSlotScavenging( + const MachineFunction &MF) const { + const AArch64FunctionInfo *AFI = MF.getInfo(); + return AFI->hasCalleeSaveStackFreeSpace(); +} Index: lib/Target/AArch64/AArch64MachineFunctionInfo.h =================================================================== --- lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -83,18 +83,24 @@ /// frame is unknown at compile time. e.g., in case of VLAs. bool StackRealigned; + /// True when the callee-save stack area has unused gaps that may be used for + /// other stack allocations. + bool CalleeSaveStackHasFreeSpace; + public: AArch64FunctionInfo() : BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false), NumLocalDynamicTLSAccesses(0), VarArgsStackIndex(0), VarArgsGPRIndex(0), VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0), - IsSplitCSR(false), StackRealigned(false) {} + IsSplitCSR(false), StackRealigned(false), + CalleeSaveStackHasFreeSpace(false) {} explicit AArch64FunctionInfo(MachineFunction &MF) : BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false), NumLocalDynamicTLSAccesses(0), VarArgsStackIndex(0), VarArgsGPRIndex(0), VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0), - IsSplitCSR(false), StackRealigned(false) { + IsSplitCSR(false), StackRealigned(false), + CalleeSaveStackHasFreeSpace(false) { (void)MF; } @@ -112,6 +118,13 @@ bool isStackRealigned() const { return StackRealigned; } void setStackRealigned(bool s) { StackRealigned = s; } + bool hasCalleeSaveStackFreeSpace() const { + return CalleeSaveStackHasFreeSpace; + } + void setCalleeSaveStackHasFreeSpace(bool s) { + CalleeSaveStackHasFreeSpace = s; + } + bool isSplitCSR() const { return IsSplitCSR; } void setIsSplitCSR(bool s) { IsSplitCSR = s; } Index: test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll =================================================================== --- test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll +++ test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll @@ -678,7 +678,7 @@ br i1 %b, label %bb0, label %bb1 bb0: - %MyAlloca = alloca i8, i64 64, align 32 + %MyAlloca = alloca i8, i64 64, align 64 br label %bb1 bb1: @@ -689,11 +689,11 @@ ; Extra realignment in the prologue (performance issue). ; CHECK: tbz {{.*}} .[[LABEL:.*]] ; CHECK: sub x9, sp, #32 // =32 -; CHECK: and sp, x9, #0xffffffffffffffe0 +; CHECK: and sp, x9, #0xffffffffffffffc0 ; CHECK: mov x19, sp ; Stack is realigned in a non-entry BB. ; CHECK: sub [[REG:x[01-9]+]], sp, #64 -; CHECK: and sp, [[REG]], #0xffffffffffffffe0 +; CHECK: and sp, [[REG]], #0xffffffffffffffc0 ; CHECK: .[[LABEL]]: ; CHECK: ret Index: test/CodeGen/AArch64/arm64-hello.ll =================================================================== --- test/CodeGen/AArch64/arm64-hello.ll +++ test/CodeGen/AArch64/arm64-hello.ll @@ -14,14 +14,12 @@ ; CHECK-NEXT: ret ; CHECK-LINUX-LABEL: main: -; CHECK-LINUX: sub sp, sp, #32 -; CHECK-LINUX-NEXT: str x30, [sp, #16] +; CHECK-LINUX: str x30, [sp, #-16]! ; CHECK-LINUX-NEXT: str wzr, [sp, #12] ; CHECK-LINUX: adrp x0, .L.str ; CHECK-LINUX: add x0, x0, :lo12:.L.str ; CHECK-LINUX-NEXT: bl puts -; CHECK-LINUX-NEXT: ldr x30, [sp, #16] -; CHECK-LINUX-NEXT: add sp, sp, #32 +; CHECK-LINUX-NEXT: ldr x30, [sp], #16 ; CHECK-LINUX-NEXT: ret @.str = private unnamed_addr constant [7 x i8] c"hello\0A\00"