Index: lib/Target/X86/X86FrameLowering.h =================================================================== --- lib/Target/X86/X86FrameLowering.h +++ lib/Target/X86/X86FrameLowering.h @@ -146,6 +146,11 @@ MachineBasicBlock::iterator MBBI, DebugLoc DL, uint64_t MaxAlign) const; + /// Make small positive stack adjustments using POPs. + bool adjustStackWithPops(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, DebugLoc DL, + int Offset) const; + /// Adjusts the stack pointer using LEA, SUB, or ADD. MachineInstrBuilder BuildStackAdjustment(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Index: lib/Target/X86/X86FrameLowering.cpp =================================================================== --- lib/Target/X86/X86FrameLowering.cpp +++ lib/Target/X86/X86FrameLowering.cpp @@ -1851,6 +1851,71 @@ #endif } +bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, DebugLoc DL, int Offset) const { + + bool Is64Bit = STI.is64Bit(); + int PopSize = Is64Bit ? 8 : 4; + if (Offset % PopSize) + return false; + + int NumPops = Offset / PopSize; + // This is only worth it if we have at most 2 pops. + if (NumPops != 1 && NumPops != 2) + return false; + + // Handle only the trivial case where the adjustment directly follows + // a call. This is the most common one, anyway. + if (MBBI == MBB.begin()) + return false; + MachineBasicBlock::iterator Prev = std::prev(MBBI); + if (!Prev->isCall() || !Prev->getOperand(1).isRegMask()) + return false; + + SmallVector Regs; + auto RegMask = Prev->getOperand(1); + + // Try to find up to NumPops free registers. + const TargetRegisterClass *RC = &X86::GR32_NOREX_NOSPRegClass; + for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); + I != E; ++I) { + + // Poor man's liveness: + // Since we're immediately after a call, any register that is clobbered + // by the call and not defined by it can be considered dead. + if (!RegMask.clobbersPhysReg(*I)) + continue; + + bool IsDef = false; + for (const MachineOperand &MO : Prev->implicit_operands()) { + if (MO.isReg() && MO.isDef() && MO.getReg() == *I) { + IsDef = true; + break; + } + } + + if (IsDef) + continue; + + Regs.push_back(*I); + if (Regs.size() == (unsigned)NumPops) + break; + } + + if (Regs.empty()) + return false; + + // If we found only one free register, but need two, reuse the same one twice. + while (Regs.size() < (unsigned)NumPops) + Regs.push_back(Regs.back()); + + for (auto Reg : Regs) + BuildMI(MBB, MBBI, DL, + TII.get(Is64Bit ? X86::POP64r : X86::POP32r), Reg); + + return true; +} + void X86FrameLowering:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { @@ -1882,8 +1947,15 @@ if (Amount) { // Add Amount to SP to destroy a frame, and subtract to setup. int Offset = isDestroy ? Amount : -Amount; - BuildStackAdjustment(MBB, I, DL, Offset, /*InEpilogue=*/false); + + const Function *F = MF.getFunction(); + bool OptForSize = F->hasFnAttribute(Attribute::OptimizeForSize) || + F->hasFnAttribute(Attribute::MinSize); + + if (!(OptForSize && adjustStackWithPops(MBB, I, DL, Offset))) + BuildStackAdjustment(MBB, I, DL, Offset, /*InEpilogue=*/false); } + return; } Index: test/CodeGen/X86/fold-push.ll =================================================================== --- test/CodeGen/X86/fold-push.ll +++ test/CodeGen/X86/fold-push.ll @@ -8,13 +8,13 @@ ; CHECK: movl [[EAX:%e..]], (%esp) ; CHECK-NEXT: pushl [[EAX]] ; CHECK-NEXT: calll -; CHECK-NEXT: addl $4, %esp +; CHECK-NEXT: popl ; CHECK: nop ; NORMAL: pushl (%esp) ; SLM: movl (%esp), [[RELOAD:%e..]] ; SLM-NEXT: pushl [[RELOAD]] ; CHECK: calll -; CHECK-NEXT: addl $4, %esp +; CHECK-NEXT: popl %c = add i32 %a, %b call void @foo(i32 %c) call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di}"() @@ -27,11 +27,11 @@ ; CHECK: movl [[EAX:%e..]], (%esp) ; CHECK-NEXT: pushl [[EAX]] ; CHECK-NEXT: calll -; CHECK-NEXT: addl $4, %esp +; CHECK-NEXT: popl ; CHECK: nop ; CHECK: pushl (%esp) ; CHECK: calll -; CHECK-NEXT: addl $4, %esp +; CHECK-NEXT: popl %c = add i32 %a, %b call void @foo(i32 %c) call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di}"() Index: test/CodeGen/X86/pop-stack-cleanup.ll =================================================================== --- test/CodeGen/X86/pop-stack-cleanup.ll +++ test/CodeGen/X86/pop-stack-cleanup.ll @@ -0,0 +1,56 @@ +; RUN: llc < %s -mtriple=i686-windows | FileCheck %s -check-prefix=CHECK -check-prefix=NORMAL + +declare void @param1(i32 %a) +declare i32 @param2_ret(i32 %a, i32 %b) +declare void @param2(i32 %a, i32 %b) +declare void @param3(i32 %a, i32 %b, i32 %c) + +define void @test() optsize { +; CHECK-LABEL: test: +; CHECK: calll _param1 +; CHECK-NEXT: popl %eax +; CHECK: calll _param2 +; CHECK-NEXT: popl %eax +; CHECK-NEXT: popl %ecx +; CHECK: calll _param2_ret +; CHECK-NEXT: popl %ecx +; CHECK-NEXT: popl %edx +; CHECK-NEXT: pushl %eax +; CHECK: calll _param3 +; CHECK-NEXT: addl $12, %esp + call void @param1(i32 1) + call void @param2(i32 1, i32 2) + %ret = call i32 @param2_ret(i32 1, i32 2) + call void @param3(i32 1, i32 2, i32 %ret) + ret void +} + +define void @negative(i32 %k) { +; CHECK-LABEL: negative: +; CHECK: calll _param1 +; CHECK-NEXT: addl $4, %esp +; CHECK: calll _param2 +; CHECK-NEXT: addl $8, %esp +; CHECK: calll _param3 +; CHECK-NEXT: movl %ebp, %esp + %v = alloca i32, i32 %k + call void @param1(i32 1) + call void @param2(i32 1, i32 2) + call void @param3(i32 1, i32 2, i32 3) + ret void +} + +define void @spill(i32 inreg %a, i32 inreg %b, i32 inreg %c) optsize { +; CHECK-LABEL: spill: +; CHECK-DAG: movl %ecx, +; CHECK-DAG: movl %edx, +; CHECK: calll _param2_ret +; CHECK-NEXT: popl %ecx +; CHECK-NEXT: popl %edx +; CHECK-DAG: movl {{.*}}, %ecx +; CHECK-DAG: movl {{.*}}, %edx +; CHECK: calll _spill + %i = call i32 @param2_ret(i32 1, i32 2) + call void @spill(i32 %a, i32 %b, i32 %c) + ret void +}