Index: llvm/trunk/lib/Target/X86/X86FrameLowering.h =================================================================== --- llvm/trunk/lib/Target/X86/X86FrameLowering.h +++ llvm/trunk/lib/Target/X86/X86FrameLowering.h @@ -146,6 +146,11 @@ MachineBasicBlock::iterator MBBI, DebugLoc DL, uint64_t MaxAlign) const; + /// Make small positive stack adjustments using POPs. + bool adjustStackWithPops(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, DebugLoc DL, + int Offset) const; + /// Adjusts the stack pointer using LEA, SUB, or ADD. MachineInstrBuilder BuildStackAdjustment(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Index: llvm/trunk/lib/Target/X86/X86FrameLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86FrameLowering.cpp +++ llvm/trunk/lib/Target/X86/X86FrameLowering.cpp @@ -1851,6 +1851,69 @@ #endif } +bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, DebugLoc DL, int Offset) const { + + if (Offset % SlotSize) + return false; + + int NumPops = Offset / SlotSize; + // This is only worth it if we have at most 2 pops. + if (NumPops != 1 && NumPops != 2) + return false; + + // Handle only the trivial case where the adjustment directly follows + // a call. This is the most common one, anyway. + if (MBBI == MBB.begin()) + return false; + MachineBasicBlock::iterator Prev = std::prev(MBBI); + if (!Prev->isCall() || !Prev->getOperand(1).isRegMask()) + return false; + + unsigned Regs[2]; + unsigned FoundRegs = 0; + + auto RegMask = Prev->getOperand(1); + + // Try to find up to NumPops free registers. + for (auto Candidate : X86::GR32_NOREX_NOSPRegClass) { + + // Poor man's liveness: + // Since we're immediately after a call, any register that is clobbered + // by the call and not defined by it can be considered dead. + if (!RegMask.clobbersPhysReg(Candidate)) + continue; + + bool IsDef = false; + for (const MachineOperand &MO : Prev->implicit_operands()) { + if (MO.isReg() && MO.isDef() && MO.getReg() == Candidate) { + IsDef = true; + break; + } + } + + if (IsDef) + continue; + + Regs[FoundRegs++] = Candidate; + if (FoundRegs == (unsigned)NumPops) + break; + } + + if (FoundRegs == 0) + return false; + + // If we found only one free register, but need two, reuse the same one twice. + while (FoundRegs < (unsigned)NumPops) + Regs[FoundRegs++] = Regs[0]; + + for (int i = 0; i < NumPops; ++i) + BuildMI(MBB, MBBI, DL, + TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r), Regs[i]); + + return true; +} + void X86FrameLowering:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { @@ -1882,8 +1945,12 @@ if (Amount) { // Add Amount to SP to destroy a frame, and subtract to setup. int Offset = isDestroy ? Amount : -Amount; - BuildStackAdjustment(MBB, I, DL, Offset, /*InEpilogue=*/false); + + if (!(MF.getFunction()->optForMinSize() && + adjustStackWithPops(MBB, I, DL, Offset))) + BuildStackAdjustment(MBB, I, DL, Offset, /*InEpilogue=*/false); } + return; } Index: llvm/trunk/test/CodeGen/X86/fold-push.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/fold-push.ll +++ llvm/trunk/test/CodeGen/X86/fold-push.ll @@ -27,11 +27,11 @@ ; CHECK: movl [[EAX:%e..]], (%esp) ; CHECK-NEXT: pushl [[EAX]] ; CHECK-NEXT: calll -; CHECK-NEXT: addl $4, %esp +; CHECK-NEXT: popl ; CHECK: nop ; CHECK: pushl (%esp) ; CHECK: calll -; CHECK-NEXT: addl $4, %esp +; CHECK-NEXT: popl %c = add i32 %a, %b call void @foo(i32 %c) call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di}"() Index: llvm/trunk/test/CodeGen/X86/pop-stack-cleanup.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/pop-stack-cleanup.ll +++ llvm/trunk/test/CodeGen/X86/pop-stack-cleanup.ll @@ -0,0 +1,61 @@ +; RUN: llc < %s -mtriple=i686-windows | FileCheck %s -check-prefix=CHECK -check-prefix=NORMAL + +declare void @param1(i32 %a) +declare i32 @param2_ret(i32 %a, i32 %b) +declare i64 @param2_ret64(i32 %a, i32 %b) +declare void @param2(i32 %a, i32 %b) +declare void @param3(i32 %a, i32 %b, i32 %c) + +define void @test() minsize { +; CHECK-LABEL: test: +; CHECK: calll _param1 +; CHECK-NEXT: popl %eax +; CHECK: calll _param2 +; CHECK-NEXT: popl %eax +; CHECK-NEXT: popl %ecx +; CHECK: calll _param2_ret +; CHECK-NEXT: popl %ecx +; CHECK-NEXT: popl %edx +; CHECK-NEXT: pushl %eax +; CHECK: calll _param3 +; CHECK-NEXT: addl $12, %esp +; CHECK: calll _param2_ret64 +; CHECK-NEXT: popl %ecx +; CHECK-NEXT: popl %ecx + call void @param1(i32 1) + call void @param2(i32 1, i32 2) + %ret = call i32 @param2_ret(i32 1, i32 2) + call void @param3(i32 1, i32 2, i32 %ret) + %ret64 = call i64 @param2_ret64(i32 1, i32 2) + ret void +} + +define void @negative(i32 %k) { +; CHECK-LABEL: negative: +; CHECK: calll _param1 +; CHECK-NEXT: addl $4, %esp +; CHECK: calll _param2 +; CHECK-NEXT: addl $8, %esp +; CHECK: calll _param3 +; CHECK-NEXT: movl %ebp, %esp + %v = alloca i32, i32 %k + call void @param1(i32 1) + call void @param2(i32 1, i32 2) + call void @param3(i32 1, i32 2, i32 3) + ret void +} + +define void @spill(i32 inreg %a, i32 inreg %b, i32 inreg %c) minsize { +; CHECK-LABEL: spill: +; CHECK-DAG: movl %ecx, +; CHECK-DAG: movl %edx, +; CHECK: calll _param2_ret +; CHECK-NEXT: popl %ecx +; CHECK-NEXT: popl %edx +; CHECK-DAG: movl {{.*}}, %ecx +; CHECK-DAG: movl {{.*}}, %edx +; CHECK: calll _spill + %i = call i32 @param2_ret(i32 1, i32 2) + call void @spill(i32 %a, i32 %b, i32 %c) + ret void +}