Index: lib/Target/X86/X86FrameLowering.cpp =================================================================== --- lib/Target/X86/X86FrameLowering.cpp +++ lib/Target/X86/X86FrameLowering.cpp @@ -82,6 +82,14 @@ } } +static unsigned getSUBrrOpcode(unsigned isLP64) { + return isLP64 ? X86::SUB64rr : X86::SUB32rr; +} + +static unsigned getADDrrOpcode(unsigned isLP64) { + return isLP64 ? X86::ADD64rr : X86::ADD32rr; +} + static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) { if (IsLP64) { if (isInt<8>(Imm)) @@ -187,6 +195,28 @@ DebugLoc DL = MBB.findDebugLoc(MBBI); while (Offset) { + if (Offset > Chunk) { + // Rather than emit a long series of instructions for large offsets, + // load the offset into a register and do one sub/add + unsigned Reg = isSub + ? (unsigned)(Is64BitTarget ? X86::RAX : X86::EAX) + : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64BitTarget); + if (Reg) { + Opc = Is64BitTarget ? X86::MOV64ri : X86::MOV32ri; + BuildMI(MBB, MBBI, DL, TII.get(Opc), Reg) + .addImm(Offset); + Opc = isSub + ? getSUBrrOpcode(Is64BitTarget) + : getADDrrOpcode(Is64BitTarget); + MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) + .addReg(StackPtr) + .addReg(Reg); + MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. + Offset = 0; + continue; + } + } + uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset; if (ThisVal == (Is64BitTarget ? 8 : 4)) { // Use push / pop instead. Index: test/CodeGen/X86/huge-stack-offset.ll =================================================================== --- test/CodeGen/X86/huge-stack-offset.ll +++ test/CodeGen/X86/huge-stack-offset.ll @@ -0,0 +1,34 @@ +; RUN: llc < %s -march=x86-64 | FileCheck %s + +; Test that a large stack offset uses a single add/sub instruction to +; adjust the stack pointer. + +define void @foo() nounwind { +; CHECK-LABEL: foo: +; CHECK: movabsq $5000000000000008, %rax +; CHECK-NEXT: subq %rax, %rsp +; CHECK-NOT: subq $2147483647, %rsp +; CHECK: movabsq $5000000000000008, [[RAX:%r..]] +; CHECK-NEXT: addq [[RAX]], %rsp + %1 = alloca [5000000000000000 x i8], align 16 + %2 = getelementptr inbounds [5000000000000000 x i8]* %1, i32 0, i32 0 + call void @bar(i8* %2) + ret void +} + +; Verify that we do not clobber the return value. + +define i32 @foo2() nounwind { +; CHECK-LABEL: foo2: +; CHECK: movabsq $5000000000000008, %rax +; CHECK-NEXT: subq %rax, %rsp +; CHECK-NOT: subq $2147483647, %rsp +; CHECK: movl $10, %eax +; CHECK-NOT: movabsq $5000000000000008, %rax + %1 = alloca [5000000000000000 x i8], align 16 + %2 = getelementptr inbounds [5000000000000000 x i8]* %1, i32 0, i32 0 + call void @bar(i8* %2) + ret i32 10 +} + +declare void @bar(i8*)