diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -647,6 +647,8 @@ uint64_t AlignOffset) const { assert(Offset && "null offset"); + const bool NeedsDwarfCFI = needsDwarfCFI(MF); + const bool HasFP = hasFP(MF); const X86Subtarget &STI = MF.getSubtarget(); const X86TargetLowering &TLI = *STI.getTargetLowering(); const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi; @@ -686,17 +688,36 @@ Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 : Is64Bit ? X86::R11D : X86::EAX; + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed) .addReg(StackPtr) .setMIFlag(MachineInstr::FrameSetup); // save loop bound { - const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, Offset); + const unsigned BoundOffset = alignDown(Offset, StackProbeSize); + const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, BoundOffset); BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed) .addReg(FinalStackProbed) - .addImm(Offset / StackProbeSize * StackProbeSize) + .addImm(BoundOffset) .setMIFlag(MachineInstr::FrameSetup); + + // while in the loop, use loop-invariant reg for CFI, + // instead of the stack pointer, which changes during the loop + if (!HasFP && NeedsDwarfCFI) { + // x32 uses the same DWARF register numbers as x86-64, + // so there isn't a register number for r11d, we must use r11 instead + const Register DwarfFinalStackProbed = + STI.isTarget64BitILP32() + ? Register(getX86SubSuperRegister(FinalStackProbed, 64)) + : FinalStackProbed; + + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createDefCfaRegister( + nullptr, TRI->getDwarfRegNum(DwarfFinalStackProbed, true))); + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createAdjustCfaOffset(nullptr, BoundOffset)); + } } // allocate a page @@ -735,15 +756,30 @@ MBB.addSuccessor(testMBB); // handle tail - unsigned TailOffset = Offset % StackProbeSize; + const unsigned TailOffset = Offset % StackProbeSize; + MachineBasicBlock::iterator TailMBBIter = tailMBB->begin(); if (TailOffset) { const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, TailOffset); - BuildMI(*tailMBB, tailMBB->begin(), DL, TII.get(Opc), StackPtr) + BuildMI(*tailMBB, TailMBBIter, DL, TII.get(Opc), StackPtr) .addReg(StackPtr) .addImm(TailOffset) .setMIFlag(MachineInstr::FrameSetup); } + // after the loop, switch back to stack pointer for CFI + if (!HasFP && NeedsDwarfCFI) { + // x32 uses the same DWARF register numbers as x86-64, + // so there isn't a register number for esp, we must use rsp instead + const Register DwarfStackPtr = + STI.isTarget64BitILP32() + ? Register(getX86SubSuperRegister(StackPtr, 64)) + : Register(StackPtr); + + BuildCFI(*tailMBB, TailMBBIter, DL, + MCCFIInstruction::createDefCfaRegister( + nullptr, TRI->getDwarfRegNum(DwarfStackPtr, true))); + } + // Update Live In information recomputeLiveIns(*testMBB); recomputeLiveIns(*tailMBB); diff --git a/llvm/test/CodeGen/X86/stack-clash-large.ll b/llvm/test/CodeGen/X86/stack-clash-large.ll --- a/llvm/test/CodeGen/X86/stack-clash-large.ll +++ b/llvm/test/CodeGen/X86/stack-clash-large.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp ; RUN: llc -mtriple=x86_64-linux-android < %s | FileCheck -check-prefix=CHECK-X64 %s ; RUN: llc -mtriple=i686-linux-android < %s | FileCheck -check-prefix=CHECK-X86 %s ; RUN: llc -mtriple=x86_64-linux-gnux32 < %s | FileCheck -check-prefix=CHECK-X32 %s @@ -7,6 +8,8 @@ ; CHECK-X64: # %bb.0: ; CHECK-X64-NEXT: movq %rsp, %r11 ; CHECK-X64-NEXT: subq $69632, %r11 # imm = 0x11000 +; CHECK-X64-NEXT: .cfi_def_cfa_register %r11 +; CHECK-X64-NEXT: .cfi_adjust_cfa_offset 69632 ; CHECK-X64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 ; CHECK-X64-NEXT: subq $4096, %rsp # imm = 0x1000 ; CHECK-X64-NEXT: movq $0, (%rsp) @@ -14,6 +17,7 @@ ; CHECK-X64-NEXT: jne .LBB0_1 ; CHECK-X64-NEXT: # %bb.2: ; CHECK-X64-NEXT: subq $2248, %rsp # imm = 0x8C8 +; CHECK-X64-NEXT: .cfi_def_cfa_register %rsp ; CHECK-X64-NEXT: .cfi_def_cfa_offset 71888 ; CHECK-X64-NEXT: movl $1, 264(%rsp) ; CHECK-X64-NEXT: movl $1, 28664(%rsp) @@ -26,6 +30,8 @@ ; CHECK-X86: # %bb.0: ; CHECK-X86-NEXT: movl %esp, %eax ; CHECK-X86-NEXT: subl $69632, %eax # imm = 0x11000 +; CHECK-X86-NEXT: .cfi_def_cfa_register %eax +; CHECK-X86-NEXT: .cfi_adjust_cfa_offset 69632 ; CHECK-X86-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 ; CHECK-X86-NEXT: subl $4096, %esp # imm = 0x1000 ; CHECK-X86-NEXT: movl $0, (%esp) @@ -33,6 +39,7 @@ ; CHECK-X86-NEXT: jne .LBB0_1 ; CHECK-X86-NEXT: # %bb.2: ; CHECK-X86-NEXT: subl $2380, %esp # imm = 0x94C +; CHECK-X86-NEXT: .cfi_def_cfa_register %esp ; CHECK-X86-NEXT: .cfi_def_cfa_offset 72016 ; CHECK-X86-NEXT: movl $1, 392(%esp) ; CHECK-X86-NEXT: movl $1, 28792(%esp) @@ -45,6 +52,8 @@ ; CHECK-X32: # %bb.0: ; CHECK-X32-NEXT: movl %esp, %r11d ; CHECK-X32-NEXT: subl $69632, %r11d # imm = 0x11000 +; CHECK-X32-NEXT: .cfi_def_cfa_register %r11 +; CHECK-X32-NEXT: .cfi_adjust_cfa_offset 69632 ; CHECK-X32-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 ; CHECK-X32-NEXT: subl $4096, %esp # imm = 0x1000 ; CHECK-X32-NEXT: movq $0, (%esp) @@ -52,6 +61,7 @@ ; CHECK-X32-NEXT: jne .LBB0_1 ; CHECK-X32-NEXT: # %bb.2: ; CHECK-X32-NEXT: subl $2248, %esp # imm = 0x8C8 +; CHECK-X32-NEXT: .cfi_def_cfa_register %rsp ; CHECK-X32-NEXT: .cfi_def_cfa_offset 71888 ; CHECK-X32-NEXT: movl $1, 264(%esp) ; CHECK-X32-NEXT: movl $1, 28664(%esp) @@ -68,4 +78,139 @@ ret i32 %c } +define void @push_before_probe(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) "probe-stack"="inline-asm" "no_caller_saved_registers" { +; CHECK-X64-LABEL: push_before_probe: +; CHECK-X64: # %bb.0: +; CHECK-X64-NEXT: pushq %rax +; CHECK-X64-NEXT: .cfi_def_cfa_offset 16 +; CHECK-X64-NEXT: movq %rsp, %r11 +; CHECK-X64-NEXT: subq $69632, %r11 # imm = 0x11000 +; CHECK-X64-NEXT: .cfi_def_cfa_register %r11 +; CHECK-X64-NEXT: .cfi_adjust_cfa_offset 69632 +; CHECK-X64-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; CHECK-X64-NEXT: subq $4096, %rsp # imm = 0x1000 +; CHECK-X64-NEXT: movq $0, (%rsp) +; CHECK-X64-NEXT: cmpq %r11, %rsp +; CHECK-X64-NEXT: jne .LBB1_1 +; CHECK-X64-NEXT: # %bb.2: +; CHECK-X64-NEXT: subq $2240, %rsp # imm = 0x8C0 +; CHECK-X64-NEXT: .cfi_def_cfa_register %rsp +; CHECK-X64-NEXT: .cfi_def_cfa_offset 71888 +; CHECK-X64-NEXT: .cfi_offset %rax, -16 +; CHECK-X64-NEXT: movl 71888(%rsp), %eax +; CHECK-X64-NEXT: addl %esi, %edi +; CHECK-X64-NEXT: addl %ecx, %edx +; CHECK-X64-NEXT: addl %edi, %edx +; CHECK-X64-NEXT: addl %r9d, %r8d +; CHECK-X64-NEXT: addl 71896(%rsp), %eax +; CHECK-X64-NEXT: addl %r8d, %eax +; CHECK-X64-NEXT: addl %edx, %eax +; CHECK-X64-NEXT: movl %eax, 264(%rsp) +; CHECK-X64-NEXT: movl %eax, 28664(%rsp) +; CHECK-X64-NEXT: addq $71872, %rsp # imm = 0x118C0 +; CHECK-X64-NEXT: .cfi_def_cfa_offset 16 +; CHECK-X64-NEXT: popq %rax +; CHECK-X64-NEXT: .cfi_def_cfa_offset 8 +; CHECK-X64-NEXT: retq +; +; CHECK-X86-LABEL: push_before_probe: +; CHECK-X86: # %bb.0: +; CHECK-X86-NEXT: pushl %esi +; CHECK-X86-NEXT: .cfi_def_cfa_offset 8 +; CHECK-X86-NEXT: pushl %edx +; CHECK-X86-NEXT: .cfi_def_cfa_offset 12 +; CHECK-X86-NEXT: pushl %ecx +; CHECK-X86-NEXT: .cfi_def_cfa_offset 16 +; CHECK-X86-NEXT: pushl %eax +; CHECK-X86-NEXT: .cfi_def_cfa_offset 20 +; CHECK-X86-NEXT: movl %esp, %eax +; CHECK-X86-NEXT: subl $69632, %eax # imm = 0x11000 +; CHECK-X86-NEXT: .cfi_def_cfa_register %eax +; CHECK-X86-NEXT: .cfi_adjust_cfa_offset 69632 +; CHECK-X86-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; CHECK-X86-NEXT: subl $4096, %esp # imm = 0x1000 +; CHECK-X86-NEXT: movl $0, (%esp) +; CHECK-X86-NEXT: cmpl %eax, %esp +; CHECK-X86-NEXT: jne .LBB1_1 +; CHECK-X86-NEXT: # %bb.2: +; CHECK-X86-NEXT: subl $2380, %esp # imm = 0x94C +; CHECK-X86-NEXT: .cfi_def_cfa_register %esp +; CHECK-X86-NEXT: .cfi_def_cfa_offset 72032 +; CHECK-X86-NEXT: .cfi_offset %eax, -20 +; CHECK-X86-NEXT: .cfi_offset %ecx, -16 +; CHECK-X86-NEXT: .cfi_offset %edx, -12 +; CHECK-X86-NEXT: .cfi_offset %esi, -8 +; CHECK-X86-NEXT: movl 72056(%esp), %eax +; CHECK-X86-NEXT: movl 72048(%esp), %edx +; CHECK-X86-NEXT: movl 72040(%esp), %ecx +; CHECK-X86-NEXT: movl 72032(%esp), %esi +; CHECK-X86-NEXT: addl 72036(%esp), %esi +; CHECK-X86-NEXT: addl 72044(%esp), %ecx +; CHECK-X86-NEXT: addl %esi, %ecx +; CHECK-X86-NEXT: addl 72052(%esp), %edx +; CHECK-X86-NEXT: addl 72060(%esp), %eax +; CHECK-X86-NEXT: addl %edx, %eax +; CHECK-X86-NEXT: addl %ecx, %eax +; CHECK-X86-NEXT: movl %eax, 392(%esp) +; CHECK-X86-NEXT: movl %eax, 28792(%esp) +; CHECK-X86-NEXT: addl $72012, %esp # imm = 0x1194C +; CHECK-X86-NEXT: .cfi_def_cfa_offset 20 +; CHECK-X86-NEXT: popl %eax +; CHECK-X86-NEXT: .cfi_def_cfa_offset 16 +; CHECK-X86-NEXT: popl %ecx +; CHECK-X86-NEXT: .cfi_def_cfa_offset 12 +; CHECK-X86-NEXT: popl %edx +; CHECK-X86-NEXT: .cfi_def_cfa_offset 8 +; CHECK-X86-NEXT: popl %esi +; CHECK-X86-NEXT: .cfi_def_cfa_offset 4 +; CHECK-X86-NEXT: retl +; +; CHECK-X32-LABEL: push_before_probe: +; CHECK-X32: # %bb.0: +; CHECK-X32-NEXT: pushq %rax +; CHECK-X32-NEXT: .cfi_def_cfa_offset 16 +; CHECK-X32-NEXT: movl %esp, %r11d +; CHECK-X32-NEXT: subl $69632, %r11d # imm = 0x11000 +; CHECK-X32-NEXT: .cfi_def_cfa_register %r11 +; CHECK-X32-NEXT: .cfi_adjust_cfa_offset 69632 +; CHECK-X32-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; CHECK-X32-NEXT: subl $4096, %esp # imm = 0x1000 +; CHECK-X32-NEXT: movq $0, (%esp) +; CHECK-X32-NEXT: cmpl %r11d, %esp +; CHECK-X32-NEXT: jne .LBB1_1 +; CHECK-X32-NEXT: # %bb.2: +; CHECK-X32-NEXT: subl $2240, %esp # imm = 0x8C0 +; CHECK-X32-NEXT: .cfi_def_cfa_register %rsp +; CHECK-X32-NEXT: .cfi_def_cfa_offset 71888 +; CHECK-X32-NEXT: .cfi_offset %rax, -16 +; CHECK-X32-NEXT: movl 71888(%esp), %eax +; CHECK-X32-NEXT: addl %esi, %edi +; CHECK-X32-NEXT: addl %ecx, %edx +; CHECK-X32-NEXT: addl %edi, %edx +; CHECK-X32-NEXT: addl %r9d, %r8d +; CHECK-X32-NEXT: addl 71896(%esp), %eax +; CHECK-X32-NEXT: addl %r8d, %eax +; CHECK-X32-NEXT: addl %edx, %eax +; CHECK-X32-NEXT: movl %eax, 264(%esp) +; CHECK-X32-NEXT: movl %eax, 28664(%esp) +; CHECK-X32-NEXT: addl $71872, %esp # imm = 0x118C0 +; CHECK-X32-NEXT: .cfi_def_cfa_offset 16 +; CHECK-X32-NEXT: popq %rax +; CHECK-X32-NEXT: .cfi_def_cfa_offset 8 +; CHECK-X32-NEXT: retq + %all = alloca i32, i64 18000, align 16 + %b0 = getelementptr inbounds i32, i32* %all, i64 98 + %b1 = getelementptr inbounds i32, i32* %all, i64 7198 + %ab = add i32 %a, %b + %cd = add i32 %c, %d + %ef = add i32 %e, %f + %gh = add i32 %g, %h + %abcd = add i32 %ab, %cd + %efgh = add i32 %ef, %gh + %sum = add i32 %abcd, %efgh + store volatile i32 %sum, i32* %b0 + store volatile i32 %sum, i32* %b1 + ret void +} + attributes #0 = {"probe-stack"="inline-asm"}