diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -556,56 +556,33 @@ const X86Subtarget &STI = MF.getSubtarget(); const X86TargetLowering &TLI = *STI.getTargetLowering(); const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset); - const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi; const uint64_t StackProbeSize = TLI.getStackProbeSize(MF); uint64_t CurrentOffset = 0; assert(AlignOffset < StackProbeSize); - // If the offset is so small it fits within a page, there's nothing to do. - if (StackProbeSize < Offset + AlignOffset) { - - MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) - .addReg(StackPtr) - .addImm(StackProbeSize - AlignOffset) - .setMIFlag(MachineInstr::FrameSetup); - MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. - - addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc)) - .setMIFlag(MachineInstr::FrameSetup), - StackPtr, false, 0) - .addImm(0) - .setMIFlag(MachineInstr::FrameSetup); - NumFrameExtraProbe++; - CurrentOffset = StackProbeSize - AlignOffset; - } - - // For the next N - 1 pages, just probe. I tried to take advantage of - // natural probes but it implies much more logic and there was very few - // interesting natural probes to interleave. + // For the first N - 1 pages, probe. + // + // We emit the most basic `movb $0, -offset(%rsp)` instruction which is good + // for offsets of up-to 2GB. This is also most throughput and space efficient + // encoding that I (nagisa) could come up. + // + // It also naturally doesn't need any special handling for precise uwtables. while (CurrentOffset + StackProbeSize < Offset) { - MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) - .addReg(StackPtr) - .addImm(StackProbeSize) - .setMIFlag(MachineInstr::FrameSetup); - MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. - - - addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc)) + NumFrameExtraProbe++; + CurrentOffset += StackProbeSize; + addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV8mi)) .setMIFlag(MachineInstr::FrameSetup), - StackPtr, false, 0) + StackPtr, false, -CurrentOffset) .addImm(0) .setMIFlag(MachineInstr::FrameSetup); - NumFrameExtraProbe++; - CurrentOffset += StackProbeSize; } // No need to probe the tail, it is smaller than a Page. - uint64_t ChunkSize = Offset - CurrentOffset; MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) .addReg(StackPtr) - .addImm(ChunkSize) + .addImm(Offset) .setMIFlag(MachineInstr::FrameSetup); MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. } diff --git a/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll b/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll --- a/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll +++ b/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll @@ -1,7 +1,64 @@ -; RUN: llc -mtriple=x86_64-linux-android < %s | FileCheck -check-prefix=CHECK-X86-64 %s -; RUN: llc -mtriple=i686-linux-android < %s | FileCheck -check-prefix=CHECK-X86-32 %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp +; RUN: llc -mtriple=x86_64-linux-android < %s | FileCheck -check-prefix=CHECK-X86-64 %s +; RUN: llc -mtriple=i686-linux-android < %s | FileCheck -check-prefix=CHECK-X86-32 %s define i32 @foo(i32 %n) local_unnamed_addr #0 { +; CHECK-X86-64-LABEL: foo: +; CHECK-X86-64: # %bb.0: +; CHECK-X86-64-NEXT: pushq %rbp +; CHECK-X86-64-NEXT: .cfi_def_cfa_offset 16 +; CHECK-X86-64-NEXT: .cfi_offset %rbp, -16 +; CHECK-X86-64-NEXT: movq %rsp, %rbp +; CHECK-X86-64-NEXT: .cfi_def_cfa_register %rbp +; CHECK-X86-64-NEXT: movq %rsp, %rax +; CHECK-X86-64-NEXT: movl %edi, %ecx +; CHECK-X86-64-NEXT: leaq 15(,%rcx,4), %rcx +; CHECK-X86-64-NEXT: andq $-16, %rcx +; CHECK-X86-64-NEXT: subq %rcx, %rax +; CHECK-X86-64-NEXT: cmpq %rsp, %rax +; CHECK-X86-64-NEXT: jge .LBB0_3 +; CHECK-X86-64-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1 +; CHECK-X86-64-NEXT: xorq $0, (%rsp) +; CHECK-X86-64-NEXT: subq $4096, %rsp # imm = 0x1000 +; CHECK-X86-64-NEXT: cmpq %rsp, %rax +; CHECK-X86-64-NEXT: jl .LBB0_2 +; CHECK-X86-64-NEXT: .LBB0_3: +; CHECK-X86-64-NEXT: movq %rax, %rsp +; CHECK-X86-64-NEXT: movl $1, 4792(%rax) +; CHECK-X86-64-NEXT: movl (%rax), %eax +; CHECK-X86-64-NEXT: movq %rbp, %rsp +; CHECK-X86-64-NEXT: popq %rbp +; CHECK-X86-64-NEXT: .cfi_def_cfa %rsp, 8 +; CHECK-X86-64-NEXT: retq +; +; CHECK-X86-32-LABEL: foo: +; CHECK-X86-32: # %bb.0: +; CHECK-X86-32-NEXT: pushl %ebp +; CHECK-X86-32-NEXT: .cfi_def_cfa_offset 8 +; CHECK-X86-32-NEXT: .cfi_offset %ebp, -8 +; CHECK-X86-32-NEXT: movl %esp, %ebp +; CHECK-X86-32-NEXT: .cfi_def_cfa_register %ebp +; CHECK-X86-32-NEXT: subl $8, %esp +; CHECK-X86-32-NEXT: movl 8(%ebp), %ecx +; CHECK-X86-32-NEXT: movl %esp, %eax +; CHECK-X86-32-NEXT: leal 15(,%ecx,4), %ecx +; CHECK-X86-32-NEXT: andl $-16, %ecx +; CHECK-X86-32-NEXT: subl %ecx, %eax +; CHECK-X86-32-NEXT: cmpl %esp, %eax +; CHECK-X86-32-NEXT: jge .LBB0_3 +; CHECK-X86-32-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1 +; CHECK-X86-32-NEXT: xorl $0, (%esp) +; CHECK-X86-32-NEXT: subl $4096, %esp # imm = 0x1000 +; CHECK-X86-32-NEXT: cmpl %esp, %eax +; CHECK-X86-32-NEXT: jl .LBB0_2 +; CHECK-X86-32-NEXT: .LBB0_3: +; CHECK-X86-32-NEXT: movl %eax, %esp +; CHECK-X86-32-NEXT: movl $1, 4792(%eax) +; CHECK-X86-32-NEXT: movl (%eax), %eax +; CHECK-X86-32-NEXT: movl %ebp, %esp +; CHECK-X86-32-NEXT: popl %ebp +; CHECK-X86-32-NEXT: .cfi_def_cfa %esp, 4 +; CHECK-X86-32-NEXT: retl %a = alloca i32, i32 %n, align 16 %b = getelementptr inbounds i32, i32* %a, i64 1198 store volatile i32 1, i32* %b diff --git a/llvm/test/CodeGen/X86/stack-clash-large-large-align.ll b/llvm/test/CodeGen/X86/stack-clash-large-large-align.ll --- a/llvm/test/CodeGen/X86/stack-clash-large-large-align.ll +++ b/llvm/test/CodeGen/X86/stack-clash-large-large-align.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp ; RUN: llc < %s | FileCheck %s @@ -7,20 +8,20 @@ define i32 @foo_noprotect() local_unnamed_addr { ; CHECK-LABEL: foo_noprotect: ; CHECK: # %bb.0: -; CHECK-NEXT: pushq %rbp -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset %rbp, -16 -; CHECK-NEXT: movq %rsp, %rbp -; CHECK-NEXT: .cfi_def_cfa_register %rbp -; CHECK-NEXT: andq $-4096, %rsp # imm = 0xF000 -; CHECK-NEXT: subq $73728, %rsp # imm = 0x12000 -; CHECK-NEXT: movl $1, 392(%rsp) -; CHECK-NEXT: movl $1, 28792(%rsp) -; CHECK-NEXT: movl (%rsp), %eax -; CHECK-NEXT: movq %rbp, %rsp -; CHECK-NEXT: popq %rbp -; CHECK-NEXT: .cfi_def_cfa %rsp, 8 -; CHECK-NEXT: retq +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rsp, %rbp +; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: andq $-4096, %rsp # imm = 0xF000 +; CHECK-NEXT: subq $73728, %rsp # imm = 0x12000 +; CHECK-NEXT: movl $1, 392(%rsp) +; CHECK-NEXT: movl $1, 28792(%rsp) +; CHECK-NEXT: movl (%rsp), %eax +; CHECK-NEXT: movq %rbp, %rsp +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_def_cfa %rsp, 8 +; CHECK-NEXT: retq %a = alloca i32, i64 18000, align 4096 @@ -35,43 +36,43 @@ define i32 @foo_protect() local_unnamed_addr #0 { ; CHECK-LABEL: foo_protect: ; CHECK: # %bb.0: -; CHECK-NEXT: pushq %rbp -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset %rbp, -16 -; CHECK-NEXT: movq %rsp, %rbp -; CHECK-NEXT: .cfi_def_cfa_register %rbp -; CHECK-NEXT: movq %rsp, %r11 -; CHECK-NEXT: andq $-4096, %r11 # imm = 0xF000 -; CHECK-NEXT: cmpq %rsp, %r11 -; CHECK-NEXT: je .LBB1_4 -; CHECK-NEXT:# %bb.1: -; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 -; CHECK-NEXT: cmpq %rsp, %r11 -; CHECK-NEXT: jb .LBB1_3 -; CHECK-NEXT:.LBB1_2: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movq $0, (%rsp) -; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 -; CHECK-NEXT: cmpq %rsp, %r11 -; CHECK-NEXT: jb .LBB1_2 -; CHECK-NEXT:.LBB1_3: -; CHECK-NEXT: movq %r11, %rsp -; CHECK-NEXT: movq $0, (%rsp) -; CHECK-NEXT:.LBB1_4: -; CHECK-NEXT: movq %rsp, %r11 -; CHECK-NEXT: subq $73728, %r11 # imm = 0x12000 -; CHECK-NEXT:.LBB1_5: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 -; CHECK-NEXT: movq $0, (%rsp) -; CHECK-NEXT: cmpq %r11, %rsp -; CHECK-NEXT: jne .LBB1_5 -; CHECK-NEXT:# %bb.6: -; CHECK-NEXT: movl $1, 392(%rsp) -; CHECK-NEXT: movl $1, 28792(%rsp) -; CHECK-NEXT: movl (%rsp), %eax -; CHECK-NEXT: movq %rbp, %rsp -; CHECK-NEXT: popq %rbp -; CHECK-NEXT: .cfi_def_cfa %rsp, 8 -; CHECK-NEXT: retq +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rsp, %rbp +; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: movq %rsp, %r11 +; CHECK-NEXT: andq $-4096, %r11 # imm = 0xF000 +; CHECK-NEXT: cmpq %rsp, %r11 +; CHECK-NEXT: je .LBB1_4 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 +; CHECK-NEXT: cmpq %rsp, %r11 +; CHECK-NEXT: jb .LBB1_3 +; CHECK-NEXT: .LBB1_2: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movq $0, (%rsp) +; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 +; CHECK-NEXT: cmpq %rsp, %r11 +; CHECK-NEXT: jb .LBB1_2 +; CHECK-NEXT: .LBB1_3: +; CHECK-NEXT: movq %r11, %rsp +; CHECK-NEXT: movq $0, (%rsp) +; CHECK-NEXT: .LBB1_4: +; CHECK-NEXT: movq %rsp, %r11 +; CHECK-NEXT: subq $73728, %r11 # imm = 0x12000 +; CHECK-NEXT: .LBB1_5: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 +; CHECK-NEXT: movq $0, (%rsp) +; CHECK-NEXT: cmpq %r11, %rsp +; CHECK-NEXT: jne .LBB1_5 +; CHECK-NEXT: # %bb.6: +; CHECK-NEXT: movl $1, 392(%rsp) +; CHECK-NEXT: movl $1, 28792(%rsp) +; CHECK-NEXT: movl (%rsp), %eax +; CHECK-NEXT: movq %rbp, %rsp +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_def_cfa %rsp, 8 +; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/stack-clash-large.ll b/llvm/test/CodeGen/X86/stack-clash-large.ll --- a/llvm/test/CodeGen/X86/stack-clash-large.ll +++ b/llvm/test/CodeGen/X86/stack-clash-large.ll @@ -1,8 +1,45 @@ -; RUN: llc -mtriple=x86_64-linux-android < %s | FileCheck -check-prefix=CHECK-X86-64 %s -; RUN: llc -mtriple=i686-linux-android < %s | FileCheck -check-prefix=CHECK-X86-32 %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp +; RUN: llc -mtriple=x86_64-linux-android < %s | FileCheck -check-prefix=CHECK-X86-64 %s +; RUN: llc -mtriple=i686-linux-android < %s | FileCheck -check-prefix=CHECK-X86-32 %s define i32 @foo() local_unnamed_addr #0 { - +; CHECK-X86-64-LABEL: foo: +; CHECK-X86-64: # %bb.0: +; CHECK-X86-64-NEXT: movq %rsp, %r11 +; CHECK-X86-64-NEXT: subq $69632, %r11 # imm = 0x11000 +; CHECK-X86-64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; CHECK-X86-64-NEXT: subq $4096, %rsp # imm = 0x1000 +; CHECK-X86-64-NEXT: movq $0, (%rsp) +; CHECK-X86-64-NEXT: cmpq %r11, %rsp +; CHECK-X86-64-NEXT: jne .LBB0_1 +; CHECK-X86-64-NEXT: # %bb.2: +; CHECK-X86-64-NEXT: subq $2248, %rsp # imm = 0x8C8 +; CHECK-X86-64-NEXT: .cfi_def_cfa_offset 71888 +; CHECK-X86-64-NEXT: movl $1, 264(%rsp) +; CHECK-X86-64-NEXT: movl $1, 28664(%rsp) +; CHECK-X86-64-NEXT: movl -128(%rsp), %eax +; CHECK-X86-64-NEXT: addq $71880, %rsp # imm = 0x118C8 +; CHECK-X86-64-NEXT: .cfi_def_cfa_offset 8 +; CHECK-X86-64-NEXT: retq +; +; CHECK-X86-32-LABEL: foo: +; CHECK-X86-32: # %bb.0: +; CHECK-X86-32-NEXT: movl %esp, %r11d +; CHECK-X86-32-NEXT: subl $69632, %r11d # imm = 0x11000 +; CHECK-X86-32-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; CHECK-X86-32-NEXT: subl $4096, %esp # imm = 0x1000 +; CHECK-X86-32-NEXT: movl $0, (%esp) +; CHECK-X86-32-NEXT: cmpl %r11d, %esp +; CHECK-X86-32-NEXT: jne .LBB0_1 +; CHECK-X86-32-NEXT: # %bb.2: +; CHECK-X86-32-NEXT: subl $2380, %esp # imm = 0x94C +; CHECK-X86-32-NEXT: .cfi_def_cfa_offset 72016 +; CHECK-X86-32-NEXT: movl $1, 392(%esp) +; CHECK-X86-32-NEXT: movl $1, 28792(%esp) +; CHECK-X86-32-NEXT: movl (%esp), %eax +; CHECK-X86-32-NEXT: addl $72012, %esp # imm = 0x1194C +; CHECK-X86-32-NEXT: .cfi_def_cfa_offset 4 +; CHECK-X86-32-NEXT: retl %a = alloca i32, i64 18000, align 16 %b0 = getelementptr inbounds i32, i32* %a, i64 98 %b1 = getelementptr inbounds i32, i32* %a, i64 7198 diff --git a/llvm/test/CodeGen/X86/stack-clash-medium-natural-probes-mutliple-objects.ll b/llvm/test/CodeGen/X86/stack-clash-medium-natural-probes-mutliple-objects.ll --- a/llvm/test/CodeGen/X86/stack-clash-medium-natural-probes-mutliple-objects.ll +++ b/llvm/test/CodeGen/X86/stack-clash-medium-natural-probes-mutliple-objects.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp ; RUN: llc < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -7,13 +7,12 @@ define i32 @foo() local_unnamed_addr #0 { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: -; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 -; CHECK-NEXT: movq $0, (%rsp) -; CHECK-NEXT: subq $1784, %rsp # imm = 0x6F8 +; CHECK-NEXT: movb $0, -4096(%rsp) +; CHECK-NEXT: subq $5880, %rsp # imm = 0x16F8 ; CHECK-NEXT: .cfi_def_cfa_offset 5888 -; CHECK-NEXT: movl $1, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movl $2, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl $1, 3872(%rsp) +; CHECK-NEXT: movl $2, 672(%rsp) +; CHECK-NEXT: movl 1872(%rsp), %eax ; CHECK-NEXT: addq $5880, %rsp # imm = 0x16F8 ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/stack-clash-medium-natural-probes.ll b/llvm/test/CodeGen/X86/stack-clash-medium-natural-probes.ll --- a/llvm/test/CodeGen/X86/stack-clash-medium-natural-probes.ll +++ b/llvm/test/CodeGen/X86/stack-clash-medium-natural-probes.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp ; RUN: llc < %s | FileCheck %s @@ -5,19 +6,17 @@ target triple = "x86_64-unknown-linux-gnu" define i32 @foo() local_unnamed_addr #0 { - ; CHECK-LABEL: foo: -; CHECK: # %bb.0: -; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 -; CHECK-NEXT: movq $0, (%rsp) -; CHECK-NEXT: subq $3784, %rsp # imm = 0xEC8 -; CHECK-NEXT: .cfi_def_cfa_offset 7888 -; CHECK-NEXT: movl $1, 264(%rsp) -; CHECK-NEXT: movl $1, 4664(%rsp) -; CHECK-NEXT: movl -128(%rsp), %eax -; CHECK-NEXT: addq $7880, %rsp # imm = 0x1EC8 -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: retq +; CHECK: # %bb.0: +; CHECK-NEXT: movb $0, -4096(%rsp) +; CHECK-NEXT: subq $7880, %rsp # imm = 0x1EC8 +; CHECK-NEXT: .cfi_def_cfa_offset 7888 +; CHECK-NEXT: movl $1, 264(%rsp) +; CHECK-NEXT: movl $1, 4664(%rsp) +; CHECK-NEXT: movl -128(%rsp), %eax +; CHECK-NEXT: addq $7880, %rsp # imm = 0x1EC8 +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/stack-clash-medium.ll b/llvm/test/CodeGen/X86/stack-clash-medium.ll --- a/llvm/test/CodeGen/X86/stack-clash-medium.ll +++ b/llvm/test/CodeGen/X86/stack-clash-medium.ll @@ -1,7 +1,29 @@ -; RUN: llc -mtriple=x86_64-linux-android < %s | FileCheck -check-prefix=CHECK-X86-64 %s -; RUN: llc -mtriple=i686-linux-android < %s | FileCheck -check-prefix=CHECK-X86-32 %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp +; RUN: llc -mtriple=x86_64-linux-android < %s | FileCheck -check-prefix=CHECK-X86-64 %s +; RUN: llc -mtriple=i686-linux-android < %s | FileCheck -check-prefix=CHECK-X86-32 %s define i32 @foo() local_unnamed_addr #0 { +; CHECK-X86-64-LABEL: foo: +; CHECK-X86-64: # %bb.0: +; CHECK-X86-64-NEXT: movb $0, -4096(%rsp) +; CHECK-X86-64-NEXT: subq $7880, %rsp # imm = 0x1EC8 +; CHECK-X86-64-NEXT: .cfi_def_cfa_offset 7888 +; CHECK-X86-64-NEXT: movl $1, 672(%rsp) +; CHECK-X86-64-NEXT: movl -128(%rsp), %eax +; CHECK-X86-64-NEXT: addq $7880, %rsp # imm = 0x1EC8 +; CHECK-X86-64-NEXT: .cfi_def_cfa_offset 8 +; CHECK-X86-64-NEXT: retq +; +; CHECK-X86-32-LABEL: foo: +; CHECK-X86-32: # %bb.0: +; CHECK-X86-32-NEXT: movb $0, -4096(%esp) +; CHECK-X86-32-NEXT: subl $8012, %esp # imm = 0x1F4C +; CHECK-X86-32-NEXT: .cfi_def_cfa_offset 8016 +; CHECK-X86-32-NEXT: movl $1, 800(%esp) +; CHECK-X86-32-NEXT: movl (%esp), %eax +; CHECK-X86-32-NEXT: addl $8012, %esp # imm = 0x1F4C +; CHECK-X86-32-NEXT: .cfi_def_cfa_offset 4 +; CHECK-X86-32-NEXT: retl %a = alloca i32, i64 2000, align 16 %b = getelementptr inbounds i32, i32* %a, i64 200 store volatile i32 1, i32* %b diff --git a/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll b/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll --- a/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll +++ b/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp ; RUN: llc < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -6,20 +7,20 @@ ; | case1 | alloca + align < probe_size define i32 @foo1(i64 %i) local_unnamed_addr #0 { ; CHECK-LABEL: foo1: -; CHECK: # %bb.0: -; CHECK-NEXT: pushq %rbp -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset %rbp, -16 -; CHECK-NEXT: movq %rsp, %rbp -; CHECK-NEXT: .cfi_def_cfa_register %rbp -; CHECK-NEXT: andq $-64, %rsp -; CHECK-NEXT: subq $832, %rsp # imm = 0x340 -; CHECK-NEXT: movl $1, (%rsp,%rdi,4) -; CHECK-NEXT: movl (%rsp), %eax -; CHECK-NEXT: movq %rbp, %rsp -; CHECK-NEXT: popq %rbp -; CHECK-NEXT: .cfi_def_cfa %rsp, 8 -; CHECK-NEXT: retq +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rsp, %rbp +; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: andq $-64, %rsp +; CHECK-NEXT: subq $832, %rsp # imm = 0x340 +; CHECK-NEXT: movl $1, (%rsp,%rdi,4) +; CHECK-NEXT: movl (%rsp), %eax +; CHECK-NEXT: movq %rbp, %rsp +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_def_cfa %rsp, 8 +; CHECK-NEXT: retq %a = alloca i32, i32 200, align 64 %b = getelementptr inbounds i32, i32* %a, i64 %i @@ -31,24 +32,21 @@ ; | case2 | alloca > probe_size, align > probe_size define i32 @foo2(i64 %i) local_unnamed_addr #0 { ; CHECK-LABEL: foo2: -; CHECK: # %bb.0: -; CHECK-NEXT: pushq %rbp -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset %rbp, -16 -; CHECK-NEXT: movq %rsp, %rbp -; CHECK-NEXT: .cfi_def_cfa_register %rbp -; CHECK-NEXT: andq $-2048, %rsp # imm = 0xF800 -; CHECK-NEXT: subq $2048, %rsp # imm = 0x800 -; CHECK-NEXT: movq $0, (%rsp) -; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 -; CHECK-NEXT: movq $0, (%rsp) -; CHECK-NEXT: subq $2048, %rsp # imm = 0x800 -; CHECK-NEXT: movl $1, (%rsp,%rdi,4) -; CHECK-NEXT: movl (%rsp), %eax -; CHECK-NEXT: movq %rbp, %rsp -; CHECK-NEXT: popq %rbp -; CHECK-NEXT: .cfi_def_cfa %rsp, 8 -; CHECK-NEXT: retq +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rsp, %rbp +; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: andq $-2048, %rsp # imm = 0xF800 +; CHECK-NEXT: movb $0, -4096(%rsp) +; CHECK-NEXT: subq $8192, %rsp # imm = 0x2000 +; CHECK-NEXT: movl $1, (%rsp,%rdi,4) +; CHECK-NEXT: movl (%rsp), %eax +; CHECK-NEXT: movq %rbp, %rsp +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_def_cfa %rsp, 8 +; CHECK-NEXT: retq %a = alloca i32, i32 2000, align 2048 %b = getelementptr inbounds i32, i32* %a, i64 %i @@ -60,22 +58,20 @@ ; | case3 | alloca < probe_size, align < probe_size, alloca + align > probe_size define i32 @foo3(i64 %i) local_unnamed_addr #0 { ; CHECK-LABEL: foo3: -; CHECK: # %bb.0: -; CHECK-NEXT: pushq %rbp -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset %rbp, -16 -; CHECK-NEXT: movq %rsp, %rbp -; CHECK-NEXT: .cfi_def_cfa_register %rbp -; CHECK-NEXT: andq $-1024, %rsp # imm = 0xFC00 -; CHECK-NEXT: subq $3072, %rsp # imm = 0xC00 -; CHECK-NEXT: movq $0, (%rsp) -; CHECK-NEXT: subq $1024, %rsp # imm = 0x400 -; CHECK-NEXT: movl $1, (%rsp,%rdi,4) -; CHECK-NEXT: movl (%rsp), %eax -; CHECK-NEXT: movq %rbp, %rsp -; CHECK-NEXT: popq %rbp -; CHECK-NEXT: .cfi_def_cfa %rsp, 8 -; CHECK-NEXT: retq +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rsp, %rbp +; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: andq $-1024, %rsp # imm = 0xFC00 +; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 +; CHECK-NEXT: movl $1, (%rsp,%rdi,4) +; CHECK-NEXT: movl (%rsp), %eax +; CHECK-NEXT: movq %rbp, %rsp +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_def_cfa %rsp, 8 +; CHECK-NEXT: retq %a = alloca i32, i32 1000, align 1024 @@ -88,39 +84,39 @@ ; | case4 | alloca + probe_size < probe_size, followed by dynamic alloca define i32 @foo4(i64 %i) local_unnamed_addr #0 { ; CHECK-LABEL: foo4: -; CHECK: # %bb.0: -; CHECK-NEXT: pushq %rbp -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset %rbp, -16 -; CHECK-NEXT: movq %rsp, %rbp -; CHECK-NEXT: .cfi_def_cfa_register %rbp -; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: andq $-64, %rsp -; CHECK-NEXT: subq $896, %rsp # imm = 0x380 -; CHECK-NEXT: movq %rsp, %rbx -; CHECK-NEXT: .cfi_offset %rbx, -24 -; CHECK-NEXT: movl $1, (%rbx,%rdi,4) -; CHECK-NEXT: movl (%rbx), %ecx -; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: leaq 15(,%rcx,4), %rcx -; CHECK-NEXT: andq $-16, %rcx -; CHECK-NEXT: subq %rcx, %rax -; CHECK-NEXT: cmpq %rsp, %rax -; CHECK-NEXT: jge .LBB3_3 -; CHECK-NEXT:.LBB3_2: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: xorq $0, (%rsp) -; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 -; CHECK-NEXT: cmpq %rsp, %rax -; CHECK-NEXT: jl .LBB3_2 -; CHECK-NEXT:.LBB3_3: -; CHECK-NEXT: andq $-64, %rax -; CHECK-NEXT: movq %rax, %rsp -; CHECK-NEXT: movl (%rax), %eax -; CHECK-NEXT: leaq -8(%rbp), %rsp -; CHECK-NEXT: popq %rbx -; CHECK-NEXT: popq %rbp -; CHECK-NEXT: .cfi_def_cfa %rsp, 8 -; CHECK-NEXT: retq +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rsp, %rbp +; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: andq $-64, %rsp +; CHECK-NEXT: subq $896, %rsp # imm = 0x380 +; CHECK-NEXT: movq %rsp, %rbx +; CHECK-NEXT: .cfi_offset %rbx, -24 +; CHECK-NEXT: movl $1, (%rbx,%rdi,4) +; CHECK-NEXT: movl (%rbx), %ecx +; CHECK-NEXT: movq %rsp, %rax +; CHECK-NEXT: leaq 15(,%rcx,4), %rcx +; CHECK-NEXT: andq $-16, %rcx +; CHECK-NEXT: subq %rcx, %rax +; CHECK-NEXT: cmpq %rsp, %rax +; CHECK-NEXT: jge .LBB3_3 +; CHECK-NEXT: .LBB3_2: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: xorq $0, (%rsp) +; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 +; CHECK-NEXT: cmpq %rsp, %rax +; CHECK-NEXT: jl .LBB3_2 +; CHECK-NEXT: .LBB3_3: +; CHECK-NEXT: andq $-64, %rax +; CHECK-NEXT: movq %rax, %rsp +; CHECK-NEXT: movl (%rax), %eax +; CHECK-NEXT: leaq -8(%rbp), %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_def_cfa %rsp, 8 +; CHECK-NEXT: retq %a = alloca i32, i32 200, align 64 %b = getelementptr inbounds i32, i32* %a, i64 %i diff --git a/llvm/test/CodeGen/X86/stack-clash-small-large-align.ll b/llvm/test/CodeGen/X86/stack-clash-small-large-align.ll --- a/llvm/test/CodeGen/X86/stack-clash-small-large-align.ll +++ b/llvm/test/CodeGen/X86/stack-clash-small-large-align.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp ; RUN: llc < %s | FileCheck %s @@ -7,19 +8,19 @@ define i32 @foo_noprotect() local_unnamed_addr { ; CHECK-LABEL: foo_noprotect: ; CHECK: # %bb.0: -; CHECK-NEXT: pushq %rbp -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset %rbp, -16 -; CHECK-NEXT: movq %rsp, %rbp -; CHECK-NEXT: .cfi_def_cfa_register %rbp -; CHECK-NEXT: andq $-65536, %rsp -; CHECK-NEXT: subq $65536, %rsp -; CHECK-NEXT: movl $1, 392(%rsp) -; CHECK-NEXT: movl (%rsp), %eax -; CHECK-NEXT: movq %rbp, %rsp -; CHECK-NEXT: popq %rbp -; CHECK-NEXT: .cfi_def_cfa %rsp, 8 -; CHECK-NEXT: retq +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rsp, %rbp +; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: andq $-65536, %rsp # imm = 0xFFFF0000 +; CHECK-NEXT: subq $65536, %rsp # imm = 0x10000 +; CHECK-NEXT: movl $1, 392(%rsp) +; CHECK-NEXT: movl (%rsp), %eax +; CHECK-NEXT: movq %rbp, %rsp +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_def_cfa %rsp, 8 +; CHECK-NEXT: retq @@ -33,42 +34,42 @@ define i32 @foo_protect() local_unnamed_addr #0 { ; CHECK-LABEL: foo_protect: ; CHECK: # %bb.0: -; CHECK-NEXT: pushq %rbp -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset %rbp, -16 -; CHECK-NEXT: movq %rsp, %rbp -; CHECK-NEXT: .cfi_def_cfa_register %rbp -; CHECK-NEXT: movq %rsp, %r11 -; CHECK-NEXT: andq $-65536, %r11 # imm = 0xFFFF0000 -; CHECK-NEXT: cmpq %rsp, %r11 -; CHECK-NEXT: je .LBB1_4 -; CHECK-NEXT:# %bb.1: -; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 -; CHECK-NEXT: cmpq %rsp, %r11 -; CHECK-NEXT: jb .LBB1_3 -; CHECK-NEXT:.LBB1_2: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movq $0, (%rsp) -; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 -; CHECK-NEXT: cmpq %rsp, %r11 -; CHECK-NEXT: jb .LBB1_2 -; CHECK-NEXT:.LBB1_3: -; CHECK-NEXT: movq %r11, %rsp -; CHECK-NEXT: movq $0, (%rsp) -; CHECK-NEXT:.LBB1_4: -; CHECK-NEXT: movq %rsp, %r11 -; CHECK-NEXT: subq $65536, %r11 # imm = 0x10000 -; CHECK-NEXT:.LBB1_5: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 -; CHECK-NEXT: movq $0, (%rsp) -; CHECK-NEXT: cmpq %r11, %rsp -; CHECK-NEXT: jne .LBB1_5 -; CHECK-NEXT:# %bb.6: -; CHECK-NEXT: movl $1, 392(%rsp) -; CHECK-NEXT: movl (%rsp), %eax -; CHECK-NEXT: movq %rbp, %rsp -; CHECK-NEXT: popq %rbp -; CHECK-NEXT: .cfi_def_cfa %rsp, 8 -; CHECK-NEXT: retq +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rsp, %rbp +; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: movq %rsp, %r11 +; CHECK-NEXT: andq $-65536, %r11 # imm = 0xFFFF0000 +; CHECK-NEXT: cmpq %rsp, %r11 +; CHECK-NEXT: je .LBB1_4 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 +; CHECK-NEXT: cmpq %rsp, %r11 +; CHECK-NEXT: jb .LBB1_3 +; CHECK-NEXT: .LBB1_2: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movq $0, (%rsp) +; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 +; CHECK-NEXT: cmpq %rsp, %r11 +; CHECK-NEXT: jb .LBB1_2 +; CHECK-NEXT: .LBB1_3: +; CHECK-NEXT: movq %r11, %rsp +; CHECK-NEXT: movq $0, (%rsp) +; CHECK-NEXT: .LBB1_4: +; CHECK-NEXT: movq %rsp, %r11 +; CHECK-NEXT: subq $65536, %r11 # imm = 0x10000 +; CHECK-NEXT: .LBB1_5: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 +; CHECK-NEXT: movq $0, (%rsp) +; CHECK-NEXT: cmpq %r11, %rsp +; CHECK-NEXT: jne .LBB1_5 +; CHECK-NEXT: # %bb.6: +; CHECK-NEXT: movl $1, 392(%rsp) +; CHECK-NEXT: movl (%rsp), %eax +; CHECK-NEXT: movq %rbp, %rsp +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_def_cfa %rsp, 8 +; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/stack-clash-small.ll b/llvm/test/CodeGen/X86/stack-clash-small.ll --- a/llvm/test/CodeGen/X86/stack-clash-small.ll +++ b/llvm/test/CodeGen/X86/stack-clash-small.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp ; RUN: llc < %s | FileCheck %s @@ -7,13 +8,13 @@ define i32 @foo() local_unnamed_addr #0 { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: -; CHECK-NEXT: subq $280, %rsp # imm = 0x118 -; CHECK-NEXT: .cfi_def_cfa_offset 288 -; CHECK-NEXT: movl $1, 264(%rsp) -; CHECK-NEXT: movl -128(%rsp), %eax -; CHECK-NEXT: addq $280, %rsp # imm = 0x118 -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: retq +; CHECK-NEXT: subq $280, %rsp # imm = 0x118 +; CHECK-NEXT: .cfi_def_cfa_offset 288 +; CHECK-NEXT: movl $1, 264(%rsp) +; CHECK-NEXT: movl -128(%rsp), %eax +; CHECK-NEXT: addq $280, %rsp # imm = 0x118 +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq %a = alloca i32, i64 100, align 16 %b = getelementptr inbounds i32, i32* %a, i64 98 diff --git a/llvm/test/CodeGen/X86/stack-clash-unknown-call.ll b/llvm/test/CodeGen/X86/stack-clash-unknown-call.ll --- a/llvm/test/CodeGen/X86/stack-clash-unknown-call.ll +++ b/llvm/test/CodeGen/X86/stack-clash-unknown-call.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp ; RUN: llc < %s | FileCheck %s @@ -7,21 +8,19 @@ declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg); define void @foo() local_unnamed_addr #0 { - -;CHECK-LABEL: foo: -;CHECK: # %bb.0: -;CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: movb $0, -4096(%rsp) +; CHECK-NEXT: subq $8008, %rsp # imm = 0x1F48 +; CHECK-NEXT: .cfi_def_cfa_offset 8016 +; CHECK-NEXT: movq %rsp, %rdi +; CHECK-NEXT: movl $8000, %edx # imm = 0x1F40 +; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: callq memset@PLT +; CHECK-NEXT: addq $8008, %rsp # imm = 0x1F48 +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq ; it's important that we don't use the call as a probe here -;CHECK-NEXT: movq $0, (%rsp) -;CHECK-NEXT: subq $3912, %rsp # imm = 0xF48 -;CHECK-NEXT: .cfi_def_cfa_offset 8016 -;CHECK-NEXT: movq %rsp, %rdi -;CHECK-NEXT: movl $8000, %edx # imm = 0x1F40 -;CHECK-NEXT: xorl %esi, %esi -;CHECK-NEXT: callq memset -;CHECK-NEXT: addq $8008, %rsp # imm = 0x1F48 -;CHECK-NEXT: .cfi_def_cfa_offset 8 -;CHECK-NEXT: retq %a = alloca i8, i64 8000, align 16 call void @llvm.memset.p0i8.i64(i8* align 16 %a, i8 0, i64 8000, i1 false)