diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -556,56 +556,43 @@ const X86Subtarget &STI = MF.getSubtarget(); const X86TargetLowering &TLI = *STI.getTargetLowering(); const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset); - const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi; const uint64_t StackProbeSize = TLI.getStackProbeSize(MF); uint64_t CurrentOffset = 0; assert(AlignOffset < StackProbeSize); - // If the offset is so small it fits within a page, there's nothing to do. if (StackProbeSize < Offset + AlignOffset) { - - MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) - .addReg(StackPtr) - .addImm(StackProbeSize - AlignOffset) - .setMIFlag(MachineInstr::FrameSetup); - MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. - - addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc)) + NumFrameExtraProbe++; + CurrentOffset = StackProbeSize - AlignOffset; + addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV8mi)) .setMIFlag(MachineInstr::FrameSetup), - StackPtr, false, 0) + StackPtr, false, -CurrentOffset) .addImm(0) .setMIFlag(MachineInstr::FrameSetup); - NumFrameExtraProbe++; - CurrentOffset = StackProbeSize - AlignOffset; } - // For the next N - 1 pages, just probe. I tried to take advantage of - // natural probes but it implies much more logic and there was very few - // interesting natural probes to interleave. + // For the remaining N - 1 pages, probe. + // + // We emit the most basic `movb $0, -offset(%rsp)` instruction which is good + // for offsets of up-to 2GB. This is also most throughput and space efficient + // encoding that I (nagisa) could come up. + // + // It also naturally doesn't need any special handling for precise uwtables. while (CurrentOffset + StackProbeSize < Offset) { - MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) - .addReg(StackPtr) - .addImm(StackProbeSize) - .setMIFlag(MachineInstr::FrameSetup); - MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. - - - addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc)) + NumFrameExtraProbe++; + CurrentOffset += StackProbeSize; + addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV8mi)) .setMIFlag(MachineInstr::FrameSetup), - StackPtr, false, 0) + StackPtr, false, -CurrentOffset) .addImm(0) .setMIFlag(MachineInstr::FrameSetup); - NumFrameExtraProbe++; - CurrentOffset += StackProbeSize; } // No need to probe the tail, it is smaller than a Page. - uint64_t ChunkSize = Offset - CurrentOffset; MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) .addReg(StackPtr) - .addImm(ChunkSize) + .addImm(Offset) .setMIFlag(MachineInstr::FrameSetup); MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. } diff --git a/llvm/test/CodeGen/X86/stack-clash-medium-natural-probes-mutliple-objects.ll b/llvm/test/CodeGen/X86/stack-clash-medium-natural-probes-mutliple-objects.ll --- a/llvm/test/CodeGen/X86/stack-clash-medium-natural-probes-mutliple-objects.ll +++ b/llvm/test/CodeGen/X86/stack-clash-medium-natural-probes-mutliple-objects.ll @@ -7,9 +7,8 @@ define i32 @foo() local_unnamed_addr #0 { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: -; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 -; CHECK-NEXT: movq $0, (%rsp) -; CHECK-NEXT: subq $1784, %rsp # imm = 0x6F8 +; CHECK-NEXT: movb $0, -4096(%rsp) +; CHECK-NEXT: subq $5880, %rsp # imm = 0x16F8 ; CHECK-NEXT: .cfi_def_cfa_offset 5888 ; CHECK-NEXT: movl $1, 3872(%rsp) ; CHECK-NEXT: movl $2, 672(%rsp) diff --git a/llvm/test/CodeGen/X86/stack-clash-medium-natural-probes.ll b/llvm/test/CodeGen/X86/stack-clash-medium-natural-probes.ll --- a/llvm/test/CodeGen/X86/stack-clash-medium-natural-probes.ll +++ b/llvm/test/CodeGen/X86/stack-clash-medium-natural-probes.ll @@ -7,9 +7,8 @@ define i32 @foo() local_unnamed_addr #0 { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: -; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 -; CHECK-NEXT: movq $0, (%rsp) -; CHECK-NEXT: subq $3784, %rsp # imm = 0xEC8 +; CHECK-NEXT: movb $0, -4096(%rsp) +; CHECK-NEXT: subq $7880, %rsp # imm = 0x1EC8 ; CHECK-NEXT: .cfi_def_cfa_offset 7888 ; CHECK-NEXT: movl $1, 264(%rsp) ; CHECK-NEXT: movl $1, 4664(%rsp) diff --git a/llvm/test/CodeGen/X86/stack-clash-medium.ll b/llvm/test/CodeGen/X86/stack-clash-medium.ll --- a/llvm/test/CodeGen/X86/stack-clash-medium.ll +++ b/llvm/test/CodeGen/X86/stack-clash-medium.ll @@ -5,9 +5,8 @@ define i32 @foo() local_unnamed_addr #0 { ; CHECK-X86-64-LABEL: foo: ; CHECK-X86-64: # %bb.0: -; CHECK-X86-64-NEXT: subq $4096, %rsp # imm = 0x1000 -; CHECK-X86-64-NEXT: movq $0, (%rsp) -; CHECK-X86-64-NEXT: subq $3784, %rsp # imm = 0xEC8 +; CHECK-X86-64-NEXT: movb $0, -4096(%rsp) +; CHECK-X86-64-NEXT: subq $7880, %rsp # imm = 0x1EC8 ; CHECK-X86-64-NEXT: .cfi_def_cfa_offset 7888 ; CHECK-X86-64-NEXT: movl $1, 672(%rsp) ; CHECK-X86-64-NEXT: movl -128(%rsp), %eax @@ -17,9 +16,8 @@ ; ; CHECK-X86-32-LABEL: foo: ; CHECK-X86-32: # %bb.0: -; CHECK-X86-32-NEXT: subl $4096, %esp # imm = 0x1000 -; CHECK-X86-32-NEXT: movl $0, (%esp) -; CHECK-X86-32-NEXT: subl $3916, %esp # imm = 0xF4C +; CHECK-X86-32-NEXT: movb $0, -4096(%esp) +; CHECK-X86-32-NEXT: subl $8012, %esp # imm = 0x1F4C ; CHECK-X86-32-NEXT: .cfi_def_cfa_offset 8016 ; CHECK-X86-32-NEXT: movl $1, 800(%esp) ; CHECK-X86-32-NEXT: movl (%esp), %eax diff --git a/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll b/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll --- a/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll +++ b/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll @@ -38,11 +38,9 @@ ; CHECK-NEXT: movq %rsp, %rbp ; CHECK-NEXT: .cfi_def_cfa_register %rbp ; CHECK-NEXT: andq $-2048, %rsp # imm = 0xF800 -; CHECK-NEXT: subq $2048, %rsp # imm = 0x800 -; CHECK-NEXT: movq $0, (%rsp) -; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 -; CHECK-NEXT: movq $0, (%rsp) -; CHECK-NEXT: subq $2048, %rsp # imm = 0x800 +; CHECK-NEXT: movb $0, -2048(%rsp) +; CHECK-NEXT: movb $0, -6144(%rsp) +; CHECK-NEXT: subq $8192, %rsp # imm = 0x2000 ; CHECK-NEXT: movl $1, (%rsp,%rdi,4) ; CHECK-NEXT: movl (%rsp), %eax ; CHECK-NEXT: movq %rbp, %rsp @@ -66,9 +64,8 @@ ; CHECK-NEXT: movq %rsp, %rbp ; CHECK-NEXT: .cfi_def_cfa_register %rbp ; CHECK-NEXT: andq $-1024, %rsp # imm = 0xFC00 -; CHECK-NEXT: subq $3072, %rsp # imm = 0xC00 -; CHECK-NEXT: movq $0, (%rsp) -; CHECK-NEXT: subq $1024, %rsp # imm = 0x400 +; CHECK-NEXT: movb $0, -3072(%rsp) +; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 ; CHECK-NEXT: movl $1, (%rsp,%rdi,4) ; CHECK-NEXT: movl (%rsp), %eax ; CHECK-NEXT: movq %rbp, %rsp diff --git a/llvm/test/CodeGen/X86/stack-clash-unknown-call.ll b/llvm/test/CodeGen/X86/stack-clash-unknown-call.ll --- a/llvm/test/CodeGen/X86/stack-clash-unknown-call.ll +++ b/llvm/test/CodeGen/X86/stack-clash-unknown-call.ll @@ -9,9 +9,8 @@ define void @foo() local_unnamed_addr #0 { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: -; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 -; CHECK-NEXT: movq $0, (%rsp) -; CHECK-NEXT: subq $3912, %rsp # imm = 0xF48 +; CHECK-NEXT: movb $0, -4096(%rsp) +; CHECK-NEXT: subq $8008, %rsp # imm = 0x1F48 ; CHECK-NEXT: .cfi_def_cfa_offset 8016 ; CHECK-NEXT: movq %rsp, %rdi ; CHECK-NEXT: movl $8000, %edx # imm = 0x1F40