diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h --- a/llvm/lib/Target/X86/X86FrameLowering.h +++ b/llvm/lib/Target/X86/X86FrameLowering.h @@ -192,6 +192,10 @@ bool has128ByteRedZone(const MachineFunction& MF) const; private: + bool isWin64Prologue(const MachineFunction &MF) const; + + bool needsDwarfCFI(const MachineFunction &MF) const; + uint64_t calculateMaxStackAlign(const MachineFunction &MF) const; /// Emit target stack probe as a call to a helper function diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -553,6 +553,8 @@ MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset, uint64_t AlignOffset) const { + const bool NeedsDwarfCFI = needsDwarfCFI(MF); + const bool HasFP = hasFP(MF); const X86Subtarget &STI = MF.getSubtarget(); const X86TargetLowering &TLI = *STI.getTargetLowering(); const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset); @@ -572,6 +574,12 @@ .setMIFlag(MachineInstr::FrameSetup); MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. + if (!HasFP && NeedsDwarfCFI) { + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createAdjustCfaOffset( + nullptr, StackProbeSize - AlignOffset)); + } + addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc)) .setMIFlag(MachineInstr::FrameSetup), StackPtr, false, 0) @@ -591,6 +599,11 @@ .setMIFlag(MachineInstr::FrameSetup); MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. + if (!HasFP && NeedsDwarfCFI) { + BuildCFI( + MBB, MBBI, DL, + MCCFIInstruction::createAdjustCfaOffset(nullptr, StackProbeSize)); + } addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc)) .setMIFlag(MachineInstr::FrameSetup), @@ -607,6 +620,10 @@ .addReg(StackPtr) .addImm(ChunkSize) .setMIFlag(MachineInstr::FrameSetup); + if (!HasFP && NeedsDwarfCFI) { + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createAdjustCfaOffset(nullptr, ChunkSize)); + } MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. } @@ -616,6 +633,8 @@ uint64_t AlignOffset) const { assert(Offset && "null offset"); + const bool NeedsDwarfCFI = needsDwarfCFI(MF); + const bool HasFP = hasFP(MF); const X86Subtarget &STI = MF.getSubtarget(); const X86TargetLowering &TLI = *STI.getTargetLowering(); const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi; @@ -652,25 +671,30 @@ MF.insert(MBBIter, testMBB); MF.insert(MBBIter, tailMBB); - Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 : X86::R11D; - BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed) + Register StackProbeIterator = Uses64BitFramePtr ? X86::R11 : X86::R11D; + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), StackProbeIterator) .addReg(StackPtr) .setMIFlag(MachineInstr::FrameSetup); // save loop bound { + const unsigned BoundOffset = Offset / StackProbeSize * StackProbeSize; const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, Offset); - BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed) - .addReg(FinalStackProbed) - .addImm(Offset / StackProbeSize * StackProbeSize) + BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), StackPtr) + .addReg(StackPtr) + .addImm(BoundOffset) .setMIFlag(MachineInstr::FrameSetup); + if (!HasFP && NeedsDwarfCFI) { + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createAdjustCfaOffset(nullptr, BoundOffset)); + } } // allocate a page { const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, StackProbeSize); - BuildMI(testMBB, DL, TII.get(SUBOpc), StackPtr) - .addReg(StackPtr) + BuildMI(testMBB, DL, TII.get(SUBOpc), StackProbeIterator) + .addReg(StackProbeIterator) .addImm(StackProbeSize) .setMIFlag(MachineInstr::FrameSetup); } @@ -678,14 +702,14 @@ // touch the page addRegOffset(BuildMI(testMBB, DL, TII.get(MovMIOpc)) .setMIFlag(MachineInstr::FrameSetup), - StackPtr, false, 0) + StackProbeIterator, false, 0) .addImm(0) .setMIFlag(MachineInstr::FrameSetup); // cmp with stack pointer bound BuildMI(testMBB, DL, TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) + .addReg(StackProbeIterator) .addReg(StackPtr) - .addReg(FinalStackProbed) .setMIFlag(MachineInstr::FrameSetup); // jump @@ -705,10 +729,16 @@ unsigned TailOffset = Offset % StackProbeSize; if (TailOffset) { const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, TailOffset); - BuildMI(*tailMBB, tailMBB->begin(), DL, TII.get(Opc), StackPtr) + MachineBasicBlock::iterator tailMBBIter = tailMBB->begin(); + BuildMI(*tailMBB, tailMBBIter, DL, TII.get(Opc), StackPtr) .addReg(StackPtr) .addImm(TailOffset) .setMIFlag(MachineInstr::FrameSetup); + + if (!HasFP && NeedsDwarfCFI) { + BuildCFI(*tailMBB, tailMBBIter, DL, + MCCFIInstruction::createAdjustCfaOffset(nullptr, TailOffset)); + } } // Update Live In information @@ -1200,6 +1230,13 @@ return Is64Bit && !IsWin64CC && !Fn.hasFnAttribute(Attribute::NoRedZone); } +bool X86FrameLowering::isWin64Prologue(const MachineFunction &MF) const { + return MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); +} + +bool X86FrameLowering::needsDwarfCFI(const MachineFunction &MF) const { + return !isWin64Prologue(MF) && MF.needsFrameMoves(); +} /// emitPrologue - Push callee-saved registers onto the stack, which /// automatically adjust the stack pointer. Adjust the stack pointer to allocate @@ -1305,13 +1342,13 @@ MF.hasEHFunclets() && Personality == EHPersonality::CoreCLR; bool IsClrFunclet = IsFunclet && FnHasClrFunclet; bool HasFP = hasFP(MF); - bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); + bool IsWin64Prologue = isWin64Prologue(MF); bool NeedsWin64CFI = IsWin64Prologue && Fn.needsUnwindTableEntry(); // FIXME: Emit FPO data for EH funclets. bool NeedsWinFPO = !IsFunclet && STI.isTargetWin32() && MMI.getModule()->getCodeViewFlag(); bool NeedsWinCFI = NeedsWin64CFI || NeedsWinFPO; - bool NeedsDwarfCFI = !IsWin64Prologue && MF.needsFrameMoves(); + bool NeedsDwarfCFI = needsDwarfCFI(MF); Register FramePtr = TRI->getFrameRegister(MF); const Register MachineFramePtr = STI.isTarget64BitILP32() diff --git a/llvm/test/CodeGen/X86/stack-clash-large-large-align.ll b/llvm/test/CodeGen/X86/stack-clash-large-large-align.ll --- a/llvm/test/CodeGen/X86/stack-clash-large-large-align.ll +++ b/llvm/test/CodeGen/X86/stack-clash-large-large-align.ll @@ -58,11 +58,11 @@ ; CHECK-NEXT: movq $0, (%rsp) ; CHECK-NEXT:.LBB1_4: ; CHECK-NEXT: movq %rsp, %r11 -; CHECK-NEXT: subq $73728, %r11 # imm = 0x12000 +; CHECK-NEXT: subq $73728, %rsp # imm = 0x12000 ; CHECK-NEXT:.LBB1_5: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 -; CHECK-NEXT: movq $0, (%rsp) -; CHECK-NEXT: cmpq %r11, %rsp +; CHECK-NEXT: subq $4096, %r11 # imm = 0x1000 +; CHECK-NEXT: movq $0, (%r11) +; CHECK-NEXT: cmpq %rsp, %r11 ; CHECK-NEXT: jne .LBB1_5 ; CHECK-NEXT:# %bb.6: ; CHECK-NEXT: movl $1, 392(%rsp) diff --git a/llvm/test/CodeGen/X86/stack-clash-large.ll b/llvm/test/CodeGen/X86/stack-clash-large.ll --- a/llvm/test/CodeGen/X86/stack-clash-large.ll +++ b/llvm/test/CodeGen/X86/stack-clash-large.ll @@ -17,14 +17,16 @@ ; CHECK-X86-64-LABEL: foo: ; CHECK-X86-64: # %bb.0: ; CHECK-X86-64-NEXT: movq %rsp, %r11 -; CHECK-X86-64-NEXT: subq $69632, %r11 # imm = 0x11000 +; CHECK-X86-64-NEXT: subq $69632, %rsp # imm = 0x11000 +; CHECK-X86-64-NEXT: .cfi_adjust_cfa_offset 69632 ; CHECK-X86-64-NEXT: .LBB0_1: -; CHECK-X86-64-NEXT: subq $4096, %rsp # imm = 0x1000 -; CHECK-X86-64-NEXT: movq $0, (%rsp) -; CHECK-X86-64-NEXT: cmpq %r11, %rsp +; CHECK-X86-64-NEXT: subq $4096, %r11 # imm = 0x1000 +; CHECK-X86-64-NEXT: movq $0, (%r11) +; CHECK-X86-64-NEXT: cmpq %rsp, %r11 ; CHECK-X86-64-NEXT: jne .LBB0_1 ; CHECK-X86-64-NEXT:# %bb.2: ; CHECK-X86-64-NEXT: subq $2248, %rsp +; CHECK-X86-64-NEXT: .cfi_adjust_cfa_offset 2248 ; CHECK-X86-64-NEXT: .cfi_def_cfa_offset 71888 ; CHECK-X86-64-NEXT: movl $1, 264(%rsp) ; CHECK-X86-64-NEXT: movl $1, 28664(%rsp) @@ -36,14 +38,16 @@ ; CHECK-X86-32-LABEL: foo: ; CHECK-X86-32: # %bb.0: ; CHECK-X86-32-NEXT: movl %esp, %r11d -; CHECK-X86-32-NEXT: subl $69632, %r11d # imm = 0x11000 +; CHECK-X86-32-NEXT: subl $69632, %esp # imm = 0x11000 +; CHECK-X86-32-NEXT: .cfi_adjust_cfa_offset 69632 ; CHECK-X86-32-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 -; CHECK-X86-32-NEXT: subl $4096, %esp # imm = 0x1000 -; CHECK-X86-32-NEXT: movl $0, (%esp) -; CHECK-X86-32-NEXT: cmpl %r11d, %esp +; CHECK-X86-32-NEXT: subl $4096, %r11d # imm = 0x1000 +; CHECK-X86-32-NEXT: movl $0, (%r11d) +; CHECK-X86-32-NEXT: cmpl %esp, %r11d ; CHECK-X86-32-NEXT: jne .LBB0_1 ; CHECK-X86-32-NEXT:# %bb.2: ; CHECK-X86-32-NEXT: subl $2380, %esp +; CHECK-X86-32-NEXT: .cfi_adjust_cfa_offset 2380 ; CHECK-X86-32-NEXT: .cfi_def_cfa_offset 72016 ; CHECK-X86-32-NEXT: movl $1, 392(%esp) ; CHECK-X86-32-NEXT: movl $1, 28792(%esp) diff --git a/llvm/test/CodeGen/X86/stack-clash-medium-natural-probes-mutliple-objects.ll b/llvm/test/CodeGen/X86/stack-clash-medium-natural-probes-mutliple-objects.ll --- a/llvm/test/CodeGen/X86/stack-clash-medium-natural-probes-mutliple-objects.ll +++ b/llvm/test/CodeGen/X86/stack-clash-medium-natural-probes-mutliple-objects.ll @@ -8,8 +8,10 @@ ; CHECK-LABEL: foo: ; CHECK: # %bb.0: ; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 +; CHECK-NEXT: .cfi_adjust_cfa_offset 4096 ; CHECK-NEXT: movq $0, (%rsp) ; CHECK-NEXT: subq $1784, %rsp # imm = 0x6F8 +; CHECK-NEXT: .cfi_adjust_cfa_offset 1784 ; CHECK-NEXT: .cfi_def_cfa_offset 5888 ; CHECK-NEXT: movl $1, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movl $2, {{[0-9]+}}(%rsp) diff --git a/llvm/test/CodeGen/X86/stack-clash-medium-natural-probes.ll b/llvm/test/CodeGen/X86/stack-clash-medium-natural-probes.ll --- a/llvm/test/CodeGen/X86/stack-clash-medium-natural-probes.ll +++ b/llvm/test/CodeGen/X86/stack-clash-medium-natural-probes.ll @@ -9,8 +9,10 @@ ; CHECK-LABEL: foo: ; CHECK: # %bb.0: ; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 +; CHECK-NEXT: .cfi_adjust_cfa_offset 4096 ; CHECK-NEXT: movq $0, (%rsp) ; CHECK-NEXT: subq $3784, %rsp # imm = 0xEC8 +; CHECK-NEXT: .cfi_adjust_cfa_offset 3784 ; CHECK-NEXT: .cfi_def_cfa_offset 7888 ; CHECK-NEXT: movl $1, 264(%rsp) ; CHECK-NEXT: movl $1, 4664(%rsp) diff --git a/llvm/test/CodeGen/X86/stack-clash-medium.ll b/llvm/test/CodeGen/X86/stack-clash-medium.ll --- a/llvm/test/CodeGen/X86/stack-clash-medium.ll +++ b/llvm/test/CodeGen/X86/stack-clash-medium.ll @@ -14,8 +14,10 @@ ; CHECK-X86-64-LABEL: foo: ; CHECK-X86-64: # %bb.0: ; CHECK-X86-64-NEXT: subq $4096, %rsp # imm = 0x1000 +; CHECK-X86-64-NEXT: .cfi_adjust_cfa_offset 4096 ; CHECK-X86-64-NEXT: movq $0, (%rsp) ; CHECK-X86-64-NEXT: subq $3784, %rsp # imm = 0xEC8 +; CHECK-X86-64-NEXT: .cfi_adjust_cfa_offset 3784 ; CHECK-X86-64-NEXT: .cfi_def_cfa_offset 7888 ; CHECK-X86-64-NEXT: movl $1, 672(%rsp) ; CHECK-X86-64-NEXT: movl -128(%rsp), %eax @@ -27,8 +29,10 @@ ; CHECK-X86-32-LABEL: foo: ; CHECK-X86-32: # %bb.0: ; CHECK-X86-32-NEXT: subl $4096, %esp # imm = 0x1000 +; CHECK-X86-32-NEXT: .cfi_adjust_cfa_offset 4096 ; CHECK-X86-32-NEXT: movl $0, (%esp) ; CHECK-X86-32-NEXT: subl $3916, %esp # imm = 0xF4C +; CHECK-X86-32-NEXT: .cfi_adjust_cfa_offset 3916 ; CHECK-X86-32-NEXT: .cfi_def_cfa_offset 8016 ; CHECK-X86-32-NEXT: movl $1, 800(%esp) ; CHECK-X86-32-NEXT: movl (%esp), %eax diff --git a/llvm/test/CodeGen/X86/stack-clash-small-large-align.ll b/llvm/test/CodeGen/X86/stack-clash-small-large-align.ll --- a/llvm/test/CodeGen/X86/stack-clash-small-large-align.ll +++ b/llvm/test/CodeGen/X86/stack-clash-small-large-align.ll @@ -56,11 +56,11 @@ ; CHECK-NEXT: movq $0, (%rsp) ; CHECK-NEXT:.LBB1_4: ; CHECK-NEXT: movq %rsp, %r11 -; CHECK-NEXT: subq $65536, %r11 # imm = 0x10000 +; CHECK-NEXT: subq $65536, %rsp # imm = 0x10000 ; CHECK-NEXT:.LBB1_5: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 -; CHECK-NEXT: movq $0, (%rsp) -; CHECK-NEXT: cmpq %r11, %rsp +; CHECK-NEXT: subq $4096, %r11 # imm = 0x1000 +; CHECK-NEXT: movq $0, (%r11) +; CHECK-NEXT: cmpq %rsp, %r11 ; CHECK-NEXT: jne .LBB1_5 ; CHECK-NEXT:# %bb.6: ; CHECK-NEXT: movl $1, 392(%rsp) diff --git a/llvm/test/CodeGen/X86/stack-clash-small.ll b/llvm/test/CodeGen/X86/stack-clash-small.ll --- a/llvm/test/CodeGen/X86/stack-clash-small.ll +++ b/llvm/test/CodeGen/X86/stack-clash-small.ll @@ -8,6 +8,7 @@ ; CHECK-LABEL: foo: ; CHECK: # %bb.0: ; CHECK-NEXT: subq $280, %rsp # imm = 0x118 +; CHECK-NEXT: .cfi_adjust_cfa_offset 280 ; CHECK-NEXT: .cfi_def_cfa_offset 288 ; CHECK-NEXT: movl $1, 264(%rsp) ; CHECK-NEXT: movl -128(%rsp), %eax diff --git a/llvm/test/CodeGen/X86/stack-clash-unknown-call.ll b/llvm/test/CodeGen/X86/stack-clash-unknown-call.ll --- a/llvm/test/CodeGen/X86/stack-clash-unknown-call.ll +++ b/llvm/test/CodeGen/X86/stack-clash-unknown-call.ll @@ -11,9 +11,11 @@ ;CHECK-LABEL: foo: ;CHECK: # %bb.0: ;CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 +;CHECK-NEXT: .cfi_adjust_cfa_offset 4096 ; it's important that we don't use the call as a probe here ;CHECK-NEXT: movq $0, (%rsp) ;CHECK-NEXT: subq $3912, %rsp # imm = 0xF48 +;CHECK-NEXT: .cfi_adjust_cfa_offset 3912 ;CHECK-NEXT: .cfi_def_cfa_offset 8016 ;CHECK-NEXT: movq %rsp, %rdi ;CHECK-NEXT: movl $8000, %edx # imm = 0x1F40