diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h --- a/llvm/lib/Target/X86/X86FrameLowering.h +++ b/llvm/lib/Target/X86/X86FrameLowering.h @@ -192,6 +192,10 @@ bool has128ByteRedZone(const MachineFunction& MF) const; private: + bool isWin64Prologue(const MachineFunction &MF) const; + + bool needsDwarfCFI(const MachineFunction &MF) const; + uint64_t calculateMaxStackAlign(const MachineFunction &MF) const; /// Emit target stack probe as a call to a helper function diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -553,6 +553,8 @@ MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset, uint64_t AlignOffset) const { + const bool NeedsDwarfCFI = needsDwarfCFI(MF); + const bool HasFP = hasFP(MF); const X86Subtarget &STI = MF.getSubtarget(); const X86TargetLowering &TLI = *STI.getTargetLowering(); const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset); @@ -572,6 +574,12 @@ .setMIFlag(MachineInstr::FrameSetup); MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. + if (!HasFP && NeedsDwarfCFI) { + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createAdjustCfaOffset( + nullptr, StackProbeSize - AlignOffset)); + } + addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc)) .setMIFlag(MachineInstr::FrameSetup), StackPtr, false, 0) @@ -591,6 +599,11 @@ .setMIFlag(MachineInstr::FrameSetup); MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. + if (!HasFP && NeedsDwarfCFI) { + BuildCFI( + MBB, MBBI, DL, + MCCFIInstruction::createAdjustCfaOffset(nullptr, StackProbeSize)); + } addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc)) .setMIFlag(MachineInstr::FrameSetup), @@ -607,6 +620,8 @@ .addReg(StackPtr) .addImm(ChunkSize) .setMIFlag(MachineInstr::FrameSetup); + // No need to adjust Dwarf CFA offset here, the last position of the stack has + // been defined MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. } @@ -616,6 +631,8 @@ uint64_t AlignOffset) const { assert(Offset && "null offset"); + const bool NeedsDwarfCFI = needsDwarfCFI(MF); + const bool HasFP = hasFP(MF); const X86Subtarget &STI = MF.getSubtarget(); const X86TargetLowering &TLI = *STI.getTargetLowering(); const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi; @@ -652,18 +669,27 @@ MF.insert(MBBIter, testMBB); MF.insert(MBBIter, tailMBB); - Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 : X86::R11D; + Register FinalStackProbed = Uses64BitFramePtr ? X86::RAX : X86::EAX; BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed) .addReg(StackPtr) .setMIFlag(MachineInstr::FrameSetup); // save loop bound { + const unsigned BoundOffset = Offset / StackProbeSize * StackProbeSize; const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, Offset); BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed) .addReg(FinalStackProbed) - .addImm(Offset / StackProbeSize * StackProbeSize) + .addImm(BoundOffset) .setMIFlag(MachineInstr::FrameSetup); + + if (!HasFP && NeedsDwarfCFI) { + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createDefCfaRegister( + nullptr, TRI->getDwarfRegNum(FinalStackProbed, true))); + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createAdjustCfaOffset(nullptr, BoundOffset)); + } } // allocate a page @@ -703,14 +729,21 @@ // handle tail unsigned TailOffset = Offset % StackProbeSize; + MachineBasicBlock::iterator TailMBBIter = tailMBB->begin(); if (TailOffset) { const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, TailOffset); - BuildMI(*tailMBB, tailMBB->begin(), DL, TII.get(Opc), StackPtr) + BuildMI(*tailMBB, TailMBBIter, DL, TII.get(Opc), StackPtr) .addReg(StackPtr) .addImm(TailOffset) .setMIFlag(MachineInstr::FrameSetup); } + if (!HasFP && NeedsDwarfCFI) { + BuildCFI(*tailMBB, TailMBBIter, DL, + MCCFIInstruction::createDefCfaRegister( + nullptr, TRI->getDwarfRegNum(StackPtr, true))); + } + // Update Live In information recomputeLiveIns(*testMBB); recomputeLiveIns(*tailMBB); @@ -1200,6 +1233,13 @@ return Is64Bit && !IsWin64CC && !Fn.hasFnAttribute(Attribute::NoRedZone); } +bool X86FrameLowering::isWin64Prologue(const MachineFunction &MF) const { + return MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); +} + +bool X86FrameLowering::needsDwarfCFI(const MachineFunction &MF) const { + return !isWin64Prologue(MF) && MF.needsFrameMoves(); +} /// emitPrologue - Push callee-saved registers onto the stack, which /// automatically adjust the stack pointer. Adjust the stack pointer to allocate @@ -1305,13 +1345,13 @@ MF.hasEHFunclets() && Personality == EHPersonality::CoreCLR; bool IsClrFunclet = IsFunclet && FnHasClrFunclet; bool HasFP = hasFP(MF); - bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); + bool IsWin64Prologue = isWin64Prologue(MF); bool NeedsWin64CFI = IsWin64Prologue && Fn.needsUnwindTableEntry(); // FIXME: Emit FPO data for EH funclets. bool NeedsWinFPO = !IsFunclet && STI.isTargetWin32() && MMI.getModule()->getCodeViewFlag(); bool NeedsWinCFI = NeedsWin64CFI || NeedsWinFPO; - bool NeedsDwarfCFI = !IsWin64Prologue && MF.needsFrameMoves(); + bool NeedsDwarfCFI = needsDwarfCFI(MF); Register FramePtr = TRI->getFrameRegister(MF); const Register MachineFramePtr = STI.isTarget64BitILP32() diff --git a/llvm/test/CodeGen/X86/stack-clash-large-large-align.ll b/llvm/test/CodeGen/X86/stack-clash-large-large-align.ll --- a/llvm/test/CodeGen/X86/stack-clash-large-large-align.ll +++ b/llvm/test/CodeGen/X86/stack-clash-large-large-align.ll @@ -57,12 +57,12 @@ ; CHECK-NEXT: movq %r11, %rsp ; CHECK-NEXT: movq $0, (%rsp) ; CHECK-NEXT:.LBB1_4: -; CHECK-NEXT: movq %rsp, %r11 -; CHECK-NEXT: subq $73728, %r11 # imm = 0x12000 +; CHECK-NEXT: movq %rsp, %rax +; CHECK-NEXT: subq $73728, %rax # imm = 0x12000 ; CHECK-NEXT:.LBB1_5: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 ; CHECK-NEXT: movq $0, (%rsp) -; CHECK-NEXT: cmpq %r11, %rsp +; CHECK-NEXT: cmpq %rax, %rsp ; CHECK-NEXT: jne .LBB1_5 ; CHECK-NEXT:# %bb.6: ; CHECK-NEXT: movl $1, 392(%rsp) diff --git a/llvm/test/CodeGen/X86/stack-clash-large.ll b/llvm/test/CodeGen/X86/stack-clash-large.ll --- a/llvm/test/CodeGen/X86/stack-clash-large.ll +++ b/llvm/test/CodeGen/X86/stack-clash-large.ll @@ -16,15 +16,18 @@ ; CHECK-X86-64-LABEL: foo: ; CHECK-X86-64: # %bb.0: -; CHECK-X86-64-NEXT: movq %rsp, %r11 -; CHECK-X86-64-NEXT: subq $69632, %r11 # imm = 0x11000 +; CHECK-X86-64-NEXT: movq %rsp, %rax +; CHECK-X86-64-NEXT: subq $69632, %rax # imm = 0x11000 +; CHECK-X86-64-NEXT: .cfi_def_cfa_register %rax +; CHECK-X86-64-NEXT: .cfi_adjust_cfa_offset 69632 ; CHECK-X86-64-NEXT: .LBB0_1: ; CHECK-X86-64-NEXT: subq $4096, %rsp # imm = 0x1000 ; CHECK-X86-64-NEXT: movq $0, (%rsp) -; CHECK-X86-64-NEXT: cmpq %r11, %rsp +; CHECK-X86-64-NEXT: cmpq %rax, %rsp ; CHECK-X86-64-NEXT: jne .LBB0_1 ; CHECK-X86-64-NEXT:# %bb.2: -; CHECK-X86-64-NEXT: subq $2248, %rsp +; CHECK-X86-64-NEXT: subq $2248, %rsp +; CHECK-X86-64-NEXT: .cfi_def_cfa_register %rsp ; CHECK-X86-64-NEXT: .cfi_def_cfa_offset 71888 ; CHECK-X86-64-NEXT: movl $1, 264(%rsp) ; CHECK-X86-64-NEXT: movl $1, 28664(%rsp) @@ -35,15 +38,18 @@ ; CHECK-X86-32-LABEL: foo: ; CHECK-X86-32: # %bb.0: -; CHECK-X86-32-NEXT: movl %esp, %r11d -; CHECK-X86-32-NEXT: subl $69632, %r11d # imm = 0x11000 +; CHECK-X86-32-NEXT: movl %esp, %eax +; CHECK-X86-32-NEXT: subl $69632, %eax # imm = 0x11000 +; CHECK-X86-32-NEXT: .cfi_def_cfa_register %eax +; CHECK-X86-32-NEXT: .cfi_adjust_cfa_offset 69632 ; CHECK-X86-32-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 ; CHECK-X86-32-NEXT: subl $4096, %esp # imm = 0x1000 ; CHECK-X86-32-NEXT: movl $0, (%esp) -; CHECK-X86-32-NEXT: cmpl %r11d, %esp +; CHECK-X86-32-NEXT: cmpl %eax, %esp ; CHECK-X86-32-NEXT: jne .LBB0_1 ; CHECK-X86-32-NEXT:# %bb.2: ; CHECK-X86-32-NEXT: subl $2380, %esp +; CHECK-X86-32-NEXT: .cfi_def_cfa_register %esp ; CHECK-X86-32-NEXT: .cfi_def_cfa_offset 72016 ; CHECK-X86-32-NEXT: movl $1, 392(%esp) ; CHECK-X86-32-NEXT: movl $1, 28792(%esp) diff --git a/llvm/test/CodeGen/X86/stack-clash-medium-natural-probes-mutliple-objects.ll b/llvm/test/CodeGen/X86/stack-clash-medium-natural-probes-mutliple-objects.ll --- a/llvm/test/CodeGen/X86/stack-clash-medium-natural-probes-mutliple-objects.ll +++ b/llvm/test/CodeGen/X86/stack-clash-medium-natural-probes-mutliple-objects.ll @@ -8,6 +8,7 @@ ; CHECK-LABEL: foo: ; CHECK: # %bb.0: ; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 +; CHECK-NEXT: .cfi_adjust_cfa_offset 4096 ; CHECK-NEXT: movq $0, (%rsp) ; CHECK-NEXT: subq $1784, %rsp # imm = 0x6F8 ; CHECK-NEXT: .cfi_def_cfa_offset 5888 diff --git a/llvm/test/CodeGen/X86/stack-clash-medium-natural-probes.ll b/llvm/test/CodeGen/X86/stack-clash-medium-natural-probes.ll --- a/llvm/test/CodeGen/X86/stack-clash-medium-natural-probes.ll +++ b/llvm/test/CodeGen/X86/stack-clash-medium-natural-probes.ll @@ -9,6 +9,7 @@ ; CHECK-LABEL: foo: ; CHECK: # %bb.0: ; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 +; CHECK-NEXT: .cfi_adjust_cfa_offset 4096 ; CHECK-NEXT: movq $0, (%rsp) ; CHECK-NEXT: subq $3784, %rsp # imm = 0xEC8 ; CHECK-NEXT: .cfi_def_cfa_offset 7888 diff --git a/llvm/test/CodeGen/X86/stack-clash-medium.ll b/llvm/test/CodeGen/X86/stack-clash-medium.ll --- a/llvm/test/CodeGen/X86/stack-clash-medium.ll +++ b/llvm/test/CodeGen/X86/stack-clash-medium.ll @@ -14,6 +14,7 @@ ; CHECK-X86-64-LABEL: foo: ; CHECK-X86-64: # %bb.0: ; CHECK-X86-64-NEXT: subq $4096, %rsp # imm = 0x1000 +; CHECK-X86-64-NEXT: .cfi_adjust_cfa_offset 4096 ; CHECK-X86-64-NEXT: movq $0, (%rsp) ; CHECK-X86-64-NEXT: subq $3784, %rsp # imm = 0xEC8 ; CHECK-X86-64-NEXT: .cfi_def_cfa_offset 7888 @@ -27,6 +28,7 @@ ; CHECK-X86-32-LABEL: foo: ; CHECK-X86-32: # %bb.0: ; CHECK-X86-32-NEXT: subl $4096, %esp # imm = 0x1000 +; CHECK-X86-32-NEXT: .cfi_adjust_cfa_offset 4096 ; CHECK-X86-32-NEXT: movl $0, (%esp) ; CHECK-X86-32-NEXT: subl $3916, %esp # imm = 0xF4C ; CHECK-X86-32-NEXT: .cfi_def_cfa_offset 8016 diff --git a/llvm/test/CodeGen/X86/stack-clash-small-large-align.ll b/llvm/test/CodeGen/X86/stack-clash-small-large-align.ll --- a/llvm/test/CodeGen/X86/stack-clash-small-large-align.ll +++ b/llvm/test/CodeGen/X86/stack-clash-small-large-align.ll @@ -55,12 +55,12 @@ ; CHECK-NEXT: movq %r11, %rsp ; CHECK-NEXT: movq $0, (%rsp) ; CHECK-NEXT:.LBB1_4: -; CHECK-NEXT: movq %rsp, %r11 -; CHECK-NEXT: subq $65536, %r11 # imm = 0x10000 +; CHECK-NEXT: movq %rsp, %rax +; CHECK-NEXT: subq $65536, %rax # imm = 0x10000 ; CHECK-NEXT:.LBB1_5: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 ; CHECK-NEXT: movq $0, (%rsp) -; CHECK-NEXT: cmpq %r11, %rsp +; CHECK-NEXT: cmpq %rax, %rsp ; CHECK-NEXT: jne .LBB1_5 ; CHECK-NEXT:# %bb.6: ; CHECK-NEXT: movl $1, 392(%rsp) diff --git a/llvm/test/CodeGen/X86/stack-clash-unknown-call.ll b/llvm/test/CodeGen/X86/stack-clash-unknown-call.ll --- a/llvm/test/CodeGen/X86/stack-clash-unknown-call.ll +++ b/llvm/test/CodeGen/X86/stack-clash-unknown-call.ll @@ -11,6 +11,7 @@ ;CHECK-LABEL: foo: ;CHECK: # %bb.0: ;CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 +;CHECK-NEXT: .cfi_adjust_cfa_offset 4096 ; it's important that we don't use the call as a probe here ;CHECK-NEXT: movq $0, (%rsp) ;CHECK-NEXT: subq $3912, %rsp # imm = 0xF48