diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -660,7 +660,6 @@ const bool HasFP = hasFP(MF); const X86Subtarget &STI = MF.getSubtarget(); const X86TargetLowering &TLI = *STI.getTargetLowering(); - const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset); const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi; const uint64_t StackProbeSize = TLI.getStackProbeSize(MF); @@ -671,16 +670,14 @@ // If the offset is so small it fits within a page, there's nothing to do. if (StackProbeSize < Offset + AlignOffset) { - MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) - .addReg(StackPtr) - .addImm(StackProbeSize - AlignOffset) - .setMIFlag(MachineInstr::FrameSetup); + uint64_t StackAdjustment = StackProbeSize - AlignOffset; + BuildStackAdjustment(MBB, MBBI, DL, -StackAdjustment, /*InEpilogue=*/false) + .setMIFlag(MachineInstr::FrameSetup); if (!HasFP && NeedsDwarfCFI) { - BuildCFI(MBB, MBBI, DL, - MCCFIInstruction::createAdjustCfaOffset( - nullptr, StackProbeSize - AlignOffset)); + BuildCFI( + MBB, MBBI, DL, + MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment)); } - MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc)) .setMIFlag(MachineInstr::FrameSetup), @@ -695,11 +692,8 @@ // natural probes but it implies much more logic and there was very few // interesting natural probes to interleave. while (CurrentOffset + StackProbeSize < Offset) { - MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) - .addReg(StackPtr) - .addImm(StackProbeSize) - .setMIFlag(MachineInstr::FrameSetup); - MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. + BuildStackAdjustment(MBB, MBBI, DL, -StackProbeSize, /*InEpilogue=*/false) + .setMIFlag(MachineInstr::FrameSetup); if (!HasFP && NeedsDwarfCFI) { BuildCFI( @@ -717,13 +711,20 @@ // No need to probe the tail, it is smaller than a Page. uint64_t ChunkSize = Offset - CurrentOffset; - MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) - .addReg(StackPtr) - .addImm(ChunkSize) - .setMIFlag(MachineInstr::FrameSetup); + if (ChunkSize == SlotSize) { + // Use push for slot sized adjustments as a size optimization, + // like emitSPUpdate does when not probing. + unsigned Reg = Is64Bit ? X86::RAX : X86::EAX; + unsigned Opc = Is64Bit ? X86::PUSH64r : X86::PUSH32r; + BuildMI(MBB, MBBI, DL, TII.get(Opc)) + .addReg(Reg, RegState::Undef) + .setMIFlag(MachineInstr::FrameSetup); + } else { + BuildStackAdjustment(MBB, MBBI, DL, -ChunkSize, /*InEpilogue=*/false) + .setMIFlag(MachineInstr::FrameSetup); + } // No need to adjust Dwarf CFA offset here, the last position of the stack has // been defined - MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. } void X86FrameLowering::emitStackProbeInlineGenericLoop( @@ -732,6 +733,9 @@ uint64_t AlignOffset) const { assert(Offset && "null offset"); + assert(!MBB.isLiveIn(X86::EFLAGS) && + "Inline stack probe loop will clobber live EFLAGS."); + const bool NeedsDwarfCFI = needsDwarfCFI(MF); const bool HasFP = hasFP(MF); const X86Subtarget &STI = MF.getSubtarget(); @@ -742,12 +746,8 @@ if (AlignOffset) { if (AlignOffset < StackProbeSize) { // Perform a first smaller allocation followed by a probe. - const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, AlignOffset); - MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), StackPtr) - .addReg(StackPtr) - .addImm(AlignOffset) - .setMIFlag(MachineInstr::FrameSetup); - MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. + BuildStackAdjustment(MBB, MBBI, DL, -AlignOffset, /*InEpilogue=*/false) + .setMIFlag(MachineInstr::FrameSetup); addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc)) .setMIFlag(MachineInstr::FrameSetup), @@ -806,13 +806,9 @@ } // allocate a page - { - const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, StackProbeSize); - BuildMI(testMBB, DL, TII.get(SUBOpc), StackPtr) - .addReg(StackPtr) - .addImm(StackProbeSize) - .setMIFlag(MachineInstr::FrameSetup); - } + BuildStackAdjustment(*testMBB, testMBB->end(), DL, -StackProbeSize, + /*InEpilogue=*/false) + .setMIFlag(MachineInstr::FrameSetup); // touch the page addRegOffset(BuildMI(testMBB, DL, TII.get(MovMIOpc)) @@ -841,13 +837,11 @@ MBB.addSuccessor(testMBB); // handle tail - const unsigned TailOffset = Offset % StackProbeSize; + const uint64_t TailOffset = Offset % StackProbeSize; MachineBasicBlock::iterator TailMBBIter = tailMBB->begin(); if (TailOffset) { - const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, TailOffset); - BuildMI(*tailMBB, TailMBBIter, DL, TII.get(Opc), StackPtr) - .addReg(StackPtr) - .addImm(TailOffset) + BuildStackAdjustment(*tailMBB, TailMBBIter, DL, -TailOffset, + /*InEpilogue=*/false) .setMIFlag(MachineInstr::FrameSetup); } @@ -879,6 +873,9 @@ const TargetInstrInfo &TII = *STI.getInstrInfo(); const BasicBlock *LLVM_BB = MBB.getBasicBlock(); + assert(!MBB.isLiveIn(X86::EFLAGS) && + "Inline stack probe loop will clobber live EFLAGS."); + // RAX contains the number of bytes of desired stack adjustment. // The handling here assumes this value has already been updated so as to // maintain stack alignment. @@ -1115,6 +1112,9 @@ report_fatal_error("Emitting stack probe calls on 64-bit with the large " "code model and indirect thunks not yet implemented."); + assert(!MBB.isLiveIn(X86::EFLAGS) && + "Stack probe calls will clobber live EFLAGS."); + unsigned CallOp; if (Is64Bit) CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32; @@ -3510,6 +3510,7 @@ return true; // If stack probes have to loop inline or call, that will clobber EFLAGS. + // FIXME: we could allow cases that will use emitStackProbeInlineGenericBlock. const X86Subtarget &STI = MF.getSubtarget(); const X86TargetLowering &TLI = *STI.getTargetLowering(); if (TLI.hasInlineStackProbe(MF) || TLI.hasStackProbeSymbol(MF)) diff --git a/llvm/test/CodeGen/X86/probe-stack-eflags.ll b/llvm/test/CodeGen/X86/probe-stack-eflags.ll --- a/llvm/test/CodeGen/X86/probe-stack-eflags.ll +++ b/llvm/test/CodeGen/X86/probe-stack-eflags.ll @@ -13,7 +13,7 @@ ; CHECK: # %bb.0: # %bb13.i ; CHECK-NEXT: pushq %rbp ; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: subq $8, %rsp +; CHECK-NEXT: pushq %rax ; CHECK-NEXT: testl %edi, %edi ; CHECK-NEXT: je .LBB0_1 ; CHECK-NEXT: # %bb.2: # %bb16.i