Index: llvm/lib/Target/X86/X86FrameLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86FrameLowering.cpp +++ llvm/lib/Target/X86/X86FrameLowering.cpp @@ -636,16 +636,16 @@ MF.insert(MBBIter, testMBB); MF.insert(MBBIter, tailMBB); - Register FinalStackPtr = Uses64BitFramePtr ? X86::R11 : X86::R11D; - BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackPtr) + Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 : X86::R11D; + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed) .addReg(StackPtr) .setMIFlag(MachineInstr::FrameSetup); // save loop bound { const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset); - BuildMI(MBB, MBBI, DL, TII.get(Opc), FinalStackPtr) - .addReg(FinalStackPtr) + BuildMI(MBB, MBBI, DL, TII.get(Opc), FinalStackProbed) + .addReg(FinalStackProbed) .addImm(Offset / StackProbeSize * StackProbeSize) .setMIFlag(MachineInstr::FrameSetup); } @@ -669,13 +669,13 @@ // cmp with stack pointer bound BuildMI(testMBB, DL, TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) .addReg(StackPtr) - .addReg(FinalStackPtr) + .addReg(FinalStackProbed) .setMIFlag(MachineInstr::FrameSetup); // jump BuildMI(testMBB, DL, TII.get(X86::JCC_1)) .addMBB(testMBB) - .addImm(X86::COND_L) + .addImm(X86::COND_NE) .setMIFlag(MachineInstr::FrameSetup); testMBB->addSuccessor(testMBB); testMBB->addSuccessor(tailMBB); @@ -686,10 +686,12 @@ MBB.addSuccessor(testMBB); // handle tail - if (Offset % StackProbeSize) { - BuildMI(*tailMBB, tailMBB->begin(), DL, TII.get(TargetOpcode::COPY), - StackPtr) - .addReg(FinalStackPtr) + unsigned TailOffset = Offset % StackProbeSize; + if (TailOffset) { + const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, TailOffset); + BuildMI(*tailMBB, tailMBB->begin(), DL, TII.get(Opc), StackPtr) + .addReg(StackPtr) + .addImm(TailOffset) .setMIFlag(MachineInstr::FrameSetup); } Index: llvm/test/CodeGen/X86/stack-clash-large.ll =================================================================== --- llvm/test/CodeGen/X86/stack-clash-large.ll +++ llvm/test/CodeGen/X86/stack-clash-large.ll @@ -22,9 +22,9 @@ ; CHECK-X86-64-NEXT: subq $4096, %rsp # imm = 0x1000 ; CHECK-X86-64-NEXT: movq $0, (%rsp) ; CHECK-X86-64-NEXT: cmpq %r11, %rsp -; CHECK-X86-64-NEXT: jl .LBB0_1 +; CHECK-X86-64-NEXT: jne .LBB0_1 ; CHECK-X86-64-NEXT:# %bb.2: -; CHECK-X86-64-NEXT: movq %r11, %rsp +; CHECK-X86-64-NEXT: subq $2248, %rsp ; CHECK-X86-64-NEXT: .cfi_def_cfa_offset 71888 ; CHECK-X86-64-NEXT: movl $1, 264(%rsp) ; CHECK-X86-64-NEXT: movl $1, 28664(%rsp) @@ -41,9 +41,9 @@ ; CHECK-X86-32-NEXT: subl $4096, %esp # imm = 0x1000 ; CHECK-X86-32-NEXT: movl $0, (%esp) ; CHECK-X86-32-NEXT: cmpl %r11d, %esp -; CHECK-X86-32-NEXT: jl .LBB0_1 +; CHECK-X86-32-NEXT: jne .LBB0_1 ; CHECK-X86-32-NEXT:# %bb.2: -; CHECK-X86-32-NEXT: movl %r11d, %esp +; CHECK-X86-32-NEXT: subl $2380, %esp ; CHECK-X86-32-NEXT: .cfi_def_cfa_offset 72016 ; CHECK-X86-32-NEXT: movl $1, 392(%esp) ; CHECK-X86-32-NEXT: movl $1, 28792(%esp)