diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -636,16 +636,16 @@ MF.insert(MBBIter, testMBB); MF.insert(MBBIter, tailMBB); - Register FinalStackPtr = Uses64BitFramePtr ? X86::R11 : X86::R11D; - BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackPtr) + Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 : X86::R11D; + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed) .addReg(StackPtr) .setMIFlag(MachineInstr::FrameSetup); // save loop bound { const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset); - BuildMI(MBB, MBBI, DL, TII.get(Opc), FinalStackPtr) - .addReg(FinalStackPtr) + BuildMI(MBB, MBBI, DL, TII.get(Opc), FinalStackProbed) + .addReg(FinalStackProbed) .addImm(Offset / StackProbeSize * StackProbeSize) .setMIFlag(MachineInstr::FrameSetup); } @@ -669,13 +669,13 @@ // cmp with stack pointer bound BuildMI(testMBB, DL, TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) .addReg(StackPtr) - .addReg(FinalStackPtr) + .addReg(FinalStackProbed) .setMIFlag(MachineInstr::FrameSetup); // jump BuildMI(testMBB, DL, TII.get(X86::JCC_1)) .addMBB(testMBB) - .addImm(X86::COND_L) + .addImm(X86::COND_NE) .setMIFlag(MachineInstr::FrameSetup); testMBB->addSuccessor(testMBB); testMBB->addSuccessor(tailMBB); @@ -686,10 +686,12 @@ MBB.addSuccessor(testMBB); // handle tail - if (Offset % StackProbeSize) { - BuildMI(*tailMBB, tailMBB->begin(), DL, TII.get(TargetOpcode::COPY), - StackPtr) - .addReg(FinalStackPtr) + unsigned TailOffset = Offset % StackProbeSize; + if (TailOffset) { + const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, TailOffset); + BuildMI(*tailMBB, tailMBB->begin(), DL, TII.get(Opc), StackPtr) + .addReg(StackPtr) + .addImm(TailOffset) .setMIFlag(MachineInstr::FrameSetup); } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -31800,8 +31800,8 @@ BuildMI(testMBB, DL, TII->get(TFI.Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) - .addReg(physSPReg) - .addReg(FinalStackPtr); + .addReg(FinalStackPtr) + .addReg(physSPReg); BuildMI(testMBB, DL, TII->get(X86::JCC_1)) .addMBB(tailMBB) diff --git a/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll b/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll --- a/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll +++ b/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll @@ -23,12 +23,12 @@ ; CHECK-X86-64-NEXT: leaq 15(,%rcx,4), %rcx ; CHECK-X86-64-NEXT: andq $-16, %rcx ; CHECK-X86-64-NEXT: subq %rcx, %rax -; CHECK-X86-64-NEXT: cmpq %rax, %rsp +; CHECK-X86-64-NEXT: cmpq %rsp, %rax ; CHECK-X86-64-NEXT: jl .LBB0_3 ; CHECK-X86-64-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1 ; CHECK-X86-64-NEXT: movq $0, (%rsp) ; CHECK-X86-64-NEXT: subq $4096, %rsp # imm = 0x1000 -; CHECK-X86-64-NEXT: cmpq %rax, %rsp +; CHECK-X86-64-NEXT: cmpq %rsp, %rax ; CHECK-X86-64-NEXT: jge .LBB0_2 ; CHECK-X86-64-NEXT: .LBB0_3: ; CHECK-X86-64-NEXT: movq %rax, %rsp @@ -53,12 +53,12 @@ ; CHECK-X86-32-NEXT: leal 15(,%ecx,4), %ecx ; CHECK-X86-32-NEXT: andl $-16, %ecx ; CHECK-X86-32-NEXT: subl %ecx, %eax -; CHECK-X86-32-NEXT: cmpl %eax, %esp +; CHECK-X86-32-NEXT: cmpl %esp, %eax ; CHECK-X86-32-NEXT: jl .LBB0_3 ; CHECK-X86-32-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1 ; CHECK-X86-32-NEXT: movl $0, (%esp) ; CHECK-X86-32-NEXT: subl $4096, %esp # imm = 0x1000 -; CHECK-X86-32-NEXT: cmpl %eax, %esp +; CHECK-X86-32-NEXT: cmpl %esp, %eax ; CHECK-X86-32-NEXT: jge .LBB0_2 ; CHECK-X86-32-NEXT: .LBB0_3: ; CHECK-X86-32-NEXT: movl %eax, %esp diff --git a/llvm/test/CodeGen/X86/stack-clash-large.ll b/llvm/test/CodeGen/X86/stack-clash-large.ll --- a/llvm/test/CodeGen/X86/stack-clash-large.ll +++ b/llvm/test/CodeGen/X86/stack-clash-large.ll @@ -22,9 +22,9 @@ ; CHECK-X86-64-NEXT: subq $4096, %rsp # imm = 0x1000 ; CHECK-X86-64-NEXT: movq $0, (%rsp) ; CHECK-X86-64-NEXT: cmpq %r11, %rsp -; CHECK-X86-64-NEXT: jl .LBB0_1 +; CHECK-X86-64-NEXT: jne .LBB0_1 ; CHECK-X86-64-NEXT:# %bb.2: -; CHECK-X86-64-NEXT: movq %r11, %rsp +; CHECK-X86-64-NEXT: subq $2248, %rsp ; CHECK-X86-64-NEXT: .cfi_def_cfa_offset 71888 ; CHECK-X86-64-NEXT: movl $1, 264(%rsp) ; CHECK-X86-64-NEXT: movl $1, 28664(%rsp) @@ -41,9 +41,9 @@ ; CHECK-X86-32-NEXT: subl $4096, %esp # imm = 0x1000 ; CHECK-X86-32-NEXT: movl $0, (%esp) ; CHECK-X86-32-NEXT: cmpl %r11d, %esp -; CHECK-X86-32-NEXT: jl .LBB0_1 +; CHECK-X86-32-NEXT: jne .LBB0_1 ; CHECK-X86-32-NEXT:# %bb.2: -; CHECK-X86-32-NEXT: movl %r11d, %esp +; CHECK-X86-32-NEXT: subl $2380, %esp ; CHECK-X86-32-NEXT: .cfi_def_cfa_offset 72016 ; CHECK-X86-32-NEXT: movl $1, 392(%esp) ; CHECK-X86-32-NEXT: movl $1, 28792(%esp)