diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -273,9 +273,8 @@ // allocation is split in smaller chunks anyway. if (EmitInlineStackProbe && !InEpilogue) { - // stack probing may involve looping, and control flow generations is - // disallowed at this point. Rely to later processing through - // `inlineStackProbe`. + // Delegate stack probing to the `inlineStackProbe` mechanism to avoid + // complications. MachineInstr *Stub = emitStackProbeInlineStub(MF, MBB, MBBI, DL, true); // Encode the static offset as a metadata attached to the stub. diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -31618,6 +31618,11 @@ physSPReg) .addReg(physSPReg) .addReg(tmpSizeVReg); + + // touch the tail too, as we don't have any information about the context + addRegOffset(BuildMI(tailMBB, DL, TII->get(MovMIOpc)), physSPReg, false, 0) + .addImm(0); + BuildMI(tailMBB, DL, TII->get(TargetOpcode::COPY), MI.getOperand(0).getReg()) .addReg(physSPReg); diff --git a/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll b/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll --- a/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll +++ b/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll @@ -27,6 +27,7 @@ ; CHECK-NEXT: .LBB0_3: ; CHECK-NEXT: subq %rax, %rsp ; CHECK-NEXT: movq %rsp, %rax +; CHECK-NEXT: movq $0, (%rsp) ; CHECK-NEXT: movl $1, 4792(%rax) ; CHECK-NEXT: movl (%rax), %eax ; CHECK-NEXT: movq %rbp, %rsp