diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -273,9 +273,8 @@
   // allocation is split in smaller chunks anyway.
   if (EmitInlineStackProbe && !InEpilogue) {
 
-    // stack probing may involve looping, and control flow generations is
-    // disallowed at this point. Rely to later processing through
-    // `inlineStackProbe`.
+    // Delegate stack probing to the `inlineStackProbe` mechanism to avoid
+    // complications.
     MachineInstr *Stub = emitStackProbeInlineStub(MF, MBB, MBBI, DL, true);
 
     // Encode the static offset as a metadata attached to the stub.
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -31618,6 +31618,11 @@
           physSPReg)
       .addReg(physSPReg)
       .addReg(tmpSizeVReg);
+
+  // touch the tail too, as we don't have any information about the context
+  addRegOffset(BuildMI(tailMBB, DL, TII->get(MovMIOpc)), physSPReg, false, 0)
+      .addImm(0);
+
   BuildMI(tailMBB, DL, TII->get(TargetOpcode::COPY), MI.getOperand(0).getReg())
       .addReg(physSPReg);
 
diff --git a/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll b/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll
--- a/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll
+++ b/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll
@@ -27,6 +27,7 @@
 ; CHECK-NEXT:  .LBB0_3:
 ; CHECK-NEXT:  	subq	%rax, %rsp
 ; CHECK-NEXT:  	movq	%rsp, %rax
+; CHECK-NEXT:	movq	$0, (%rsp)
 ; CHECK-NEXT:  	movl	$1, 4792(%rax)
 ; CHECK-NEXT:  	movl	(%rax), %eax
 ; CHECK-NEXT:  	movq	%rbp, %rsp