Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp
+++ lib/Target/X86/X86ISelLowering.cpp
@@ -24719,6 +24719,57 @@
   case TargetOpcode::PATCHPOINT:
     return emitPatchPoint(MI, BB);
 
+  case X86::LCMPXCHG8B: {
+    const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
+    // In addition to 4 E[ABCD] registers implied by encoding, CMPXCHG8B
+    // requires a memory operand. If it happens that current architecture is
+    // i686 and for current function we need a base pointer
+    // - which is ESI for i686 - register allocator would not be able to
+    // allocate registers for an address in form of X(%reg, %reg, Y)
+    // - there never would be enough unreserved registers during regalloc
+    // (without the need for base ptr the only option would be X(%edi, %esi, Y).
+    // We are giving a hand to register allocator by precomputing the address in
+    // a new vreg using LEA.
+
+    // If it is not i686 or there is no base pointer - nothing to do here.
+    if (!Subtarget.is32Bit() || !TRI->hasBasePointer(*MF))
+      return BB;
+
+    // Even though this code does not necessarily needs the base pointer to
+    // be ESI, we check for that. The reason: if this assert fails, there are
+    // some changes happened in the compiler base pointer handling, which most
+    // probably have to be addressed somehow here.
+    assert(TRI->getBaseRegister() == X86::ESI &&
+           "LCMPXCHG8B custom insertion for i686 is written with X86::ESI as a "
+           "base pointer in mind");
+
+    MachineRegisterInfo &MRI = MF->getRegInfo();
+    MVT SPTy = getPointerTy(MF->getDataLayout());
+    const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy);
+    unsigned computedAddrVReg = MRI.createVirtualRegister(AddrRegClass);
+
+    X86AddressMode AM = getAddressFromInstr(&MI, 0);
+    // Regalloc does not need any help when the memory operand of CMPXCHG8B
+    // does not use index register.
+    if (AM.IndexReg == X86::NoRegister)
+      return BB;
+
+    // After X86TargetLowering::ReplaceNodeResults CMPXCHG8B is glued to its
+    // four operand definitions that are E[ABCD] registers. We skip them and
+    // then insert the LEA.
+    MachineBasicBlock::iterator MBBI(MI);
+    while (MBBI->definesRegister(X86::EAX) || MBBI->definesRegister(X86::EBX) ||
+           MBBI->definesRegister(X86::ECX) || MBBI->definesRegister(X86::EDX))
+      --MBBI;
+    addFullAddress(
+        BuildMI(*BB, *MBBI, DL, TII->get(X86::LEA32r), computedAddrVReg), AM);
+
+    setDirectAddressInInstr(&MI, 0, computedAddrVReg);
+
+    return BB;
+  }
+  case X86::LCMPXCHG16B:
+    return BB;
   case X86::LCMPXCHG8B_SAVE_EBX:
   case X86::LCMPXCHG16B_SAVE_RBX: {
     unsigned BasePtr =
Index: lib/Target/X86/X86InstrBuilder.h
===================================================================
--- lib/Target/X86/X86InstrBuilder.h
+++ lib/Target/X86/X86InstrBuilder.h
@@ -119,6 +119,16 @@
   return AM;
 }
 
+// Replace the address used in the instruction with the direct memory reference.
+static inline void setDirectAddressInInstr(MachineInstr *MI, unsigned Operand,
+                                           unsigned Reg) {
+  MI->getOperand(Operand).setReg(Reg);
+  MI->getOperand(Operand + 1).setImm(1);
+  MI->getOperand(Operand + 2).setReg(0);
+  MI->getOperand(Operand + 3).setImm(0);
+  MI->getOperand(Operand + 4).setReg(0);
+}
+
 /// addDirectMem - This function is used to add a direct memory reference to the
 /// current instruction -- that is, a dereference of an address in a register,
 /// with no scale, index or displacement. An example is: DWORD PTR [EAX].
Index: lib/Target/X86/X86InstrCompiler.td
===================================================================
--- lib/Target/X86/X86InstrCompiler.td
+++ lib/Target/X86/X86InstrCompiler.td
@@ -723,7 +723,7 @@
 multiclass LCMPXCHG_UnOp<bits<8> Opc, Format Form, string mnemonic,
                          SDPatternOperator frag, X86MemOperand x86memop,
                          InstrItinClass itin> {
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1, usesCustomInserter = 1 in {
   def NAME : I<Opc, Form, (outs), (ins x86memop:$ptr),
                !strconcat(mnemonic, "\t$ptr"),
                [(frag addr:$ptr)], itin>, TB, LOCK;
Index: test/CodeGen/X86/cmpxchg8b_alloca_regalloc_handling.ll
===================================================================
--- /dev/null
+++ test/CodeGen/X86/cmpxchg8b_alloca_regalloc_handling.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=x86 -stackrealign -O2 | FileCheck %s
+; PR28755
+
+; Check that register allocator is able to handle that
+; a-lot-of-fixed-and-reserved-registers case. We do that by
+; emmiting lea before 4 cmpxchg8b operands generators.
+
+define void @foo_alloca(i64* %a, i32 %off, i32 %n) {
+  %dummy = alloca i32, i32 %n
+  %addr = getelementptr inbounds i64, i64* %a, i32 %off
+
+  %res = cmpxchg i64* %addr, i64 0, i64 1 monotonic monotonic
+  ret void
+}
+
+; CHECK-LABEL: foo_alloca
+; CHECK: leal    {{\(%e..,%e..,.*\)}}, [[REGISTER:%e.i]]
+; CHECK-NEXT: xorl    %eax, %eax
+; CHECK-NEXT: xorl    %edx, %edx
+; CHECK-NEXT: xorl    %ecx, %ecx
+; CHECK-NEXT: movl    $1, %ebx
+; CHECK-NEXT: lock            cmpxchg8b       ([[REGISTER]])
+
+; If we don't use index register in the address mode -
+; check that we did not generate the lea.
+define void @foo_alloca_direct_address(i64* %addr, i32 %n) {
+  %dummy = alloca i32, i32 %n
+
+  %res = cmpxchg i64* %addr, i64 0, i64 1 monotonic monotonic
+  ret void
+}
+
+; CHECK-LABEL: foo_alloca_direct_address
+; CHECK-NOT: leal    {{\(%e.*\)}}, [[REGISTER:%e.i]]
+; CHECK: lock            cmpxchg8b       ([[REGISTER]])