Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -24719,6 +24719,57 @@ case TargetOpcode::PATCHPOINT: return emitPatchPoint(MI, BB); + case X86::LCMPXCHG8B: { + const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); + // In addition to 4 E[ABCD] registers implied by encoding, CMPXCHG8B + // requires a memory operand. If it happens that current architecture is + // i686 and for current function we need a base pointer + // - which is ESI for i686 - register allocator would not be able to + // allocate registers for an address in form of X(%reg, %reg, Y) + // - there never would be enough unreserved registers during regalloc + // (without the need for base ptr the only option would be X(%edi, %esi, Y). + // We are giving a hand to register allocator by precomputing the address in + // a new vreg using LEA. + + // If it is not i686 or there is no base pointer - nothing to do here. + if (!Subtarget.is32Bit() || !TRI->hasBasePointer(*MF)) + return BB; + + // Even though this code does not necessarily needs the base pointer to + // be ESI, we check for that. The reason: if this assert fails, there are + // some changes happened in the compiler base pointer handling, which most + // probably have to be addressed somehow here. + assert(TRI->getBaseRegister() == X86::ESI && + "LCMPXCHG8B custom insertion for i686 is written with X86::ESI as a " + "base pointer in mind"); + + MachineRegisterInfo &MRI = MF->getRegInfo(); + MVT SPTy = getPointerTy(MF->getDataLayout()); + const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy); + unsigned computedAddrVReg = MRI.createVirtualRegister(AddrRegClass); + + X86AddressMode AM = getAddressFromInstr(&MI, 0); + // Regalloc does not need any help when the memory operand of CMPXCHG8B + // does not use index register. + if (AM.IndexReg == X86::NoRegister) + return BB; + + // After X86TargetLowering::ReplaceNodeResults CMPXCHG8B is glued to its + // four operand definitions that are E[ABCD] registers. We skip them and + // then insert the LEA. + MachineBasicBlock::iterator MBBI(MI); + while (MBBI->definesRegister(X86::EAX) || MBBI->definesRegister(X86::EBX) || + MBBI->definesRegister(X86::ECX) || MBBI->definesRegister(X86::EDX)) + --MBBI; + addFullAddress( + BuildMI(*BB, *MBBI, DL, TII->get(X86::LEA32r), computedAddrVReg), AM); + + setDirectAddressInInstr(&MI, 0, computedAddrVReg); + + return BB; + } + case X86::LCMPXCHG16B: + return BB; case X86::LCMPXCHG8B_SAVE_EBX: case X86::LCMPXCHG16B_SAVE_RBX: { unsigned BasePtr = Index: lib/Target/X86/X86InstrBuilder.h =================================================================== --- lib/Target/X86/X86InstrBuilder.h +++ lib/Target/X86/X86InstrBuilder.h @@ -119,6 +119,16 @@ return AM; } +// Replace the address used in the instruction with the direct memory reference. +static inline void setDirectAddressInInstr(MachineInstr *MI, unsigned Operand, + unsigned Reg) { + MI->getOperand(Operand).setReg(Reg); + MI->getOperand(Operand + 1).setImm(1); + MI->getOperand(Operand + 2).setReg(0); + MI->getOperand(Operand + 3).setImm(0); + MI->getOperand(Operand + 4).setReg(0); +} + /// addDirectMem - This function is used to add a direct memory reference to the /// current instruction -- that is, a dereference of an address in a register, /// with no scale, index or displacement. An example is: DWORD PTR [EAX]. Index: lib/Target/X86/X86InstrCompiler.td =================================================================== --- lib/Target/X86/X86InstrCompiler.td +++ lib/Target/X86/X86InstrCompiler.td @@ -723,7 +723,7 @@ multiclass LCMPXCHG_UnOp Opc, Format Form, string mnemonic, SDPatternOperator frag, X86MemOperand x86memop, InstrItinClass itin> { -let isCodeGenOnly = 1 in { +let isCodeGenOnly = 1, usesCustomInserter = 1 in { def NAME : I, TB, LOCK; Index: test/CodeGen/X86/cmpxchg8b_alloca_regalloc_handling.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/cmpxchg8b_alloca_regalloc_handling.ll @@ -0,0 +1,35 @@ +; RUN: llc < %s -march=x86 -stackrealign -O2 | FileCheck %s +; PR28755 + +; Check that register allocator is able to handle that +; a-lot-of-fixed-and-reserved-registers case. We do that by +; emmiting lea before 4 cmpxchg8b operands generators. + +define void @foo_alloca(i64* %a, i32 %off, i32 %n) { + %dummy = alloca i32, i32 %n + %addr = getelementptr inbounds i64, i64* %a, i32 %off + + %res = cmpxchg i64* %addr, i64 0, i64 1 monotonic monotonic + ret void +} + +; CHECK-LABEL: foo_alloca +; CHECK: leal {{\(%e..,%e..,.*\)}}, [[REGISTER:%e.i]] +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: movl $1, %ebx +; CHECK-NEXT: lock cmpxchg8b ([[REGISTER]]) + +; If we don't use index register in the address mode - +; check that we did not generate the lea. +define void @foo_alloca_direct_address(i64* %addr, i32 %n) { + %dummy = alloca i32, i32 %n + + %res = cmpxchg i64* %addr, i64 0, i64 1 monotonic monotonic + ret void +} + +; CHECK-LABEL: foo_alloca_direct_address +; CHECK-NOT: leal {{\(%e.*\)}}, [[REGISTER:%e.i]] +; CHECK: lock cmpxchg8b ([[REGISTER]])