Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -24505,6 +24505,11 @@ MachineBasicBlock * X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const { + MachineFunction *MF = BB->getParent(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + switch (MI.getOpcode()) { default: llvm_unreachable("Unexpected instr type to insert"); case X86::TAILJMPd64: @@ -24558,8 +24563,6 @@ case X86::RDFLAGS32: case X86::RDFLAGS64: { - DebugLoc DL = MI.getDebugLoc(); - const TargetInstrInfo *TII = Subtarget.getInstrInfo(); unsigned PushF = MI.getOpcode() == X86::RDFLAGS32 ? X86::PUSHF32 : X86::PUSHF64; unsigned Pop = MI.getOpcode() == X86::RDFLAGS32 ? X86::POP32r : X86::POP64r; @@ -24577,8 +24580,6 @@ case X86::WRFLAGS32: case X86::WRFLAGS64: { - DebugLoc DL = MI.getDebugLoc(); - const TargetInstrInfo *TII = Subtarget.getInstrInfo(); unsigned Push = MI.getOpcode() == X86::WRFLAGS32 ? X86::PUSH32r : X86::PUSH64r; unsigned PopF = @@ -24603,19 +24604,15 @@ case X86::FP80_TO_INT16_IN_MEM: case X86::FP80_TO_INT32_IN_MEM: case X86::FP80_TO_INT64_IN_MEM: { - MachineFunction *F = BB->getParent(); - const TargetInstrInfo *TII = Subtarget.getInstrInfo(); - DebugLoc DL = MI.getDebugLoc(); - // Change the floating point control register to use "round towards zero" // mode when truncating to an integer value. - int CWFrameIdx = F->getFrameInfo().CreateStackObject(2, 2, false); + int CWFrameIdx = MF->getFrameInfo().CreateStackObject(2, 2, false); addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::FNSTCW16m)), CWFrameIdx); // Load the old value of the high byte of the control word... unsigned OldCW = - F->getRegInfo().createVirtualRegister(&X86::GR16RegClass); + MF->getRegInfo().createVirtualRegister(&X86::GR16RegClass); addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16rm), OldCW), CWFrameIdx); @@ -24723,6 +24720,45 @@ case TargetOpcode::PATCHPOINT: return emitPatchPoint(MI, BB); + case X86::LCMPXCHG8B: { + const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); + // In addition to 4 E[ABCD] registers implied by encoding, CMPXCHG8B requires + // a memory operand. If it happens that current architecture is i686 and for + // current function we need a base pointer - which is ESI for i686 - + // register allocator would not be able to allocate registers for an address + // in form of X(%reg, %reg, Y) - there never would be enough unreserved + // registers during regalloc (without the need for base ptr the only option + // would be X(%edi, %esi, Y). + // We are giving a hand to register allocator by precomputing the address in + // a new vreg using LEA. + if (TRI->hasBasePointer(*MF) && TRI->getBaseRegister() == X86::ESI) { + MVT SPTy = getPointerTy(MF->getDataLayout()); + const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy); + unsigned computedAddrVReg = MRI.createVirtualRegister(AddrRegClass); + + X86AddressMode AM = getAddressFromInstr(&MI, 0); + // Regalloc does not need any help when the memory operand of CMPXCHG8B + // does not use index register. + if (AM.IndexReg == X86::NoRegister) + return BB; + + // After X86TargetLowering::ReplaceNodeResults CMPXCHG8B is glued to its + // four operand definitions that are E[ABCD] registers. We skip them and + // then insert the LEA. + MachineBasicBlock::iterator MBBI(MI); + while (MBBI->definesRegister(X86::EAX) || + MBBI->definesRegister(X86::EBX) || + MBBI->definesRegister(X86::ECX) || MBBI->definesRegister(X86::EDX)) + --MBBI; + addFullAddress( + BuildMI(*BB, *MBBI, DL, TII->get(X86::LEA32r), computedAddrVReg), AM); + + setDirectAddressInInstr(&MI, 0, computedAddrVReg); + } + return BB; + } + case X86::LCMPXCHG16B: + return BB; case X86::LCMPXCHG8B_SAVE_EBX: case X86::LCMPXCHG16B_SAVE_RBX: { unsigned BasePtr = Index: lib/Target/X86/X86InstrBuilder.h =================================================================== --- lib/Target/X86/X86InstrBuilder.h +++ lib/Target/X86/X86InstrBuilder.h @@ -119,6 +119,16 @@ return AM; } +// Replace the address used in the instruction with the direct memory reference. +static inline void setDirectAddressInInstr(MachineInstr *MI, unsigned Operand, + unsigned Reg) { + MI->getOperand(Operand).setReg(Reg); + MI->getOperand(Operand + 1).setImm(1); + MI->getOperand(Operand + 2).setReg(0); + MI->getOperand(Operand + 3).setImm(0); + MI->getOperand(Operand + 4).setReg(0); +} + /// addDirectMem - This function is used to add a direct memory reference to the /// current instruction -- that is, a dereference of an address in a register, /// with no scale, index or displacement. An example is: DWORD PTR [EAX]. Index: lib/Target/X86/X86InstrCompiler.td =================================================================== --- lib/Target/X86/X86InstrCompiler.td +++ lib/Target/X86/X86InstrCompiler.td @@ -723,7 +723,7 @@ multiclass LCMPXCHG_UnOp Opc, Format Form, string mnemonic, SDPatternOperator frag, X86MemOperand x86memop, InstrItinClass itin> { -let isCodeGenOnly = 1 in { +let isCodeGenOnly = 1, usesCustomInserter = 1 in { def NAME : I, TB, LOCK; Index: test/CodeGen/X86/pr28755.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/pr28755.ll @@ -0,0 +1,9 @@ +; RUN: llc < %s -march=x86 -stackrealign + +define void @foo(i64* %a, i32 %off, i32 %n) { + %dummy = alloca i32, i32 %n + %addr = getelementptr inbounds i64, i64* %a, i32 %off + + %res = cmpxchg i64* %addr, i64 0, i64 1 monotonic monotonic + ret void +}