Index: lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- lib/Target/X86/X86ISelDAGToDAG.cpp +++ lib/Target/X86/X86ISelDAGToDAG.cpp @@ -140,6 +140,128 @@ }; } +/// Atomic opcode table +/// +enum AtomicOpc { + ADD, + SUB, + INC, + DEC, + OR, + AND, + XOR, + AtomicOpcEnd +}; + +enum AtomicSz { + ConstantI8, + I8, + SextConstantI16, + ConstantI16, + I16, + SextConstantI32, + ConstantI32, + I32, + SextConstantI64, + ConstantI64, + I64, + AtomicSzEnd +}; + +static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = { + { + X86::LOCK_ADD8mi, + X86::LOCK_ADD8mr, + X86::LOCK_ADD16mi8, + X86::LOCK_ADD16mi, + X86::LOCK_ADD16mr, + X86::LOCK_ADD32mi8, + X86::LOCK_ADD32mi, + X86::LOCK_ADD32mr, + X86::LOCK_ADD64mi8, + X86::LOCK_ADD64mi32, + X86::LOCK_ADD64mr, + }, + { + X86::LOCK_SUB8mi, + X86::LOCK_SUB8mr, + X86::LOCK_SUB16mi8, + X86::LOCK_SUB16mi, + X86::LOCK_SUB16mr, + X86::LOCK_SUB32mi8, + X86::LOCK_SUB32mi, + X86::LOCK_SUB32mr, + X86::LOCK_SUB64mi8, + X86::LOCK_SUB64mi32, + X86::LOCK_SUB64mr, + }, + { + 0, + X86::LOCK_INC8m, + 0, + 0, + X86::LOCK_INC16m, + 0, + 0, + X86::LOCK_INC32m, + 0, + 0, + X86::LOCK_INC64m, + }, + { + 0, + X86::LOCK_DEC8m, + 0, + 0, + X86::LOCK_DEC16m, + 0, + 0, + X86::LOCK_DEC32m, + 0, + 0, + X86::LOCK_DEC64m, + }, + { + X86::LOCK_OR8mi, + X86::LOCK_OR8mr, + X86::LOCK_OR16mi8, + X86::LOCK_OR16mi, + X86::LOCK_OR16mr, + X86::LOCK_OR32mi8, + X86::LOCK_OR32mi, + X86::LOCK_OR32mr, + X86::LOCK_OR64mi8, + X86::LOCK_OR64mi32, + X86::LOCK_OR64mr, + }, + { + X86::LOCK_AND8mi, + X86::LOCK_AND8mr, + X86::LOCK_AND16mi8, + X86::LOCK_AND16mi, + X86::LOCK_AND16mr, + X86::LOCK_AND32mi8, + X86::LOCK_AND32mi, + X86::LOCK_AND32mr, + X86::LOCK_AND64mi8, + X86::LOCK_AND64mi32, + X86::LOCK_AND64mr, + }, + { + X86::LOCK_XOR8mi, + X86::LOCK_XOR8mr, + X86::LOCK_XOR16mi8, + X86::LOCK_XOR16mi, + X86::LOCK_XOR16mr, + X86::LOCK_XOR32mi8, + X86::LOCK_XOR32mi, + X86::LOCK_XOR32mr, + X86::LOCK_XOR64mi8, + X86::LOCK_XOR64mi32, + X86::LOCK_XOR64mr, + } +}; + namespace { //===--------------------------------------------------------------------===// /// ISel - X86 specific code to select X86 machine instructions for @@ -222,6 +344,9 @@ SDValue &Segment, SDValue &NodeWithChain); + SDValue getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG, SDLoc dl, + enum AtomicOpc &Op, MVT NVT, SDValue Val); + bool TryFoldLoad(SDNode *P, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, @@ -1575,138 +1700,15 @@ return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy()).getNode(); } -/// Atomic opcode table -/// -enum AtomicOpc { - ADD, - SUB, - INC, - DEC, - OR, - AND, - XOR, - AtomicOpcEnd -}; - -enum AtomicSz { - ConstantI8, - I8, - SextConstantI16, - ConstantI16, - I16, - SextConstantI32, - ConstantI32, - I32, - SextConstantI64, - ConstantI64, - I64, - AtomicSzEnd -}; - -static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = { - { - X86::LOCK_ADD8mi, - X86::LOCK_ADD8mr, - X86::LOCK_ADD16mi8, - X86::LOCK_ADD16mi, - X86::LOCK_ADD16mr, - X86::LOCK_ADD32mi8, - X86::LOCK_ADD32mi, - X86::LOCK_ADD32mr, - X86::LOCK_ADD64mi8, - X86::LOCK_ADD64mi32, - X86::LOCK_ADD64mr, - }, - { - X86::LOCK_SUB8mi, - X86::LOCK_SUB8mr, - X86::LOCK_SUB16mi8, - X86::LOCK_SUB16mi, - X86::LOCK_SUB16mr, - X86::LOCK_SUB32mi8, - X86::LOCK_SUB32mi, - X86::LOCK_SUB32mr, - X86::LOCK_SUB64mi8, - X86::LOCK_SUB64mi32, - X86::LOCK_SUB64mr, - }, - { - 0, - X86::LOCK_INC8m, - 0, - 0, - X86::LOCK_INC16m, - 0, - 0, - X86::LOCK_INC32m, - 0, - 0, - X86::LOCK_INC64m, - }, - { - 0, - X86::LOCK_DEC8m, - 0, - 0, - X86::LOCK_DEC16m, - 0, - 0, - X86::LOCK_DEC32m, - 0, - 0, - X86::LOCK_DEC64m, - }, - { - X86::LOCK_OR8mi, - X86::LOCK_OR8mr, - X86::LOCK_OR16mi8, - X86::LOCK_OR16mi, - X86::LOCK_OR16mr, - X86::LOCK_OR32mi8, - X86::LOCK_OR32mi, - X86::LOCK_OR32mr, - X86::LOCK_OR64mi8, - X86::LOCK_OR64mi32, - X86::LOCK_OR64mr, - }, - { - X86::LOCK_AND8mi, - X86::LOCK_AND8mr, - X86::LOCK_AND16mi8, - X86::LOCK_AND16mi, - X86::LOCK_AND16mr, - X86::LOCK_AND32mi8, - X86::LOCK_AND32mi, - X86::LOCK_AND32mr, - X86::LOCK_AND64mi8, - X86::LOCK_AND64mi32, - X86::LOCK_AND64mr, - }, - { - X86::LOCK_XOR8mi, - X86::LOCK_XOR8mr, - X86::LOCK_XOR16mi8, - X86::LOCK_XOR16mi, - X86::LOCK_XOR16mr, - X86::LOCK_XOR32mi8, - X86::LOCK_XOR32mi, - X86::LOCK_XOR32mr, - X86::LOCK_XOR64mi8, - X86::LOCK_XOR64mi32, - X86::LOCK_XOR64mr, - } -}; - // Return the target constant operand for atomic-load-op and do simple // translations, such as from atomic-load-add to lock-sub. The return value is // one of the following 3 cases: // + target-constant, the operand could be supported as a target constant. // + empty, the operand is not needed any more with the new op selected. // + non-empty, otherwise. -static SDValue getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG, - SDLoc dl, - enum AtomicOpc &Op, MVT NVT, - SDValue Val) { +SDValue X86DAGToDAGISel:: +getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG, SDLoc dl, + enum AtomicOpc &Op, MVT NVT, SDValue Val) { if (ConstantSDNode *CN = dyn_cast(Val)) { int64_t CNVal = CN->getSExtValue(); // Quit if not 32-bit imm. @@ -1721,7 +1723,7 @@ // For atomic-load-add, we could do some optimizations. if (Op == ADD) { // Translate to INC/DEC if ADD by 1 or -1. - if ((CNVal == 1) || (CNVal == -1)) { + if (((CNVal == 1) || (CNVal == -1)) && !Subtarget->slowIncDec()) { Op = (CNVal == 1) ? INC : DEC; // No more constant operand after being translated into INC/DEC. return SDValue(); Index: test/CodeGen/X86/atomic_add.ll =================================================================== --- test/CodeGen/X86/atomic_add.ll +++ test/CodeGen/X86/atomic_add.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=x86-64 -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=x86-64 -mattr=slow-incdec -verify-machineinstrs | FileCheck %s --check-prefix SLOW_INC ; rdar://7103704 @@ -14,6 +15,8 @@ entry: ; CHECK-LABEL: inc4: ; CHECK: incq +; SLOW_INC-LABEL: inc4: +; SLOW_INC-NOT: incq %0 = atomicrmw add i64* %p, i64 1 monotonic ret void } @@ -39,6 +42,8 @@ entry: ; CHECK-LABEL: inc3: ; CHECK: incb +; SLOW_INC-LABEL: inc3: +; SLOW_INC-NOT: incb %0 = atomicrmw add i8* %p, i8 1 monotonic ret void } @@ -64,6 +69,8 @@ entry: ; CHECK-LABEL: inc2: ; CHECK: incw +; SLOW_INC-LABEL: inc2: +; SLOW_INC-NOT: incw %0 = atomicrmw add i16* %p, i16 1 monotonic ret void } @@ -89,6 +96,8 @@ entry: ; CHECK-LABEL: inc1: ; CHECK: incl +; SLOW_INC-LABEL: inc1: +; SLOW_INC-NOT: incl %0 = atomicrmw add i32* %p, i32 1 monotonic ret void } @@ -113,6 +122,8 @@ entry: ; CHECK-LABEL: dec4: ; CHECK: decq +; SLOW_INC-LABEL: dec4: +; SLOW_INC-NOT: decq %0 = atomicrmw sub i64* %p, i64 1 monotonic ret void } @@ -138,6 +149,8 @@ entry: ; CHECK-LABEL: dec3: ; CHECK: decb +; SLOW_INC-LABEL: dec3: +; SLOW_INC-NOT: decb %0 = atomicrmw sub i8* %p, i8 1 monotonic ret void } @@ -163,6 +176,8 @@ entry: ; CHECK-LABEL: dec2: ; CHECK: decw +; SLOW_INC-LABEL: dec2: +; SLOW_INC-NOT: decw %0 = atomicrmw sub i16* %p, i16 1 monotonic ret void } @@ -189,6 +204,8 @@ entry: ; CHECK-LABEL: dec1: ; CHECK: decl +; SLOW_INC-LABEL: dec1: +; SLOW_INC-NOT: decl %0 = atomicrmw sub i32* %p, i32 1 monotonic ret void }