Index: lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- lib/Target/X86/X86ISelDAGToDAG.cpp +++ lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1494,12 +1494,27 @@ SDValue In2L = Node->getOperand(2); SDValue In2H = Node->getOperand(3); + unsigned TargetFlags = 0; + + if (Subtarget->hasHLE()) { + HLEHint Hint = cast(Node)->getHLEHint(); + switch (Hint) { + default: + case HLENone: TargetFlags = 0x00; break; + case HLEAcquire: TargetFlags = 0x01; break; + case HLERelease: TargetFlags = 0x02; break; + } + } + + SDValue TFlag = CurDAG->getTargetConstant(TargetFlags, MVT::i8); + SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; if (!SelectAddr(Node, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) return NULL; MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = cast(Node)->getMemOperand(); - const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, Chain}; + const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, TFlag, + Chain}; SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(), MVT::i32, MVT::i32, MVT::Other, Ops, array_lengthof(Ops)); @@ -1535,97 +1550,104 @@ AtomicSzEnd }; -static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = { +enum AtomicTargetFlags { + TargetFlagNone, + TargetFlagXAcquire, + TargetFlagXRelease, + AtomicTfEnd +}; + +static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd][AtomicTfEnd] = { { - X86::LOCK_ADD8mi, - X86::LOCK_ADD8mr, - X86::LOCK_ADD16mi8, - X86::LOCK_ADD16mi, - X86::LOCK_ADD16mr, - X86::LOCK_ADD32mi8, - X86::LOCK_ADD32mi, - X86::LOCK_ADD32mr, - X86::LOCK_ADD64mi8, - X86::LOCK_ADD64mi32, - X86::LOCK_ADD64mr, + { X86::LOCK_ADD8mi, X86::LOCK_ADDACQ8mi, X86::LOCK_ADDREL8mi }, + { X86::LOCK_ADD8mr, X86::LOCK_ADDACQ8mr, X86::LOCK_ADDREL8mr }, + { X86::LOCK_ADD16mi8, X86::LOCK_ADDACQ16mi8, X86::LOCK_ADDREL16mi8 }, + { X86::LOCK_ADD16mi, X86::LOCK_ADDACQ16mi, X86::LOCK_ADDREL16mi }, + { X86::LOCK_ADD16mr, X86::LOCK_ADDACQ16mr, X86::LOCK_ADDREL16mr }, + { X86::LOCK_ADD32mi8, X86::LOCK_ADDACQ32mi8, X86::LOCK_ADDREL32mi8 }, + { X86::LOCK_ADD32mi, X86::LOCK_ADDACQ32mi, X86::LOCK_ADDREL32mi }, + { X86::LOCK_ADD32mr, X86::LOCK_ADDACQ32mr, X86::LOCK_ADDREL32mr }, + { X86::LOCK_ADD64mi8, X86::LOCK_ADDACQ64mi8, X86::LOCK_ADDREL64mi8 }, + { X86::LOCK_ADD64mi32, X86::LOCK_ADDACQ64mi32, X86::LOCK_ADDREL64mi32 }, + { X86::LOCK_ADD64mr, X86::LOCK_ADDACQ64mr, X86::LOCK_ADDREL64mr } }, { - X86::LOCK_SUB8mi, - X86::LOCK_SUB8mr, - X86::LOCK_SUB16mi8, - X86::LOCK_SUB16mi, - X86::LOCK_SUB16mr, - X86::LOCK_SUB32mi8, - X86::LOCK_SUB32mi, - X86::LOCK_SUB32mr, - X86::LOCK_SUB64mi8, - X86::LOCK_SUB64mi32, - X86::LOCK_SUB64mr, + { X86::LOCK_SUB8mi, X86::LOCK_SUBACQ8mi, X86::LOCK_SUBREL8mi }, + { X86::LOCK_SUB8mr, X86::LOCK_SUBACQ8mr, X86::LOCK_SUBREL8mr }, + { X86::LOCK_SUB16mi8, X86::LOCK_SUBACQ16mi8, X86::LOCK_SUBREL16mi8 }, + { X86::LOCK_SUB16mi, X86::LOCK_SUBACQ16mi, X86::LOCK_SUBREL16mi }, + { X86::LOCK_SUB16mr, X86::LOCK_SUBACQ16mr, X86::LOCK_SUBREL16mr }, + { X86::LOCK_SUB32mi8, X86::LOCK_SUBACQ32mi8, X86::LOCK_SUBREL32mi8 }, + { X86::LOCK_SUB32mi, X86::LOCK_SUBACQ32mi, X86::LOCK_SUBREL32mi }, + { X86::LOCK_SUB32mr, X86::LOCK_SUBACQ32mr, X86::LOCK_SUBREL32mr }, + { X86::LOCK_SUB64mi8, X86::LOCK_SUBACQ64mi8, X86::LOCK_SUBREL64mi8 }, + { X86::LOCK_SUB64mi32, X86::LOCK_SUBACQ64mi32, X86::LOCK_SUBREL64mi32 }, + { X86::LOCK_SUB64mr, X86::LOCK_SUBACQ64mr, X86::LOCK_SUBREL64mr } }, { - 0, - X86::LOCK_INC8m, - 0, - 0, - X86::LOCK_INC16m, - 0, - 0, - X86::LOCK_INC32m, - 0, - 0, - X86::LOCK_INC64m, + { 0, 0, 0 }, + { X86::LOCK_INC8m, X86::LOCK_INCACQ8m, X86::LOCK_INCREL8m }, + { 0, 0, 0 }, + { 0, 0, 0 }, + { X86::LOCK_INC16m, X86::LOCK_INCACQ16m, X86::LOCK_INCREL16m }, + { 0, 0, 0 }, + { 0, 0, 0 }, + { X86::LOCK_INC32m, X86::LOCK_INCACQ32m, X86::LOCK_INCREL32m }, + { 0, 0, 0 }, + { 0, 0, 0 }, + { X86::LOCK_INC64m, X86::LOCK_INCACQ64m, X86::LOCK_INCREL64m } }, { - 0, - X86::LOCK_DEC8m, - 0, - 0, - X86::LOCK_DEC16m, - 0, - 0, - X86::LOCK_DEC32m, - 0, - 0, - X86::LOCK_DEC64m, + { 0, 0, 0 }, + { X86::LOCK_DEC8m, X86::LOCK_DECACQ8m, X86::LOCK_DECREL8m }, + { 0, 0, 0 }, + { 0, 0, 0 }, + { X86::LOCK_DEC16m, X86::LOCK_DECACQ16m, X86::LOCK_DECREL16m }, + { 0, 0, 0 }, + { 0, 0, 0 }, + { X86::LOCK_DEC32m, X86::LOCK_DECACQ32m, X86::LOCK_DECREL32m }, + { 0, 0, 0 }, + { 0, 0, 0 }, + { X86::LOCK_DEC64m, X86::LOCK_DECACQ64m, X86::LOCK_DECREL64m } }, { - X86::LOCK_OR8mi, - X86::LOCK_OR8mr, - X86::LOCK_OR16mi8, - X86::LOCK_OR16mi, - X86::LOCK_OR16mr, - X86::LOCK_OR32mi8, - X86::LOCK_OR32mi, - X86::LOCK_OR32mr, - X86::LOCK_OR64mi8, - X86::LOCK_OR64mi32, - X86::LOCK_OR64mr, + { X86::LOCK_OR8mi, X86::LOCK_ORACQ8mi, X86::LOCK_ORREL8mi }, + { X86::LOCK_OR8mr, X86::LOCK_ORACQ8mr, X86::LOCK_ORREL8mr }, + { X86::LOCK_OR16mi8, X86::LOCK_ORACQ16mi8, X86::LOCK_ORREL16mi8 }, + { X86::LOCK_OR16mi, X86::LOCK_ORACQ16mi, X86::LOCK_ORREL16mi }, + { X86::LOCK_OR16mr, X86::LOCK_ORACQ16mr, X86::LOCK_ORREL16mr }, + { X86::LOCK_OR32mi8, X86::LOCK_ORACQ32mi8, X86::LOCK_ORREL32mi8 }, + { X86::LOCK_OR32mi, X86::LOCK_ORACQ32mi, X86::LOCK_ORREL32mi }, + { X86::LOCK_OR32mr, X86::LOCK_ORACQ32mr, X86::LOCK_ORREL32mr }, + { X86::LOCK_OR64mi8, X86::LOCK_ORACQ64mi8, X86::LOCK_ORREL64mi8 }, + { X86::LOCK_OR64mi32, X86::LOCK_ORACQ64mi32, X86::LOCK_ORREL64mi32 }, + { X86::LOCK_OR64mr, X86::LOCK_ORACQ64mr, X86::LOCK_ORREL64mr } }, { - X86::LOCK_AND8mi, - X86::LOCK_AND8mr, - X86::LOCK_AND16mi8, - X86::LOCK_AND16mi, - X86::LOCK_AND16mr, - X86::LOCK_AND32mi8, - X86::LOCK_AND32mi, - X86::LOCK_AND32mr, - X86::LOCK_AND64mi8, - X86::LOCK_AND64mi32, - X86::LOCK_AND64mr, + { X86::LOCK_AND8mi, X86::LOCK_ANDACQ8mi, X86::LOCK_ANDREL8mi }, + { X86::LOCK_AND8mr, X86::LOCK_ANDACQ8mr, X86::LOCK_ANDREL8mr }, + { X86::LOCK_AND16mi8, X86::LOCK_ANDACQ16mi8, X86::LOCK_ANDREL16mi8 }, + { X86::LOCK_AND16mi, X86::LOCK_ANDACQ16mi, X86::LOCK_ANDREL16mi }, + { X86::LOCK_AND16mr, X86::LOCK_ANDACQ16mr, X86::LOCK_ANDREL16mr }, + { X86::LOCK_AND32mi8, X86::LOCK_ANDACQ32mi8, X86::LOCK_ANDREL32mi8 }, + { X86::LOCK_AND32mi, X86::LOCK_ANDACQ32mi, X86::LOCK_ANDREL32mi }, + { X86::LOCK_AND32mr, X86::LOCK_ANDACQ32mr, X86::LOCK_ANDREL32mr }, + { X86::LOCK_AND64mi8, X86::LOCK_ANDACQ64mi8, X86::LOCK_ANDREL64mi8 }, + { X86::LOCK_AND64mi32, X86::LOCK_ANDACQ64mi32, X86::LOCK_ANDREL64mi32 }, + { X86::LOCK_AND64mr, X86::LOCK_ANDACQ64mr, X86::LOCK_ANDREL64mr } }, { - X86::LOCK_XOR8mi, - X86::LOCK_XOR8mr, - X86::LOCK_XOR16mi8, - X86::LOCK_XOR16mi, - X86::LOCK_XOR16mr, - X86::LOCK_XOR32mi8, - X86::LOCK_XOR32mi, - X86::LOCK_XOR32mr, - X86::LOCK_XOR64mi8, - X86::LOCK_XOR64mi32, - X86::LOCK_XOR64mr, + { X86::LOCK_XOR8mi, X86::LOCK_XORACQ8mi, X86::LOCK_XORREL8mi }, + { X86::LOCK_XOR8mr, X86::LOCK_XORACQ8mr, X86::LOCK_XORREL8mr }, + { X86::LOCK_XOR16mi8, X86::LOCK_XORACQ16mi8, X86::LOCK_XORREL16mi8 }, + { X86::LOCK_XOR16mi, X86::LOCK_XORACQ16mi, X86::LOCK_XORREL16mi }, + { X86::LOCK_XOR16mr, X86::LOCK_XORACQ16mr, X86::LOCK_XORREL16mr }, + { X86::LOCK_XOR32mi8, X86::LOCK_XORACQ32mi8, X86::LOCK_XORREL32mi8 }, + { X86::LOCK_XOR32mi, X86::LOCK_XORACQ32mi, X86::LOCK_XORREL32mi }, + { X86::LOCK_XOR32mr, X86::LOCK_XORACQ32mr, X86::LOCK_XORREL32mr }, + { X86::LOCK_XOR64mi8, X86::LOCK_XORACQ64mi8, X86::LOCK_XORREL64mi8 }, + { X86::LOCK_XOR64mi32, X86::LOCK_XORACQ64mi32, X86::LOCK_XORREL64mi32 }, + { X86::LOCK_XOR64mr, X86::LOCK_XORACQ64mr, X86::LOCK_XORREL64mr } } }; @@ -1690,6 +1712,18 @@ DebugLoc dl = Node->getDebugLoc(); + enum AtomicTargetFlags TFlag = TargetFlagNone; + + if (Subtarget->hasHLE()) { + HLEHint Hint = cast(Node)->getHLEHint(); + switch (Hint) { + default: + case HLENone: TFlag = TargetFlagNone; break; + case HLEAcquire: TFlag = TargetFlagXAcquire; break; + case HLERelease: TFlag = TargetFlagXRelease; break; + } + } + // Optimize common patterns for __sync_or_and_fetch and similar arith // operations where the result is not used. This allows us to use the "lock" // version of the arithmetic instruction. @@ -1718,7 +1752,7 @@ Op = ADD; break; } - + Val = getAtomicLoadArithTargetConstant(CurDAG, dl, Op, NVT, Val); bool isUnOp = !Val.getNode(); bool isCN = Val.getNode() && (Val.getOpcode() == ISD::TargetConstant); @@ -1728,35 +1762,35 @@ default: return 0; case MVT::i8: if (isCN) - Opc = AtomicOpcTbl[Op][ConstantI8]; + Opc = AtomicOpcTbl[Op][ConstantI8][TFlag]; else - Opc = AtomicOpcTbl[Op][I8]; + Opc = AtomicOpcTbl[Op][I8][TFlag]; break; case MVT::i16: if (isCN) { if (immSext8(Val.getNode())) - Opc = AtomicOpcTbl[Op][SextConstantI16]; + Opc = AtomicOpcTbl[Op][SextConstantI16][TFlag]; else - Opc = AtomicOpcTbl[Op][ConstantI16]; + Opc = AtomicOpcTbl[Op][ConstantI16][TFlag]; } else - Opc = AtomicOpcTbl[Op][I16]; + Opc = AtomicOpcTbl[Op][I16][TFlag]; break; case MVT::i32: if (isCN) { if (immSext8(Val.getNode())) - Opc = AtomicOpcTbl[Op][SextConstantI32]; + Opc = AtomicOpcTbl[Op][SextConstantI32][TFlag]; else - Opc = AtomicOpcTbl[Op][ConstantI32]; + Opc = AtomicOpcTbl[Op][ConstantI32][TFlag]; } else - Opc = AtomicOpcTbl[Op][I32]; + Opc = AtomicOpcTbl[Op][I32][TFlag]; break; case MVT::i64: - Opc = AtomicOpcTbl[Op][I64]; + Opc = AtomicOpcTbl[Op][I64][TFlag]; if (isCN) { if (immSext8(Val.getNode())) - Opc = AtomicOpcTbl[Op][SextConstantI64]; + Opc = AtomicOpcTbl[Op][SextConstantI64][TFlag]; else if (i64immSExt32(Val.getNode())) - Opc = AtomicOpcTbl[Op][ConstantI64]; + Opc = AtomicOpcTbl[Op][ConstantI64][TFlag]; } break; } Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -12800,12 +12800,21 @@ } // Get CMPXCHG opcode for the specified data type. -static unsigned getCmpXChgOpcode(EVT VT) { +static unsigned getCmpXChgOpcode(EVT VT, unsigned TargetFlags) { + assert(((TargetFlags & 3) != 3) && "unknown 'targetflags'"); + + static unsigned CmpXChgOps[4][3] = { + { X86::LCMPXCHG8, X86::LCMPXCHGACQ8, X86::LCMPXCHGREL8 }, + { X86::LCMPXCHG16, X86::LCMPXCHGACQ16, X86::LCMPXCHGREL16 }, + { X86::LCMPXCHG32, X86::LCMPXCHGACQ32, X86::LCMPXCHGREL32 }, + { X86::LCMPXCHG64, X86::LCMPXCHGACQ64, X86::LCMPXCHGREL64 }, + }; + switch (VT.getSimpleVT().SimpleTy) { - case MVT::i8: return X86::LCMPXCHG8; - case MVT::i16: return X86::LCMPXCHG16; - case MVT::i32: return X86::LCMPXCHG32; - case MVT::i64: return X86::LCMPXCHG64; + case MVT::i8: return CmpXChgOps[0][TargetFlags]; + case MVT::i16: return CmpXChgOps[1][TargetFlags]; + case MVT::i32: return CmpXChgOps[2][TargetFlags]; + case MVT::i64: return CmpXChgOps[3][TargetFlags]; default: break; } @@ -12951,7 +12960,7 @@ MachineFunction::iterator I = MBB; ++I; - assert(MI->getNumOperands() <= X86::AddrNumOperands + 4 && + assert(MI->getNumOperands() <= X86::AddrNumOperands + 5 && "Unexpected number of operands"); assert(MI->hasOneMemOperand() && @@ -12963,6 +12972,7 @@ unsigned DstReg, SrcReg; unsigned MemOpndSlot; + unsigned TargetFlags; unsigned CurOp = 0; @@ -12970,6 +12980,7 @@ MemOpndSlot = CurOp; CurOp += X86::AddrNumOperands; SrcReg = MI->getOperand(CurOp++).getReg(); + TargetFlags = MI->getOperand(CurOp++).getImm(); const TargetRegisterClass *RC = MRI.getRegClass(DstReg); MVT::SimpleValueType VT = *RC->vt_begin(); @@ -12979,7 +12990,7 @@ unsigned t4 = MRI.createVirtualRegister(RC); unsigned PhyReg = getX86SubSuperRegister(X86::EAX, VT); - unsigned LCMPXCHGOpc = getCmpXChgOpcode(VT); + unsigned LCMPXCHGOpc = getCmpXChgOpcode(VT, TargetFlags); unsigned LOADOpc = getLoadOpcode(VT); // For the atomic load-arith operator, we generate @@ -13224,7 +13235,7 @@ MachineFunction::iterator I = MBB; ++I; - assert(MI->getNumOperands() <= X86::AddrNumOperands + 7 && + assert(MI->getNumOperands() <= X86::AddrNumOperands + 8 && "Unexpected number of operands"); assert(MI->hasOneMemOperand() && @@ -13237,6 +13248,7 @@ unsigned DstLoReg, DstHiReg; unsigned SrcLoReg, SrcHiReg; unsigned MemOpndSlot; + unsigned TargetFlags; unsigned CurOp = 0; @@ -13246,6 +13258,15 @@ CurOp += X86::AddrNumOperands; SrcLoReg = MI->getOperand(CurOp++).getReg(); SrcHiReg = MI->getOperand(CurOp++).getReg(); + TargetFlags = MI->getOperand(CurOp++).getImm(); + + assert(!(TargetFlags && !Subtarget->hasHLE()) && + "'targetflags' is specified while HLE is disabled."); + + assert(((TargetFlags & 3) != 3) && "unknown 'targetflags'"); + + bool IsXAcq = TargetFlags & 1; + bool IsXRel = TargetFlags & 2; const TargetRegisterClass *RC = &X86::GR32RegClass; const TargetRegisterClass *RC8 = &X86::GR8RegClass; @@ -13259,7 +13280,8 @@ unsigned t4L = MRI.createVirtualRegister(RC); unsigned t4H = MRI.createVirtualRegister(RC); - unsigned LCMPXCHGOpc = X86::LCMPXCHG8B; + unsigned LCMPXCHGOpc = IsXAcq ? X86::LCMPXCHG8BACQ : + IsXRel ? X86::LCMPXCHG8BREL : X86::LCMPXCHG8B; unsigned LOADOpc = X86::MOV32rm; // For the atomic load-arith operator, we generate Index: lib/Target/X86/X86InstrCompiler.td =================================================================== --- lib/Target/X86/X86InstrCompiler.td +++ lib/Target/X86/X86InstrCompiler.td @@ -516,32 +516,38 @@ let usesCustomInserter = 1, mayLoad = 1, mayStore = 1 in { let Defs = [EFLAGS, AL] in def NAME#8 : I<0, Pseudo, (outs GR8:$dst), - (ins i8mem:$ptr, GR8:$val), + (ins i8mem:$ptr, GR8:$val, i8imm:$flags), !strconcat(mnemonic, "8 PSEUDO!"), []>; let Defs = [EFLAGS, AX] in def NAME#16 : I<0, Pseudo,(outs GR16:$dst), - (ins i16mem:$ptr, GR16:$val), + (ins i16mem:$ptr, GR16:$val, i8imm:$flags), !strconcat(mnemonic, "16 PSEUDO!"), []>; let Defs = [EFLAGS, EAX] in def NAME#32 : I<0, Pseudo, (outs GR32:$dst), - (ins i32mem:$ptr, GR32:$val), + (ins i32mem:$ptr, GR32:$val, i8imm:$flags), !strconcat(mnemonic, "32 PSEUDO!"), []>; let Defs = [EFLAGS, RAX] in def NAME#64 : I<0, Pseudo, (outs GR64:$dst), - (ins i64mem:$ptr, GR64:$val), + (ins i64mem:$ptr, GR64:$val, i8imm:$flags), !strconcat(mnemonic, "64 PSEUDO!"), []>; } } -multiclass PSEUDO_ATOMIC_LOAD_BINOP_PATS { +multiclass PSEUDO_ATOMIC_LOAD_BINOP_PATS_WITH_FLAG { def : Pat<(!cast(frag # "_8") addr:$ptr, GR8:$val), - (!cast(name # "8") addr:$ptr, GR8:$val)>; + (!cast(name # "8") addr:$ptr, GR8:$val, flag)>; def : Pat<(!cast(frag # "_16") addr:$ptr, GR16:$val), - (!cast(name # "16") addr:$ptr, GR16:$val)>; + (!cast(name # "16") addr:$ptr, GR16:$val, flag)>; def : Pat<(!cast(frag # "_32") addr:$ptr, GR32:$val), - (!cast(name # "32") addr:$ptr, GR32:$val)>; + (!cast(name # "32") addr:$ptr, GR32:$val, flag)>; def : Pat<(!cast(frag # "_64") addr:$ptr, GR64:$val), - (!cast(name # "64") addr:$ptr, GR64:$val)>; + (!cast(name # "64") addr:$ptr, GR64:$val, flag)>; +} + +multiclass PSEUDO_ATOMIC_LOAD_BINOP_PATS { + defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS_WITH_FLAG; + defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS_WITH_FLAG; + defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS_WITH_FLAG; } // Atomic exchange, and, or, xor @@ -567,7 +573,7 @@ let usesCustomInserter = 1, Defs = [EFLAGS, EAX, EDX], mayLoad = 1, mayStore = 1, hasSideEffects = 0 in def NAME#6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), - (ins i64mem:$ptr, GR32:$val1, GR32:$val2), + (ins i64mem:$ptr, GR32:$val1, GR32:$val2, i8imm:$flags), !strconcat(mnemonic, "6432 PSEUDO!"), []>; } @@ -686,11 +692,25 @@ } -defm LOCK_ADD : LOCK_ArithBinOp<0x00, 0x80, 0x83, MRM0m, "add">; -defm LOCK_SUB : LOCK_ArithBinOp<0x28, 0x80, 0x83, MRM5m, "sub">; -defm LOCK_OR : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM1m, "or">; -defm LOCK_AND : LOCK_ArithBinOp<0x20, 0x80, 0x83, MRM4m, "and">; -defm LOCK_XOR : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, "xor">; +defm LOCK_ADD : LOCK_ArithBinOp<0x00, 0x80, 0x83, MRM0m, "add">; +defm LOCK_ADDACQ : LOCK_ArithBinOp<0x00, 0x80, 0x83, MRM0m, "add">, XACQ; +defm LOCK_ADDREL : LOCK_ArithBinOp<0x00, 0x80, 0x83, MRM0m, "add">, XREL; + +defm LOCK_SUB : LOCK_ArithBinOp<0x28, 0x80, 0x83, MRM5m, "sub">; +defm LOCK_SUBACQ : LOCK_ArithBinOp<0x28, 0x80, 0x83, MRM5m, "sub">, XACQ; +defm LOCK_SUBREL : LOCK_ArithBinOp<0x28, 0x80, 0x83, MRM5m, "sub">, XREL; + +defm LOCK_OR : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM1m, "or">; +defm LOCK_ORACQ : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM1m, "or">, XACQ; +defm LOCK_ORREL : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM1m, "or">, XREL; + +defm LOCK_AND : LOCK_ArithBinOp<0x20, 0x80, 0x83, MRM4m, "and">; +defm LOCK_ANDACQ : LOCK_ArithBinOp<0x20, 0x80, 0x83, MRM4m, "and">, XACQ; +defm LOCK_ANDREL : LOCK_ArithBinOp<0x20, 0x80, 0x83, MRM4m, "and">, XREL; + +defm LOCK_XOR : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, "xor">; +defm LOCK_XORACQ : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, "xor">, XACQ; +defm LOCK_XORREL : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, "xor">, XREL; // Optimized codegen when the non-memory output is not used. multiclass LOCK_ArithUnOp Opc8, bits<8> Opc, Format Form, @@ -713,7 +733,12 @@ } defm LOCK_INC : LOCK_ArithUnOp<0xFE, 0xFF, MRM0m, "inc">; +defm LOCK_INCACQ : LOCK_ArithUnOp<0xFE, 0xFF, MRM0m, "inc">, XACQ; +defm LOCK_INCREL : LOCK_ArithUnOp<0xFE, 0xFF, MRM0m, "inc">, XREL; + defm LOCK_DEC : LOCK_ArithUnOp<0xFE, 0xFF, MRM1m, "dec">; +defm LOCK_DECACQ : LOCK_ArithUnOp<0xFE, 0xFF, MRM1m, "dec">, XACQ; +defm LOCK_DECREL : LOCK_ArithUnOp<0xFE, 0xFF, MRM1m, "dec">, XREL; // Atomic compare and swap. multiclass LCMPXCHG_UnOp Opc, Format Form, string mnemonic, @@ -750,20 +775,39 @@ } let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in { -defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b", - X86cas8, i64mem, - IIC_CMPX_LOCK_8B>; +defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b", + X86cas8_none, i64mem, + IIC_CMPX_LOCK_8B>; +defm LCMPXCHG8BACQ : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b", + X86cas8_xacq, i64mem, + IIC_CMPX_LOCK_8B>, XACQ; +defm LCMPXCHG8BREL : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b", + X86cas8_xrel, i64mem, + IIC_CMPX_LOCK_8B>, XREL; } let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX], Predicates = [HasCmpxchg16b] in { -defm LCMPXCHG16B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg16b", - X86cas16, i128mem, - IIC_CMPX_LOCK_16B>, REX_W; +defm LCMPXCHG16B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg16b", + X86cas16_none, i128mem, + IIC_CMPX_LOCK_16B>, REX_W; +defm LCMPXCHG16BACQ : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg16b", + X86cas16_xrel, i128mem, + IIC_CMPX_LOCK_16B>, REX_W, XACQ; +defm LCMPXCHG16BREL : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg16b", + X86cas16_xrel, i128mem, + IIC_CMPX_LOCK_16B>, REX_W, XREL; } -defm LCMPXCHG : LCMPXCHG_BinOp<0xB0, 0xB1, MRMDestMem, "cmpxchg", - X86cas, IIC_CMPX_LOCK_8, IIC_CMPX_LOCK>; +defm LCMPXCHG : LCMPXCHG_BinOp<0xB0, 0xB1, MRMDestMem, "cmpxchg", + X86cas_none, IIC_CMPX_LOCK_8, + IIC_CMPX_LOCK>; +defm LCMPXCHGACQ : LCMPXCHG_BinOp<0xB0, 0xB1, MRMDestMem, "cmpxchg", + X86cas_xacq, IIC_CMPX_LOCK_8, + IIC_CMPX_LOCK>, XACQ; +defm LCMPXCHGREL : LCMPXCHG_BinOp<0xB0, 0xB1, MRMDestMem, "cmpxchg", + X86cas_xrel, IIC_CMPX_LOCK_8, + IIC_CMPX_LOCK>, XREL; // Atomic exchange and add multiclass ATOMIC_LOAD_BINOP opc8, bits<8> opc, string mnemonic, @@ -800,9 +844,15 @@ } } -defm LXADD : ATOMIC_LOAD_BINOP<0xc0, 0xc1, "xadd", "atomic_load_add", - IIC_XADD_LOCK_MEM8, IIC_XADD_LOCK_MEM>, - TB, LOCK; +defm LXADD : ATOMIC_LOAD_BINOP<0xc0, 0xc1, "xadd", "atomic_load_add_none", + IIC_XADD_LOCK_MEM8, IIC_XADD_LOCK_MEM>, + TB, LOCK; +defm LXADDACQ : ATOMIC_LOAD_BINOP<0xc0, 0xc1, "xadd", "atomic_load_add_xacq", + IIC_XADD_LOCK_MEM8, IIC_XADD_LOCK_MEM>, + TB, LOCK, XACQ; +defm LXADDREL : ATOMIC_LOAD_BINOP<0xc0, 0xc1, "xadd", "atomic_load_add_xrel", + IIC_XADD_LOCK_MEM8, IIC_XADD_LOCK_MEM>, + TB, LOCK, XREL; def ACQUIRE_MOV8rm : I<0, Pseudo, (outs GR8 :$dst), (ins i8mem :$src), "#ACQUIRE_MOV PSEUDO!", @@ -819,16 +869,36 @@ def RELEASE_MOV8mr : I<0, Pseudo, (outs), (ins i8mem :$dst, GR8 :$src), "#RELEASE_MOV PSEUDO!", - [(atomic_store_8 addr:$dst, GR8 :$src)]>; + [(atomic_store_none_8 addr:$dst, GR8 :$src)]>; def RELEASE_MOV16mr : I<0, Pseudo, (outs), (ins i16mem:$dst, GR16:$src), "#RELEASE_MOV PSEUDO!", - [(atomic_store_16 addr:$dst, GR16:$src)]>; + [(atomic_store_none_16 addr:$dst, GR16:$src)]>; def RELEASE_MOV32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, GR32:$src), "#RELEASE_MOV PSEUDO!", - [(atomic_store_32 addr:$dst, GR32:$src)]>; + [(atomic_store_none_32 addr:$dst, GR32:$src)]>; def RELEASE_MOV64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, GR64:$src), "#RELEASE_MOV PSEUDO!", - [(atomic_store_64 addr:$dst, GR64:$src)]>; + [(atomic_store_none_64 addr:$dst, GR64:$src)]>; + +multiclass ATOMIC_STORE opc8, bits<8> opc, string mnemonic, string frag> { + let isCodeGenOnly = 1 in { + def NAME#8mr : I(frag # "_8") addr:$dst, GR8:$src)]>; + def NAME#16mr : I(frag # "_16") addr:$dst, GR16:$src)]>, + OpSize; + def NAME#32mr : I(frag # "_32") addr:$dst, GR32:$src)]>; + def NAME#64mr : RI(frag # "_64") addr:$dst, GR64:$src)]>; + } +} + +defm XRELEASE_MOV : ATOMIC_STORE<0x88, 0x89, "mov", "atomic_store_xrel">, XREL; //===----------------------------------------------------------------------===// // Conditional Move Pseudo Instructions. Index: lib/Target/X86/X86InstrInfo.td =================================================================== --- lib/Target/X86/X86InstrInfo.td +++ lib/Target/X86/X86InstrInfo.td @@ -754,6 +754,156 @@ return N->hasOneUse(); }]>; +// Helper frag for atomic with target flags. +class ATOMIC_NONE : PatFrag(N)->getHLEHint(); + return !Subtarget->hasHLE() || (Hint == HLENone); +}]>; + +class ATOMIC_XACQ : PatFrag(N)->getHLEHint(); + return Subtarget->hasHLE() && (Hint == HLEAcquire); +}]>; + +class ATOMIC_XREL : PatFrag(N)->getHLEHint(); + return Subtarget->hasHLE() && (Hint == HLERelease); +}]>; + +class MEMINTRINSIC_NONE : PatFrag(N)->getHLEHint(); + return !Subtarget->hasHLE() || (Hint == HLENone); +}]>; + +class MEMINTRINSIC_XACQ : PatFrag(N)->getHLEHint(); + return Subtarget->hasHLE() && (Hint == HLEAcquire); +}]>; + +class MEMINTRINSIC_XREL : PatFrag(N)->getHLEHint(); + return Subtarget->hasHLE() && (Hint == HLERelease); +}]>; + +multiclass atomic_unop { + def _none_8 : ATOMIC_NONE<(ops node:$ptr), + (!cast(frag # "_8") node:$ptr)>; + def _none_16 : ATOMIC_NONE<(ops node:$ptr), + (!cast(frag # "_16") node:$ptr)>; + def _none_32 : ATOMIC_NONE<(ops node:$ptr), + (!cast(frag # "_32") node:$ptr)>; + def _none_64 : ATOMIC_NONE<(ops node:$ptr), + (!cast(frag # "_64") node:$ptr)>; + def _xacq_8 : ATOMIC_XACQ<(ops node:$ptr), + (!cast(frag # "_8") node:$ptr)>; + def _xacq_16 : ATOMIC_XACQ<(ops node:$ptr), + (!cast(frag # "_16") node:$ptr)>; + def _xacq_32 : ATOMIC_XACQ<(ops node:$ptr), + (!cast(frag # "_32") node:$ptr)>; + def _xacq_64 : ATOMIC_XACQ<(ops node:$ptr), + (!cast(frag # "_64") node:$ptr)>; + def _xrel_8 : ATOMIC_XREL<(ops node:$ptr), + (!cast(frag # "_8") node:$ptr)>; + def _xrel_16 : ATOMIC_XREL<(ops node:$ptr), + (!cast(frag # "_16") node:$ptr)>; + def _xrel_32 : ATOMIC_XREL<(ops node:$ptr), + (!cast(frag # "_32") node:$ptr)>; + def _xrel_64 : ATOMIC_XREL<(ops node:$ptr), + (!cast(frag # "_64") node:$ptr)>; +} + +multiclass atomic_binop { + def _none_8 : ATOMIC_NONE<(ops node:$ptr, node:$val), + (!cast(frag # "_8") node:$ptr, node:$val)>; + def _none_16 : ATOMIC_NONE<(ops node:$ptr, node:$val), + (!cast(frag # "_16") node:$ptr, node:$val)>; + def _none_32 : ATOMIC_NONE<(ops node:$ptr, node:$val), + (!cast(frag # "_32") node:$ptr, node:$val)>; + def _none_64 : ATOMIC_NONE<(ops node:$ptr, node:$val), + (!cast(frag # "_64") node:$ptr, node:$val)>; + def _xacq_8 : ATOMIC_XACQ<(ops node:$ptr, node:$val), + (!cast(frag # "_8") node:$ptr, node:$val)>; + def _xacq_16 : ATOMIC_XACQ<(ops node:$ptr, node:$val), + (!cast(frag # "_16") node:$ptr, node:$val)>; + def _xacq_32 : ATOMIC_XACQ<(ops node:$ptr, node:$val), + (!cast(frag # "_32") node:$ptr, node:$val)>; + def _xacq_64 : ATOMIC_XACQ<(ops node:$ptr, node:$val), + (!cast(frag # "_64") node:$ptr, node:$val)>; + def _xrel_8 : ATOMIC_XREL<(ops node:$ptr, node:$val), + (!cast(frag # "_8") node:$ptr, node:$val)>; + def _xrel_16 : ATOMIC_XREL<(ops node:$ptr, node:$val), + (!cast(frag # "_16") node:$ptr, node:$val)>; + def _xrel_32 : ATOMIC_XREL<(ops node:$ptr, node:$val), + (!cast(frag # "_32") node:$ptr, node:$val)>; + def _xrel_64 : ATOMIC_XREL<(ops node:$ptr, node:$val), + (!cast(frag # "_64") node:$ptr, node:$val)>; +} + +multiclass atomic_ternop { + def _none_8 : ATOMIC_NONE<(ops node:$ptr, node:$cmp, node:$swap), + (!cast(frag # "_8") node:$ptr, node:$cmp, node:$swap)>; + def _none_16 : ATOMIC_NONE<(ops node:$ptr, node:$cmp, node:$swap), + (!cast(frag # "_16") node:$ptr, node:$cmp, node:$swap)>; + def _none_32 : ATOMIC_NONE<(ops node:$ptr, node:$cmp, node:$swap), + (!cast(frag # "_32") node:$ptr, node:$cmp, node:$swap)>; + def _none_64 : ATOMIC_NONE<(ops node:$ptr, node:$cmp, node:$swap), + (!cast(frag # "_64") node:$ptr, node:$cmp, node:$swap)>; + def _xacq_8 : ATOMIC_XACQ<(ops node:$ptr, node:$cmp, node:$swap), + (!cast(frag # "_8") node:$ptr, node:$cmp, node:$swap)>; + def _xacq_16 : ATOMIC_XACQ<(ops node:$ptr, node:$cmp, node:$swap), + (!cast(frag # "_16") node:$ptr, node:$cmp, node:$swap)>; + def _xacq_32 : ATOMIC_XACQ<(ops node:$ptr, node:$cmp, node:$swap), + (!cast(frag # "_32") node:$ptr, node:$cmp, node:$swap)>; + def _xacq_64 : ATOMIC_XACQ<(ops node:$ptr, node:$cmp, node:$swap), + (!cast(frag # "_64") node:$ptr, node:$cmp, node:$swap)>; + def _xrel_8 : ATOMIC_XREL<(ops node:$ptr, node:$cmp, node:$swap), + (!cast(frag # "_8") node:$ptr, node:$cmp, node:$swap)>; + def _xrel_16 : ATOMIC_XREL<(ops node:$ptr, node:$cmp, node:$swap), + (!cast(frag # "_16") node:$ptr, node:$cmp, node:$swap)>; + def _xrel_32 : ATOMIC_XREL<(ops node:$ptr, node:$cmp, node:$swap), + (!cast(frag # "_32") node:$ptr, node:$cmp, node:$swap)>; + def _xrel_64 : ATOMIC_XREL<(ops node:$ptr, node:$cmp, node:$swap), + (!cast(frag # "_64") node:$ptr, node:$cmp, node:$swap)>; +} + +// FIXME: some primitives doesn't support XACQUIRE or XRELEASE: e.g. +// 'load' cannot be used with neither XACQUIRE or XRELEASE; +// 'store' can only be used with XRELEASE; + +defm atomic_cmp_swap : atomic_ternop<"atomic_cmp_swap">; +defm atomic_load_add : atomic_binop<"atomic_load_add">; +defm atomic_swap : atomic_binop<"atomic_swap">; +defm atomic_load_sub : atomic_binop<"atomic_load_sub">; +defm atomic_load_and : atomic_binop<"atomic_load_and">; +defm atomic_load_or : atomic_binop<"atomic_load_or">; +defm atomic_load_xor : atomic_binop<"atomic_load_xor">; +defm atomic_load_nand : atomic_binop<"atomic_load_nand">; +defm atomic_load_min : atomic_binop<"atomic_load_min">; +defm atomic_load_max : atomic_binop<"atomic_load_max">; +defm atomic_load_umin : atomic_binop<"atomic_load_umin">; +defm atomic_load_umax : atomic_binop<"atomic_load_umax">; +defm atomic_store : atomic_binop<"atomic_store">; +defm atomic_load : atomic_unop<"atomic_load">; + +multiclass memintrinsic_unop { + def _none : MEMINTRINSIC_NONE<(ops node:$ptr), (opnode node:$ptr)>; + def _xacq : MEMINTRINSIC_XACQ<(ops node:$ptr), (opnode node:$ptr)>; + def _xrel : MEMINTRINSIC_XREL<(ops node:$ptr), (opnode node:$ptr)>; +} + +multiclass memintrinsic_ternop { + def _none : MEMINTRINSIC_NONE<(ops node:$ptr, node:$val, node:$imm), + (opnode node:$ptr, node:$val, node:$imm)>; + def _xacq : MEMINTRINSIC_XACQ<(ops node:$ptr, node:$val, node:$imm), + (opnode node:$ptr, node:$val, node:$imm)>; + def _xrel : MEMINTRINSIC_XREL<(ops node:$ptr, node:$val, node:$imm), + (opnode node:$ptr, node:$val, node:$imm)>; +} + +defm X86cas : memintrinsic_ternop; +defm X86cas8 : memintrinsic_unop; +defm X86cas16 : memintrinsic_unop; + //===----------------------------------------------------------------------===// // Instruction list. // @@ -1378,7 +1528,11 @@ } } -defm XCHG : ATOMIC_SWAP<0x86, 0x87, "xchg", "atomic_swap", IIC_XCHG_MEM>; +defm XCHG : ATOMIC_SWAP<0x86, 0x87, "xchg", "atomic_swap_none", IIC_XCHG_MEM>; +let isCodeGenOnly = 1 in { +defm XCHGACQ : ATOMIC_SWAP<0x86, 0x87, "xchg", "atomic_swap_xacq", IIC_XCHG_MEM>, XACQ; +defm XCHGREL : ATOMIC_SWAP<0x86, 0x87, "xchg", "atomic_swap_xrel", IIC_XCHG_MEM>, XREL; +} // Swap between registers. let Constraints = "$val = $dst" in { Index: test/CodeGen/X86/hle-atomic16.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/hle-atomic16.ll @@ -0,0 +1,188 @@ +; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 -mattr=+hle -verify-machineinstrs | FileCheck %s --check-prefix X64HLE +; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 -mattr=-hle -verify-machineinstrs | FileCheck %s --check-prefix X64NOHLE + +@sc16 = external global i16 + +; 16-bit + +define void @atomic_fetch_add16() nounwind { +; X64HLE: atomic_fetch_add16 +; X64NOHLE: atomic_fetch_add16 + %t0 = atomicrmw add i16* @sc16, i16 1 acquire, !hle.lock !0 +; X64HLE: lock +; X64HLE-NEXT: xacquire +; X64HLE: incw +; X64NOHLE: lock +; X64NOHLE-NOT: xacquire +; X64NOHLE: incw + %t1 = atomicrmw add i16* @sc16, i16 5 acquire, !hle.lock !1 +; X64HLE: lock +; X64HLE-NEXT: xrelease +; X64HLE: xaddw +; X64NOHLE: lock +; X64NOHLE-NOT: xrelease +; X64NOHLE: xaddw + %t2 = atomicrmw add i16* @sc16, i16 %t1 acquire, !hle.lock !0 +; X64HLE: lock +; X64HLE-NEXT: xacquire +; X64HLE: addw +; X64NOHLE: lock +; X64NOHLE-NOT: xacquire +; X64NOHLE: addw + ret void +; X64HLE: ret +; X64NOHLE: ret +} + +define void @atomic_fetch_sub16() nounwind { +; X64HLE: atomic_fetch_sub16 +; X64NOHLE: atomic_fetch_sub16 + %t3 = atomicrmw sub i16* @sc16, i16 1 acquire, !hle.lock !1 +; X64HLE: lock +; X64HLE-NEXT: xrelease +; X64HLE: decw +; X64NOHLE: lock +; X64NOHLE-NOT: xrelease +; X64NOHLE: decw + %t4 = atomicrmw sub i16* @sc16, i16 5 acquire, !hle.lock !0 +; X64HLE: lock +; X64HLE-NEXT: xacquire +; X64HLE: xaddw +; X64NOHLE: lock +; X64NOHLE-NOT: xacquire +; X64NOHLE: xaddw + %t5 = atomicrmw sub i16* @sc16, i16 %t4 acquire, !hle.lock !1 +; X64HLE: lock +; X64HLE-NEXT: xrelease +; X64HLE: subw +; X64NOHLE: lock +; X64NOHLE-NOT: xrelease +; X64NOHLE: subw + ret void +; X64HLE: ret +; X64NOHLE: ret +} + +define void @atomic_fetch_logic16() nounwind { +; X64HLE: atomic_fetch_logic16 +; X64NOHLE: atomic_fetch_logic16 + %t6 = atomicrmw and i16* @sc16, i16 5 acquire, !hle.lock !0 +; X64HLE: lock +; X64HLE-NEXT: xacquire +; X64HLE: andw +; X64NOHLE: lock +; X64NOHLE-NOT: xacquire +; X64NOHLE: andw + %t7 = atomicrmw or i16* @sc16, i16 5 acquire, !hle.lock !1 +; X64HLE: lock +; X64HLE-NEXT: xrelease +; X64HLE: orw +; X64NOHLE: lock +; X64NOHLE-NOT: xrelease +; X64NOHLE: orw + %t8 = atomicrmw xor i16* @sc16, i16 5 acquire, !hle.lock !0 +; X64HLE: lock +; X64HLE-NEXT: xacquire +; X64HLE: xorw +; X64NOHLE: lock +; X64NOHLE-NOT: xacquire +; X64NOHLE: xorw + %t9 = atomicrmw nand i16* @sc16, i16 5 acquire, !hle.lock !1 +; X64HLE: andw +; X64HLE: notw +; X64HLE: lock +; X64HLE-NEXT: xrelease +; X64HLE: cmpxchgw +; X64NOHLE: andw +; X64NOHLE: notw +; X64NOHLE: lock +; X64NOHLE-NOT: xrelease +; X64NOHLE: cmpxchgw + ret void +; X64HLE: ret +; X64NOHLE: ret +} + +define void @atomic_fetch_minmax16() nounwind { +; X64HLE: atomic_fetch_minmax16 +; X64NOHLE: atomic_fetch_minmax16 + %t0 = atomicrmw max i16* @sc16, i16 5 acquire, !hle.lock !0 +; X64HLE: cmpw +; X64HLE: cmov +; X64HLE: lock +; X64HLE-NEXT: xacquire +; X64HLE: cmpxchgw +; X64NOHLE: cmpw +; X64NOHLE: cmov +; X64NOHLE: lock +; X64NOHLE-NOT: xacquire +; X64NOHLE: cmpxchgw + %t1 = atomicrmw min i16* @sc16, i16 5 acquire, !hle.lock !1 +; X64HLE: cmpw +; X64HLE: cmov +; X64HLE: lock +; X64HLE-NEXT: xrelease +; X64HLE: cmpxchgw +; X64NOHLE: cmpw +; X64NOHLE: cmov +; X64NOHLE: lock +; X64NOHLE-NOT: xrelease +; X64NOHLE: cmpxchgw + %t2 = atomicrmw umax i16* @sc16, i16 5 acquire, !hle.lock !0 +; X64HLE: cmpw +; X64HLE: cmov +; X64HLE: lock +; X64HLE-NEXT: xacquire +; X64HLE: cmpxchgw +; X64NOHLE: cmpw +; X64NOHLE: cmov +; X64NOHLE: lock +; X64NOHLE-NOT: xacquire +; X64NOHLE: cmpxchgw + %t3 = atomicrmw umin i16* @sc16, i16 5 acquire, !hle.lock !1 +; X64HLE: cmpw +; X64HLE: cmov +; X64HLE: lock +; X64HLE-NEXT: xrelease +; X64HLE: cmpxchgw +; X64NOHLE: cmpw +; X64NOHLE: cmov +; X64NOHLE: lock +; X64NOHLE-NOT: xrelease +; X64NOHLE: cmpxchgw + ret void +; X64HLE: ret +; X64NOHLE: ret +} + +define void @atomic_fetch_misc16() nounwind { +; X64HLE: atomic_fetch_misc16 +; X64NOHLE: atomic_fetch_misc16 + %t4 = cmpxchg i16* @sc16, i16 0, i16 1 acquire, !hle.lock !0 +; X64HLE: lock +; X64HLE-NEXT: xacquire +; X64HLE: cmpxchgw +; X64NOHLE: lock +; X64NOHLE-NOT: xacquire +; X64NOHLE: cmpxchgw + store atomic i16 0, i16* @sc16 release, align 2, !hle.lock !1 +; X64HLE-NOT: lock +; X64HLE: xrelease +; X64HLE: movw +; X64NOHLE-NOT: lock +; X64NOHLE-NOT: xrelease +; X64NOHLE: movw + %t5 = atomicrmw xchg i16* @sc16, i16 %t4 acquire, !hle.lock !0 +; X64HLE-NOT: lock +; X64HLE: xacquire +; X64HLE: xchgw +; X64NOHLE-NOT: lock +; X64NOHLE-NOT: xacquire +; X64NOHLE: xchgw + ret void +; X64HLE: ret +; X64NOHLE: ret +} + +!0 = metadata !{metadata !"acquire"} +!1 = metadata !{metadata !"release"} Index: test/CodeGen/X86/hle-atomic32.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/hle-atomic32.ll @@ -0,0 +1,188 @@ +; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 -mattr=+hle -verify-machineinstrs | FileCheck %s --check-prefix X64HLE +; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 -mattr=-hle -verify-machineinstrs | FileCheck %s --check-prefix X64NOHLE + +@sc32 = external global i32 + +; 32-bit + +define void @atomic_fetch_add32() nounwind { +; X64HLE: atomic_fetch_add32 +; X64NOHLE: atomic_fetch_add32 + %t0 = atomicrmw add i32* @sc32, i32 1 acquire, !hle.lock !0 +; X64HLE: lock +; X64HLE-NEXT: xacquire +; X64HLE: incl +; X64NOHLE: lock +; X64NOHLE-NOT: xacquire +; X64NOHLE: incl + %t1 = atomicrmw add i32* @sc32, i32 5 acquire, !hle.lock !1 +; X64HLE: lock +; X64HLE-NEXT: xrelease +; X64HLE: xaddl +; X64NOHLE: lock +; X64NOHLE-NOT: xrelease +; X64NOHLE: xaddl + %t2 = atomicrmw add i32* @sc32, i32%t1 acquire, !hle.lock !0 +; X64HLE: lock +; X64HLE-NEXT: xacquire +; X64HLE: addl +; X64NOHLE: lock +; X64NOHLE-NOT: xacquire +; X64NOHLE: addl + ret void +; X64HLE: ret +; X64NOHLE: ret +} + +define void @atomic_fetch_sub32() nounwind { +; X64HLE: atomic_fetch_sub32 +; X64NOHLE: atomic_fetch_sub32 + %t0 = atomicrmw sub i32* @sc32, i32 1 acquire, !hle.lock !1 +; X64HLE: lock +; X64HLE-NEXT: xrelease +; X64HLE: decl +; X64NOHLE: lock +; X64NOHLE-NOT: xrelease +; X64NOHLE: decl + %t1 = atomicrmw sub i32* @sc32, i32 5 acquire, !hle.lock !0 +; X64HLE: lock +; X64HLE-NEXT: xacquire +; X64HLE: xaddl +; X64NOHLE: lock +; X64NOHLE-NOT: xacquire +; X64NOHLE: xaddl + %t2 = atomicrmw sub i32* @sc32, i32%t1 acquire, !hle.lock !1 +; X64HLE: lock +; X64HLE-NEXT: xrelease +; X64HLE: subl +; X64NOHLE: lock +; X64NOHLE-NOT: xrelease +; X64NOHLE: subl + ret void +; X64HLE: ret +; X64NOHLE: ret +} + +define void @atomic_fetch_logic32() nounwind { +; X64HLE: atomic_fetch_logic32 +; X64NOHLE: atomic_fetch_logic32 + %t0 = atomicrmw and i32* @sc32, i32 5 acquire, !hle.lock !0 +; X64HLE: lock +; X64HLE-NEXT: xacquire +; X64HLE: andl +; X64NOHLE: lock +; X64NOHLE-NOT: xacquire +; X64NOHLE: andl + %t1 = atomicrmw or i32* @sc32, i32 5 acquire, !hle.lock !1 +; X64HLE: lock +; X64HLE-NEXT: xrelease +; X64HLE: orl +; X64NOHLE: lock +; X64NOHLE-NOT: xrelease +; X64NOHLE: orl + %t2 = atomicrmw xor i32* @sc32, i32 5 acquire, !hle.lock !0 +; X64HLE: lock +; X64HLE-NEXT: xacquire +; X64HLE: xorl +; X64NOHLE: lock +; X64NOHLE-NOT: xacquire +; X64NOHLE: xorl + %t3 = atomicrmw nand i32* @sc32, i32 5 acquire, !hle.lock !1 +; X64HLE: andl +; X64HLE: notl +; X64HLE: lock +; X64HLE-NEXT: xrelease +; X64HLE: cmpxchgl +; X64NOHLE: andl +; X64NOHLE: notl +; X64NOHLE: lock +; X64NOHLE-NOT: xrelease +; X64NOHLE: cmpxchgl + ret void +; X64HLE: ret +; X64NOHLE: ret +} + +define void @atomic_fetch_minmax32() nounwind { +; X64HLE: atomic_fetch_minmax32 +; X64NOHLE: atomic_fetch_minmax32 + %t0 = atomicrmw max i32* @sc32, i32 5 acquire, !hle.lock !0 +; X64HLE: cmpl +; X64HLE: cmov +; X64HLE: lock +; X64HLE-NEXT: xacquire +; X64HLE: cmpxchgl +; X64NOHLE: cmpl +; X64NOHLE: cmov +; X64NOHLE: lock +; X64NOHLE-NOT: xacquire +; X64NOHLE: cmpxchgl + %t1 = atomicrmw min i32* @sc32, i32 5 acquire, !hle.lock !1 +; X64HLE: cmpl +; X64HLE: cmov +; X64HLE: lock +; X64HLE-NEXT: xrelease +; X64HLE: cmpxchgl +; X64NOHLE: cmpl +; X64NOHLE: cmov +; X64NOHLE: lock +; X64NOHLE-NOT: xrelease +; X64NOHLE: cmpxchgl + %t2 = atomicrmw umax i32* @sc32, i32 5 acquire, !hle.lock !0 +; X64HLE: cmpl +; X64HLE: cmov +; X64HLE: lock +; X64HLE-NEXT: xacquire +; X64HLE: cmpxchgl +; X64NOHLE: cmpl +; X64NOHLE: cmov +; X64NOHLE: lock +; X64NOHLE-NOT: xacquire +; X64NOHLE: cmpxchgl + %t3 = atomicrmw umin i32* @sc32, i32 5 acquire, !hle.lock !1 +; X64HLE: cmpl +; X64HLE: cmov +; X64HLE: lock +; X64HLE-NEXT: xrelease +; X64HLE: cmpxchgl +; X64NOHLE: cmpl +; X64NOHLE: cmov +; X64NOHLE: lock +; X64NOHLE-NOT: xrelease +; X64NOHLE: cmpxchgl + ret void +; X64HLE: ret +; X64NOHLE: ret +} + +define void @atomic_fetch_misc32() nounwind { +; X64HLE: atomic_fetch_misc32 +; X64NOHLE: atomic_fetch_misc32 + %t0 = cmpxchg i32* @sc32, i32 0, i32 1 acquire, !hle.lock !0 +; X64HLE: lock +; X64HLE-NEXT: xacquire +; X64HLE: cmpxchgl +; X64NOHLE: lock +; X64NOHLE-NOT: xacquire +; X64NOHLE: cmpxchgl + store atomic i32 0, i32* @sc32 release, align 4, !hle.lock !1 +; X64HLE-NOT: lock +; X64HLE: xrelease +; X64HLE: movl +; X64NOHLE-NOT: lock +; X64NOHLE-NOT: xrelease +; X64NOHLE: movl + %t1 = atomicrmw xchg i32* @sc32, i32 %t0 acquire, !hle.lock !0 +; X64HLE-NOT: lock +; X64HLE: xacquire +; X64HLE: xchgl +; X64NOHLE-NOT: lock +; X64NOHLE-NOT: xacquire +; X64NOHLE: xchgl + ret void +; X64HLE: ret +; X64NOHLE: ret +} + +!0 = metadata !{metadata !"acquire"} +!1 = metadata !{metadata !"release"} Index: test/CodeGen/X86/hle-atomic64.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/hle-atomic64.ll @@ -0,0 +1,188 @@ +; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 -mattr=+hle -verify-machineinstrs | FileCheck %s --check-prefix X64HLE +; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 -mattr=-hle -verify-machineinstrs | FileCheck %s --check-prefix X64NOHLE + +@sc64 = external global i64 + +; 64-bit + +define void @atomic_fetch_add64() nounwind { +; X64HLE: atomic_fetch_add64 +; X64NOHLE: atomic_fetch_add64 + %t0 = atomicrmw add i64* @sc64, i64 1 acquire, !hle.lock !0 +; X64HLE: lock +; X64HLE-NEXT: xacquire +; X64HLE: incq +; X64NOHLE: lock +; X64NOHLE-NOT: xacquire +; X64NOHLE: incq + %t1 = atomicrmw add i64* @sc64, i64 5 acquire, !hle.lock !1 +; X64HLE: lock +; X64HLE-NEXT: xrelease +; X64HLE: xaddq +; X64NOHLE: lock +; X64NOHLE-NOT: xrelease +; X64NOHLE: xaddq + %t2 = atomicrmw add i64* @sc64, i64 %t1 acquire, !hle.lock !0 +; X64HLE: lock +; X64HLE-NEXT: xacquire +; X64HLE: addq +; X64NOHLE: lock +; X64NOHLE-NOT: xacquire +; X64NOHLE: addq + ret void +; X64HLE: ret +; X64NOHLE: ret +} + +define void @atomic_fetch_sub64() nounwind { +; X64HLE: atomic_fetch_sub64 +; X64NOHLE: atomic_fetch_sub64 + %t3 = atomicrmw sub i64* @sc64, i64 1 acquire, !hle.lock !1 +; X64HLE: lock +; X64HLE-NEXT: xrelease +; X64HLE: decq +; X64NOHLE: lock +; X64NOHLE-NOT: xrelease +; X64NOHLE: decq + %t4 = atomicrmw sub i64* @sc64, i64 5 acquire, !hle.lock !0 +; X64HLE: lock +; X64HLE-NEXT: xacquire +; X64HLE: xaddq +; X64NOHLE: lock +; X64NOHLE-NOT: xacquire +; X64NOHLE: xaddq + %t5 = atomicrmw sub i64* @sc64, i64 %t4 acquire, !hle.lock !1 +; X64HLE: lock +; X64HLE-NEXT: xrelease +; X64HLE: subq +; X64NOHLE: lock +; X64NOHLE-NOT: xrelease +; X64NOHLE: subq + ret void +; X64HLE: ret +; X64NOHLE: ret +} + +define void @atomic_fetch_logic64() nounwind { +; X64HLE: atomic_fetch_logic64 +; X64NOHLE: atomic_fetch_logic64 + %t6 = atomicrmw and i64* @sc64, i64 5 acquire, !hle.lock !0 +; X64HLE: lock +; X64HLE-NEXT: xacquire +; X64HLE: andq +; X64NOHLE: lock +; X64NOHLE-NOT: xacquire +; X64NOHLE: andq + %t7 = atomicrmw or i64* @sc64, i64 5 acquire, !hle.lock !1 +; X64HLE: lock +; X64HLE-NEXT: xrelease +; X64HLE: orq +; X64NOHLE: lock +; X64NOHLE-NOT: xrelease +; X64NOHLE: orq + %t8 = atomicrmw xor i64* @sc64, i64 5 acquire, !hle.lock !0 +; X64HLE: lock +; X64HLE-NEXT: xacquire +; X64HLE: xorq +; X64NOHLE: lock +; X64NOHLE-NOT: xacquire +; X64NOHLE: xorq + %t9 = atomicrmw nand i64* @sc64, i64 5 acquire, !hle.lock !1 +; X64HLE: andq +; X64HLE: notq +; X64HLE: lock +; X64HLE-NEXT: xrelease +; X64HLE: cmpxchgq +; X64NOHLE: andq +; X64NOHLE: notq +; X64NOHLE: lock +; X64NOHLE-NOT: xrelease +; X64NOHLE: cmpxchgq + ret void +; X64HLE: ret +; X64NOHLE: ret +} + +define void @atomic_fetch_minmax64() nounwind { +; X64HLE: atomic_fetch_minmax64 +; X64NOHLE: atomic_fetch_minmax64 + %t0 = atomicrmw max i64* @sc64, i64 5 acquire, !hle.lock !0 +; X64HLE: cmpq +; X64HLE: cmov +; X64HLE: lock +; X64HLE-NEXT: xacquire +; X64HLE: cmpxchgq +; X64NOHLE: cmpq +; X64NOHLE: cmov +; X64NOHLE: lock +; X64NOHLE-NOT: xacquire +; X64NOHLE: cmpxchgq + %t1 = atomicrmw min i64* @sc64, i64 5 acquire, !hle.lock !1 +; X64HLE: cmpq +; X64HLE: cmov +; X64HLE: lock +; X64HLE-NEXT: xrelease +; X64HLE: cmpxchgq +; X64NOHLE: cmpq +; X64NOHLE: cmov +; X64NOHLE: lock +; X64NOHLE-NOT: xrelease +; X64NOHLE: cmpxchgq + %t2 = atomicrmw umax i64* @sc64, i64 5 acquire, !hle.lock !0 +; X64HLE: cmpq +; X64HLE: cmov +; X64HLE: lock +; X64HLE-NEXT: xacquire +; X64HLE: cmpxchgq +; X64NOHLE: cmpq +; X64NOHLE: cmov +; X64NOHLE: lock +; X64NOHLE-NOT: xacquire +; X64NOHLE: cmpxchgq + %t3 = atomicrmw umin i64* @sc64, i64 5 acquire, !hle.lock !1 +; X64HLE: cmpq +; X64HLE: cmov +; X64HLE: lock +; X64HLE-NEXT: xrelease +; X64HLE: cmpxchgq +; X64NOHLE: cmpq +; X64NOHLE: cmov +; X64NOHLE: lock +; X64NOHLE-NOT: xrelease +; X64NOHLE: cmpxchgq + ret void +; X64HLE: ret +; X64NOHLE: ret +} + +define void @atomic_fetch_misc64() nounwind { +; X64HLE: atomic_fetch_misc64 +; X64NOHLE: atomic_fetch_misc64 + %t4 = cmpxchg i64* @sc64, i64 0, i64 1 acquire, !hle.lock !0 +; X64HLE: lock +; X64HLE-NEXT: xacquire +; X64HLE: cmpxchgq +; X64NOHLE: lock +; X64NOHLE-NOT: xacquire +; X64NOHLE: cmpxchgq + store atomic i64 0, i64* @sc64 release, align 8, !hle.lock !1 +; X64HLE-NOT: lock +; X64HLE: xrelease +; X64HLE: movq +; X64NOHLE-NOT: lock +; X64NOHLE-NOT: xrelease +; X64NOHLE: movq + %t5 = atomicrmw xchg i64* @sc64, i64 %t4 acquire, !hle.lock !0 +; X64HLE-NOT: lock +; X64HLE: xacquire +; X64HLE: xchgq +; X64NOHLE-NOT: lock +; X64NOHLE-NOT: xacquire +; X64NOHLE: xchgq + ret void +; X64HLE: ret +; X64NOHLE: ret +} + +!0 = metadata !{metadata !"acquire"} +!1 = metadata !{metadata !"release"} Index: test/CodeGen/X86/hle-atomic8.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/hle-atomic8.ll @@ -0,0 +1,188 @@ +; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 -mattr=+hle -verify-machineinstrs | FileCheck %s --check-prefix X64HLE +; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 -mattr=-hle -verify-machineinstrs | FileCheck %s --check-prefix X64NOHLE + +@sc8 = external global i8 + +; 8-bit + +define void @atomic_fetch_add8() nounwind { +; X64HLE: atomic_fetch_add8 +; X64NOHLE: atomic_fetch_add8 + %t0 = atomicrmw add i8* @sc8, i8 1 acquire, !hle.lock !0 +; X64HLE: lock +; X64HLE-NEXT: xacquire +; X64HLE: incb +; X64NOHLE: lock +; X64NOHLE-NOT: xacquire +; X64NOHLE: incb + %t1 = atomicrmw add i8* @sc8, i8 5 acquire, !hle.lock !1 +; X64HLE: lock +; X64HLE-NEXT: xrelease +; X64HLE: xaddb +; X64NOHLE: lock +; X64NOHLE-NOT: xrelease +; X64NOHLE: xaddb + %t2 = atomicrmw add i8* @sc8, i8 %t1 acquire, !hle.lock !0 +; X64HLE: lock +; X64HLE-NEXT: xacquire +; X64HLE: addb +; X64NOHLE: lock +; X64NOHLE-NOT: xacquire +; X64NOHLE: addb + ret void +; X64HLE: ret +; X64NOHLE: ret +} + +define void @atomic_fetch_sub8() nounwind { +; X64HLE: atomic_fetch_sub8 +; X64NOHLE: atomic_fetch_sub8 + %t3 = atomicrmw sub i8* @sc8, i8 1 acquire, !hle.lock !1 +; X64HLE: lock +; X64HLE-NEXT: xrelease +; X64HLE: decb +; X64NOHLE: lock +; X64NOHLE-NOT: xrelease +; X64NOHLE: decb + %t4 = atomicrmw sub i8* @sc8, i8 5 acquire, !hle.lock !0 +; X64HLE: lock +; X64HLE-NEXT: xacquire +; X64HLE: xaddb +; X64NOHLE: lock +; X64NOHLE-NOT: xacquire +; X64NOHLE: xaddb + %t5 = atomicrmw sub i8* @sc8, i8 %t4 acquire, !hle.lock !1 +; X64HLE: lock +; X64HLE-NEXT: xrelease +; X64HLE: subb +; X64NOHLE: lock +; X64NOHLE-NOT: xrelease +; X64NOHLE: subb + ret void +; X64HLE: ret +; X64NOHLE: ret +} + +define void @atomic_fetch_logic8() nounwind { +; X64HLE: atomic_fetch_logic8 +; X64NOHLE: atomic_fetch_logic8 + %t6 = atomicrmw and i8* @sc8, i8 5 acquire, !hle.lock !0 +; X64HLE: lock +; X64HLE-NEXT: xacquire +; X64HLE: andb +; X64NOHLE: lock +; X64NOHLE-NOT: xacquire +; X64NOHLE: andb + %t7 = atomicrmw or i8* @sc8, i8 5 acquire, !hle.lock !1 +; X64HLE: lock +; X64HLE-NEXT: xrelease +; X64HLE: orb +; X64NOHLE: lock +; X64NOHLE-NOT: xrelease +; X64NOHLE: orb + %t8 = atomicrmw xor i8* @sc8, i8 5 acquire, !hle.lock !0 +; X64HLE: lock +; X64HLE-NEXT: xacquire +; X64HLE: xorb +; X64NOHLE: lock +; X64NOHLE-NOT: xacquire +; X64NOHLE: xorb + %t9 = atomicrmw nand i8* @sc8, i8 5 acquire, !hle.lock !1 +; X64HLE: andb +; X64HLE: notb +; X64HLE: lock +; X64HLE-NEXT: xrelease +; X64HLE: cmpxchgb +; X64NOHLE: andb +; X64NOHLE: notb +; X64NOHLE: lock +; X64NOHLE-NOT: xrelease +; X64NOHLE: cmpxchgb + ret void +; X64HLE: ret +; X64NOHLE: ret +} + +define void @atomic_fetch_minmax8() nounwind { +; X64HLE: atomic_fetch_minmax8 +; X64NOHLE: atomic_fetch_minmax8 + %t0 = atomicrmw max i8* @sc8, i8 5 acquire, !hle.lock !0 +; X64HLE: cmpb +; X64HLE: cmov +; X64HLE: lock +; X64HLE-NEXT: xacquire +; X64HLE: cmpxchgb +; X64NOHLE: cmpb +; X64NOHLE: cmov +; X64NOHLE: lock +; X64NOHLE-NOT: xacquire +; X64NOHLE: cmpxchgb + %t1 = atomicrmw min i8* @sc8, i8 5 acquire, !hle.lock !1 +; X64HLE: cmpb +; X64HLE: cmov +; X64HLE: lock +; X64HLE-NEXT: xrelease +; X64HLE: cmpxchgb +; X64NOHLE: cmpb +; X64NOHLE: cmov +; X64NOHLE: lock +; X64NOHLE-NOT: xrelease +; X64NOHLE: cmpxchgb + %t2 = atomicrmw umax i8* @sc8, i8 5 acquire, !hle.lock !0 +; X64HLE: cmpb +; X64HLE: cmov +; X64HLE: lock +; X64HLE-NEXT: xacquire +; X64HLE: cmpxchgb +; X64NOHLE: cmpb +; X64NOHLE: cmov +; X64NOHLE: lock +; X64NOHLE-NOT: xacquire +; X64NOHLE: cmpxchgb + %t3 = atomicrmw umin i8* @sc8, i8 5 acquire, !hle.lock !1 +; X64HLE: cmpb +; X64HLE: cmov +; X64HLE: lock +; X64HLE-NEXT: xrelease +; X64HLE: cmpxchgb +; X64NOHLE: cmpb +; X64NOHLE: cmov +; X64NOHLE: lock +; X64NOHLE-NOT: xrelease +; X64NOHLE: cmpxchgb + ret void +; X64HLE: ret +; X64NOHLE: ret +} + +define void @atomic_fetch_misc8() nounwind { +; X64HLE: atomic_fetch_misc8 +; X64NOHLE: atomic_fetch_misc8 + %t4 = cmpxchg i8* @sc8, i8 0, i8 1 acquire, !hle.lock !0 +; X64HLE: lock +; X64HLE-NEXT: xacquire +; X64HLE: cmpxchgb +; X64NOHLE: lock +; X64NOHLE-NOT: xacquire +; X64NOHLE: cmpxchgb + store atomic i8 0, i8* @sc8 release, align 1, !hle.lock !1 +; X64HLE-NOT: lock +; X64HLE: xrelease +; X64HLE: movb +; X64NOHLE-NOT: lock +; X64NOHLE-NOT: xrelease +; X64NOHLE: movb + %t5 = atomicrmw xchg i8* @sc8, i8 %t4 acquire, !hle.lock !0 +; X64HLE-NOT: lock +; X64HLE: xacquire +; X64HLE: xchgb +; X64NOHLE-NOT: lock +; X64NOHLE-NOT: xacquire +; X64NOHLE: xchgb + ret void +; X64HLE: ret +; X64NOHLE: ret +} + +!0 = metadata !{metadata !"acquire"} +!1 = metadata !{metadata !"release"}