Index: lib/Target/X86/CMakeLists.txt =================================================================== --- lib/Target/X86/CMakeLists.txt +++ lib/Target/X86/CMakeLists.txt @@ -14,6 +14,7 @@ set(sources X86AsmPrinter.cpp + X86AtomicExpandPass.cpp X86CodeEmitter.cpp X86FastISel.cpp X86FloatingPoint.cpp Index: lib/Target/X86/X86.h =================================================================== --- lib/Target/X86/X86.h +++ lib/Target/X86/X86.h @@ -24,6 +24,10 @@ class JITCodeEmitter; class X86TargetMachine; +/// createX86AtomicExpandPass - This pass expands atomic operations that cannot +/// be handled natively in terms of a loop using cmpxchg. +FunctionPass *createX86AtomicExpandPass(const X86TargetMachine *TM); + /// createX86ISelDag - This pass converts a legalized DAG into a /// X86-specific DAG, ready for instruction scheduling. /// Index: lib/Target/X86/X86AtomicExpandPass.cpp =================================================================== --- /dev/null +++ lib/Target/X86/X86AtomicExpandPass.cpp @@ -0,0 +1,269 @@ +//===-- X86AtomicExpandPass.cpp - Expand illegal atomic instructions --0---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass (at IR level) to replace atomic instructions which +// cannot be implemented as a single instruction with cmpxchg-based loops. +// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86TargetMachine.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + +#define DEBUG_TYPE "x86-atomic-expand" + +namespace { + class X86AtomicExpandPass : public FunctionPass { + const X86TargetMachine *TM; + public: + static char ID; // Pass identification, replacement for typeid + explicit X86AtomicExpandPass(const X86TargetMachine *TM) + : FunctionPass(ID), TM(TM) {} + + bool runOnFunction(Function &F) override; + bool expandAtomicInsts(Function &F); + + bool needsCmpXchgNb(Type *MemType); + bool shouldExpandStore(StoreInst *SI); + bool shouldExpandAtomicRMW(AtomicRMWInst *AI); + bool shouldExpand(Instruction *Inst); + + bool expandAtomicRMW(AtomicRMWInst *AI); + bool expandAtomicStore(StoreInst *SI); + }; +} + +char X86AtomicExpandPass::ID = 0; + +FunctionPass *llvm::createX86AtomicExpandPass(const X86TargetMachine *TM) { + return new X86AtomicExpandPass(TM); +} + +bool X86AtomicExpandPass::runOnFunction(Function &F) { + SmallVector AtomicInsts; + + // Changing control-flow while iterating through it is a bad idea, so gather a + // list of all atomic instructions before we start. + for (BasicBlock &BB : F) + for (Instruction &Inst : BB) { + if (isa(&Inst) || + (isa(&Inst) && cast(&Inst)->isAtomic())) + AtomicInsts.push_back(&Inst); + } + + bool MadeChange = false; + for (Instruction *Inst : AtomicInsts) { + if (!shouldExpand(Inst)) + continue; + + if (AtomicRMWInst *AI = dyn_cast(Inst)) + MadeChange |= expandAtomicRMW(AI); + if (StoreInst *SI = dyn_cast(Inst)) + MadeChange |= expandAtomicStore(SI); + } + + return MadeChange; +} + +/// Returns true if operations on the given type will need to use either +/// cmpxchg8b or cmpxchg16b. This occurs if the type is 1 step up from the +/// native width, and the instructions are available (otherwise we leave them +/// alone to become __sync_fetch_and_... calls). +bool X86AtomicExpandPass::needsCmpXchgNb(llvm::Type *MemType) { + const X86Subtarget &Subtarget = TM->getSubtarget(); + if (!Subtarget.hasCmpxchg16b()) + return false; + + unsigned CmpXchgNbWidth = Subtarget.is64Bit() ? 128 : 64; + + unsigned OpWidth = MemType->getPrimitiveSizeInBits(); + if (OpWidth == CmpXchgNbWidth) + return true; + + return false; +} + + +bool X86AtomicExpandPass::shouldExpandAtomicRMW(AtomicRMWInst *AI) { + const X86Subtarget &Subtarget = TM->getSubtarget(); + unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32; + + if (needsCmpXchgNb(AI->getType())) + return true; + else if (AI->getType()->getPrimitiveSizeInBits() > NativeWidth) + return false; + + AtomicRMWInst::BinOp Op = AI->getOperation(); + switch (Op) { + default: + llvm_unreachable("Unknown atomic operation"); + case AtomicRMWInst::Xchg: + case AtomicRMWInst::Add: + case AtomicRMWInst::Sub: + // It's better to use xadd, xsub or xchg for these in all cases. + return false; + case AtomicRMWInst::Or: + case AtomicRMWInst::And: + case AtomicRMWInst::Xor: + // If the atomicrmw's result isn't actually used, we can just add a "lock" + // prefix to a normal instruction for these operations. + return !AI->use_empty(); + case AtomicRMWInst::Nand: + case AtomicRMWInst::Max: + case AtomicRMWInst::Min: + case AtomicRMWInst::UMax: + case AtomicRMWInst::UMin: + // These always require a non-trivial set of data operations on x86. We must + // use a cmpxchg loop. + return true; + } +} + +bool X86AtomicExpandPass::shouldExpandStore(StoreInst *SI) { + if (needsCmpXchgNb(SI->getValueOperand()->getType())) + return true; + + return false; +} + +bool X86AtomicExpandPass::shouldExpand(Instruction *Inst) { + if (AtomicRMWInst *AI = dyn_cast(Inst)) + return shouldExpandAtomicRMW(AI); + else if (StoreInst *SI = dyn_cast(Inst)) + return shouldExpandStore(SI); + return false; +} + + +bool X86AtomicExpandPass::expandAtomicRMW(AtomicRMWInst *AI) { + AtomicOrdering Order = + AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering(); + Value *Addr = AI->getPointerOperand(); + BasicBlock *BB = AI->getParent(); + Function *F = BB->getParent(); + LLVMContext &Ctx = F->getContext(); + + // Given: atomicrmw some_op iN* %addr, iN %incr ordering + // + // The standard expansion we produce is: + // [...] + // %init_loaded = load atomic iN* %addr + // br label %loop + // loop: + // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ] + // %new = some_op iN %loaded, %incr + // %pair = cmpxchg iN* %addr, iN %loaded, iN %new + // %new_loaded = extractvalue { iN, i1 } %pair, 0 + // %success = extractvalue { iN, i1 } %pair, 1 + // br i1 %success, label %atomicrmw.end, label %loop + // atomicrmw.end: + // [...] + BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end"); + BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); + + // This grabs the DebugLoc from AI. + IRBuilder<> Builder(AI); + + // The split call above "helpfully" added a branch at the end of BB (to the + // wrong place), but we want a load. It's easiest to just remove + // the branch entirely. + std::prev(BB->end())->eraseFromParent(); + Builder.SetInsertPoint(BB); + LoadInst *InitLoaded = Builder.CreateLoad(Addr); + InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits()); + Builder.CreateBr(LoopBB); + + // Start the main loop block now that we've taken care of the preliminaries. + Builder.SetInsertPoint(LoopBB); + PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded"); + Loaded->addIncoming(InitLoaded, BB); + + Value *NewVal; + switch (AI->getOperation()) { + case AtomicRMWInst::Xchg: + NewVal = AI->getValOperand(); + break; + case AtomicRMWInst::Add: + NewVal = Builder.CreateAdd(Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::Sub: + NewVal = Builder.CreateSub(Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::And: + NewVal = Builder.CreateAnd(Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::Nand: + NewVal = Builder.CreateNot(Builder.CreateAnd(Loaded, AI->getValOperand()), + "new"); + break; + case AtomicRMWInst::Or: + NewVal = Builder.CreateOr(Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::Xor: + NewVal = Builder.CreateXor(Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::Max: + NewVal = Builder.CreateICmpSGT(Loaded, AI->getValOperand()); + NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::Min: + NewVal = Builder.CreateICmpSLE(Loaded, AI->getValOperand()); + NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::UMax: + NewVal = Builder.CreateICmpUGT(Loaded, AI->getValOperand()); + NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::UMin: + NewVal = Builder.CreateICmpULE(Loaded, AI->getValOperand()); + NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); + break; + default: + llvm_unreachable("Unknown atomic op"); + } + + Value *Pair = Builder.CreateAtomicCmpXchg( + Addr, Loaded, NewVal, Order, + AtomicCmpXchgInst::getStrongestFailureOrdering(Order)); + Value *NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); + Loaded->addIncoming(NewLoaded, LoopBB); + + Value *Success = Builder.CreateExtractValue(Pair, 1, "success"); + Builder.CreateCondBr(Success, ExitBB, LoopBB); + + AI->replaceAllUsesWith(NewLoaded); + AI->eraseFromParent(); + + return true; +} + +bool X86AtomicExpandPass::expandAtomicStore(StoreInst *SI) { + // An atomic store might need cmpxchg16b (or 8b on x86) to execute. Express + // this in terms of the usual expansion to "atomicrmw xchg". + IRBuilder<> Builder(SI); + AtomicRMWInst *AI = + Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(), + SI->getValueOperand(), SI->getOrdering()); + SI->eraseFromParent(); + + // Now we have an appropriate swap instruction, lower it as usual. + if (shouldExpandAtomicRMW(AI)) + return expandAtomicRMW(AI); + + return AI; +} Index: lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- lib/Target/X86/X86ISelDAGToDAG.cpp +++ lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2126,38 +2126,6 @@ return getGlobalBaseReg(); - case X86ISD::ATOMOR64_DAG: - case X86ISD::ATOMXOR64_DAG: - case X86ISD::ATOMADD64_DAG: - case X86ISD::ATOMSUB64_DAG: - case X86ISD::ATOMNAND64_DAG: - case X86ISD::ATOMAND64_DAG: - case X86ISD::ATOMMAX64_DAG: - case X86ISD::ATOMMIN64_DAG: - case X86ISD::ATOMUMAX64_DAG: - case X86ISD::ATOMUMIN64_DAG: - case X86ISD::ATOMSWAP64_DAG: { - unsigned Opc; - switch (Opcode) { - default: llvm_unreachable("Impossible opcode"); - case X86ISD::ATOMOR64_DAG: Opc = X86::ATOMOR6432; break; - case X86ISD::ATOMXOR64_DAG: Opc = X86::ATOMXOR6432; break; - case X86ISD::ATOMADD64_DAG: Opc = X86::ATOMADD6432; break; - case X86ISD::ATOMSUB64_DAG: Opc = X86::ATOMSUB6432; break; - case X86ISD::ATOMNAND64_DAG: Opc = X86::ATOMNAND6432; break; - case X86ISD::ATOMAND64_DAG: Opc = X86::ATOMAND6432; break; - case X86ISD::ATOMMAX64_DAG: Opc = X86::ATOMMAX6432; break; - case X86ISD::ATOMMIN64_DAG: Opc = X86::ATOMMIN6432; break; - case X86ISD::ATOMUMAX64_DAG: Opc = X86::ATOMUMAX6432; break; - case X86ISD::ATOMUMIN64_DAG: Opc = X86::ATOMUMIN6432; break; - case X86ISD::ATOMSWAP64_DAG: Opc = X86::ATOMSWAP6432; break; - } - SDNode *RetVal = SelectAtomic64(Node, Opc); - if (RetVal) - return RetVal; - break; - } - case ISD::ATOMIC_LOAD_XOR: case ISD::ATOMIC_LOAD_AND: case ISD::ATOMIC_LOAD_OR: Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -400,23 +400,8 @@ // XTEST - Test if in transactional execution. XTEST, - // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG, - // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG - - // Atomic 64-bit binary operations. - ATOMADD64_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE, - ATOMSUB64_DAG, - ATOMOR64_DAG, - ATOMXOR64_DAG, - ATOMAND64_DAG, - ATOMNAND64_DAG, - ATOMMAX64_DAG, - ATOMMIN64_DAG, - ATOMUMAX64_DAG, - ATOMUMIN64_DAG, - ATOMSWAP64_DAG, - // LCMPXCHG_DAG, LCMPXCHG8_DAG, LCMPXCHG16_DAG - Compare and swap. - LCMPXCHG_DAG, + LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE, LCMPXCHG8_DAG, LCMPXCHG16_DAG, Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -585,21 +585,6 @@ setOperationAction(ISD::ATOMIC_STORE, VT, Custom); } - if (!Subtarget->is64Bit()) { - setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom); - } - if (Subtarget->hasCmpxchg16b()) { setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom); } @@ -14835,29 +14820,6 @@ Results.push_back(Swap.getValue(2)); } -static void -ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl&Results, - SelectionDAG &DAG, unsigned NewOp) { - SDLoc dl(Node); - assert (Node->getValueType(0) == MVT::i64 && - "Only know how to expand i64 atomics"); - - SDValue Chain = Node->getOperand(0); - SDValue In1 = Node->getOperand(1); - SDValue In2L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, - Node->getOperand(2), DAG.getIntPtrConstant(0)); - SDValue In2H = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, - Node->getOperand(2), DAG.getIntPtrConstant(1)); - SDValue Ops[] = { Chain, In1, In2L, In2H }; - SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); - SDValue Result = - DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops, MVT::i64, - cast(Node)->getMemOperand()); - SDValue OpsF[] = { Result.getValue(0), Result.getValue(1)}; - Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF)); - Results.push_back(Result.getValue(2)); -} - /// ReplaceNodeResults - Replace a node with an illegal result type /// with a new node built out of custom code. void X86TargetLowering::ReplaceNodeResults(SDNode *N, @@ -15003,57 +14965,6 @@ Results.push_back(EFLAGS.getValue(1)); return; } - case ISD::ATOMIC_LOAD_ADD: - case ISD::ATOMIC_LOAD_AND: - case ISD::ATOMIC_LOAD_NAND: - case ISD::ATOMIC_LOAD_OR: - case ISD::ATOMIC_LOAD_SUB: - case ISD::ATOMIC_LOAD_XOR: - case ISD::ATOMIC_LOAD_MAX: - case ISD::ATOMIC_LOAD_MIN: - case ISD::ATOMIC_LOAD_UMAX: - case ISD::ATOMIC_LOAD_UMIN: - case ISD::ATOMIC_SWAP: { - unsigned Opc; - switch (N->getOpcode()) { - default: llvm_unreachable("Unexpected opcode"); - case ISD::ATOMIC_LOAD_ADD: - Opc = X86ISD::ATOMADD64_DAG; - break; - case ISD::ATOMIC_LOAD_AND: - Opc = X86ISD::ATOMAND64_DAG; - break; - case ISD::ATOMIC_LOAD_NAND: - Opc = X86ISD::ATOMNAND64_DAG; - break; - case ISD::ATOMIC_LOAD_OR: - Opc = X86ISD::ATOMOR64_DAG; - break; - case ISD::ATOMIC_LOAD_SUB: - Opc = X86ISD::ATOMSUB64_DAG; - break; - case ISD::ATOMIC_LOAD_XOR: - Opc = X86ISD::ATOMXOR64_DAG; - break; - case ISD::ATOMIC_LOAD_MAX: - Opc = X86ISD::ATOMMAX64_DAG; - break; - case ISD::ATOMIC_LOAD_MIN: - Opc = X86ISD::ATOMMIN64_DAG; - break; - case ISD::ATOMIC_LOAD_UMAX: - Opc = X86ISD::ATOMUMAX64_DAG; - break; - case ISD::ATOMIC_LOAD_UMIN: - Opc = X86ISD::ATOMUMIN64_DAG; - break; - case ISD::ATOMIC_SWAP: - Opc = X86ISD::ATOMSWAP64_DAG; - break; - } - ReplaceATOMIC_BINARY_64(N, Results, DAG, Opc); - return; - } case ISD::ATOMIC_LOAD: { ReplaceATOMIC_LOAD(N, Results, DAG); return; @@ -15160,12 +15071,6 @@ case X86ISD::LCMPXCHG_DAG: return "X86ISD::LCMPXCHG_DAG"; case X86ISD::LCMPXCHG8_DAG: return "X86ISD::LCMPXCHG8_DAG"; case X86ISD::LCMPXCHG16_DAG: return "X86ISD::LCMPXCHG16_DAG"; - case X86ISD::ATOMADD64_DAG: return "X86ISD::ATOMADD64_DAG"; - case X86ISD::ATOMSUB64_DAG: return "X86ISD::ATOMSUB64_DAG"; - case X86ISD::ATOMOR64_DAG: return "X86ISD::ATOMOR64_DAG"; - case X86ISD::ATOMXOR64_DAG: return "X86ISD::ATOMXOR64_DAG"; - case X86ISD::ATOMAND64_DAG: return "X86ISD::ATOMAND64_DAG"; - case X86ISD::ATOMNAND64_DAG: return "X86ISD::ATOMNAND64_DAG"; case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL"; case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD"; case X86ISD::VZEXT: return "X86ISD::VZEXT"; @@ -15554,685 +15459,6 @@ return sinkMBB; } -// Get CMPXCHG opcode for the specified data type. -static unsigned getCmpXChgOpcode(EVT VT) { - switch (VT.getSimpleVT().SimpleTy) { - case MVT::i8: return X86::LCMPXCHG8; - case MVT::i16: return X86::LCMPXCHG16; - case MVT::i32: return X86::LCMPXCHG32; - case MVT::i64: return X86::LCMPXCHG64; - default: - break; - } - llvm_unreachable("Invalid operand size!"); -} - -// Get LOAD opcode for the specified data type. -static unsigned getLoadOpcode(EVT VT) { - switch (VT.getSimpleVT().SimpleTy) { - case MVT::i8: return X86::MOV8rm; - case MVT::i16: return X86::MOV16rm; - case MVT::i32: return X86::MOV32rm; - case MVT::i64: return X86::MOV64rm; - default: - break; - } - llvm_unreachable("Invalid operand size!"); -} - -// Get opcode of the non-atomic one from the specified atomic instruction. -static unsigned getNonAtomicOpcode(unsigned Opc) { - switch (Opc) { - case X86::ATOMAND8: return X86::AND8rr; - case X86::ATOMAND16: return X86::AND16rr; - case X86::ATOMAND32: return X86::AND32rr; - case X86::ATOMAND64: return X86::AND64rr; - case X86::ATOMOR8: return X86::OR8rr; - case X86::ATOMOR16: return X86::OR16rr; - case X86::ATOMOR32: return X86::OR32rr; - case X86::ATOMOR64: return X86::OR64rr; - case X86::ATOMXOR8: return X86::XOR8rr; - case X86::ATOMXOR16: return X86::XOR16rr; - case X86::ATOMXOR32: return X86::XOR32rr; - case X86::ATOMXOR64: return X86::XOR64rr; - } - llvm_unreachable("Unhandled atomic-load-op opcode!"); -} - -// Get opcode of the non-atomic one from the specified atomic instruction with -// extra opcode. -static unsigned getNonAtomicOpcodeWithExtraOpc(unsigned Opc, - unsigned &ExtraOpc) { - switch (Opc) { - case X86::ATOMNAND8: ExtraOpc = X86::NOT8r; return X86::AND8rr; - case X86::ATOMNAND16: ExtraOpc = X86::NOT16r; return X86::AND16rr; - case X86::ATOMNAND32: ExtraOpc = X86::NOT32r; return X86::AND32rr; - case X86::ATOMNAND64: ExtraOpc = X86::NOT64r; return X86::AND64rr; - case X86::ATOMMAX8: ExtraOpc = X86::CMP8rr; return X86::CMOVL32rr; - case X86::ATOMMAX16: ExtraOpc = X86::CMP16rr; return X86::CMOVL16rr; - case X86::ATOMMAX32: ExtraOpc = X86::CMP32rr; return X86::CMOVL32rr; - case X86::ATOMMAX64: ExtraOpc = X86::CMP64rr; return X86::CMOVL64rr; - case X86::ATOMMIN8: ExtraOpc = X86::CMP8rr; return X86::CMOVG32rr; - case X86::ATOMMIN16: ExtraOpc = X86::CMP16rr; return X86::CMOVG16rr; - case X86::ATOMMIN32: ExtraOpc = X86::CMP32rr; return X86::CMOVG32rr; - case X86::ATOMMIN64: ExtraOpc = X86::CMP64rr; return X86::CMOVG64rr; - case X86::ATOMUMAX8: ExtraOpc = X86::CMP8rr; return X86::CMOVB32rr; - case X86::ATOMUMAX16: ExtraOpc = X86::CMP16rr; return X86::CMOVB16rr; - case X86::ATOMUMAX32: ExtraOpc = X86::CMP32rr; return X86::CMOVB32rr; - case X86::ATOMUMAX64: ExtraOpc = X86::CMP64rr; return X86::CMOVB64rr; - case X86::ATOMUMIN8: ExtraOpc = X86::CMP8rr; return X86::CMOVA32rr; - case X86::ATOMUMIN16: ExtraOpc = X86::CMP16rr; return X86::CMOVA16rr; - case X86::ATOMUMIN32: ExtraOpc = X86::CMP32rr; return X86::CMOVA32rr; - case X86::ATOMUMIN64: ExtraOpc = X86::CMP64rr; return X86::CMOVA64rr; - } - llvm_unreachable("Unhandled atomic-load-op opcode!"); -} - -// Get opcode of the non-atomic one from the specified atomic instruction for -// 64-bit data type on 32-bit target. -static unsigned getNonAtomic6432Opcode(unsigned Opc, unsigned &HiOpc) { - switch (Opc) { - case X86::ATOMAND6432: HiOpc = X86::AND32rr; return X86::AND32rr; - case X86::ATOMOR6432: HiOpc = X86::OR32rr; return X86::OR32rr; - case X86::ATOMXOR6432: HiOpc = X86::XOR32rr; return X86::XOR32rr; - case X86::ATOMADD6432: HiOpc = X86::ADC32rr; return X86::ADD32rr; - case X86::ATOMSUB6432: HiOpc = X86::SBB32rr; return X86::SUB32rr; - case X86::ATOMSWAP6432: HiOpc = X86::MOV32rr; return X86::MOV32rr; - case X86::ATOMMAX6432: HiOpc = X86::SETLr; return X86::SETLr; - case X86::ATOMMIN6432: HiOpc = X86::SETGr; return X86::SETGr; - case X86::ATOMUMAX6432: HiOpc = X86::SETBr; return X86::SETBr; - case X86::ATOMUMIN6432: HiOpc = X86::SETAr; return X86::SETAr; - } - llvm_unreachable("Unhandled atomic-load-op opcode!"); -} - -// Get opcode of the non-atomic one from the specified atomic instruction for -// 64-bit data type on 32-bit target with extra opcode. -static unsigned getNonAtomic6432OpcodeWithExtraOpc(unsigned Opc, - unsigned &HiOpc, - unsigned &ExtraOpc) { - switch (Opc) { - case X86::ATOMNAND6432: - ExtraOpc = X86::NOT32r; - HiOpc = X86::AND32rr; - return X86::AND32rr; - } - llvm_unreachable("Unhandled atomic-load-op opcode!"); -} - -// Get pseudo CMOV opcode from the specified data type. -static unsigned getPseudoCMOVOpc(EVT VT) { - switch (VT.getSimpleVT().SimpleTy) { - case MVT::i8: return X86::CMOV_GR8; - case MVT::i16: return X86::CMOV_GR16; - case MVT::i32: return X86::CMOV_GR32; - default: - break; - } - llvm_unreachable("Unknown CMOV opcode!"); -} - -// EmitAtomicLoadArith - emit the code sequence for pseudo atomic instructions. -// They will be translated into a spin-loop or compare-exchange loop from -// -// ... -// dst = atomic-fetch-op MI.addr, MI.val -// ... -// -// to -// -// ... -// t1 = LOAD MI.addr -// loop: -// t4 = phi(t1, t3 / loop) -// t2 = OP MI.val, t4 -// EAX = t4 -// LCMPXCHG [MI.addr], t2, [EAX is implicitly used & defined] -// t3 = EAX -// JNE loop -// sink: -// dst = t3 -// ... -MachineBasicBlock * -X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI, - MachineBasicBlock *MBB) const { - MachineFunction *MF = MBB->getParent(); - const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); - DebugLoc DL = MI->getDebugLoc(); - - MachineRegisterInfo &MRI = MF->getRegInfo(); - - const BasicBlock *BB = MBB->getBasicBlock(); - MachineFunction::iterator I = MBB; - ++I; - - assert(MI->getNumOperands() <= X86::AddrNumOperands + 4 && - "Unexpected number of operands"); - - assert(MI->hasOneMemOperand() && - "Expected atomic-load-op to have one memoperand"); - - // Memory Reference - MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); - MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); - - unsigned DstReg, SrcReg; - unsigned MemOpndSlot; - - unsigned CurOp = 0; - - DstReg = MI->getOperand(CurOp++).getReg(); - MemOpndSlot = CurOp; - CurOp += X86::AddrNumOperands; - SrcReg = MI->getOperand(CurOp++).getReg(); - - const TargetRegisterClass *RC = MRI.getRegClass(DstReg); - MVT::SimpleValueType VT = *RC->vt_begin(); - unsigned t1 = MRI.createVirtualRegister(RC); - unsigned t2 = MRI.createVirtualRegister(RC); - unsigned t3 = MRI.createVirtualRegister(RC); - unsigned t4 = MRI.createVirtualRegister(RC); - unsigned PhyReg = getX86SubSuperRegister(X86::EAX, VT); - - unsigned LCMPXCHGOpc = getCmpXChgOpcode(VT); - unsigned LOADOpc = getLoadOpcode(VT); - - // For the atomic load-arith operator, we generate - // - // thisMBB: - // t1 = LOAD [MI.addr] - // mainMBB: - // t4 = phi(t1 / thisMBB, t3 / mainMBB) - // t1 = OP MI.val, EAX - // EAX = t4 - // LCMPXCHG [MI.addr], t1, [EAX is implicitly used & defined] - // t3 = EAX - // JNE mainMBB - // sinkMBB: - // dst = t3 - - MachineBasicBlock *thisMBB = MBB; - MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); - MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); - MF->insert(I, mainMBB); - MF->insert(I, sinkMBB); - - MachineInstrBuilder MIB; - - // Transfer the remainder of BB and its successor edges to sinkMBB. - sinkMBB->splice(sinkMBB->begin(), MBB, - std::next(MachineBasicBlock::iterator(MI)), MBB->end()); - sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); - - // thisMBB: - MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1); - for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { - MachineOperand NewMO = MI->getOperand(MemOpndSlot + i); - if (NewMO.isReg()) - NewMO.setIsKill(false); - MIB.addOperand(NewMO); - } - for (MachineInstr::mmo_iterator MMOI = MMOBegin; MMOI != MMOEnd; ++MMOI) { - unsigned flags = (*MMOI)->getFlags(); - flags = (flags & ~MachineMemOperand::MOStore) | MachineMemOperand::MOLoad; - MachineMemOperand *MMO = - MF->getMachineMemOperand((*MMOI)->getPointerInfo(), flags, - (*MMOI)->getSize(), - (*MMOI)->getBaseAlignment(), - (*MMOI)->getTBAAInfo(), - (*MMOI)->getRanges()); - MIB.addMemOperand(MMO); - } - - thisMBB->addSuccessor(mainMBB); - - // mainMBB: - MachineBasicBlock *origMainMBB = mainMBB; - - // Add a PHI. - MachineInstr *Phi = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4) - .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(mainMBB); - - unsigned Opc = MI->getOpcode(); - switch (Opc) { - default: - llvm_unreachable("Unhandled atomic-load-op opcode!"); - case X86::ATOMAND8: - case X86::ATOMAND16: - case X86::ATOMAND32: - case X86::ATOMAND64: - case X86::ATOMOR8: - case X86::ATOMOR16: - case X86::ATOMOR32: - case X86::ATOMOR64: - case X86::ATOMXOR8: - case X86::ATOMXOR16: - case X86::ATOMXOR32: - case X86::ATOMXOR64: { - unsigned ARITHOpc = getNonAtomicOpcode(Opc); - BuildMI(mainMBB, DL, TII->get(ARITHOpc), t2).addReg(SrcReg) - .addReg(t4); - break; - } - case X86::ATOMNAND8: - case X86::ATOMNAND16: - case X86::ATOMNAND32: - case X86::ATOMNAND64: { - unsigned Tmp = MRI.createVirtualRegister(RC); - unsigned NOTOpc; - unsigned ANDOpc = getNonAtomicOpcodeWithExtraOpc(Opc, NOTOpc); - BuildMI(mainMBB, DL, TII->get(ANDOpc), Tmp).addReg(SrcReg) - .addReg(t4); - BuildMI(mainMBB, DL, TII->get(NOTOpc), t2).addReg(Tmp); - break; - } - case X86::ATOMMAX8: - case X86::ATOMMAX16: - case X86::ATOMMAX32: - case X86::ATOMMAX64: - case X86::ATOMMIN8: - case X86::ATOMMIN16: - case X86::ATOMMIN32: - case X86::ATOMMIN64: - case X86::ATOMUMAX8: - case X86::ATOMUMAX16: - case X86::ATOMUMAX32: - case X86::ATOMUMAX64: - case X86::ATOMUMIN8: - case X86::ATOMUMIN16: - case X86::ATOMUMIN32: - case X86::ATOMUMIN64: { - unsigned CMPOpc; - unsigned CMOVOpc = getNonAtomicOpcodeWithExtraOpc(Opc, CMPOpc); - - BuildMI(mainMBB, DL, TII->get(CMPOpc)) - .addReg(SrcReg) - .addReg(t4); - - if (Subtarget->hasCMov()) { - if (VT != MVT::i8) { - // Native support - BuildMI(mainMBB, DL, TII->get(CMOVOpc), t2) - .addReg(SrcReg) - .addReg(t4); - } else { - // Promote i8 to i32 to use CMOV32 - const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo(); - const TargetRegisterClass *RC32 = - TRI->getSubClassWithSubReg(getRegClassFor(MVT::i32), X86::sub_8bit); - unsigned SrcReg32 = MRI.createVirtualRegister(RC32); - unsigned AccReg32 = MRI.createVirtualRegister(RC32); - unsigned Tmp = MRI.createVirtualRegister(RC32); - - unsigned Undef = MRI.createVirtualRegister(RC32); - BuildMI(mainMBB, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Undef); - - BuildMI(mainMBB, DL, TII->get(TargetOpcode::INSERT_SUBREG), SrcReg32) - .addReg(Undef) - .addReg(SrcReg) - .addImm(X86::sub_8bit); - BuildMI(mainMBB, DL, TII->get(TargetOpcode::INSERT_SUBREG), AccReg32) - .addReg(Undef) - .addReg(t4) - .addImm(X86::sub_8bit); - - BuildMI(mainMBB, DL, TII->get(CMOVOpc), Tmp) - .addReg(SrcReg32) - .addReg(AccReg32); - - BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t2) - .addReg(Tmp, 0, X86::sub_8bit); - } - } else { - // Use pseudo select and lower them. - assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) && - "Invalid atomic-load-op transformation!"); - unsigned SelOpc = getPseudoCMOVOpc(VT); - X86::CondCode CC = X86::getCondFromCMovOpc(CMOVOpc); - assert(CC != X86::COND_INVALID && "Invalid atomic-load-op transformation!"); - MIB = BuildMI(mainMBB, DL, TII->get(SelOpc), t2) - .addReg(SrcReg).addReg(t4) - .addImm(CC); - mainMBB = EmitLoweredSelect(MIB, mainMBB); - // Replace the original PHI node as mainMBB is changed after CMOV - // lowering. - BuildMI(*origMainMBB, Phi, DL, TII->get(X86::PHI), t4) - .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(mainMBB); - Phi->eraseFromParent(); - } - break; - } - } - - // Copy PhyReg back from virtual register. - BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), PhyReg) - .addReg(t4); - - MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc)); - for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { - MachineOperand NewMO = MI->getOperand(MemOpndSlot + i); - if (NewMO.isReg()) - NewMO.setIsKill(false); - MIB.addOperand(NewMO); - } - MIB.addReg(t2); - MIB.setMemRefs(MMOBegin, MMOEnd); - - // Copy PhyReg back to virtual register. - BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3) - .addReg(PhyReg); - - BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB); - - mainMBB->addSuccessor(origMainMBB); - mainMBB->addSuccessor(sinkMBB); - - // sinkMBB: - BuildMI(*sinkMBB, sinkMBB->begin(), DL, - TII->get(TargetOpcode::COPY), DstReg) - .addReg(t3); - - MI->eraseFromParent(); - return sinkMBB; -} - -// EmitAtomicLoadArith6432 - emit the code sequence for pseudo atomic -// instructions. They will be translated into a spin-loop or compare-exchange -// loop from -// -// ... -// dst = atomic-fetch-op MI.addr, MI.val -// ... -// -// to -// -// ... -// t1L = LOAD [MI.addr + 0] -// t1H = LOAD [MI.addr + 4] -// loop: -// t4L = phi(t1L, t3L / loop) -// t4H = phi(t1H, t3H / loop) -// t2L = OP MI.val.lo, t4L -// t2H = OP MI.val.hi, t4H -// EAX = t4L -// EDX = t4H -// EBX = t2L -// ECX = t2H -// LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined] -// t3L = EAX -// t3H = EDX -// JNE loop -// sink: -// dstL = t3L -// dstH = t3H -// ... -MachineBasicBlock * -X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI, - MachineBasicBlock *MBB) const { - MachineFunction *MF = MBB->getParent(); - const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); - DebugLoc DL = MI->getDebugLoc(); - - MachineRegisterInfo &MRI = MF->getRegInfo(); - - const BasicBlock *BB = MBB->getBasicBlock(); - MachineFunction::iterator I = MBB; - ++I; - - assert(MI->getNumOperands() <= X86::AddrNumOperands + 7 && - "Unexpected number of operands"); - - assert(MI->hasOneMemOperand() && - "Expected atomic-load-op32 to have one memoperand"); - - // Memory Reference - MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); - MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); - - unsigned DstLoReg, DstHiReg; - unsigned SrcLoReg, SrcHiReg; - unsigned MemOpndSlot; - - unsigned CurOp = 0; - - DstLoReg = MI->getOperand(CurOp++).getReg(); - DstHiReg = MI->getOperand(CurOp++).getReg(); - MemOpndSlot = CurOp; - CurOp += X86::AddrNumOperands; - SrcLoReg = MI->getOperand(CurOp++).getReg(); - SrcHiReg = MI->getOperand(CurOp++).getReg(); - - const TargetRegisterClass *RC = &X86::GR32RegClass; - const TargetRegisterClass *RC8 = &X86::GR8RegClass; - - unsigned t1L = MRI.createVirtualRegister(RC); - unsigned t1H = MRI.createVirtualRegister(RC); - unsigned t2L = MRI.createVirtualRegister(RC); - unsigned t2H = MRI.createVirtualRegister(RC); - unsigned t3L = MRI.createVirtualRegister(RC); - unsigned t3H = MRI.createVirtualRegister(RC); - unsigned t4L = MRI.createVirtualRegister(RC); - unsigned t4H = MRI.createVirtualRegister(RC); - - unsigned LCMPXCHGOpc = X86::LCMPXCHG8B; - unsigned LOADOpc = X86::MOV32rm; - - // For the atomic load-arith operator, we generate - // - // thisMBB: - // t1L = LOAD [MI.addr + 0] - // t1H = LOAD [MI.addr + 4] - // mainMBB: - // t4L = phi(t1L / thisMBB, t3L / mainMBB) - // t4H = phi(t1H / thisMBB, t3H / mainMBB) - // t2L = OP MI.val.lo, t4L - // t2H = OP MI.val.hi, t4H - // EBX = t2L - // ECX = t2H - // LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined] - // t3L = EAX - // t3H = EDX - // JNE loop - // sinkMBB: - // dstL = t3L - // dstH = t3H - - MachineBasicBlock *thisMBB = MBB; - MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); - MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); - MF->insert(I, mainMBB); - MF->insert(I, sinkMBB); - - MachineInstrBuilder MIB; - - // Transfer the remainder of BB and its successor edges to sinkMBB. - sinkMBB->splice(sinkMBB->begin(), MBB, - std::next(MachineBasicBlock::iterator(MI)), MBB->end()); - sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); - - // thisMBB: - // Lo - MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1L); - for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { - MachineOperand NewMO = MI->getOperand(MemOpndSlot + i); - if (NewMO.isReg()) - NewMO.setIsKill(false); - MIB.addOperand(NewMO); - } - for (MachineInstr::mmo_iterator MMOI = MMOBegin; MMOI != MMOEnd; ++MMOI) { - unsigned flags = (*MMOI)->getFlags(); - flags = (flags & ~MachineMemOperand::MOStore) | MachineMemOperand::MOLoad; - MachineMemOperand *MMO = - MF->getMachineMemOperand((*MMOI)->getPointerInfo(), flags, - (*MMOI)->getSize(), - (*MMOI)->getBaseAlignment(), - (*MMOI)->getTBAAInfo(), - (*MMOI)->getRanges()); - MIB.addMemOperand(MMO); - }; - MachineInstr *LowMI = MIB; - - // Hi - MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1H); - for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { - if (i == X86::AddrDisp) { - MIB.addDisp(MI->getOperand(MemOpndSlot + i), 4); // 4 == sizeof(i32) - } else { - MachineOperand NewMO = MI->getOperand(MemOpndSlot + i); - if (NewMO.isReg()) - NewMO.setIsKill(false); - MIB.addOperand(NewMO); - } - } - MIB.setMemRefs(LowMI->memoperands_begin(), LowMI->memoperands_end()); - - thisMBB->addSuccessor(mainMBB); - - // mainMBB: - MachineBasicBlock *origMainMBB = mainMBB; - - // Add PHIs. - MachineInstr *PhiL = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4L) - .addReg(t1L).addMBB(thisMBB).addReg(t3L).addMBB(mainMBB); - MachineInstr *PhiH = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4H) - .addReg(t1H).addMBB(thisMBB).addReg(t3H).addMBB(mainMBB); - - unsigned Opc = MI->getOpcode(); - switch (Opc) { - default: - llvm_unreachable("Unhandled atomic-load-op6432 opcode!"); - case X86::ATOMAND6432: - case X86::ATOMOR6432: - case X86::ATOMXOR6432: - case X86::ATOMADD6432: - case X86::ATOMSUB6432: { - unsigned HiOpc; - unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc); - BuildMI(mainMBB, DL, TII->get(LoOpc), t2L).addReg(t4L) - .addReg(SrcLoReg); - BuildMI(mainMBB, DL, TII->get(HiOpc), t2H).addReg(t4H) - .addReg(SrcHiReg); - break; - } - case X86::ATOMNAND6432: { - unsigned HiOpc, NOTOpc; - unsigned LoOpc = getNonAtomic6432OpcodeWithExtraOpc(Opc, HiOpc, NOTOpc); - unsigned TmpL = MRI.createVirtualRegister(RC); - unsigned TmpH = MRI.createVirtualRegister(RC); - BuildMI(mainMBB, DL, TII->get(LoOpc), TmpL).addReg(SrcLoReg) - .addReg(t4L); - BuildMI(mainMBB, DL, TII->get(HiOpc), TmpH).addReg(SrcHiReg) - .addReg(t4H); - BuildMI(mainMBB, DL, TII->get(NOTOpc), t2L).addReg(TmpL); - BuildMI(mainMBB, DL, TII->get(NOTOpc), t2H).addReg(TmpH); - break; - } - case X86::ATOMMAX6432: - case X86::ATOMMIN6432: - case X86::ATOMUMAX6432: - case X86::ATOMUMIN6432: { - unsigned HiOpc; - unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc); - unsigned cL = MRI.createVirtualRegister(RC8); - unsigned cH = MRI.createVirtualRegister(RC8); - unsigned cL32 = MRI.createVirtualRegister(RC); - unsigned cH32 = MRI.createVirtualRegister(RC); - unsigned cc = MRI.createVirtualRegister(RC); - // cl := cmp src_lo, lo - BuildMI(mainMBB, DL, TII->get(X86::CMP32rr)) - .addReg(SrcLoReg).addReg(t4L); - BuildMI(mainMBB, DL, TII->get(LoOpc), cL); - BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cL32).addReg(cL); - // ch := cmp src_hi, hi - BuildMI(mainMBB, DL, TII->get(X86::CMP32rr)) - .addReg(SrcHiReg).addReg(t4H); - BuildMI(mainMBB, DL, TII->get(HiOpc), cH); - BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cH32).addReg(cH); - // cc := if (src_hi == hi) ? cl : ch; - if (Subtarget->hasCMov()) { - BuildMI(mainMBB, DL, TII->get(X86::CMOVE32rr), cc) - .addReg(cH32).addReg(cL32); - } else { - MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), cc) - .addReg(cH32).addReg(cL32) - .addImm(X86::COND_E); - mainMBB = EmitLoweredSelect(MIB, mainMBB); - } - BuildMI(mainMBB, DL, TII->get(X86::TEST32rr)).addReg(cc).addReg(cc); - if (Subtarget->hasCMov()) { - BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t2L) - .addReg(SrcLoReg).addReg(t4L); - BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t2H) - .addReg(SrcHiReg).addReg(t4H); - } else { - MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t2L) - .addReg(SrcLoReg).addReg(t4L) - .addImm(X86::COND_NE); - mainMBB = EmitLoweredSelect(MIB, mainMBB); - // As the lowered CMOV won't clobber EFLAGS, we could reuse it for the - // 2nd CMOV lowering. - mainMBB->addLiveIn(X86::EFLAGS); - MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t2H) - .addReg(SrcHiReg).addReg(t4H) - .addImm(X86::COND_NE); - mainMBB = EmitLoweredSelect(MIB, mainMBB); - // Replace the original PHI node as mainMBB is changed after CMOV - // lowering. - BuildMI(*origMainMBB, PhiL, DL, TII->get(X86::PHI), t4L) - .addReg(t1L).addMBB(thisMBB).addReg(t3L).addMBB(mainMBB); - BuildMI(*origMainMBB, PhiH, DL, TII->get(X86::PHI), t4H) - .addReg(t1H).addMBB(thisMBB).addReg(t3H).addMBB(mainMBB); - PhiL->eraseFromParent(); - PhiH->eraseFromParent(); - } - break; - } - case X86::ATOMSWAP6432: { - unsigned HiOpc; - unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc); - BuildMI(mainMBB, DL, TII->get(LoOpc), t2L).addReg(SrcLoReg); - BuildMI(mainMBB, DL, TII->get(HiOpc), t2H).addReg(SrcHiReg); - break; - } - } - - // Copy EDX:EAX back from HiReg:LoReg - BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EAX).addReg(t4L); - BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EDX).addReg(t4H); - // Copy ECX:EBX from t1H:t1L - BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EBX).addReg(t2L); - BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::ECX).addReg(t2H); - - MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc)); - for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { - MachineOperand NewMO = MI->getOperand(MemOpndSlot + i); - if (NewMO.isReg()) - NewMO.setIsKill(false); - MIB.addOperand(NewMO); - } - MIB.setMemRefs(MMOBegin, MMOEnd); - - // Copy EDX:EAX back to t3H:t3L - BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3L).addReg(X86::EAX); - BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3H).addReg(X86::EDX); - - BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB); - - mainMBB->addSuccessor(origMainMBB); - mainMBB->addSuccessor(sinkMBB); - - // sinkMBB: - BuildMI(*sinkMBB, sinkMBB->begin(), DL, - TII->get(TargetOpcode::COPY), DstLoReg) - .addReg(t3L); - BuildMI(*sinkMBB, sinkMBB->begin(), DL, - TII->get(TargetOpcode::COPY), DstHiReg) - .addReg(t3H); - - MI->eraseFromParent(); - return sinkMBB; -} - // FIXME: When we get size specific XMM0 registers, i.e. XMM0_V16I8 // or XMM0_V32I8 in AVX all of this code can be replaced with that // in the .td file. @@ -17442,62 +16668,6 @@ case X86::XBEGIN: return EmitXBegin(MI, BB, BB->getParent()->getTarget().getInstrInfo()); - // Atomic Lowering. - case X86::ATOMAND8: - case X86::ATOMAND16: - case X86::ATOMAND32: - case X86::ATOMAND64: - // Fall through - case X86::ATOMOR8: - case X86::ATOMOR16: - case X86::ATOMOR32: - case X86::ATOMOR64: - // Fall through - case X86::ATOMXOR16: - case X86::ATOMXOR8: - case X86::ATOMXOR32: - case X86::ATOMXOR64: - // Fall through - case X86::ATOMNAND8: - case X86::ATOMNAND16: - case X86::ATOMNAND32: - case X86::ATOMNAND64: - // Fall through - case X86::ATOMMAX8: - case X86::ATOMMAX16: - case X86::ATOMMAX32: - case X86::ATOMMAX64: - // Fall through - case X86::ATOMMIN8: - case X86::ATOMMIN16: - case X86::ATOMMIN32: - case X86::ATOMMIN64: - // Fall through - case X86::ATOMUMAX8: - case X86::ATOMUMAX16: - case X86::ATOMUMAX32: - case X86::ATOMUMAX64: - // Fall through - case X86::ATOMUMIN8: - case X86::ATOMUMIN16: - case X86::ATOMUMIN32: - case X86::ATOMUMIN64: - return EmitAtomicLoadArith(MI, BB); - - // This group does 64-bit operations on a 32-bit host. - case X86::ATOMAND6432: - case X86::ATOMOR6432: - case X86::ATOMXOR6432: - case X86::ATOMNAND6432: - case X86::ATOMADD6432: - case X86::ATOMSUB6432: - case X86::ATOMMAX6432: - case X86::ATOMMIN6432: - case X86::ATOMUMAX6432: - case X86::ATOMUMIN6432: - case X86::ATOMSWAP6432: - return EmitAtomicLoadArith6432(MI, BB); - case X86::VASTART_SAVE_XMM_REGS: return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB); Index: lib/Target/X86/X86InstrCompiler.td =================================================================== --- lib/Target/X86/X86InstrCompiler.td +++ lib/Target/X86/X86InstrCompiler.td @@ -502,83 +502,6 @@ //===----------------------------------------------------------------------===// -// Atomic Instruction Pseudo Instructions -//===----------------------------------------------------------------------===// - -// Pseudo atomic instructions - -multiclass PSEUDO_ATOMIC_LOAD_BINOP { - let usesCustomInserter = 1, mayLoad = 1, mayStore = 1 in { - let Defs = [EFLAGS, AL] in - def NAME#8 : I<0, Pseudo, (outs GR8:$dst), - (ins i8mem:$ptr, GR8:$val), - !strconcat(mnemonic, "8 PSEUDO!"), []>; - let Defs = [EFLAGS, AX] in - def NAME#16 : I<0, Pseudo,(outs GR16:$dst), - (ins i16mem:$ptr, GR16:$val), - !strconcat(mnemonic, "16 PSEUDO!"), []>; - let Defs = [EFLAGS, EAX] in - def NAME#32 : I<0, Pseudo, (outs GR32:$dst), - (ins i32mem:$ptr, GR32:$val), - !strconcat(mnemonic, "32 PSEUDO!"), []>; - let Defs = [EFLAGS, RAX] in - def NAME#64 : I<0, Pseudo, (outs GR64:$dst), - (ins i64mem:$ptr, GR64:$val), - !strconcat(mnemonic, "64 PSEUDO!"), []>; - } -} - -multiclass PSEUDO_ATOMIC_LOAD_BINOP_PATS { - def : Pat<(!cast(frag # "_8") addr:$ptr, GR8:$val), - (!cast(name # "8") addr:$ptr, GR8:$val)>; - def : Pat<(!cast(frag # "_16") addr:$ptr, GR16:$val), - (!cast(name # "16") addr:$ptr, GR16:$val)>; - def : Pat<(!cast(frag # "_32") addr:$ptr, GR32:$val), - (!cast(name # "32") addr:$ptr, GR32:$val)>; - def : Pat<(!cast(frag # "_64") addr:$ptr, GR64:$val), - (!cast(name # "64") addr:$ptr, GR64:$val)>; -} - -// Atomic exchange, and, or, xor -defm ATOMAND : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMAND">; -defm ATOMOR : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMOR">; -defm ATOMXOR : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMXOR">; -defm ATOMNAND : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMNAND">; -defm ATOMMAX : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMMAX">; -defm ATOMMIN : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMMIN">; -defm ATOMUMAX : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMUMAX">; -defm ATOMUMIN : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMUMIN">; - -defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMAND", "atomic_load_and">; -defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMOR", "atomic_load_or">; -defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMXOR", "atomic_load_xor">; -defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMNAND", "atomic_load_nand">; -defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMMAX", "atomic_load_max">; -defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMMIN", "atomic_load_min">; -defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMAX", "atomic_load_umax">; -defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMIN", "atomic_load_umin">; - -multiclass PSEUDO_ATOMIC_LOAD_BINOP6432 { - let usesCustomInserter = 1, Defs = [EFLAGS, EAX, EDX], - mayLoad = 1, mayStore = 1, hasSideEffects = 0 in - def NAME#6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), - (ins i64mem:$ptr, GR32:$val1, GR32:$val2), - !strconcat(mnemonic, "6432 PSEUDO!"), []>; -} - -defm ATOMAND : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMAND">; -defm ATOMOR : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMOR">; -defm ATOMXOR : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMXOR">; -defm ATOMNAND : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMNAND">; -defm ATOMADD : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMADD">; -defm ATOMSUB : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMSUB">; -defm ATOMMAX : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMMAX">; -defm ATOMMIN : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMMIN">; -defm ATOMUMAX : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMUMAX">; -defm ATOMUMIN : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMUMIN">; -defm ATOMSWAP : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMSWAP">; - -//===----------------------------------------------------------------------===// // Normal-Instructions-With-Lock-Prefix Pseudo Instructions //===----------------------------------------------------------------------===// Index: lib/Target/X86/X86InstrInfo.td =================================================================== --- lib/Target/X86/X86InstrInfo.td +++ lib/Target/X86/X86InstrInfo.td @@ -155,27 +155,6 @@ [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; -def X86AtomAdd64 : SDNode<"X86ISD::ATOMADD64_DAG", SDTX86atomicBinary, - [SDNPHasChain, SDNPMayStore, - SDNPMayLoad, SDNPMemOperand]>; -def X86AtomSub64 : SDNode<"X86ISD::ATOMSUB64_DAG", SDTX86atomicBinary, - [SDNPHasChain, SDNPMayStore, - SDNPMayLoad, SDNPMemOperand]>; -def X86AtomOr64 : SDNode<"X86ISD::ATOMOR64_DAG", SDTX86atomicBinary, - [SDNPHasChain, SDNPMayStore, - SDNPMayLoad, SDNPMemOperand]>; -def X86AtomXor64 : SDNode<"X86ISD::ATOMXOR64_DAG", SDTX86atomicBinary, - [SDNPHasChain, SDNPMayStore, - SDNPMayLoad, SDNPMemOperand]>; -def X86AtomAnd64 : SDNode<"X86ISD::ATOMAND64_DAG", SDTX86atomicBinary, - [SDNPHasChain, SDNPMayStore, - SDNPMayLoad, SDNPMemOperand]>; -def X86AtomNand64 : SDNode<"X86ISD::ATOMNAND64_DAG", SDTX86atomicBinary, - [SDNPHasChain, SDNPMayStore, - SDNPMayLoad, SDNPMemOperand]>; -def X86AtomSwap64 : SDNode<"X86ISD::ATOMSWAP64_DAG", SDTX86atomicBinary, - [SDNPHasChain, SDNPMayStore, - SDNPMayLoad, SDNPMemOperand]>; def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; Index: lib/Target/X86/X86TargetMachine.cpp =================================================================== --- lib/Target/X86/X86TargetMachine.cpp +++ lib/Target/X86/X86TargetMachine.cpp @@ -111,6 +111,7 @@ return *getX86TargetMachine().getSubtargetImpl(); } + void addIRPasses() override; bool addInstSelector() override; bool addILPOpts() override; bool addPreRegAlloc() override; @@ -123,6 +124,12 @@ return new X86PassConfig(this, PM); } +void X86PassConfig::addIRPasses() { + addPass(createX86AtomicExpandPass(&getX86TargetMachine())); + + TargetPassConfig::addIRPasses(); +} + bool X86PassConfig::addInstSelector() { // Install an instruction selector. addPass(createX86ISelDag(getX86TargetMachine(), getOptLevel())); Index: test/CodeGen/X86/2010-01-08-Atomic64Bug.ll =================================================================== --- test/CodeGen/X86/2010-01-08-Atomic64Bug.ll +++ test/CodeGen/X86/2010-01-08-Atomic64Bug.ll @@ -11,9 +11,9 @@ ; CHECK: movl 4([[REG]]), %edx ; CHECK: LBB0_1: ; CHECK: movl %eax, %ebx -; CHECK: addl {{%[a-z]+}}, %ebx +; CHECK: addl $1, %ebx ; CHECK: movl %edx, %ecx -; CHECK: adcl {{%[a-z]+}}, %ecx +; CHECK: adcl $0, %ecx ; CHECK: lock ; CHECK-NEXT: cmpxchg8b ([[REG]]) ; CHECK-NEXT: jne Index: test/CodeGen/X86/Atomics-64.ll =================================================================== --- test/CodeGen/X86/Atomics-64.ll +++ test/CodeGen/X86/Atomics-64.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=x86-64 > %t.x86-64 -; RUN: llc < %s -march=x86 > %t.x86 +; RUN: llc < %s -march=x86 -mattr=cx16 > %t.x86 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" target triple = "x86_64-apple-darwin8" Index: test/CodeGen/X86/atomic-load-store-wide.ll =================================================================== --- test/CodeGen/X86/atomic-load-store-wide.ll +++ test/CodeGen/X86/atomic-load-store-wide.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -mcpu=corei7 -march=x86 -verify-machineinstrs | FileCheck %s ; 64-bit load/store on x86-32 ; FIXME: The generated code can be substantially improved. Index: test/CodeGen/X86/atomic-minmax-i6432.ll =================================================================== --- test/CodeGen/X86/atomic-minmax-i6432.ll +++ test/CodeGen/X86/atomic-minmax-i6432.ll @@ -1,6 +1,5 @@ -; RUN: llc -march=x86 -mattr=+cmov -mtriple=i386-pc-linux -verify-machineinstrs < %s | FileCheck %s -check-prefix=LINUX -; RUN: llc -march=x86 -mattr=-cmov -mtriple=i386-pc-linux -verify-machineinstrs < %s | FileCheck %s -check-prefix=NOCMOV -; RUN: llc -march=x86 -mtriple=i386-macosx -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s -check-prefix=PIC +; RUN: llc -march=x86 -mattr=+cmov,cx16 -mtriple=i386-pc-linux -verify-machineinstrs < %s | FileCheck %s -check-prefix=LINUX +; RUN: llc -march=x86 -mattr=cx16 -mtriple=i386-macosx -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s -check-prefix=PIC @sc64 = external global i64 @@ -9,87 +8,39 @@ %1 = atomicrmw max i64* @sc64, i64 5 acquire ; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]] ; LINUX: cmpl -; LINUX: setl -; LINUX: cmpl -; LINUX: setl +; LINUX: seta ; LINUX: cmovne ; LINUX: cmovne ; LINUX: lock ; LINUX-NEXT: cmpxchg8b ; LINUX: jne [[LABEL]] -; NOCMOV: [[LABEL:.LBB[0-9]+_[0-9]+]] -; NOCMOV: cmpl -; NOCMOV: setl -; NOCMOV: cmpl -; NOCMOV: setl -; NOCMOV: jne -; NOCMOV: jne -; NOCMOV: lock -; NOCMOV-NEXT: cmpxchg8b -; NOCMOV: jne [[LABEL]] %2 = atomicrmw min i64* @sc64, i64 6 acquire ; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]] ; LINUX: cmpl -; LINUX: setg -; LINUX: cmpl -; LINUX: setg +; LINUX: setb ; LINUX: cmovne ; LINUX: cmovne ; LINUX: lock ; LINUX-NEXT: cmpxchg8b ; LINUX: jne [[LABEL]] -; NOCMOV: [[LABEL:.LBB[0-9]+_[0-9]+]] -; NOCMOV: cmpl -; NOCMOV: setg -; NOCMOV: cmpl -; NOCMOV: setg -; NOCMOV: jne -; NOCMOV: jne -; NOCMOV: lock -; NOCMOV-NEXT: cmpxchg8b -; NOCMOV: jne [[LABEL]] %3 = atomicrmw umax i64* @sc64, i64 7 acquire ; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]] ; LINUX: cmpl -; LINUX: setb -; LINUX: cmpl -; LINUX: setb +; LINUX: seta ; LINUX: cmovne ; LINUX: cmovne ; LINUX: lock ; LINUX-NEXT: cmpxchg8b ; LINUX: jne [[LABEL]] -; NOCMOV: [[LABEL:.LBB[0-9]+_[0-9]+]] -; NOCMOV: cmpl -; NOCMOV: setb -; NOCMOV: cmpl -; NOCMOV: setb -; NOCMOV: jne -; NOCMOV: jne -; NOCMOV: lock -; NOCMOV-NEXT: cmpxchg8b -; NOCMOV: jne [[LABEL]] %4 = atomicrmw umin i64* @sc64, i64 8 acquire ; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]] ; LINUX: cmpl -; LINUX: seta -; LINUX: cmpl -; LINUX: seta +; LINUX: setb ; LINUX: cmovne ; LINUX: cmovne ; LINUX: lock ; LINUX-NEXT: cmpxchg8b ; LINUX: jne [[LABEL]] -; NOCMOV: [[LABEL:.LBB[0-9]+_[0-9]+]] -; NOCMOV: cmpl -; NOCMOV: seta -; NOCMOV: cmpl -; NOCMOV: seta -; NOCMOV: jne -; NOCMOV: jne -; NOCMOV: lock -; NOCMOV-NEXT: cmpxchg8b -; NOCMOV: jne [[LABEL]] ret void } @@ -98,8 +49,8 @@ define void @tf_bug(i8* %ptr) nounwind { ; PIC-LABEL: tf_bug: -; PIC: movl _id-L1$pb( -; PIC: movl (_id-L1$pb)+4( +; PIC-DAG: movl _id-L1$pb( +; PIC-DAG: movl (_id-L1$pb)+4( %tmp1 = atomicrmw add i64* @id, i64 1 seq_cst %tmp2 = add i64 %tmp1, 1 %tmp3 = bitcast i8* %ptr to i64* Index: test/CodeGen/X86/atomic128.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/atomic128.ll @@ -0,0 +1,315 @@ +; RUN: llc < %s -march=x86-64 -verify-machineinstrs -mattr=cx16 | FileCheck %s + +@var = global i128 0 + +define i128 @val_compare_and_swap(i128* %p, i128 %oldval, i128 %newval) { +; CHECK-LABEL: val_compare_and_swap: +; CHECK: movq %rsi, %rax +; CHECK: movq %rcx, %rbx +; CHECK: movq %r8, %rcx +; CHECK: lock +; CHECK: cmpxchg16b (%rdi) + + %pair = cmpxchg i128* %p, i128 %oldval, i128 %newval acquire acquire + %val = extractvalue { i128, i1 } %pair, 0 + ret i128 %val +} + +define void @fetch_and_nand(i128* %p, i128 %bits) { +; CHECK-LABEL: fetch_and_nand: +; CHECK-DAG: movq %rdx, [[INCHI:%[a-z0-9]+]] +; CHECK-DAG: movq (%rdi), %rax +; CHECK-DAG: movq 8(%rdi), %rdx + +; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]: +; CHECK: movq %rdx, %rcx +; CHECK: andq [[INCHI]], %rcx +; CHECK: movq %rax, %rbx + ; INCLO equivalent comes in in %rsi, so it makes sense it stays there. +; CHECK: andq %rsi, %rbx +; CHECK: notq %rbx +; CHECK: notq %rcx +; CHECK: lock +; CHECK: cmpxchg16b (%rdi) +; CHECK: jne [[LOOP]] + +; CHECK: movq %rax, _var +; CHECK: movq %rdx, _var+8 + %val = atomicrmw nand i128* %p, i128 %bits release + store i128 %val, i128* @var, align 16 + ret void +} + +define void @fetch_and_or(i128* %p, i128 %bits) { +; CHECK-LABEL: fetch_and_or: +; CHECK-DAG: movq %rdx, [[INCHI:%[a-z0-9]+]] +; CHECK-DAG: movq (%rdi), %rax +; CHECK-DAG: movq 8(%rdi), %rdx + +; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]: +; CHECK: movq %rax, %rbx + ; INCLO equivalent comes in in %rsi, so it makes sense it stays there. +; CHECK: orq %rsi, %rbx +; CHECK: movq %rdx, %rcx +; CHECK: orq [[INCHI]], %rcx +; CHECK: lock +; CHECK: cmpxchg16b (%rdi) +; CHECK: jne [[LOOP]] + +; CHECK: movq %rax, _var +; CHECK: movq %rdx, _var+8 + + %val = atomicrmw or i128* %p, i128 %bits seq_cst + store i128 %val, i128* @var, align 16 + ret void +} + +define void @fetch_and_add(i128* %p, i128 %bits) { +; CHECK-LABEL: fetch_and_add: +; CHECK-DAG: movq %rdx, [[INCHI:%[a-z0-9]+]] +; CHECK-DAG: movq (%rdi), %rax +; CHECK-DAG: movq 8(%rdi), %rdx + +; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]: +; CHECK: movq %rax, %rbx + ; INCLO equivalent comes in in %rsi, so it makes sense it stays there. +; CHECK: addq %rsi, %rbx +; CHECK: movq %rdx, %rcx +; CHECK: adcq [[INCHI]], %rcx +; CHECK: lock +; CHECK: cmpxchg16b (%rdi) +; CHECK: jne [[LOOP]] + +; CHECK: movq %rax, _var +; CHECK: movq %rdx, _var+8 + + %val = atomicrmw add i128* %p, i128 %bits seq_cst + store i128 %val, i128* @var, align 16 + ret void +} + +define void @fetch_and_sub(i128* %p, i128 %bits) { +; CHECK-LABEL: fetch_and_sub: +; CHECK-DAG: movq %rdx, [[INCHI:%[a-z0-9]+]] +; CHECK-DAG: movq (%rdi), %rax +; CHECK-DAG: movq 8(%rdi), %rdx + +; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]: +; CHECK: movq %rax, %rbx + ; INCLO equivalent comes in in %rsi, so it makes sense it stays there. +; CHECK: subq %rsi, %rbx +; CHECK: movq %rdx, %rcx +; CHECK: sbbq [[INCHI]], %rcx +; CHECK: lock +; CHECK: cmpxchg16b (%rdi) +; CHECK: jne [[LOOP]] + +; CHECK: movq %rax, _var +; CHECK: movq %rdx, _var+8 + + %val = atomicrmw sub i128* %p, i128 %bits seq_cst + store i128 %val, i128* @var, align 16 + ret void +} + +define void @fetch_and_min(i128* %p, i128 %bits) { +; CHECK-LABEL: fetch_and_min: +; CHECK-DAG: movq %rdx, [[INCHI:%[a-z0-9]+]] +; CHECK-DAG: movq (%rdi), %rax +; CHECK-DAG: movq 8(%rdi), %rdx + +; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]: +; CHECK: cmpq %rsi, %rax +; CHECK: setbe [[CMP:%[a-z0-9]+]] +; CHECK: cmpq [[INCHI]], %rdx +; CHECK: setle [[HICMP:%[a-z0-9]+]] +; CHECK: je [[USE_LO:.?LBB[0-9]+_[0-9]+]] + +; CHECK: movb [[HICMP]], [[CMP]] +; CHECK: [[USE_LO]]: +; CHECK: testb [[CMP]], [[CMP]] +; CHECK: movq %rsi, %rbx +; CHECK: cmovneq %rax, %rbx +; CHECK: movq [[INCHI]], %rcx +; CHECK: cmovneq %rdx, %rcx +; CHECK: lock +; CHECK: cmpxchg16b (%rdi) +; CHECK: jne [[LOOP]] + +; CHECK: movq %rax, _var +; CHECK: movq %rdx, _var+8 + + %val = atomicrmw min i128* %p, i128 %bits seq_cst + store i128 %val, i128* @var, align 16 + ret void +} + +define void @fetch_and_max(i128* %p, i128 %bits) { +; CHECK-LABEL: fetch_and_max: +; CHECK-DAG: movq %rdx, [[INCHI:%[a-z0-9]+]] +; CHECK-DAG: movq (%rdi), %rax +; CHECK-DAG: movq 8(%rdi), %rdx + +; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]: +; CHECK: cmpq %rsi, %rax +; CHECK: setae [[CMP:%[a-z0-9]+]] +; CHECK: cmpq [[INCHI]], %rdx +; CHECK: setge [[HICMP:%[a-z0-9]+]] +; CHECK: je [[USE_LO:.?LBB[0-9]+_[0-9]+]] + +; CHECK: movb [[HICMP]], [[CMP]] +; CHECK: [[USE_LO]]: +; CHECK: testb [[CMP]], [[CMP]] +; CHECK: movq %rsi, %rbx +; CHECK: cmovneq %rax, %rbx +; CHECK: movq [[INCHI]], %rcx +; CHECK: cmovneq %rdx, %rcx +; CHECK: lock +; CHECK: cmpxchg16b (%rdi) +; CHECK: jne [[LOOP]] + +; CHECK: movq %rax, _var +; CHECK: movq %rdx, _var+8 + + %val = atomicrmw max i128* %p, i128 %bits seq_cst + store i128 %val, i128* @var, align 16 + ret void +} + +define void @fetch_and_umin(i128* %p, i128 %bits) { +; CHECK-LABEL: fetch_and_umin: +; CHECK-DAG: movq %rdx, [[INCHI:%[a-z0-9]+]] +; CHECK-DAG: movq (%rdi), %rax +; CHECK-DAG: movq 8(%rdi), %rdx + +; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]: +; CHECK: cmpq %rsi, %rax +; CHECK: setbe [[CMP:%[a-z0-9]+]] +; CHECK: cmpq [[INCHI]], %rdx +; CHECK: setbe [[HICMP:%[a-z0-9]+]] +; CHECK: je [[USE_LO:.?LBB[0-9]+_[0-9]+]] + +; CHECK: movb [[HICMP]], [[CMP]] +; CHECK: [[USE_LO]]: +; CHECK: testb [[CMP]], [[CMP]] +; CHECK: movq %rsi, %rbx +; CHECK: cmovneq %rax, %rbx +; CHECK: movq [[INCHI]], %rcx +; CHECK: cmovneq %rdx, %rcx +; CHECK: lock +; CHECK: cmpxchg16b (%rdi) +; CHECK: jne [[LOOP]] + +; CHECK: movq %rax, _var +; CHECK: movq %rdx, _var+8 + + %val = atomicrmw umin i128* %p, i128 %bits seq_cst + store i128 %val, i128* @var, align 16 + ret void +} + +define void @fetch_and_umax(i128* %p, i128 %bits) { +; CHECK-LABEL: fetch_and_umax: +; CHECK-DAG: movq %rdx, [[INCHI:%[a-z0-9]+]] +; CHECK-DAG: movq (%rdi), %rax +; CHECK-DAG: movq 8(%rdi), %rdx + +; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]: +; CHECK: cmpq %rax, %rsi +; CHECK: setb [[CMP:%[a-z0-9]+]] +; CHECK: cmpq [[INCHI]], %rdx +; CHECK: seta [[HICMP:%[a-z0-9]+]] +; CHECK: je [[USE_LO:.?LBB[0-9]+_[0-9]+]] + +; CHECK: movb [[HICMP]], [[CMP]] +; CHECK: [[USE_LO]]: +; CHECK: testb [[CMP]], [[CMP]] +; CHECK: movq %rsi, %rbx +; CHECK: cmovneq %rax, %rbx +; CHECK: movq [[INCHI]], %rcx +; CHECK: cmovneq %rdx, %rcx +; CHECK: lock +; CHECK: cmpxchg16b (%rdi) +; CHECK: jne [[LOOP]] + +; CHECK: movq %rax, _var +; CHECK: movq %rdx, _var+8 + + %val = atomicrmw umax i128* %p, i128 %bits seq_cst + store i128 %val, i128* @var, align 16 + ret void +} + +define i128 @atomic_load_seq_cst(i128* %p) { +; CHECK-LABEL: atomic_load_seq_cst: +; CHECK: xorl %eax, %eax +; CHECK: xorl %edx, %edx +; CHECK: xorl %ebx, %ebx +; CHECK: xorl %ecx, %ecx +; CHECK: lock +; CHECK: cmpxchg16b (%rdi) + + %r = load atomic i128* %p seq_cst, align 16 + ret i128 %r +} + +define i128 @atomic_load_relaxed(i128* %p) { +; CHECK: atomic_load_relaxed: +; CHECK: xorl %eax, %eax +; CHECK: xorl %edx, %edx +; CHECK: xorl %ebx, %ebx +; CHECK: xorl %ecx, %ecx +; CHECK: lock +; CHECK: cmpxchg16b (%rdi) + + %r = load atomic i128* %p monotonic, align 16 + ret i128 %r +} + +define void @atomic_store_seq_cst(i128* %p, i128 %in) { +; CHECK-LABEL: atomic_store_seq_cst: +; CHECK: movq %rdx, %rcx +; CHECK: movq %rsi, %rbx +; CHECK: movq (%rdi), %rax +; CHECK: movq 8(%rdi), %rdx + +; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]: +; CHECK: lock +; CHECK: cmpxchg16b (%rdi) +; CHECK: jne [[LOOP]] + + store atomic i128 %in, i128* %p seq_cst, align 16 + ret void +} + +define void @atomic_store_release(i128* %p, i128 %in) { +; CHECK-LABEL: atomic_store_release: +; CHECK: movq %rdx, %rcx +; CHECK: movq %rsi, %rbx +; CHECK: movq (%rdi), %rax +; CHECK: movq 8(%rdi), %rdx + +; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]: +; CHECK: lock +; CHECK: cmpxchg16b (%rdi) +; CHECK: jne [[LOOP]] + + store atomic i128 %in, i128* %p release, align 16 + ret void +} + +define void @atomic_store_relaxed(i128* %p, i128 %in) { +; CHECK-LABEL: atomic_store_relaxed: +; CHECK: movq %rdx, %rcx +; CHECK: movq %rsi, %rbx +; CHECK: movq (%rdi), %rax +; CHECK: movq 8(%rdi), %rdx + +; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]: +; CHECK: lock +; CHECK: cmpxchg16b (%rdi) +; CHECK: jne [[LOOP]] + + store atomic i128 %in, i128* %p unordered, align 16 + ret void +} Index: test/CodeGen/X86/atomic16.ll =================================================================== --- test/CodeGen/X86/atomic16.ll +++ test/CodeGen/X86/atomic16.ll @@ -4,8 +4,8 @@ @sc16 = external global i16 define void @atomic_fetch_add16() nounwind { -; X64: atomic_fetch_add16 -; X32: atomic_fetch_add16 +; X64-LABEL: atomic_fetch_add16 +; X32-LABEL: atomic_fetch_add16 entry: ; 32-bit %t1 = atomicrmw add i16* @sc16, i16 1 acquire @@ -34,8 +34,8 @@ } define void @atomic_fetch_sub16() nounwind { -; X64: atomic_fetch_sub16 -; X32: atomic_fetch_sub16 +; X64-LABEL: atomic_fetch_sub16 +; X32-LABEL: atomic_fetch_sub16 %t1 = atomicrmw sub i16* @sc16, i16 1 acquire ; X64: lock ; X64: decw @@ -62,18 +62,18 @@ } define void @atomic_fetch_and16() nounwind { -; X64: atomic_fetch_and16 -; X32: atomic_fetch_and16 +; X64-LABEL: atomic_fetch_and16 +; X32-LABEL: atomic_fetch_and16 %t1 = atomicrmw and i16* @sc16, i16 3 acquire ; X64: lock ; X64: andw $3, {{.*}} # encoding: [0xf0,0x66 ; X32: lock ; X32: andw $3 %t2 = atomicrmw and i16* @sc16, i16 5 acquire -; X64: andw +; X64: andl ; X64: lock ; X64: cmpxchgw -; X32: andw +; X32: andl ; X32: lock ; X32: cmpxchgw %t3 = atomicrmw and i16* @sc16, i16 %t2 acquire @@ -87,18 +87,18 @@ } define void @atomic_fetch_or16() nounwind { -; X64: atomic_fetch_or16 -; X32: atomic_fetch_or16 +; X64-LABEL: atomic_fetch_or16 +; X32-LABEL: atomic_fetch_or16 %t1 = atomicrmw or i16* @sc16, i16 3 acquire ; X64: lock ; X64: orw $3, {{.*}} # encoding: [0xf0,0x66 ; X32: lock ; X32: orw $3 %t2 = atomicrmw or i16* @sc16, i16 5 acquire -; X64: orw +; X64: orl ; X64: lock ; X64: cmpxchgw -; X32: orw +; X32: orl ; X32: lock ; X32: cmpxchgw %t3 = atomicrmw or i16* @sc16, i16 %t2 acquire @@ -112,18 +112,18 @@ } define void @atomic_fetch_xor16() nounwind { -; X64: atomic_fetch_xor16 -; X32: atomic_fetch_xor16 +; X64-LABEL: atomic_fetch_xor16 +; X32-LABEL: atomic_fetch_xor16 %t1 = atomicrmw xor i16* @sc16, i16 3 acquire ; X64: lock ; X64: xorw $3, {{.*}} # encoding: [0xf0,0x66 ; X32: lock ; X32: xorw $3 %t2 = atomicrmw xor i16* @sc16, i16 5 acquire -; X64: xorw +; X64: xorl ; X64: lock ; X64: cmpxchgw -; X32: xorw +; X32: xorl ; X32: lock ; X32: cmpxchgw %t3 = atomicrmw xor i16* @sc16, i16 %t2 acquire @@ -137,15 +137,15 @@ } define void @atomic_fetch_nand16(i16 %x) nounwind { -; X64: atomic_fetch_nand16 -; X32: atomic_fetch_nand16 +; X64-LABEL: atomic_fetch_nand16 +; X32-LABEL: atomic_fetch_nand16 %t1 = atomicrmw nand i16* @sc16, i16 %x acquire -; X64: andw -; X64: notw +; X64: andl +; X64: notl ; X64: lock ; X64: cmpxchgw -; X32: andw -; X32: notw +; X32: andl +; X32: notl ; X32: lock ; X32: cmpxchgw ret void @@ -155,12 +155,16 @@ define void @atomic_fetch_max16(i16 %x) nounwind { %t1 = atomicrmw max i16* @sc16, i16 %x acquire -; X64: cmpw +; X64: movswl +; X64: movswl +; X64: subl ; X64: cmov ; X64: lock ; X64: cmpxchgw -; X32: cmpw +; X32: movswl +; X32: movswl +; X32: subl ; X32: cmov ; X32: lock ; X32: cmpxchgw @@ -171,12 +175,16 @@ define void @atomic_fetch_min16(i16 %x) nounwind { %t1 = atomicrmw min i16* @sc16, i16 %x acquire -; X64: cmpw +; X64: movswl +; X64: movswl +; X64: subl ; X64: cmov ; X64: lock ; X64: cmpxchgw -; X32: cmpw +; X32: movswl +; X32: movswl +; X32: subl ; X32: cmov ; X32: lock ; X32: cmpxchgw @@ -187,12 +195,16 @@ define void @atomic_fetch_umax16(i16 %x) nounwind { %t1 = atomicrmw umax i16* @sc16, i16 %x acquire -; X64: cmpw +; X64: movzwl +; X64: movzwl +; X64: subl ; X64: cmov ; X64: lock ; X64: cmpxchgw -; X32: cmpw +; X32: movzwl +; X32: movzwl +; X32: subl ; X32: cmov ; X32: lock ; X32: cmpxchgw @@ -203,11 +215,16 @@ define void @atomic_fetch_umin16(i16 %x) nounwind { %t1 = atomicrmw umin i16* @sc16, i16 %x acquire -; X64: cmpw +; X64: movzwl +; X64: movzwl +; X64: subl ; X64: cmov ; X64: lock ; X64: cmpxchgw -; X32: cmpw + +; X32: movzwl +; X32: movzwl +; X32: subl ; X32: cmov ; X32: lock ; X32: cmpxchgw Index: test/CodeGen/X86/atomic32.ll =================================================================== --- test/CodeGen/X86/atomic32.ll +++ test/CodeGen/X86/atomic32.ll @@ -5,8 +5,8 @@ @sc32 = external global i32 define void @atomic_fetch_add32() nounwind { -; X64: atomic_fetch_add32 -; X32: atomic_fetch_add32 +; X64-LABEL: atomic_fetch_add32: +; X32-LABEL: atomic_fetch_add32: entry: ; 32-bit %t1 = atomicrmw add i32* @sc32, i32 1 acquire @@ -35,8 +35,8 @@ } define void @atomic_fetch_sub32() nounwind { -; X64: atomic_fetch_sub32 -; X32: atomic_fetch_sub32 +; X64-LABEL: atomic_fetch_sub32: +; X32-LABEL: atomic_fetch_sub32: %t1 = atomicrmw sub i32* @sc32, i32 1 acquire ; X64: lock ; X64: decl @@ -63,8 +63,8 @@ } define void @atomic_fetch_and32() nounwind { -; X64: atomic_fetch_and32 -; X32: atomic_fetch_and32 +; X64-LABEL: atomic_fetch_and32: +; X32-LABEL: atomic_fetch_and32: %t1 = atomicrmw and i32* @sc32, i32 3 acquire ; X64: lock ; X64: andl $3 @@ -88,8 +88,8 @@ } define void @atomic_fetch_or32() nounwind { -; X64: atomic_fetch_or32 -; X32: atomic_fetch_or32 +; X64-LABEL: atomic_fetch_or32: +; X32-LABEL: atomic_fetch_or32: %t1 = atomicrmw or i32* @sc32, i32 3 acquire ; X64: lock ; X64: orl $3 @@ -113,8 +113,8 @@ } define void @atomic_fetch_xor32() nounwind { -; X64: atomic_fetch_xor32 -; X32: atomic_fetch_xor32 +; X64-LABEL: atomic_fetch_xor32: +; X32-LABEL: atomic_fetch_xor32: %t1 = atomicrmw xor i32* @sc32, i32 3 acquire ; X64: lock ; X64: xorl $3 @@ -138,8 +138,8 @@ } define void @atomic_fetch_nand32(i32 %x) nounwind { -; X64: atomic_fetch_nand32 -; X32: atomic_fetch_nand32 +; X64-LABEL: atomic_fetch_nand32: +; X32-LABEL: atomic_fetch_nand32: %t1 = atomicrmw nand i32* @sc32, i32 %x acquire ; X64: andl ; X64: notl @@ -155,19 +155,22 @@ } define void @atomic_fetch_max32(i32 %x) nounwind { +; X64-LABEL: atomic_fetch_max32: +; X32-LABEL: atomic_fetch_max32: + %t1 = atomicrmw max i32* @sc32, i32 %x acquire -; X64: cmpl +; X64: subl ; X64: cmov ; X64: lock ; X64: cmpxchgl -; X32: cmpl +; X32: subl ; X32: cmov ; X32: lock ; X32: cmpxchgl -; NOCMOV: cmpl -; NOCMOV: jl +; NOCMOV: subl +; NOCMOV: jge ; NOCMOV: lock ; NOCMOV: cmpxchgl ret void @@ -177,19 +180,23 @@ } define void @atomic_fetch_min32(i32 %x) nounwind { +; X64-LABEL: atomic_fetch_min32: +; X32-LABEL: atomic_fetch_min32: +; NOCMOV-LABEL: atomic_fetch_min32: + %t1 = atomicrmw min i32* @sc32, i32 %x acquire -; X64: cmpl +; X64: subl ; X64: cmov ; X64: lock ; X64: cmpxchgl -; X32: cmpl +; X32: subl ; X32: cmov ; X32: lock ; X32: cmpxchgl -; NOCMOV: cmpl -; NOCMOV: jg +; NOCMOV: subl +; NOCMOV: jle ; NOCMOV: lock ; NOCMOV: cmpxchgl ret void @@ -199,19 +206,23 @@ } define void @atomic_fetch_umax32(i32 %x) nounwind { +; X64-LABEL: atomic_fetch_umax32: +; X32-LABEL: atomic_fetch_umax32: +; NOCMOV-LABEL: atomic_fetch_umax32: + %t1 = atomicrmw umax i32* @sc32, i32 %x acquire -; X64: cmpl +; X64: subl ; X64: cmov ; X64: lock ; X64: cmpxchgl -; X32: cmpl +; X32: subl ; X32: cmov ; X32: lock ; X32: cmpxchgl -; NOCMOV: cmpl -; NOCMOV: jb +; NOCMOV: subl +; NOCMOV: ja ; NOCMOV: lock ; NOCMOV: cmpxchgl ret void @@ -221,19 +232,23 @@ } define void @atomic_fetch_umin32(i32 %x) nounwind { +; X64-LABEL: atomic_fetch_umin32: +; X32-LABEL: atomic_fetch_umin32: +; NOCMOV-LABEL: atomic_fetch_umin32: + %t1 = atomicrmw umin i32* @sc32, i32 %x acquire -; X64: cmpl +; X64: subl ; X64: cmov ; X64: lock ; X64: cmpxchgl -; X32: cmpl +; X32: subl ; X32: cmov ; X32: lock ; X32: cmpxchgl -; NOCMOV: cmpl -; NOCMOV: ja +; NOCMOV: subl +; NOCMOV: jb ; NOCMOV: lock ; NOCMOV: cmpxchgl ret void @@ -243,6 +258,9 @@ } define void @atomic_fetch_cmpxchg32() nounwind { +; X64-LABEL: atomic_fetch_cmpxchg32: +; X32-LABEL: atomic_fetch_cmpxchg32: + %t1 = cmpxchg i32* @sc32, i32 0, i32 1 acquire acquire ; X64: lock ; X64: cmpxchgl @@ -254,6 +272,9 @@ } define void @atomic_fetch_store32(i32 %x) nounwind { +; X64-LABEL: atomic_fetch_store32: +; X32-LABEL: atomic_fetch_store32: + store atomic i32 %x, i32* @sc32 release, align 4 ; X64-NOT: lock ; X64: movl @@ -265,6 +286,9 @@ } define void @atomic_fetch_swap32(i32 %x) nounwind { +; X64-LABEL: atomic_fetch_swap32: +; X32-LABEL: atomic_fetch_swap32: + %t1 = atomicrmw xchg i32* @sc32, i32 %x acquire ; X64-NOT: lock ; X64: xchgl Index: test/CodeGen/X86/atomic64.ll =================================================================== --- test/CodeGen/X86/atomic64.ll +++ test/CodeGen/X86/atomic64.ll @@ -3,7 +3,8 @@ @sc64 = external global i64 define void @atomic_fetch_add64() nounwind { -; X64: atomic_fetch_add64 +; X64-LABEL: atomic_fetch_add64: +; X32-LABEL: atomic_fetch_add64: entry: %t1 = atomicrmw add i64* @sc64, i64 1 acquire ; X64: lock @@ -22,7 +23,8 @@ } define void @atomic_fetch_sub64() nounwind { -; X64: atomic_fetch_sub64 +; X64-LABEL: atomic_fetch_sub64: +; X32-LABEL: atomic_fetch_sub64: %t1 = atomicrmw sub i64* @sc64, i64 1 acquire ; X64: lock ; X64: decq @@ -40,7 +42,8 @@ } define void @atomic_fetch_and64() nounwind { -; X64: atomic_fetch_and64 +; X64-LABEL: atomic_fetch_and64: +; X32-LABEL: atomic_fetch_and64: %t1 = atomicrmw and i64* @sc64, i64 3 acquire ; X64: lock ; X64: andq $3 @@ -56,7 +59,8 @@ } define void @atomic_fetch_or64() nounwind { -; X64: atomic_fetch_or64 +; X64-LABEL: atomic_fetch_or64: +; X32-LABEL: atomic_fetch_or64: %t1 = atomicrmw or i64* @sc64, i64 3 acquire ; X64: lock ; X64: orq $3 @@ -72,7 +76,8 @@ } define void @atomic_fetch_xor64() nounwind { -; X64: atomic_fetch_xor64 +; X64-LABEL: atomic_fetch_xor64: +; X32-LABEL: atomic_fetch_xor64: %t1 = atomicrmw xor i64* @sc64, i64 3 acquire ; X64: lock ; X64: xorq $3 @@ -88,8 +93,8 @@ } define void @atomic_fetch_nand64(i64 %x) nounwind { -; X64: atomic_fetch_nand64 -; X32: atomic_fetch_nand64 +; X64-LABEL: atomic_fetch_nand64: +; X32-LABEL: atomic_fetch_nand64: %t1 = atomicrmw nand i64* @sc64, i64 %x acquire ; X64: andq ; X64: notq @@ -107,8 +112,10 @@ } define void @atomic_fetch_max64(i64 %x) nounwind { +; X64-LABEL: atomic_fetch_max64: +; X32-LABEL: atomic_fetch_max64: %t1 = atomicrmw max i64* @sc64, i64 %x acquire -; X64: cmpq +; X64: subq ; X64: cmov ; X64: lock ; X64: cmpxchgq @@ -126,8 +133,10 @@ } define void @atomic_fetch_min64(i64 %x) nounwind { +; X64-LABEL: atomic_fetch_min64: +; X32-LABEL: atomic_fetch_min64: %t1 = atomicrmw min i64* @sc64, i64 %x acquire -; X64: cmpq +; X64: subq ; X64: cmov ; X64: lock ; X64: cmpxchgq @@ -145,8 +154,10 @@ } define void @atomic_fetch_umax64(i64 %x) nounwind { +; X64-LABEL: atomic_fetch_umax64: +; X32-LABEL: atomic_fetch_umax64: %t1 = atomicrmw umax i64* @sc64, i64 %x acquire -; X64: cmpq +; X64: subq ; X64: cmov ; X64: lock ; X64: cmpxchgq @@ -164,8 +175,10 @@ } define void @atomic_fetch_umin64(i64 %x) nounwind { +; X64-LABEL: atomic_fetch_umin64: +; X32-LABEL: atomic_fetch_umin64: %t1 = atomicrmw umin i64* @sc64, i64 %x acquire -; X64: cmpq +; X64: subq ; X64: cmov ; X64: lock ; X64: cmpxchgq @@ -183,6 +196,8 @@ } define void @atomic_fetch_cmpxchg64() nounwind { +; X64-LABEL: atomic_fetch_cmpxchg64: +; X32-LABEL: atomic_fetch_cmpxchg64: %t1 = cmpxchg i64* @sc64, i64 0, i64 1 acquire acquire ; X64: lock ; X64: cmpxchgq @@ -194,6 +209,8 @@ } define void @atomic_fetch_store64(i64 %x) nounwind { +; X64-LABEL: atomic_fetch_store64: +; X32-LABEL: atomic_fetch_store64: store atomic i64 %x, i64* @sc64 release, align 8 ; X64-NOT: lock ; X64: movq @@ -205,6 +222,8 @@ } define void @atomic_fetch_swap64(i64 %x) nounwind { +; X64-LABEL: atomic_fetch_swap64: +; X32-LABEL: atomic_fetch_swap64: %t1 = atomicrmw xchg i64* @sc64, i64 %x acquire ; X64-NOT: lock ; X64: xchgq Index: test/CodeGen/X86/atomic6432.ll =================================================================== --- test/CodeGen/X86/atomic6432.ll +++ test/CodeGen/X86/atomic6432.ll @@ -3,7 +3,8 @@ @sc64 = external global i64 define void @atomic_fetch_add64() nounwind { -; X32: atomic_fetch_add64 +; X64-LABEL: atomic_fetch_add64: +; X32-LABEL: atomic_fetch_add64: entry: %t1 = atomicrmw add i64* @sc64, i64 1 acquire ; X32: addl @@ -30,20 +31,21 @@ } define void @atomic_fetch_sub64() nounwind { -; X32: atomic_fetch_sub64 +; X64-LABEL: atomic_fetch_sub64: +; X32-LABEL: atomic_fetch_sub64: %t1 = atomicrmw sub i64* @sc64, i64 1 acquire -; X32: subl -; X32: sbbl +; X32: addl $-1 +; X32: adcl $-1 ; X32: lock ; X32: cmpxchg8b %t2 = atomicrmw sub i64* @sc64, i64 3 acquire -; X32: subl -; X32: sbbl +; X32: addl $-3 +; X32: adcl $-1 ; X32: lock ; X32: cmpxchg8b %t3 = atomicrmw sub i64* @sc64, i64 5 acquire -; X32: subl -; X32: sbbl +; X32: addl $-5 +; X32: adcl $-1 ; X32: lock ; X32: cmpxchg8b %t4 = atomicrmw sub i64* @sc64, i64 %t3 acquire @@ -56,15 +58,16 @@ } define void @atomic_fetch_and64() nounwind { -; X32: atomic_fetch_and64 +; X64-LABEL: atomic_fetch_and:64 +; X32-LABEL: atomic_fetch_and64: %t1 = atomicrmw and i64* @sc64, i64 3 acquire -; X32: andl -; X32: andl +; X32: andl $3 +; X32-NOT: andl ; X32: lock ; X32: cmpxchg8b - %t2 = atomicrmw and i64* @sc64, i64 5 acquire -; X32: andl -; X32: andl + %t2 = atomicrmw and i64* @sc64, i64 4294967297 acquire +; X32: andl $1 +; X32: andl $1 ; X32: lock ; X32: cmpxchg8b %t3 = atomicrmw and i64* @sc64, i64 %t2 acquire @@ -77,15 +80,16 @@ } define void @atomic_fetch_or64() nounwind { -; X32: atomic_fetch_or64 +; X64-LABEL: atomic_fetch_or64: +; X32-LABEL: atomic_fetch_or64: %t1 = atomicrmw or i64* @sc64, i64 3 acquire -; X32: orl -; X32: orl +; X32: orl $3 +; X32-NOT: orl ; X32: lock ; X32: cmpxchg8b - %t2 = atomicrmw or i64* @sc64, i64 5 acquire -; X32: orl -; X32: orl + %t2 = atomicrmw or i64* @sc64, i64 4294967297 acquire +; X32: orl $1 +; X32: orl $1 ; X32: lock ; X32: cmpxchg8b %t3 = atomicrmw or i64* @sc64, i64 %t2 acquire @@ -98,15 +102,16 @@ } define void @atomic_fetch_xor64() nounwind { -; X32: atomic_fetch_xor64 +; X64-LABEL: atomic_fetch_xor:64 +; X32-LABEL: atomic_fetch_xor64: %t1 = atomicrmw xor i64* @sc64, i64 3 acquire ; X32: xorl -; X32: xorl +; X32-NOT: xorl ; X32: lock ; X32: cmpxchg8b - %t2 = atomicrmw xor i64* @sc64, i64 5 acquire -; X32: xorl -; X32: xorl + %t2 = atomicrmw xor i64* @sc64, i64 4294967297 acquire +; X32: xorl $1 +; X32: xorl $1 ; X32: lock ; X32: cmpxchg8b %t3 = atomicrmw xor i64* @sc64, i64 %t2 acquire @@ -119,7 +124,8 @@ } define void @atomic_fetch_nand64(i64 %x) nounwind { -; X32: atomic_fetch_nand64 +; X64-LABEL: atomic_fetch_nand64: +; X32-LABEL: atomic_fetch_nand64: %t1 = atomicrmw nand i64* @sc64, i64 %x acquire ; X32: andl ; X32: andl @@ -132,10 +138,11 @@ } define void @atomic_fetch_max64(i64 %x) nounwind { +; X64-LABEL: atomic_fetch_max:64 +; X32-LABEL: atomic_fetch_max64: %t1 = atomicrmw max i64* @sc64, i64 %x acquire -; X32: cmpl -; X32: cmpl -; X32: cmov +; X32: subl +; X32: subl ; X32: cmov ; X32: cmov ; X32: lock @@ -145,10 +152,11 @@ } define void @atomic_fetch_min64(i64 %x) nounwind { +; X64-LABEL: atomic_fetch_min64: +; X32-LABEL: atomic_fetch_min64: %t1 = atomicrmw min i64* @sc64, i64 %x acquire -; X32: cmpl -; X32: cmpl -; X32: cmov +; X32: subl +; X32: subl ; X32: cmov ; X32: cmov ; X32: lock @@ -158,10 +166,11 @@ } define void @atomic_fetch_umax64(i64 %x) nounwind { +; X64-LABEL: atomic_fetch_umax:64 +; X32-LABEL: atomic_fetch_umax64: %t1 = atomicrmw umax i64* @sc64, i64 %x acquire -; X32: cmpl -; X32: cmpl -; X32: cmov +; X32: subl +; X32: subl ; X32: cmov ; X32: cmov ; X32: lock @@ -171,10 +180,11 @@ } define void @atomic_fetch_umin64(i64 %x) nounwind { +; X64-LABEL: atomic_fetch_umin64: +; X32-LABEL: atomic_fetch_umin64: %t1 = atomicrmw umin i64* @sc64, i64 %x acquire -; X32: cmpl -; X32: cmpl -; X32: cmov +; X32: subl +; X32: subl ; X32: cmov ; X32: cmov ; X32: lock @@ -184,6 +194,8 @@ } define void @atomic_fetch_cmpxchg64() nounwind { +; X64-LABEL: atomic_fetch_cmpxchg:64 +; X32-LABEL: atomic_fetch_cmpxchg64: %t1 = cmpxchg i64* @sc64, i64 0, i64 1 acquire acquire ; X32: lock ; X32: cmpxchg8b @@ -192,6 +204,8 @@ } define void @atomic_fetch_store64(i64 %x) nounwind { +; X64-LABEL: atomic_fetch_store64: +; X32-LABEL: atomic_fetch_store64: store atomic i64 %x, i64* @sc64 release, align 8 ; X32: lock ; X32: cmpxchg8b @@ -200,6 +214,8 @@ } define void @atomic_fetch_swap64(i64 %x) nounwind { +; X64-LABEL: atomic_fetch_swap64: +; X32-LABEL: atomic_fetch_swap64: %t1 = atomicrmw xchg i64* @sc64, i64 %x acquire ; X32: lock ; X32: xchg8b Index: test/CodeGen/X86/atomic8.ll =================================================================== --- test/CodeGen/X86/atomic8.ll +++ test/CodeGen/X86/atomic8.ll @@ -4,8 +4,8 @@ @sc8 = external global i8 define void @atomic_fetch_add8() nounwind { -; X64: atomic_fetch_add8 -; X32: atomic_fetch_add8 +; X64-LABEL: atomic_fetch_add8: +; X32-LABEL: atomic_fetch_add8: entry: ; 32-bit %t1 = atomicrmw add i8* @sc8, i8 1 acquire @@ -34,8 +34,8 @@ } define void @atomic_fetch_sub8() nounwind { -; X64: atomic_fetch_sub8 -; X32: atomic_fetch_sub8 +; X64-LABEL: atomic_fetch_sub8: +; X32-LABEL: atomic_fetch_sub8: %t1 = atomicrmw sub i8* @sc8, i8 1 acquire ; X64: lock ; X64: decb @@ -62,8 +62,8 @@ } define void @atomic_fetch_and8() nounwind { -; X64: atomic_fetch_and8 -; X32: atomic_fetch_and8 +; X64-LABEL: atomic_fetch_and8: +; X32-LABEL: atomic_fetch_and8: %t1 = atomicrmw and i8* @sc8, i8 3 acquire ; X64: lock ; X64: andb $3 @@ -87,8 +87,8 @@ } define void @atomic_fetch_or8() nounwind { -; X64: atomic_fetch_or8 -; X32: atomic_fetch_or8 +; X64-LABEL: atomic_fetch_or8: +; X32-LABEL: atomic_fetch_or8: %t1 = atomicrmw or i8* @sc8, i8 3 acquire ; X64: lock ; X64: orb $3 @@ -112,8 +112,8 @@ } define void @atomic_fetch_xor8() nounwind { -; X64: atomic_fetch_xor8 -; X32: atomic_fetch_xor8 +; X64-LABEL: atomic_fetch_xor8: +; X32-LABEL: atomic_fetch_xor8: %t1 = atomicrmw xor i8* @sc8, i8 3 acquire ; X64: lock ; X64: xorb $3 @@ -137,8 +137,8 @@ } define void @atomic_fetch_nand8(i8 %x) nounwind { -; X64: atomic_fetch_nand8 -; X32: atomic_fetch_nand8 +; X64-LABEL: atomic_fetch_nand8: +; X32-LABEL: atomic_fetch_nand8: %t1 = atomicrmw nand i8* @sc8, i8 %x acquire ; X64: andb ; X64: notb @@ -154,14 +154,18 @@ } define void @atomic_fetch_max8(i8 %x) nounwind { +; X64-LABEL: atomic_fetch_max8: +; X32-LABEL: atomic_fetch_max8: %t1 = atomicrmw max i8* @sc8, i8 %x acquire -; X64: cmpb -; X64: cmov +; X64: movsbl +; X64: movsbl +; X64: subl ; X64: lock ; X64: cmpxchgb -; X32: cmpb -; X32: cmov +; X32: movsbl +; X32: movsbl +; X32: subl ; X32: lock ; X32: cmpxchgb ret void @@ -170,14 +174,18 @@ } define void @atomic_fetch_min8(i8 %x) nounwind { +; X64-LABEL: atomic_fetch_min8: +; X32-LABEL: atomic_fetch_min8: %t1 = atomicrmw min i8* @sc8, i8 %x acquire -; X64: cmpb -; X64: cmov +; X64: movsbl +; X64: movsbl +; X64: subl ; X64: lock ; X64: cmpxchgb -; X32: cmpb -; X32: cmov +; X32: movsbl +; X32: movsbl +; X32: subl ; X32: lock ; X32: cmpxchgb ret void @@ -186,14 +194,18 @@ } define void @atomic_fetch_umax8(i8 %x) nounwind { +; X64-LABEL: atomic_fetch_umax8: +; X32-LABEL: atomic_fetch_umax8: %t1 = atomicrmw umax i8* @sc8, i8 %x acquire -; X64: cmpb -; X64: cmov +; X64: movzbl +; X64: movzbl +; X64: subl ; X64: lock ; X64: cmpxchgb -; X32: cmpb -; X32: cmov +; X32: movzbl +; X32: movzbl +; X32: subl ; X32: lock ; X32: cmpxchgb ret void @@ -202,13 +214,18 @@ } define void @atomic_fetch_umin8(i8 %x) nounwind { +; X64-LABEL: atomic_fetch_umin8: +; X32-LABEL: atomic_fetch_umin8: %t1 = atomicrmw umin i8* @sc8, i8 %x acquire -; X64: cmpb -; X64: cmov +; X64: movzbl +; X64: movzbl +; X64: subl ; X64: lock ; X64: cmpxchgb -; X32: cmpb -; X32: cmov + +; X32: movzbl +; X32: movzbl +; X32: subl ; X32: lock ; X32: cmpxchgb ret void @@ -217,6 +234,8 @@ } define void @atomic_fetch_cmpxchg8() nounwind { +; X64-LABEL: atomic_fetch_cmpxchg8: +; X32-LABEL: atomic_fetch_cmpxchg8: %t1 = cmpxchg i8* @sc8, i8 0, i8 1 acquire acquire ; X64: lock ; X64: cmpxchgb @@ -228,6 +247,8 @@ } define void @atomic_fetch_store8(i8 %x) nounwind { +; X64-LABEL: atomic_fetch_store8: +; X32-LABEL: atomic_fetch_store8: store atomic i8 %x, i8* @sc8 release, align 4 ; X64-NOT: lock ; X64: movb @@ -239,6 +260,8 @@ } define void @atomic_fetch_swap8(i8 %x) nounwind { +; X64-LABEL: atomic_fetch_swap8: +; X32-LABEL: atomic_fetch_swap8: %t1 = atomicrmw xchg i8* @sc8, i8 %x acquire ; X64-NOT: lock ; X64: xchgb Index: test/CodeGen/X86/atomic_op.ll =================================================================== --- test/CodeGen/X86/atomic_op.ll +++ test/CodeGen/X86/atomic_op.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+cmov -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+cmov,cx16 -verify-machineinstrs | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" @@ -110,19 +110,19 @@ %17 = extractvalue { i32, i1 } %pair17, 0 store i32 %17, i32* %old ; CHECK: movl [[R17atomic:.*]], %eax - ; CHECK: movl $1401, %[[R17mask:[a-z]*]] - ; CHECK: andl %eax, %[[R17mask]] - ; CHECK: notl %[[R17mask]] + ; CHECK: movl %eax, %[[R17mask:[a-z]*]] + ; CHECK: notl %[[R17mask]] + ; CHECK: orl $-1402, %[[R17mask]] ; CHECK: lock ; CHECK: cmpxchgl %[[R17mask]], [[R17atomic]] ; CHECK: jne ; CHECK: movl %eax, %18 = atomicrmw nand i32* %val2, i32 1401 monotonic store i32 %18, i32* %old - ; CHECK: andl - ; CHECK: andl ; CHECK: notl ; CHECK: notl + ; CHECK: orl $252645135 + ; CHECK: orl $252645135 ; CHECK: lock ; CHECK: cmpxchg8b %19 = atomicrmw nand i64* %temp64, i64 17361641481138401520 monotonic Index: test/CodeGen/X86/pr5145.ll =================================================================== --- test/CodeGen/X86/pr5145.ll +++ test/CodeGen/X86/pr5145.ll @@ -5,29 +5,29 @@ ; CHECK: atomic_maxmin_i8 %1 = atomicrmw max i8* @sc8, i8 5 acquire ; CHECK: [[LABEL1:\.?LBB[0-9]+_[0-9]+]]: -; CHECK: cmpb -; CHECK: cmovl +; CHECK: movsbl +; CHECK: cmpl ; CHECK: lock ; CHECK-NEXT: cmpxchgb ; CHECK: jne [[LABEL1]] %2 = atomicrmw min i8* @sc8, i8 6 acquire ; CHECK: [[LABEL3:\.?LBB[0-9]+_[0-9]+]]: -; CHECK: cmpb -; CHECK: cmovg +; CHECK: movsbl +; CHECK: cmpl ; CHECK: lock ; CHECK-NEXT: cmpxchgb ; CHECK: jne [[LABEL3]] %3 = atomicrmw umax i8* @sc8, i8 7 acquire ; CHECK: [[LABEL5:\.?LBB[0-9]+_[0-9]+]]: -; CHECK: cmpb -; CHECK: cmovb +; CHECK: movzbl +; CHECK: cmpl ; CHECK: lock ; CHECK-NEXT: cmpxchgb ; CHECK: jne [[LABEL5]] %4 = atomicrmw umin i8* @sc8, i8 8 acquire ; CHECK: [[LABEL7:\.?LBB[0-9]+_[0-9]+]]: -; CHECK: cmpb -; CHECK: cmova +; CHECK: movzbl +; CHECK: cmpl ; CHECK: lock ; CHECK-NEXT: cmpxchgb ; CHECK: jne [[LABEL7]]