Index: include/llvm/Target/TargetLowering.h =================================================================== --- include/llvm/Target/TargetLowering.h +++ include/llvm/Target/TargetLowering.h @@ -165,6 +165,13 @@ /// \brief Initialize all of the actions to default values. void initActions(); + /// Allow lowering into __sync_* libcalls. Without calling this, the + /// __sync calls do not have names defined, and attempting to use + /// them from your backend will result in an error. (These must be + /// enabled explicitly only in order to avoid them being generated + /// accidentally on targets that don't support them.) + void initSyncLibcalls(); + public: const TargetMachine &getTargetMachine() const { return TM; } Index: include/llvm/Target/TargetSubtargetInfo.h =================================================================== --- include/llvm/Target/TargetSubtargetInfo.h +++ include/llvm/Target/TargetSubtargetInfo.h @@ -144,9 +144,6 @@ /// which is the preferred way to influence this. virtual bool enablePostRAScheduler() const; - /// \brief True if the subtarget should run the atomic expansion pass. - virtual bool enableAtomicExpand() const; - /// \brief Override generic scheduling policy within a region. /// /// This is a convenient way for targets that don't provide any custom Index: lib/CodeGen/AtomicExpandPass.cpp =================================================================== --- lib/CodeGen/AtomicExpandPass.cpp +++ lib/CodeGen/AtomicExpandPass.cpp @@ -173,7 +173,7 @@ } // end anonymous namespace bool AtomicExpand::runOnFunction(Function &F) { - if (!TM || !TM->getSubtargetImpl(F)->enableAtomicExpand()) + if (!TM) return false; TLI = TM->getSubtargetImpl(F)->getTargetLowering(); Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2795,33 +2795,6 @@ Results.push_back(DAG.getConstant(0, dl, MVT::i32)); Results.push_back(Node->getOperand(0)); break; - case ISD::ATOMIC_LOAD: { - // There is no libcall for atomic load; fake it with ATOMIC_CMP_SWAP. - SDValue Zero = DAG.getConstant(0, dl, Node->getValueType(0)); - SDVTList VTs = DAG.getVTList(Node->getValueType(0), MVT::Other); - SDValue Swap = DAG.getAtomicCmpSwap( - ISD::ATOMIC_CMP_SWAP, dl, cast(Node)->getMemoryVT(), VTs, - Node->getOperand(0), Node->getOperand(1), Zero, Zero, - cast(Node)->getMemOperand(), - cast(Node)->getOrdering(), - cast(Node)->getOrdering(), - cast(Node)->getSynchScope()); - Results.push_back(Swap.getValue(0)); - Results.push_back(Swap.getValue(1)); - break; - } - case ISD::ATOMIC_STORE: { - // There is no libcall for atomic store; fake it with ATOMIC_SWAP. - SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl, - cast(Node)->getMemoryVT(), - Node->getOperand(0), - Node->getOperand(1), Node->getOperand(2), - cast(Node)->getMemOperand(), - cast(Node)->getOrdering(), - cast(Node)->getSynchScope()); - Results.push_back(Swap.getValue(1)); - break; - } case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: { // Expanding an ATOMIC_CMP_SWAP_WITH_SUCCESS produces an ATOMIC_CMP_SWAP and // splits out the success value as a comparison. Expanding the resulting Index: lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1327,7 +1327,6 @@ case ISD::UDIV: ExpandIntRes_UDIV(N, Lo, Hi); break; case ISD::UREM: ExpandIntRes_UREM(N, Lo, Hi); break; case ISD::ZERO_EXTEND: ExpandIntRes_ZERO_EXTEND(N, Lo, Hi); break; - case ISD::ATOMIC_LOAD: ExpandIntRes_ATOMIC_LOAD(N, Lo, Hi); break; case ISD::ATOMIC_LOAD_ADD: case ISD::ATOMIC_LOAD_SUB: @@ -2700,24 +2699,6 @@ } } -void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N, - SDValue &Lo, SDValue &Hi) { - SDLoc dl(N); - EVT VT = cast(N)->getMemoryVT(); - SDVTList VTs = DAG.getVTList(VT, MVT::i1, MVT::Other); - SDValue Zero = DAG.getConstant(0, dl, VT); - SDValue Swap = DAG.getAtomicCmpSwap( - ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, - cast(N)->getMemoryVT(), VTs, N->getOperand(0), - N->getOperand(1), Zero, Zero, cast(N)->getMemOperand(), - cast(N)->getOrdering(), - cast(N)->getOrdering(), - cast(N)->getSynchScope()); - - ReplaceValueWith(SDValue(N, 0), Swap.getValue(0)); - ReplaceValueWith(SDValue(N, 1), Swap.getValue(2)); -} - //===----------------------------------------------------------------------===// // Integer Operand Expansion //===----------------------------------------------------------------------===// @@ -2762,8 +2743,6 @@ case ISD::ROTR: Res = ExpandIntOp_Shift(N); break; case ISD::RETURNADDR: case ISD::FRAMEADDR: Res = ExpandIntOp_RETURNADDR(N); break; - - case ISD::ATOMIC_STORE: Res = ExpandIntOp_ATOMIC_STORE(N); break; } // If the result is null, the sub-method took care of registering results etc. @@ -3198,19 +3177,6 @@ return TLI.makeLibCall(DAG, LC, DstVT, Op, true, dl).first; } -SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) { - SDLoc dl(N); - SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl, - cast(N)->getMemoryVT(), - N->getOperand(0), - N->getOperand(1), N->getOperand(2), - cast(N)->getMemOperand(), - cast(N)->getOrdering(), - cast(N)->getSynchScope()); - return Swap.getValue(1); -} - - SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) { SDValue InOp0 = N->getOperand(0); EVT InVT = InOp0.getValueType(); Index: lib/CodeGen/SelectionDAG/LegalizeTypes.h =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -386,7 +386,6 @@ SDValue ExpandIntOp_TRUNCATE(SDNode *N); SDValue ExpandIntOp_UINT_TO_FP(SDNode *N); SDValue ExpandIntOp_RETURNADDR(SDNode *N); - SDValue ExpandIntOp_ATOMIC_STORE(SDNode *N); void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &dl); Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1435,6 +1435,10 @@ } SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) { + if (!Sym) + report_fatal_error( + "Attempted to use null symbol in SelectionDAG::getExternalSymbol!"); + SDNode *&N = ExternalSymbols[Sym]; if (N) return SDValue(N, 0); N = newSDNode(false, Sym, 0, VT); @@ -1453,6 +1457,10 @@ SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT, unsigned char TargetFlags) { + if (!Sym) + report_fatal_error("Attempted to use null symbol in " + "SelectionDAG::getTargetExternalSymbol!"); + SDNode *&N = TargetExternalSymbols[std::pair(Sym, TargetFlags)]; Index: lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- lib/CodeGen/TargetLoweringBase.cpp +++ lib/CodeGen/TargetLoweringBase.cpp @@ -353,66 +353,6 @@ Names[RTLIB::MEMMOVE] = "memmove"; Names[RTLIB::MEMSET] = "memset"; Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume"; - Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1"; - Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2"; - Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4] = "__sync_val_compare_and_swap_4"; - Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8] = "__sync_val_compare_and_swap_8"; - Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_16] = "__sync_val_compare_and_swap_16"; - Names[RTLIB::SYNC_LOCK_TEST_AND_SET_1] = "__sync_lock_test_and_set_1"; - Names[RTLIB::SYNC_LOCK_TEST_AND_SET_2] = "__sync_lock_test_and_set_2"; - Names[RTLIB::SYNC_LOCK_TEST_AND_SET_4] = "__sync_lock_test_and_set_4"; - Names[RTLIB::SYNC_LOCK_TEST_AND_SET_8] = "__sync_lock_test_and_set_8"; - Names[RTLIB::SYNC_LOCK_TEST_AND_SET_16] = "__sync_lock_test_and_set_16"; - Names[RTLIB::SYNC_FETCH_AND_ADD_1] = "__sync_fetch_and_add_1"; - Names[RTLIB::SYNC_FETCH_AND_ADD_2] = "__sync_fetch_and_add_2"; - Names[RTLIB::SYNC_FETCH_AND_ADD_4] = "__sync_fetch_and_add_4"; - Names[RTLIB::SYNC_FETCH_AND_ADD_8] = "__sync_fetch_and_add_8"; - Names[RTLIB::SYNC_FETCH_AND_ADD_16] = "__sync_fetch_and_add_16"; - Names[RTLIB::SYNC_FETCH_AND_SUB_1] = "__sync_fetch_and_sub_1"; - Names[RTLIB::SYNC_FETCH_AND_SUB_2] = "__sync_fetch_and_sub_2"; - Names[RTLIB::SYNC_FETCH_AND_SUB_4] = "__sync_fetch_and_sub_4"; - Names[RTLIB::SYNC_FETCH_AND_SUB_8] = "__sync_fetch_and_sub_8"; - Names[RTLIB::SYNC_FETCH_AND_SUB_16] = "__sync_fetch_and_sub_16"; - Names[RTLIB::SYNC_FETCH_AND_AND_1] = "__sync_fetch_and_and_1"; - Names[RTLIB::SYNC_FETCH_AND_AND_2] = "__sync_fetch_and_and_2"; - Names[RTLIB::SYNC_FETCH_AND_AND_4] = "__sync_fetch_and_and_4"; - Names[RTLIB::SYNC_FETCH_AND_AND_8] = "__sync_fetch_and_and_8"; - Names[RTLIB::SYNC_FETCH_AND_AND_16] = "__sync_fetch_and_and_16"; - Names[RTLIB::SYNC_FETCH_AND_OR_1] = "__sync_fetch_and_or_1"; - Names[RTLIB::SYNC_FETCH_AND_OR_2] = "__sync_fetch_and_or_2"; - Names[RTLIB::SYNC_FETCH_AND_OR_4] = "__sync_fetch_and_or_4"; - Names[RTLIB::SYNC_FETCH_AND_OR_8] = "__sync_fetch_and_or_8"; - Names[RTLIB::SYNC_FETCH_AND_OR_16] = "__sync_fetch_and_or_16"; - Names[RTLIB::SYNC_FETCH_AND_XOR_1] = "__sync_fetch_and_xor_1"; - Names[RTLIB::SYNC_FETCH_AND_XOR_2] = "__sync_fetch_and_xor_2"; - Names[RTLIB::SYNC_FETCH_AND_XOR_4] = "__sync_fetch_and_xor_4"; - Names[RTLIB::SYNC_FETCH_AND_XOR_8] = "__sync_fetch_and_xor_8"; - Names[RTLIB::SYNC_FETCH_AND_XOR_16] = "__sync_fetch_and_xor_16"; - Names[RTLIB::SYNC_FETCH_AND_NAND_1] = "__sync_fetch_and_nand_1"; - Names[RTLIB::SYNC_FETCH_AND_NAND_2] = "__sync_fetch_and_nand_2"; - Names[RTLIB::SYNC_FETCH_AND_NAND_4] = "__sync_fetch_and_nand_4"; - Names[RTLIB::SYNC_FETCH_AND_NAND_8] = "__sync_fetch_and_nand_8"; - Names[RTLIB::SYNC_FETCH_AND_NAND_16] = "__sync_fetch_and_nand_16"; - Names[RTLIB::SYNC_FETCH_AND_MAX_1] = "__sync_fetch_and_max_1"; - Names[RTLIB::SYNC_FETCH_AND_MAX_2] = "__sync_fetch_and_max_2"; - Names[RTLIB::SYNC_FETCH_AND_MAX_4] = "__sync_fetch_and_max_4"; - Names[RTLIB::SYNC_FETCH_AND_MAX_8] = "__sync_fetch_and_max_8"; - Names[RTLIB::SYNC_FETCH_AND_MAX_16] = "__sync_fetch_and_max_16"; - Names[RTLIB::SYNC_FETCH_AND_UMAX_1] = "__sync_fetch_and_umax_1"; - Names[RTLIB::SYNC_FETCH_AND_UMAX_2] = "__sync_fetch_and_umax_2"; - Names[RTLIB::SYNC_FETCH_AND_UMAX_4] = "__sync_fetch_and_umax_4"; - Names[RTLIB::SYNC_FETCH_AND_UMAX_8] = "__sync_fetch_and_umax_8"; - Names[RTLIB::SYNC_FETCH_AND_UMAX_16] = "__sync_fetch_and_umax_16"; - Names[RTLIB::SYNC_FETCH_AND_MIN_1] = "__sync_fetch_and_min_1"; - Names[RTLIB::SYNC_FETCH_AND_MIN_2] = "__sync_fetch_and_min_2"; - Names[RTLIB::SYNC_FETCH_AND_MIN_4] = "__sync_fetch_and_min_4"; - Names[RTLIB::SYNC_FETCH_AND_MIN_8] = "__sync_fetch_and_min_8"; - Names[RTLIB::SYNC_FETCH_AND_MIN_16] = "__sync_fetch_and_min_16"; - Names[RTLIB::SYNC_FETCH_AND_UMIN_1] = "__sync_fetch_and_umin_1"; - Names[RTLIB::SYNC_FETCH_AND_UMIN_2] = "__sync_fetch_and_umin_2"; - Names[RTLIB::SYNC_FETCH_AND_UMIN_4] = "__sync_fetch_and_umin_4"; - Names[RTLIB::SYNC_FETCH_AND_UMIN_8] = "__sync_fetch_and_umin_8"; - Names[RTLIB::SYNC_FETCH_AND_UMIN_16] = "__sync_fetch_and_umin_16"; Names[RTLIB::ATOMIC_LOAD] = "__atomic_load"; Names[RTLIB::ATOMIC_LOAD_1] = "__atomic_load_1"; @@ -488,6 +428,85 @@ Names[RTLIB::DEOPTIMIZE] = "__llvm_deoptimize"; } +void TargetLoweringBase::initSyncLibcalls() { + LibcallRoutineNames[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = + "__sync_val_compare_and_swap_1"; + LibcallRoutineNames[RTLIB::SYNC_LOCK_TEST_AND_SET_1] = + "__sync_lock_test_and_set_1"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_ADD_1] = "__sync_fetch_and_add_1"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_SUB_1] = "__sync_fetch_and_sub_1"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_AND_1] = "__sync_fetch_and_and_1"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_OR_1] = "__sync_fetch_and_or_1"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_XOR_1] = "__sync_fetch_and_xor_1"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_NAND_1] = "__sync_fetch_and_nand_1"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_MAX_1] = "__sync_fetch_and_max_1"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_UMAX_1] = "__sync_fetch_and_umax_1"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_MIN_1] = "__sync_fetch_and_min_1"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_UMIN_1] = "__sync_fetch_and_umin_1"; + + LibcallRoutineNames[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = + "__sync_val_compare_and_swap_2"; + LibcallRoutineNames[RTLIB::SYNC_LOCK_TEST_AND_SET_2] = + "__sync_lock_test_and_set_2"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_ADD_2] = "__sync_fetch_and_add_2"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_SUB_2] = "__sync_fetch_and_sub_2"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_AND_2] = "__sync_fetch_and_and_2"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_OR_2] = "__sync_fetch_and_or_2"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_XOR_2] = "__sync_fetch_and_xor_2"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_NAND_2] = "__sync_fetch_and_nand_2"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_MAX_2] = "__sync_fetch_and_max_2"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_UMAX_2] = "__sync_fetch_and_umax_2"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_MIN_2] = "__sync_fetch_and_min_2"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_UMIN_2] = "__sync_fetch_and_umin_2"; + + LibcallRoutineNames[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4] = + "__sync_val_compare_and_swap_4"; + LibcallRoutineNames[RTLIB::SYNC_LOCK_TEST_AND_SET_4] = + "__sync_lock_test_and_set_4"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_ADD_4] = "__sync_fetch_and_add_4"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_SUB_4] = "__sync_fetch_and_sub_4"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_AND_4] = "__sync_fetch_and_and_4"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_OR_4] = "__sync_fetch_and_or_4"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_XOR_4] = "__sync_fetch_and_xor_4"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_NAND_4] = "__sync_fetch_and_nand_4"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_MAX_4] = "__sync_fetch_and_max_4"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_UMAX_4] = "__sync_fetch_and_umax_4"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_MIN_4] = "__sync_fetch_and_min_4"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_UMIN_4] = "__sync_fetch_and_umin_4"; + + LibcallRoutineNames[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8] = + "__sync_val_compare_and_swap_8"; + LibcallRoutineNames[RTLIB::SYNC_LOCK_TEST_AND_SET_8] = + "__sync_lock_test_and_set_8"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_ADD_8] = "__sync_fetch_and_add_8"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_SUB_8] = "__sync_fetch_and_sub_8"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_AND_8] = "__sync_fetch_and_and_8"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_OR_8] = "__sync_fetch_and_or_8"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_XOR_8] = "__sync_fetch_and_xor_8"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_NAND_8] = "__sync_fetch_and_nand_8"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_MAX_8] = "__sync_fetch_and_max_8"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_UMAX_8] = "__sync_fetch_and_umax_8"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_MIN_8] = "__sync_fetch_and_min_8"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_UMIN_8] = "__sync_fetch_and_umin_8"; + + LibcallRoutineNames[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_16] = + "__sync_val_compare_and_swap_16"; + LibcallRoutineNames[RTLIB::SYNC_LOCK_TEST_AND_SET_16] = + "__sync_lock_test_and_set_16"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_ADD_16] = "__sync_fetch_and_add_16"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_SUB_16] = "__sync_fetch_and_sub_16"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_AND_16] = "__sync_fetch_and_and_16"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_OR_16] = "__sync_fetch_and_or_16"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_XOR_16] = "__sync_fetch_and_xor_16"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_NAND_16] = + "__sync_fetch_and_nand_16"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_MAX_16] = "__sync_fetch_and_max_16"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_UMAX_16] = + "__sync_fetch_and_umax_16"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_MIN_16] = "__sync_fetch_and_min_16"; + LibcallRoutineNames[RTLIB::SYNC_FETCH_AND_UMIN_16] = + "__sync_fetch_and_umin_16"; +} /// InitLibcallCallingConvs - Set default libcall CallingConvs. /// static void InitLibcallCallingConvs(CallingConv::ID *CCs) { @@ -826,9 +845,7 @@ GatherAllAliasesMaxDepth = 6; MinStackArgumentAlignment = 1; MinimumJumpTableEntries = 4; - // TODO: the default will be switched to 0 in the next commit, along - // with the Target-specific changes necessary. - MaxAtomicSizeInBitsSupported = 1024; + MaxAtomicSizeInBitsSupported = 0; MinCmpXchgSizeInBits = 0; Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -639,6 +639,8 @@ } PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive(); + + setMaxAtomicSizeInBitsSupported(128); } void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) { @@ -10208,28 +10210,29 @@ return TargetLoweringBase::getPreferredVectorAction(VT); } -// Loads and stores less than 128-bits are already atomic; ones above that -// are doomed anyway, so defer to the default libcall and blame the OS when -// things go wrong. +// Loads and stores less than 128-bits are already atomic; 128-bit +// ones can only be done via ldaxp/stlxp sequences, so must be expanded. bool AArch64TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits(); + assert(Size <= 128 && + "Sizes above 128 should've been handled by AtomicExpandPass"); return Size == 128; } -// Loads and stores less than 128-bits are already atomic; ones above that -// are doomed anyway, so defer to the default libcall and blame the OS when -// things go wrong. +// Loads and stores less than 128-bits are already atomic; 128-bit +// ones can only be done via ldaxp/stlxp sequences, so must be expanded. TargetLowering::AtomicExpansionKind AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { unsigned Size = LI->getType()->getPrimitiveSizeInBits(); + assert(Size <= 128 && + "Sizes above 128 should've been handled by AtomicExpandPass"); return Size == 128 ? AtomicExpansionKind::LLSC : AtomicExpansionKind::None; } -// For the real atomic operations, we have ldxr/stxr up to 128 bits, +// Expand RMW operations to ldrex/strex instructions. TargetLowering::AtomicExpansionKind AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { - unsigned Size = AI->getType()->getPrimitiveSizeInBits(); - return Size <= 128 ? AtomicExpansionKind::LLSC : AtomicExpansionKind::None; + return AtomicExpansionKind::LLSC; } bool AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR( Index: lib/Target/ARM/ARMISelLowering.h =================================================================== --- lib/Target/ARM/ARMISelLowering.h +++ lib/Target/ARM/ARMISelLowering.h @@ -493,10 +493,6 @@ /// unsigned ARMPCLabelIndex; - // TODO: remove this, and have shouldInsertFencesForAtomic do the proper - // check. - bool InsertFencesForAtomic; - void addTypeForNEON(MVT VT, MVT PromotedLdStVT, MVT PromotedBitwiseVT); void addDRTypeForNEON(MVT VT); void addQRTypeForNEON(MVT VT); Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -842,48 +842,51 @@ else setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); - // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use - // the default expansion. - InsertFencesForAtomic = false; - if (Subtarget->hasAnyDataBarrier() && - (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) { - // ATOMIC_FENCE needs custom lowering; the others should have been expanded - // to ldrex/strex loops already. - setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); - if (!Subtarget->isThumb() || !Subtarget->isMClass()) + // Processors that support ldrex have native lock-free atomics. + // + // And, OSes that have cmpxchg via kernel support can use atomics + // regardless (with expansion to __sync_* libcalls as needed). + // + if (Subtarget->hasLdrex() || Subtarget->isTargetDarwin() || + Subtarget->isTargetLinux()) { + // The Cortex-M only supports up to 32bit operations, while + // everything else supports 64-bit (via the ldrexd intrinsic + // expansion). + if (Subtarget->isMClass()) + setMaxAtomicSizeInBitsSupported(32); + else + setMaxAtomicSizeInBitsSupported(64); + + // When we're relying on OS cmpxchg support, set everything but + // ATOMIC_LOAD/ATOMIC_STORE for expansion, so we will emit + // __sync_* libcalls. (load and store themselves are atomic on all + // CPUs) + if (!Subtarget->hasLdrex()) { + initSyncLibcalls(); + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); + } else { + // This is part of the hack for -O0 mode: in other modes cmpxchg is + // translated into ldrex/strex, so no ATOMIC_CMP_SWAP is seen. setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); - - // On v8, we have particularly efficient implementations of atomic fences - // if they can be combined with nearby atomic loads and stores. - if (!Subtarget->hasV8Ops() || getTargetMachine().getOptLevel() == 0) { - // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc. - InsertFencesForAtomic = true; } - } else { - // If there's anything we can use as a barrier, go through custom lowering - // for ATOMIC_FENCE. - setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, - Subtarget->hasAnyDataBarrier() ? Custom : Expand); - - // Set them all for expansion, which will force libcalls. - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); - // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the - // Unordered/Monotonic case. - setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom); - setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom); } + // If there's anything we can use as a barrier, go through custom lowering + // for ATOMIC_FENCE. + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, + Subtarget->hasAnyDataBarrier() ? Custom : Expand); + setOperationAction(ISD::PREFETCH, MVT::Other, Custom); // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes. @@ -6934,16 +6937,6 @@ Results.push_back(Upper); } -static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) { - if (isStrongerThanMonotonic(cast(Op)->getOrdering())) - // Acquire/Release load/store is not legal for targets without a dmb or - // equivalent available. - return SDValue(); - - // Monotonic load/store is legal for all targets. - return Op; -} - static void ReplaceREADCYCLECOUNTER(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG, @@ -7082,8 +7075,6 @@ case ISD::SSUBO: case ISD::USUBO: return LowerXALUO(Op, DAG); - case ISD::ATOMIC_LOAD: - case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); case ISD::FSINCOS: return LowerFSINCOS(Op, DAG); case ISD::SDIVREM: case ISD::UDIVREM: return LowerDivRem(Op, DAG); @@ -12117,8 +12108,6 @@ // First, if the target has no DMB, see what fallback we can use. if (!Subtarget->hasDataBarrier()) { // Some ARMv6 cpus can support data barriers with an mcr instruction. - // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get - // here. if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) { Function *MCR = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_mcr); Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0), @@ -12126,9 +12115,10 @@ Builder.getInt32(10), Builder.getInt32(5)}; return Builder.CreateCall(MCR, args); } else { - // Instead of using barriers, atomic accesses on these subtargets use - // libcalls. - llvm_unreachable("makeDMB on a target so old that it has no barriers"); + // Instead of barriers, atomic accesses on Thumb1 and pre-v6 ARM + // mode just use a libcall to __sync_synchronize. So, just emit + // a fence instruction. + return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent); } } else { Function *DMB = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_dmb); @@ -12183,41 +12173,66 @@ llvm_unreachable("Unknown fence ordering in emitTrailingFence"); } -// Loads and stores less than 64-bits are already atomic; ones above that -// are doomed anyway, so defer to the default libcall and blame the OS when -// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit -// anything for those. +// In the following "should*Atomic*" routines, there's two cases to consider: +// 1) We have native atomics (hasLdrex() == true) +// +// 2) We don't actually have native atomics, but we have told AtomicExpandPass +// that we do, because we're on an OS that provides a "magic" lock-free +// compare-and-swap routine. In the latter case, we rely on __sync libcall +// expansions for all the operations. +// +// The other possibility is that we have neither native atomics, nor special OS +// routines allowing lock-free libcalls. However, then, expansion to __atomic_* +// calls will happen in AtomicExpandPass (due to MaxAtomicSizeInBitsSupported = +// 0), and the below routines will not be called. So, here, we're only concerned +// with the first two cases. +// +// If we are using libcalls, cmpxchg and rmw operations are desired. If we're +// using native instructions ll/sc expansions are needed. + +// Loads and stores less than 64-bits are intrinsically atomic. For 64-bit +// operations, we can replace with ldrexd/strexd. +// +// FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that +// guarantee, see DDI0406C ARM architecture reference manual, sections +// A8.8.72-74 LDRD); on such CPUs it would be advantageous to not expand 64-bit +// loads and stores to LL/SC sequences. bool ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits(); - return (Size == 64) && !Subtarget->isMClass(); + assert(Size <= 64 && + "Sizes above 64 should've been handled by AtomicExpandPass"); + return Size == 64; } -// Loads and stores less than 64-bits are already atomic; ones above that -// are doomed anyway, so defer to the default libcall and blame the OS when -// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit -// anything for those. -// FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that -// guarantee, see DDI0406C ARM architecture reference manual, -// sections A8.8.72-74 LDRD) TargetLowering::AtomicExpansionKind ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { unsigned Size = LI->getType()->getPrimitiveSizeInBits(); - return ((Size == 64) && !Subtarget->isMClass()) ? AtomicExpansionKind::LLOnly - : AtomicExpansionKind::None; + assert(Size <= 64 && + "Sizes above 64 should've been handled by AtomicExpandPass"); + if (Size != 64) + return AtomicExpansionKind::None; + + if (!Subtarget->hasLdrex()) + // will expand to cmpxchg libcall. + return AtomicExpansionKind::CmpXChg; + + return AtomicExpansionKind::LLOnly; } -// For the real atomic operations, we have ldrex/strex up to 32 bits, -// and up to 64 bits on the non-M profiles +// For the more complex atomic operations, we use LL/SC instead of +// cmpxchg. TargetLowering::AtomicExpansionKind ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { - unsigned Size = AI->getType()->getPrimitiveSizeInBits(); - return (Size <= (Subtarget->isMClass() ? 32U : 64U)) - ? AtomicExpansionKind::LLSC - : AtomicExpansionKind::None; + if (!Subtarget->hasLdrex()) + return AtomicExpansionKind::None; + return AtomicExpansionKind::LLSC; } bool ARMTargetLowering::shouldExpandAtomicCmpXchgInIR( AtomicCmpXchgInst *AI) const { + if (!Subtarget->hasLdrex()) + return false; + // At -O0, fast-regalloc cannot cope with the live vregs necessary to // implement cmpxchg without spilling. If the address being exchanged is also // on the stack and close enough to the spill slot, this can lead to a @@ -12228,7 +12243,26 @@ bool ARMTargetLowering::shouldInsertFencesForAtomic( const Instruction *I) const { - return InsertFencesForAtomic; + // On cpus without ldrex, we emit __sync_* libcalls. These don't need + // barriers, as they already have appropriate barriers within. However, Load + // and Store are still handled directly, and thus need barriers. + if (!Subtarget->hasLdrex()) { + return isa(I) || isa(I); + } + + // In -O0 mode, there's a hack in place to expand ATOMIC_CMP_SWAP in a late + // pseudo expansion instead of in IR. This pseduo requires fences to be + // emitted externally externally. + if (getTargetMachine().getOptLevel() == 0 && isa(I)) + return true; + + // On v8, we have particularly efficient implementations of atomic fences + // if they can be combined with nearby atomic loads and stores. + if (Subtarget->hasV8Ops()) + return false; + + // Automatically insert fences (dmb ish) around all atomic operations. + return true; } // This has so far only been implemented for MachO. Index: lib/Target/ARM/ARMSubtarget.h =================================================================== --- lib/Target/ARM/ARMSubtarget.h +++ lib/Target/ARM/ARMSubtarget.h @@ -492,8 +492,10 @@ /// True for some subtargets at > -O0. bool enablePostRAScheduler() const override; - // enableAtomicExpand- True if we need to expand our atomics. - bool enableAtomicExpand() const override; + // True for targets that support atomic ldrex/strex. + bool hasLdrex() const { + return HasV6Ops && (!InThumbMode || HasV8MBaselineOps); + } /// getInstrItins - Return the instruction itineraries based on subtarget /// selection. Index: lib/Target/ARM/ARMSubtarget.cpp =================================================================== --- lib/Target/ARM/ARMSubtarget.cpp +++ lib/Target/ARM/ARMSubtarget.cpp @@ -317,10 +317,6 @@ return (!isThumb() || hasThumb2()); } -bool ARMSubtarget::enableAtomicExpand() const { - return hasAnyDataBarrier() && (!isThumb() || hasV8MBaselineOps()); -} - bool ARMSubtarget::useStride4VFPs(const MachineFunction &MF) const { // For general targets, the prologue can grow when VFPs are allocated with // stride 4 (more vpush instructions). But WatchOS uses a compact unwind Index: lib/Target/BPF/BPFISelLowering.cpp =================================================================== --- lib/Target/BPF/BPFISelLowering.cpp +++ lib/Target/BPF/BPFISelLowering.cpp @@ -63,6 +63,8 @@ setStackPointerRegisterToSaveRestore(BPF::R11); + setMaxAtomicSizeInBitsSupported(64); + setOperationAction(ISD::BR_CC, MVT::i64, Custom); setOperationAction(ISD::BR_JT, MVT::Other, Expand); setOperationAction(ISD::BRIND, MVT::Other, Expand); Index: lib/Target/Hexagon/HexagonISelLowering.h =================================================================== --- lib/Target/Hexagon/HexagonISelLowering.h +++ lib/Target/Hexagon/HexagonISelLowering.h @@ -254,12 +254,13 @@ AtomicOrdering Ord) const override; Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override; - AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override; - bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override { return AtomicExpansionKind::LLSC; } + bool shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override { + return true; + } protected: std::pair Index: lib/Target/Hexagon/HexagonISelLowering.cpp =================================================================== --- lib/Target/Hexagon/HexagonISelLowering.cpp +++ lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1713,6 +1713,7 @@ setPrefLoopAlignment(4); setPrefFunctionAlignment(4); setMinFunctionAlignment(2); + setMaxAtomicSizeInBitsSupported(64); setStackPointerRegisterToSaveRestore(HRI.getStackRegister()); if (EnableHexSDNodeSched) @@ -3108,16 +3109,3 @@ Value *Ext = Builder.CreateZExt(Cmp, Type::getInt32Ty(M->getContext())); return Ext; } - -TargetLowering::AtomicExpansionKind -HexagonTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { - // Do not expand loads and stores that don't exceed 64 bits. - return LI->getType()->getPrimitiveSizeInBits() > 64 - ? AtomicExpansionKind::LLOnly - : AtomicExpansionKind::None; -} - -bool HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { - // Do not expand loads and stores that don't exceed 64 bits. - return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() > 64; -} Index: lib/Target/Mips/Mips16ISelLowering.cpp =================================================================== --- lib/Target/Mips/Mips16ISelLowering.cpp +++ lib/Target/Mips/Mips16ISelLowering.cpp @@ -128,6 +128,10 @@ if (!Subtarget.useSoftFloat()) setMips16HardFloatLibCalls(); + // Call __sync_* library calls for most atomic instructions; the + // MIPS16 ISA has no ll/sc or fence instructions, but it can call mips32 + // functions to do the work. + initSyncLibcalls(); setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); Index: lib/Target/Mips/MipsISelLowering.cpp =================================================================== --- lib/Target/Mips/MipsISelLowering.cpp +++ lib/Target/Mips/MipsISelLowering.cpp @@ -387,11 +387,10 @@ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - if (!Subtarget.isGP64bit()) { - setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); - setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); - } - + if (Subtarget.isGP64bit()) + setMaxAtomicSizeInBitsSupported(64); + else + setMaxAtomicSizeInBitsSupported(32); if (!Subtarget.hasMips32r2()) { setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -828,11 +828,6 @@ setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom); - if (!isPPC64) { - setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); - setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); - } - setBooleanContents(ZeroOrOneBooleanContent); if (Subtarget.hasAltivec()) { @@ -923,6 +918,7 @@ break; } + setMaxAtomicSizeInBitsSupported(isPPC64 ? 64 : 32); if (Subtarget.enableMachineScheduler()) setSchedulingPreference(Sched::Source); Index: lib/Target/Sparc/SparcISelLowering.cpp =================================================================== --- lib/Target/Sparc/SparcISelLowering.cpp +++ lib/Target/Sparc/SparcISelLowering.cpp @@ -1644,8 +1644,6 @@ // Test made to fail pending completion of AtomicExpandPass, // as this will cause a regression until that work is completed. setMaxAtomicSizeInBitsSupported(32); - else - setMaxAtomicSizeInBitsSupported(0); setMinCmpXchgSizeInBits(32); @@ -1653,15 +1651,9 @@ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Legal); - // Custom Lower Atomic LOAD/STORE - setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom); - setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom); - if (Subtarget->is64Bit()) { setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Legal); setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Legal); - setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Custom); } if (!Subtarget->isV9()) { @@ -2996,15 +2988,6 @@ return DAG.getMergeValues(Ops, dl); } -static SDValue LowerATOMIC_LOAD_STORE(SDValue Op, SelectionDAG &DAG) { - if (isStrongerThanMonotonic(cast(Op)->getOrdering())) - // Expand with a fence. - return SDValue(); - - // Monotonic load/stores are legal. - return Op; -} - SDValue SparcTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); @@ -3076,8 +3059,6 @@ case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); case ISD::UMULO: case ISD::SMULO: return LowerUMULO_SMULO(Op, DAG, *this); - case ISD::ATOMIC_LOAD: - case ISD::ATOMIC_STORE: return LowerATOMIC_LOAD_STORE(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); } } Index: lib/Target/SystemZ/SystemZISelLowering.cpp =================================================================== --- lib/Target/SystemZ/SystemZISelLowering.cpp +++ lib/Target/SystemZ/SystemZISelLowering.cpp @@ -128,6 +128,8 @@ // Instructions are strings of 2-byte aligned 2-byte values. setMinFunctionAlignment(2); + setMaxAtomicSizeInBitsSupported(64); + // Handle operations that are handled in a similar way for all types. for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE; I <= MVT::LAST_FP_VALUETYPE; Index: lib/Target/TargetSubtargetInfo.cpp =================================================================== --- lib/Target/TargetSubtargetInfo.cpp +++ lib/Target/TargetSubtargetInfo.cpp @@ -28,10 +28,6 @@ TargetSubtargetInfo::~TargetSubtargetInfo() {} -bool TargetSubtargetInfo::enableAtomicExpand() const { - return true; -} - bool TargetSubtargetInfo::enableMachineScheduler() const { return false; } Index: lib/Target/WebAssembly/WebAssemblyISelLowering.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -49,6 +49,11 @@ // Tell ISel that we have a stack pointer. setStackPointerRegisterToSaveRestore( Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32); + // Maximum atomics size + if (Subtarget->hasAddr64()) + setMaxAtomicSizeInBitsSupported(64); + else + setMaxAtomicSizeInBitsSupported(32); // Set up the register classes. addRegisterClass(MVT::i32, &WebAssembly::I32RegClass); addRegisterClass(MVT::i64, &WebAssembly::I64RegClass); Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -84,6 +84,17 @@ // X86-SSE is even stranger. It uses -1 or 0 for vector masks. setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); + if (Subtarget.is64Bit()) { + if (Subtarget.hasCmpxchg16b()) + setMaxAtomicSizeInBitsSupported(128); + else + setMaxAtomicSizeInBitsSupported(64); + } else { + // FIXME: Check that we actually have cmpxchg (i486 or later) + // FIXME: Check that we actually have cmpxchg8b (i586 or later) + setMaxAtomicSizeInBitsSupported(64); + } + // For 64-bit, since we have so many registers, use the ILP scheduler. // For 32-bit, use the register pressure specific scheduling. // For Atom, always use ILP scheduling. @@ -20417,32 +20428,27 @@ return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC); } -/// Returns true if the operand type is exactly twice the native width, and -/// the corresponding cmpxchg8b or cmpxchg16b instruction is available. -/// Used to know whether to use cmpxchg8/16b when expanding atomic operations -/// (otherwise we leave them alone to become __sync_fetch_and_... calls). -bool X86TargetLowering::needsCmpXchgNb(Type *MemType) const { - unsigned OpWidth = MemType->getPrimitiveSizeInBits(); - - if (OpWidth == 64) - return !Subtarget.is64Bit(); // FIXME this should be Subtarget.hasCmpxchg8b - else if (OpWidth == 128) - return Subtarget.hasCmpxchg16b(); - else - return false; -} +// Atomic operations larger than the normal register size can only be +// done with cmpxchg8b/16b, so expand loads/stores to cmpxchg if +// required. +// (Note: we don't need to worry about those instructions not being +// available, because larger-than-supported IR instructions will +// already have been transformed into __atomic_* libcalls if needed) bool X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { - return needsCmpXchgNb(SI->getValueOperand()->getType()); + unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32; + return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() > + NativeWidth; } // Note: this turns large loads into lock cmpxchg8b/16b. // FIXME: On 32 bits x86, fild/movq might be faster than lock cmpxchg8b. TargetLowering::AtomicExpansionKind X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { - auto PTy = cast(LI->getPointerOperand()->getType()); - return needsCmpXchgNb(PTy->getElementType()) ? AtomicExpansionKind::CmpXChg - : AtomicExpansionKind::None; + unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32; + return (LI->getType()->getPrimitiveSizeInBits() > NativeWidth) + ? AtomicExpansionKind::CmpXChg + : AtomicExpansionKind::None; } TargetLowering::AtomicExpansionKind @@ -20450,12 +20456,9 @@ unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32; Type *MemType = AI->getType(); - // If the operand is too big, we must see if cmpxchg8/16b is available - // and default to library calls otherwise. - if (MemType->getPrimitiveSizeInBits() > NativeWidth) { - return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg - : AtomicExpansionKind::None; - } + // If the operand is too big, we need to use cmpxchg8b/16b. + if (MemType->getPrimitiveSizeInBits() > NativeWidth) + return AtomicExpansionKind::CmpXChg; AtomicRMWInst::BinOp Op = AI->getOperation(); switch (Op) { @@ -20616,7 +20619,7 @@ DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), cpOut); DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success); DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), EFLAGS.getValue(1)); - return SDValue(); + return Op; } static SDValue LowerBITCAST(SDValue Op, const X86Subtarget &Subtarget, @@ -21091,7 +21094,7 @@ // RAUW the chain, but don't worry about the result, as it's unused. assert(!N->hasAnyUseOfValue(0)); DAG.ReplaceAllUsesOfValueWith(N.getValue(1), LockOp.getValue(1)); - return SDValue(); + return LockOp; } static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) { Index: lib/Target/XCore/XCoreISelLowering.h =================================================================== --- lib/Target/XCore/XCoreISelLowering.h +++ lib/Target/XCore/XCoreISelLowering.h @@ -185,8 +185,6 @@ SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const; // Inline asm support std::pair @@ -225,9 +223,6 @@ bool isVarArg, const SmallVectorImpl &ArgsFlags, LLVMContext &Context) const override; - bool shouldInsertFencesForAtomic(const Instruction *I) const override { - return true; - } }; } Index: lib/Target/XCore/XCoreISelLowering.cpp =================================================================== --- lib/Target/XCore/XCoreISelLowering.cpp +++ lib/Target/XCore/XCoreISelLowering.cpp @@ -151,12 +151,7 @@ setOperationAction(ISD::EH_RETURN, MVT::Other, Custom); setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom); - // Atomic operations - // We request a fence for ATOMIC_* instructions, to reduce them to Monotonic. - // As we are always Sequential Consistent, an ATOMIC_FENCE becomes a no OP. setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); - setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom); - setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom); // TRAMPOLINE is custom lowered. setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); @@ -222,8 +217,6 @@ case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG); - case ISD::ATOMIC_LOAD: return LowerATOMIC_LOAD(Op, DAG); - case ISD::ATOMIC_STORE: return LowerATOMIC_STORE(Op, DAG); default: llvm_unreachable("unimplemented operand"); } @@ -963,68 +956,6 @@ return DAG.getNode(XCoreISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0)); } -SDValue XCoreTargetLowering:: -LowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const { - AtomicSDNode *N = cast(Op); - assert(N->getOpcode() == ISD::ATOMIC_LOAD && "Bad Atomic OP"); - assert((N->getOrdering() == AtomicOrdering::Unordered || - N->getOrdering() == AtomicOrdering::Monotonic) && - "setInsertFencesForAtomic(true) expects unordered / monotonic"); - if (N->getMemoryVT() == MVT::i32) { - if (N->getAlignment() < 4) - report_fatal_error("atomic load must be aligned"); - return DAG.getLoad(getPointerTy(DAG.getDataLayout()), SDLoc(Op), - N->getChain(), N->getBasePtr(), N->getPointerInfo(), - N->isVolatile(), N->isNonTemporal(), N->isInvariant(), - N->getAlignment(), N->getAAInfo(), N->getRanges()); - } - if (N->getMemoryVT() == MVT::i16) { - if (N->getAlignment() < 2) - report_fatal_error("atomic load must be aligned"); - return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), MVT::i32, N->getChain(), - N->getBasePtr(), N->getPointerInfo(), MVT::i16, - N->isVolatile(), N->isNonTemporal(), - N->isInvariant(), N->getAlignment(), N->getAAInfo()); - } - if (N->getMemoryVT() == MVT::i8) - return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), MVT::i32, N->getChain(), - N->getBasePtr(), N->getPointerInfo(), MVT::i8, - N->isVolatile(), N->isNonTemporal(), - N->isInvariant(), N->getAlignment(), N->getAAInfo()); - return SDValue(); -} - -SDValue XCoreTargetLowering:: -LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const { - AtomicSDNode *N = cast(Op); - assert(N->getOpcode() == ISD::ATOMIC_STORE && "Bad Atomic OP"); - assert((N->getOrdering() == AtomicOrdering::Unordered || - N->getOrdering() == AtomicOrdering::Monotonic) && - "setInsertFencesForAtomic(true) expects unordered / monotonic"); - if (N->getMemoryVT() == MVT::i32) { - if (N->getAlignment() < 4) - report_fatal_error("atomic store must be aligned"); - return DAG.getStore(N->getChain(), SDLoc(Op), N->getVal(), - N->getBasePtr(), N->getPointerInfo(), - N->isVolatile(), N->isNonTemporal(), - N->getAlignment(), N->getAAInfo()); - } - if (N->getMemoryVT() == MVT::i16) { - if (N->getAlignment() < 2) - report_fatal_error("atomic store must be aligned"); - return DAG.getTruncStore(N->getChain(), SDLoc(Op), N->getVal(), - N->getBasePtr(), N->getPointerInfo(), MVT::i16, - N->isVolatile(), N->isNonTemporal(), - N->getAlignment(), N->getAAInfo()); - } - if (N->getMemoryVT() == MVT::i8) - return DAG.getTruncStore(N->getChain(), SDLoc(Op), N->getVal(), - N->getBasePtr(), N->getPointerInfo(), MVT::i8, - N->isVolatile(), N->isNonTemporal(), - N->getAlignment(), N->getAAInfo()); - return SDValue(); -} - //===----------------------------------------------------------------------===// // Calling Convention Implementation //===----------------------------------------------------------------------===// Index: test/CodeGen/ARM/atomic-cmpxchg.ll =================================================================== --- test/CodeGen/ARM/atomic-cmpxchg.ll +++ test/CodeGen/ARM/atomic-cmpxchg.ll @@ -1,27 +1,21 @@ -; RUN: llc < %s -mtriple=arm-linux-gnueabi -asm-verbose=false -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-ARM -; RUN: llc < %s -mtriple=thumb-linux-gnueabi -asm-verbose=false -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-THUMB +; RUN: llc < %s -mtriple=arm-linux-gnueabi -asm-verbose=false -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-ARM -check-prefix=CHECK +; RUN: llc < %s -mtriple=thumb-linux-gnueabi -asm-verbose=false -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-THUMB -check-prefix=CHECK -; RUN: llc < %s -mtriple=armv6-linux-gnueabi -asm-verbose=false -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-ARMV6 -; RUN: llc < %s -mtriple=thumbv6-linux-gnueabi -asm-verbose=false -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-THUMBV6 +; RUN: llc < %s -mtriple=armv6-linux-gnueabi -asm-verbose=false -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-ARMV6 -check-prefix=CHECK +; RUN: llc < %s -mtriple=thumbv6-linux-gnueabi -asm-verbose=false -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-THUMBV6 -check-prefix=CHECK -; RUN: llc < %s -mtriple=armv7-linux-gnueabi -asm-verbose=false -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-ARMV7 -; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -asm-verbose=false -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-THUMBV7 +; RUN: llc < %s -mtriple=armv7-linux-gnueabi -asm-verbose=false -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-ARMV7 -check-prefix=CHECK +; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -asm-verbose=false -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-THUMBV7 -check-prefix=CHECK define zeroext i1 @test_cmpxchg_res_i8(i8* %addr, i8 %desired, i8 zeroext %new) { -entry: - %0 = cmpxchg i8* %addr, i8 %desired, i8 %new monotonic monotonic - %1 = extractvalue { i8, i1 } %0, 1 - ret i1 %1 -} +; CHECK-LABEL: test_cmpxchg_res_i8: -; CHECK-ARM-LABEL: test_cmpxchg_res_i8 ; CHECK-ARM: bl __sync_val_compare_and_swap_1 ; CHECK-ARM: mov [[REG:r[0-9]+]], #0 ; CHECK-ARM: cmp r0, {{r[0-9]+}} ; CHECK-ARM: moveq [[REG]], #1 ; CHECK-ARM: mov r0, [[REG]] -; CHECK-THUMB-LABEL: test_cmpxchg_res_i8 ; CHECK-THUMB: bl __sync_val_compare_and_swap_1 ; CHECK-THUMB-NOT: mov [[R1:r[0-7]]], r0 ; CHECK-THUMB: push {r0} @@ -33,7 +27,6 @@ ; CHECK-THUMB: push {[[R2]]} ; CHECK-THUMB: pop {r0} -; CHECK-ARMV6-LABEL: test_cmpxchg_res_i8: ; CHECK-ARMV6-NEXT: .fnstart ; CHECK-ARMV6-NEXT: uxtb [[DESIRED:r[0-9]+]], r1 ; CHECK-ARMV6-NEXT: [[TRY:.LBB[0-9_]+]]: @@ -49,7 +42,6 @@ ; CHECK-ARMV6-NEXT: mov r0, [[RES]] ; CHECK-ARMV6-NEXT: bx lr -; CHECK-THUMBV6-LABEL: test_cmpxchg_res_i8: ; CHECK-THUMBV6: mov [[EXPECTED:r[0-9]+]], r1 ; CHECK-THUMBV6-NEXT: bl __sync_val_compare_and_swap_1 ; CHECK-THUMBV6-NEXT: mov [[RES:r[0-9]+]], r0 @@ -61,7 +53,6 @@ ; CHECK-THUMBV6-NEXT: [[END]]: ; CHECK-THUMBV6-NEXT: pop {{.*}}pc} -; CHECK-ARMV7-LABEL: test_cmpxchg_res_i8: ; CHECK-ARMV7-NEXT: .fnstart ; CHECK-ARMV7-NEXT: uxtb [[DESIRED:r[0-9]+]], r1 ; CHECK-ARMV7-NEXT: [[TRY:.LBB[0-9_]+]]: @@ -80,7 +71,6 @@ ; CHECK-ARMV7-NEXT: mov r0, [[RES]] ; CHECK-ARMV7-NEXT: bx lr -; CHECK-THUMBV7-LABEL: test_cmpxchg_res_i8: ; CHECK-THUMBV7-NEXT: .fnstart ; CHECK-THUMBV7-NEXT: uxtb [[DESIRED:r[0-9]+]], r1 ; CHECK-THUMBV7-NEXT: b [[TRYLD:.LBB[0-9_]+]] @@ -97,3 +87,31 @@ ; CHECK-THUMBV7-NEXT: clrex ; CHECK-THUMBV7-NEXT: movs r0, #0 ; CHECK-THUMBV7-NEXT: bx lr + +entry: + %0 = cmpxchg i8* %addr, i8 %desired, i8 %new monotonic monotonic + %1 = extractvalue { i8, i1 } %0, 1 + ret i1 %1 +} + + + +;; Also ensure that i64s are inlined or turned into a libcall, as appropriate. +define zeroext i1 @test_cmpxchg_res_i64(i64* %addr, i64 %desired, i64 zeroext %new) { +; CHECK-LABEL: test_cmpxchg_res_i64: + +; CHECK-ARM: __sync_val_compare_and_swap_8 +; CHECK-THUMB: __sync_val_compare_and_swap_8 +; CHECK-ARMV6: ldrexd +; CHECK-ARMV6: strexd +; CHECK-THUMBV6: __sync_val_compare_and_swap_8 +; CHECK-ARMV7: ldrexd +; CHECK-ARMV7: strexd +; CHECK-THUMBv7: ldrexd +; CHECK-THUMBv7: strexd + +entry: + %0 = cmpxchg i64* %addr, i64 %desired, i64 %new monotonic monotonic + %1 = extractvalue { i64, i1 } %0, 1 + ret i1 %1 +} Index: test/CodeGen/ARM/atomic-load-store.ll =================================================================== --- test/CodeGen/ARM/atomic-load-store.ll +++ test/CodeGen/ARM/atomic-load-store.ll @@ -12,7 +12,9 @@ ; ARM-NEXT: str ; ARM-NEXT: dmb {{ish$}} ; THUMBONE-LABEL: test1 -; THUMBONE: __sync_lock_test_and_set_4 +; THUMBONE: ___sync_synchronize +; THUMBONE-NEXT: str +; THUMBONE-NEXT: ___sync_synchronize ; THUMBTWO-LABEL: test1 ; THUMBTWO: dmb {{ish$}} ; THUMBTWO-NEXT: str @@ -34,7 +36,8 @@ ; ARM: ldr ; ARM-NEXT: dmb {{ish$}} ; THUMBONE-LABEL: test2 -; THUMBONE: __sync_val_compare_and_swap_4 +; THUMBONE: ldr +; THUMBONE: __sync_synchronize ; THUMBTWO-LABEL: test2 ; THUMBTWO: ldr ; THUMBTWO-NEXT: dmb {{ish$}} @@ -83,8 +86,11 @@ define void @test4(i8* %ptr1, i8* %ptr2) { ; THUMBONE-LABEL: test4 -; THUMBONE: ___sync_val_compare_and_swap_1 -; THUMBONE: ___sync_lock_test_and_set_1 +; THUMBONE: ldrb +; THUMBONE-NEXT: ___sync_synchronize +; THUMBONE-NEXT: ___sync_synchronize +; THUMBONE-NEXT: strb +; THUMBONE-NEXT: ___sync_synchronize ; ARMV6-LABEL: test4 ; THUMBM-LABEL: test4 %val = load atomic i8, i8* %ptr1 seq_cst, align 1 Index: test/CodeGen/ARM/atomic-op.ll =================================================================== --- test/CodeGen/ARM/atomic-op.ll +++ test/CodeGen/ARM/atomic-op.ll @@ -7,6 +7,7 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" define void @func(i32 %argc, i8** %argv) nounwind { +; CHECK-LABEL: func: entry: %argc.addr = alloca i32 ; [#uses=1] %argv.addr = alloca i8** ; [#uses=1] @@ -153,6 +154,7 @@ } define void @func2() nounwind { +; CHECK-LABEL: func2: entry: %val = alloca i16 %old = alloca i16 @@ -194,6 +196,7 @@ } define void @func3() nounwind { +; CHECK-LABEL: func3: entry: %val = alloca i8 %old = alloca i8 @@ -234,7 +237,7 @@ ret void } -; CHECK: func4 +; CHECK-LABEL: func4: ; This function should not need to use callee-saved registers. ; rdar://problem/12203728 ; CHECK-NOT: r4 @@ -246,7 +249,6 @@ define i32 @test_cmpxchg_fail_order(i32 *%addr, i32 %desired, i32 %new) { ; CHECK-LABEL: test_cmpxchg_fail_order: - %pair = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst monotonic %oldval = extractvalue { i32, i1 } %pair, 0 ; CHECK-ARMV7: ldrex [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]] @@ -286,7 +288,6 @@ define i32 @test_cmpxchg_fail_order1(i32 *%addr, i32 %desired, i32 %new) { ; CHECK-LABEL: test_cmpxchg_fail_order1: - %pair = cmpxchg i32* %addr, i32 %desired, i32 %new acquire acquire %oldval = extractvalue { i32, i1 } %pair, 0 ; CHECK-NOT: dmb ish @@ -308,7 +309,7 @@ } define i32 @load_load_add_acquire(i32* %mem1, i32* %mem2) nounwind { -; CHECK-LABEL: load_load_add_acquire +; CHECK-LABEL: load_load_add_acquire: %val1 = load atomic i32, i32* %mem1 acquire, align 4 %val2 = load atomic i32, i32* %mem2 acquire, align 4 %tmp = add i32 %val1, %val2 @@ -332,7 +333,7 @@ } define void @store_store_release(i32* %mem1, i32 %val1, i32* %mem2, i32 %val2) { -; CHECK-LABEL: store_store_release +; CHECK-LABEL: store_store_release: store atomic i32 %val1, i32* %mem1 release, align 4 store atomic i32 %val2, i32* %mem2 release, align 4 @@ -341,19 +342,21 @@ ; CHECK: dmb ; CHECK: str r3, [r2] -; CHECK-T1: ___sync_lock_test_and_set -; CHECK-T1: ___sync_lock_test_and_set +; CHECK-M0: dmb +; CHECK-M0: str r1, [r0] +; CHECK-M0: dmb +; CHECK-M0: str r3, [r2] ; CHECK-BAREMETAL-NOT: dmb -; CHECK-BAREMTEAL: str r1, [r0] +; CHECK-BAREMETAL: str r1, [r0] ; CHECK-BAREMETAL-NOT: dmb -; CHECK-BAREMTEAL: str r3, [r2] +; CHECK-BAREMETAL: str r3, [r2] ret void } define void @load_fence_store_monotonic(i32* %mem1, i32* %mem2) { -; CHECK-LABEL: load_fence_store_monotonic +; CHECK-LABEL: load_fence_store_monotonic: %val = load atomic i32, i32* %mem1 monotonic, align 4 fence seq_cst store atomic i32 %val, i32* %mem2 monotonic, align 4 Index: test/CodeGen/PowerPC/atomics-indexed.ll =================================================================== --- test/CodeGen/PowerPC/atomics-indexed.ll +++ test/CodeGen/PowerPC/atomics-indexed.ll @@ -34,8 +34,8 @@ } define i64 @load_x_i64_unordered([100000 x i64]* %mem) { ; CHECK-LABEL: load_x_i64_unordered -; PPC32: __sync_ -; PPC64-NOT: __sync_ +; PPC32: __atomic_ +; PPC64-NOT: __atomic_ ; PPC64: ldx ; CHECK-NOT: sync %ptr = getelementptr inbounds [100000 x i64], [100000 x i64]* %mem, i64 0, i64 90000 @@ -71,8 +71,8 @@ define void @store_x_i64_unordered([100000 x i64]* %mem) { ; CHECK-LABEL: store_x_i64_unordered ; CHECK-NOT: sync -; PPC32: __sync_ -; PPC64-NOT: __sync_ +; PPC32: __atomic_ +; PPC64-NOT: __atomic_ ; PPC64: stdx %ptr = getelementptr inbounds [100000 x i64], [100000 x i64]* %mem, i64 0, i64 90000 store atomic i64 42, i64* %ptr unordered, align 8 Index: test/CodeGen/PowerPC/atomics.ll =================================================================== --- test/CodeGen/PowerPC/atomics.ll +++ test/CodeGen/PowerPC/atomics.ll @@ -32,12 +32,12 @@ } define i64 @load_i64_seq_cst(i64* %mem) { ; CHECK-LABEL: load_i64_seq_cst -; CHECK: sync -; PPC32: __sync_ -; PPC64-NOT: __sync_ +; PPC32: __atomic_ +; PPC64-NOT: __atomic_ +; PPC64: sync ; PPC64: ld %val = load atomic i64, i64* %mem seq_cst, align 8 -; CHECK: lwsync +; PPC64: lwsync ret i64 %val } @@ -65,9 +65,9 @@ } define void @store_i64_seq_cst(i64* %mem) { ; CHECK-LABEL: store_i64_seq_cst -; CHECK: sync -; PPC32: __sync_ -; PPC64-NOT: __sync_ +; PPC32: __atomic_ +; PPC64-NOT: __atomic_ +; PPC64: sync ; PPC64: std store atomic i64 42, i64* %mem seq_cst, align 8 ret void @@ -100,7 +100,8 @@ } define i64 @cas_weak_i64_release_monotonic(i64* %mem) { ; CHECK-LABEL: cas_weak_i64_release_monotonic -; CHECK: lwsync +; PPC32: __atomic_ +; PPC64: lwsync %val = cmpxchg weak i64* %mem, i64 0, i64 1 release monotonic ; CHECK-NOT: [sync ] %loaded = extractvalue { i64, i1} %val, 0 @@ -130,7 +131,8 @@ } define i64 @and_i64_release(i64* %mem, i64 %operand) { ; CHECK-LABEL: and_i64_release -; CHECK: lwsync +; PPC32: __atomic_ +; PPC64: lwsync %val = atomicrmw and i64* %mem, i64 %operand release ; CHECK-NOT: [sync ] ret i64 %val Index: test/CodeGen/X86/atomic-non-integer.ll =================================================================== --- test/CodeGen/X86/atomic-non-integer.ll +++ test/CodeGen/X86/atomic-non-integer.ll @@ -34,7 +34,7 @@ define void @store_fp128(fp128* %fptr, fp128 %v) { ; CHECK-LABEL: @store_fp128 -; CHECK: callq __sync_lock_test_and_set_16 +; CHECK: callq __atomic_store_16 store atomic fp128 %v, fp128* %fptr unordered, align 16 ret void } @@ -66,7 +66,7 @@ define fp128 @load_fp128(fp128* %fptr) { ; CHECK-LABEL: @load_fp128 -; CHECK: callq __sync_val_compare_and_swap_16 +; CHECK: callq __atomic_load_16 %v = load atomic fp128, fp128* %fptr unordered, align 16 ret fp128 %v } Index: test/CodeGen/X86/nocx16.ll =================================================================== --- test/CodeGen/X86/nocx16.ll +++ test/CodeGen/X86/nocx16.ll @@ -1,21 +1,21 @@ ; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=-cx16 | FileCheck %s define void @test(i128* %a) nounwind { entry: -; CHECK: __sync_val_compare_and_swap_16 +; CHECK: __atomic_compare_exchange_16 %0 = cmpxchg i128* %a, i128 1, i128 1 seq_cst seq_cst -; CHECK: __sync_lock_test_and_set_16 +; CHECK: __atomic_exchange_16 %1 = atomicrmw xchg i128* %a, i128 1 seq_cst -; CHECK: __sync_fetch_and_add_16 +; CHECK: __atomic_fetch_add_16 %2 = atomicrmw add i128* %a, i128 1 seq_cst -; CHECK: __sync_fetch_and_sub_16 +; CHECK: __atomic_fetch_sub_16 %3 = atomicrmw sub i128* %a, i128 1 seq_cst -; CHECK: __sync_fetch_and_and_16 +; CHECK: __atomic_fetch_and_16 %4 = atomicrmw and i128* %a, i128 1 seq_cst -; CHECK: __sync_fetch_and_nand_16 +; CHECK: __atomic_fetch_nand_16 %5 = atomicrmw nand i128* %a, i128 1 seq_cst -; CHECK: __sync_fetch_and_or_16 +; CHECK: __atomic_fetch_or_16 %6 = atomicrmw or i128* %a, i128 1 seq_cst -; CHECK: __sync_fetch_and_xor_16 +; CHECK: __atomic_fetch_xor_16 %7 = atomicrmw xor i128* %a, i128 1 seq_cst ret void } Index: test/CodeGen/XCore/atomic.ll =================================================================== --- test/CodeGen/XCore/atomic.ll +++ test/CodeGen/XCore/atomic.ll @@ -21,71 +21,11 @@ entry: ; CHECK-LABEL: atomicloadstore -; CHECK: ldw r[[R0:[0-9]+]], dp[pool] -; CHECK-NEXT: ldaw r[[R1:[0-9]+]], dp[pool] -; CHECK-NEXT: #MEMBARRIER -; CHECK-NEXT: ldc r[[R2:[0-9]+]], 0 +; CHECK: bl __atomic_load_4 %0 = load atomic i32, i32* bitcast (i64* @pool to i32*) acquire, align 4 -; CHECK-NEXT: ld16s r3, r[[R1]][r[[R2]]] -; CHECK-NEXT: #MEMBARRIER - %1 = load atomic i16, i16* bitcast (i64* @pool to i16*) acquire, align 2 - -; CHECK-NEXT: ld8u r11, r[[R1]][r[[R2]]] -; CHECK-NEXT: #MEMBARRIER - %2 = load atomic i8, i8* bitcast (i64* @pool to i8*) acquire, align 1 - -; CHECK-NEXT: ldw r4, dp[pool] -; CHECK-NEXT: #MEMBARRIER - %3 = load atomic i32, i32* bitcast (i64* @pool to i32*) seq_cst, align 4 - -; CHECK-NEXT: ld16s r5, r[[R1]][r[[R2]]] -; CHECK-NEXT: #MEMBARRIER - %4 = load atomic i16, i16* bitcast (i64* @pool to i16*) seq_cst, align 2 - -; CHECK-NEXT: ld8u r6, r[[R1]][r[[R2]]] -; CHECK-NEXT: #MEMBARRIER - %5 = load atomic i8, i8* bitcast (i64* @pool to i8*) seq_cst, align 1 - -; CHECK-NEXT: #MEMBARRIER -; CHECK-NEXT: stw r[[R0]], dp[pool] - store atomic i32 %0, i32* bitcast (i64* @pool to i32*) release, align 4 - -; CHECK-NEXT: #MEMBARRIER -; CHECK-NEXT: st16 r3, r[[R1]][r[[R2]]] - store atomic i16 %1, i16* bitcast (i64* @pool to i16*) release, align 2 - -; CHECK-NEXT: #MEMBARRIER -; CHECK-NEXT: st8 r11, r[[R1]][r[[R2]]] - store atomic i8 %2, i8* bitcast (i64* @pool to i8*) release, align 1 - -; CHECK-NEXT: #MEMBARRIER -; CHECK-NEXT: stw r4, dp[pool] -; CHECK-NEXT: #MEMBARRIER - store atomic i32 %3, i32* bitcast (i64* @pool to i32*) seq_cst, align 4 - -; CHECK-NEXT: #MEMBARRIER -; CHECK-NEXT: st16 r5, r[[R1]][r[[R2]]] -; CHECK-NEXT: #MEMBARRIER - store atomic i16 %4, i16* bitcast (i64* @pool to i16*) seq_cst, align 2 - -; CHECK-NEXT: #MEMBARRIER -; CHECK-NEXT: st8 r6, r[[R1]][r[[R2]]] -; CHECK-NEXT: #MEMBARRIER - store atomic i8 %5, i8* bitcast (i64* @pool to i8*) seq_cst, align 1 - -; CHECK-NEXT: ldw r[[R0]], dp[pool] -; CHECK-NEXT: stw r[[R0]], dp[pool] -; CHECK-NEXT: ld16s r[[R0]], r[[R1]][r[[R2]]] -; CHECK-NEXT: st16 r[[R0]], r[[R1]][r[[R2]]] -; CHECK-NEXT: ld8u r[[R0]], r[[R1]][r[[R2]]] -; CHECK-NEXT: st8 r[[R0]], r[[R1]][r[[R2]]] - %6 = load atomic i32, i32* bitcast (i64* @pool to i32*) monotonic, align 4 - store atomic i32 %6, i32* bitcast (i64* @pool to i32*) monotonic, align 4 - %7 = load atomic i16, i16* bitcast (i64* @pool to i16*) monotonic, align 2 - store atomic i16 %7, i16* bitcast (i64* @pool to i16*) monotonic, align 2 - %8 = load atomic i8, i8* bitcast (i64* @pool to i8*) monotonic, align 1 - store atomic i8 %8, i8* bitcast (i64* @pool to i8*) monotonic, align 1 +; CHECK: bl __atomic_store_2 + store atomic i16 5, i16* bitcast (i64* @pool to i16*) release, align 2 ret void }