diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -491,6 +491,10 @@ "The '%0' architecture does not support -moutline; flag ignored">, InGroup; +def warn_drv_moutline_atomics_unsupported_opt : Warning< + "The '%0' architecture does not support -moutline-atomics; flag ignored">, + InGroup; + def warn_drv_darwin_sdk_invalid_settings : Warning< "SDK settings were ignored as 'SDKSettings.json' could not be parsed">, InGroup>; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2612,6 +2612,10 @@ Group, Alias; def msoft_float : Flag<["-"], "msoft-float">, Group, Flags<[CC1Option]>, HelpText<"Use software floating point">; +def moutline_atomics : Flag<["-"], "moutline-atomics">, Group, Flags<[CC1Option]>, + HelpText<"Generate local calls to out-of-line atomic operations">; +def mno_outline_atomics : Flag<["-"], "mno-outline-atomics">, Group, Flags<[CC1Option]>, + HelpText<"Don't generate local calls to out-of-line atomic operations">; def mno_implicit_float : Flag<["-"], "mno-implicit-float">, Group, HelpText<"Don't generate implicit floating point instructions">; def mimplicit_float : Flag<["-"], "mimplicit-float">, Group; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -6359,6 +6359,23 @@ } } + if (Arg *A = Args.getLastArg(options::OPT_moutline_atomics, + options::OPT_mno_outline_atomics)) { + if (A->getOption().matches(options::OPT_moutline_atomics)) { + // Option -moutline-atomics supported for AArch64 target only. + if (Triple.getArch() != llvm::Triple::aarch64) { + D.Diag(diag::warn_drv_moutline_atomics_unsupported_opt) + << Triple.getArchName(); + } else { + CmdArgs.push_back("-target-feature"); + CmdArgs.push_back("+outline-atomics"); + } + } else { + CmdArgs.push_back("-target-feature"); + CmdArgs.push_back("-outline-atomics"); + } + } + if (Args.hasFlag(options::OPT_faddrsig, options::OPT_fno_addrsig, (TC.getTriple().isOSBinFormatELF() || TC.getTriple().isOSBinFormatCOFF()) && diff --git a/llvm/docs/Atomics.rst b/llvm/docs/Atomics.rst --- a/llvm/docs/Atomics.rst +++ b/llvm/docs/Atomics.rst @@ -621,3 +621,26 @@ There's also, somewhat separately, the possibility to lower ``ATOMIC_FENCE`` to ``__sync_synchronize()``. This may happen or not happen independent of all the above, controlled purely by ``setOperationAction(ISD::ATOMIC_FENCE, ...)``. + +Libcalls: out-of-line atomics +============================= + +Out of line atomics mechanism can be used to deploy architecture supported atomic +instructions (AArch64 Large System Extensions (LSE) so far). LLVM expands a set +of atomic operations to the library functions calls. These helper functions +determine at runtime if suitable atomic instructions are available and utilize +them, otherwise use atomic loads and stores. The following helper functions are +implemented for AArch64 in both ``compiler-rt`` and ``libgcc`` libraries +(``N`` is one of 1, 2, 4, 8, and ``M`` is one of 1, 2, 4, 8 and 16, and +``ORDER`` is one of 'relax', 'acq', 'rel', 'acq_rel'):: + + iM __aarch64_casM_ORDER(iM expected, iM desired, iM *ptr) + iN __aarch64_swpN_ORDER(iN val, iN *ptr) + iN __aarch64_ldaddN_ORDER(iN val, iN *ptr) + iN __aarch64_ldclrN_ORDER(iN val, iN *ptr) + iN __aarch64_ldeorN_ORDER(iN val, iN *ptr) + iN __aarch64_ldsetN_ORDER(iN val, iN *ptr) + +Please note, if atomic instructions support is already enabled for target +(``+lse`` in AArch64 case) then out-of-line atomics calls are not generated and +atomic instructions are used in place. diff --git a/llvm/include/llvm/CodeGen/RuntimeLibcalls.h b/llvm/include/llvm/CodeGen/RuntimeLibcalls.h --- a/llvm/include/llvm/CodeGen/RuntimeLibcalls.h +++ b/llvm/include/llvm/CodeGen/RuntimeLibcalls.h @@ -15,6 +15,7 @@ #define LLVM_CODEGEN_RUNTIMELIBCALLS_H #include "llvm/CodeGen/ValueTypes.h" +#include "llvm/Support/AtomicOrdering.h" namespace llvm { namespace RTLIB { @@ -60,6 +61,10 @@ /// UNKNOWN_LIBCALL if there is none. Libcall getSYNC(unsigned Opc, MVT VT); + /// Return the outline atomics value for the given opcode, atomic ordering + /// and type, or UNKNOWN_LIBCALL if there is none. + Libcall getOUTLINE_ATOMIC(unsigned Opc, AtomicOrdering order, MVT VT); + /// getMEMCPY_ELEMENT_UNORDERED_ATOMIC - Return /// MEMCPY_ELEMENT_UNORDERED_ATOMIC_* value for the given element size or /// UNKNOW_LIBCALL if there is none. diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def --- a/llvm/include/llvm/IR/RuntimeLibcalls.def +++ b/llvm/include/llvm/IR/RuntimeLibcalls.def @@ -544,6 +544,26 @@ HANDLE_LIBCALL(ATOMIC_FETCH_NAND_8, "__atomic_fetch_nand_8") HANDLE_LIBCALL(ATOMIC_FETCH_NAND_16, "__atomic_fetch_nand_16") +// Out-of-line atomics libcalls +#define HLCALLS(A, N) \ + HANDLE_LIBCALL(A##N##_RELAX, nullptr) \ + HANDLE_LIBCALL(A##N##_ACQ, nullptr) \ + HANDLE_LIBCALL(A##N##_REL, nullptr) \ + HANDLE_LIBCALL(A##N##_ACQ_REL, nullptr) +#define HLCALL4(A) \ + HLCALLS(A, 1) HLCALLS(A, 2) HLCALLS(A, 4) HLCALLS(A, 8) +#define HLCALL5(A) \ + HLCALLS(A, 1) HLCALLS(A, 2) HLCALLS(A, 4) HLCALLS(A, 8) HLCALLS(A, 16) +HLCALL5(ATOMIC_CAS) +HLCALL4(ATOMIC_SWP) +HLCALL4(ATOMIC_LDADD) +HLCALL4(ATOMIC_LDSET) +HLCALL4(ATOMIC_LDCLR) +HLCALL4(ATOMIC_LDEOR) +#undef HLCALLS +#undef HLCALL4 +#undef HLCALL5 + // Stack Protector Fail HANDLE_LIBCALL(STACKPROTECTOR_CHECK_FAIL, "__stack_chk_fail") diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1815,7 +1815,7 @@ // later than DestVT. SDValue Store; - if (SrcSize > SlotSize) + if (SrcSize > SlotSize) Store = DAG.getTruncStore(Chain, dl, SrcOp, FIPtr, PtrInfo, SlotVT, SrcAlign); else { @@ -3017,7 +3017,7 @@ break; // We fall back to use stack operation when the FP_ROUND operation // isn't available. - Tmp1 = EmitStackConvert(Node->getOperand(1), + Tmp1 = EmitStackConvert(Node->getOperand(1), Node->getValueType(0), Node->getValueType(0), dl, Node->getOperand(0)); ReplaceNode(Node, Tmp1.getNode()); @@ -3025,7 +3025,7 @@ return true; case ISD::FP_ROUND: case ISD::BITCAST: - Tmp1 = EmitStackConvert(Node->getOperand(0), + Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0), Node->getValueType(0), dl); Results.push_back(Tmp1); @@ -4054,12 +4054,24 @@ case ISD::ATOMIC_LOAD_UMAX: case ISD::ATOMIC_CMP_SWAP: { MVT VT = cast(Node)->getMemoryVT().getSimpleVT(); - RTLIB::Libcall LC = RTLIB::getSYNC(Opc, VT); - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!"); - + AtomicOrdering order = cast(Node)->getOrdering(); + RTLIB::Libcall LC = RTLIB::getOUTLINE_ATOMIC(Opc, order, VT); EVT RetVT = Node->getValueType(0); - SmallVector Ops(Node->op_begin() + 1, Node->op_end()); TargetLowering::MakeLibCallOptions CallOptions; + SmallVector Ops; + if (TLI.getLibcallName(LC)) { + // If outline atomic available + // prepare it's arguments and expand. + Ops.append(Node->op_begin() + 2, Node->op_end()); + Ops.push_back(Node->getOperand(1)); + + } else { + LC = RTLIB::getSYNC(Opc, VT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && + "Unexpected atomic op or value type!"); + // Arguments for expansion to sync libcall + Ops.append(Node->op_begin() + 1, Node->op_end()); + } std::pair Tmp = TLI.makeLibCall(DAG, LC, RetVT, Ops, CallOptions, SDLoc(Node), diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -575,7 +575,7 @@ SDValue Res; if (N->isStrictFPOpcode()) { - Res = DAG.getNode(NewOpc, dl, { NVT, MVT::Other }, + Res = DAG.getNode(NewOpc, dl, { NVT, MVT::Other }, { N->getOperand(0), N->getOperand(1) }); // Legalize the chain result - switch anything that used the old chain to // use the new one. @@ -1260,7 +1260,7 @@ } // Handle promotion for the ADDE/SUBE/ADDCARRY/SUBCARRY nodes. Notice that -// the third operand of ADDE/SUBE nodes is carry flag, which differs from +// the third operand of ADDE/SUBE nodes is carry flag, which differs from // the ADDCARRY/SUBCARRY nodes in that the third operand is carry Boolean. SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo) { if (ResNo == 1) @@ -2160,12 +2160,22 @@ std::pair DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { unsigned Opc = Node->getOpcode(); MVT VT = cast(Node)->getMemoryVT().getSimpleVT(); - RTLIB::Libcall LC = RTLIB::getSYNC(Opc, VT); - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!"); - + AtomicOrdering order = cast(Node)->getOrdering(); + // Lower to outline atomic libcall if outline atomics enabled, + // or to sync libcall otherwise + RTLIB::Libcall LC = RTLIB::getOUTLINE_ATOMIC(Opc, order, VT); EVT RetVT = Node->getValueType(0); - SmallVector Ops(Node->op_begin() + 1, Node->op_end()); TargetLowering::MakeLibCallOptions CallOptions; + SmallVector Ops; + if (TLI.getLibcallName(LC)) { + Ops.append(Node->op_begin() + 2, Node->op_end()); + Ops.push_back(Node->getOperand(1)); + } else { + LC = RTLIB::getSYNC(Opc, VT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && + "Unexpected atomic op or value type!"); + Ops.append(Node->op_begin() + 1, Node->op_end()); + } return TLI.makeLibCall(DAG, LC, RetVT, Ops, CallOptions, SDLoc(Node), Node->getOperand(0)); } @@ -3087,7 +3097,7 @@ ReplaceValueWith(SDValue(N, 1), Swap.getValue(2)); return; } - + if (ISD::isNormalLoad(N)) { ExpandRes_NormalLoad(N, Lo, Hi); return; diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -446,6 +446,99 @@ return UNKNOWN_LIBCALL; } +RTLIB::Libcall RTLIB::getOUTLINE_ATOMIC(unsigned Opc, AtomicOrdering Order, + MVT VT) { + struct OutlineAtomicsLibcalls { + Libcall LC[5][4]; + }; + unsigned ModeN, ModelN; + const OutlineAtomicsLibcalls *Lcalls; +#define LCALLS(A, B) \ + { A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL } +#define LCALL4(A) \ + LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), { \ + UNKNOWN_LIBCALL, UNKNOWN_LIBCALL, UNKNOWN_LIBCALL, UNKNOWN_LIBCALL \ + } +#define LCALL5(A) \ + LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16) + const OutlineAtomicsLibcalls CasLcalls = {{LCALL5(ATOMIC_CAS)}}; + const OutlineAtomicsLibcalls SwpLcalls = {{LCALL4(ATOMIC_SWP)}}; + const OutlineAtomicsLibcalls LdaddLcalls = {{LCALL4(ATOMIC_LDADD)}}; + const OutlineAtomicsLibcalls LdsetLcalls = {{LCALL4(ATOMIC_LDSET)}}; + const OutlineAtomicsLibcalls LdclrLcalls = {{LCALL4(ATOMIC_LDCLR)}}; + const OutlineAtomicsLibcalls LdeorLcalls = {{LCALL4(ATOMIC_LDEOR)}}; +#undef LCALLS +#undef LCALL4 +#undef LCALL5 + switch (Opc) { + case ISD::ATOMIC_CMP_SWAP: { + Lcalls = &CasLcalls; + break; + } + case ISD::ATOMIC_SWAP: { + Lcalls = &SwpLcalls; + break; + } + case ISD::ATOMIC_LOAD_ADD: { + Lcalls = &LdaddLcalls; + break; + } + case ISD::ATOMIC_LOAD_OR: { + Lcalls = &LdsetLcalls; + break; + } + case ISD::ATOMIC_LOAD_CLR: { + Lcalls = &LdclrLcalls; + break; + } + case ISD::ATOMIC_LOAD_XOR: { + Lcalls = &LdeorLcalls; + break; + } + default: + return UNKNOWN_LIBCALL; + } + + switch (VT.SimpleTy) { + case MVT::i8: + ModeN = 0; + break; + case MVT::i16: + ModeN = 1; + break; + case MVT::i32: + ModeN = 2; + break; + case MVT::i64: + ModeN = 3; + break; + case MVT::i128: + ModeN = 4; + break; + default: + return UNKNOWN_LIBCALL; + } + + switch (Order) { + case AtomicOrdering::Monotonic: + ModelN = 0; + break; + case AtomicOrdering::Acquire: + ModelN = 1; + break; + case AtomicOrdering::Release: + ModelN = 2; + break; + case AtomicOrdering::AcquireRelease: + case AtomicOrdering::SequentiallyConsistent: + ModelN = 3; + break; + default: + return UNKNOWN_LIBCALL; + } + return Lcalls->LC[ModeN][ModelN]; +} + RTLIB::Libcall RTLIB::getSYNC(unsigned Opc, MVT VT) { #define OP_TO_LIBCALL(Name, Enum) \ case Name: \ diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -61,6 +61,9 @@ def FeatureLSE : SubtargetFeature<"lse", "HasLSE", "true", "Enable ARMv8.1 Large System Extension (LSE) atomic instructions">; +def FeatureOutlineAtomics : SubtargetFeature<"outline-atomics", "OutlineAtomics", "true", + "Enable out of line atomics to support LSE instructions">; + def FeatureRDM : SubtargetFeature<"rdm", "HasRDM", "true", "Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions">; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -660,6 +660,55 @@ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom); + if (Subtarget->outlineAtomics()) { + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, LibCall); + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, LibCall); + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, LibCall); + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, LibCall); + setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, LibCall); + setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, LibCall); + setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i8, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i16, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i64, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, LibCall); +#define LCALLNAMES(A, B, N) \ + setLibcallName(A##N##_RELAX, #B #N "_relax"); \ + setLibcallName(A##N##_ACQ, #B #N "_acq"); \ + setLibcallName(A##N##_REL, #B #N "_rel"); \ + setLibcallName(A##N##_ACQ_REL, #B #N "_acq_rel"); +#define LCALLNAME4(A, B) \ + LCALLNAMES(A, B, 1) \ + LCALLNAMES(A, B, 2) LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8) +#define LCALLNAME5(A, B) \ + LCALLNAMES(A, B, 1) \ + LCALLNAMES(A, B, 2) LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8) \ + LCALLNAMES(A, B, 16) + LCALLNAME5(RTLIB::ATOMIC_CAS, __aarch64_cas) + LCALLNAME4(RTLIB::ATOMIC_SWP, __aarch64_swp) + LCALLNAME4(RTLIB::ATOMIC_LDADD, __aarch64_ldadd) + LCALLNAME4(RTLIB::ATOMIC_LDSET, __aarch64_ldset) + LCALLNAME4(RTLIB::ATOMIC_LDCLR, __aarch64_ldclr) + LCALLNAME4(RTLIB::ATOMIC_LDEOR, __aarch64_ldeor) +#undef LCALLNAMES +#undef LCALLNAME4 +#undef LCALLNAME5 + } + // 128-bit loads and stores can be done without expanding setOperationAction(ISD::LOAD, MVT::i128, Custom); setOperationAction(ISD::STORE, MVT::i128, Custom); @@ -9894,7 +9943,7 @@ SDValue AArch64TargetLowering::LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const { auto &Subtarget = static_cast(DAG.getSubtarget()); - if (!Subtarget.hasLSE()) + if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics()) return SDValue(); // LSE has an atomic load-add instruction, but not a load-sub. @@ -9911,7 +9960,7 @@ SDValue AArch64TargetLowering::LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const { auto &Subtarget = static_cast(DAG.getSubtarget()); - if (!Subtarget.hasLSE()) + if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics()) return SDValue(); // LSE has an atomic load-clear instruction, but not a load-and. @@ -15365,7 +15414,7 @@ assert(N->getValueType(0) == MVT::i128 && "AtomicCmpSwap on types less than 128 should be legal"); - if (Subtarget->hasLSE()) { + if (Subtarget->hasLSE() || Subtarget->outlineAtomics()) { // LSE has a 128-bit compare and swap (CASP), but i128 is not a legal type, // so lower it here, wrapped in REG_SEQUENCE and EXTRACT_SUBREG. SDValue Ops[] = { @@ -15594,14 +15643,30 @@ // Nand not supported in LSE. if (AI->getOperation() == AtomicRMWInst::Nand) return AtomicExpansionKind::LLSC; // Leave 128 bits to LLSC. - return (Subtarget->hasLSE() && Size < 128) ? AtomicExpansionKind::None : AtomicExpansionKind::LLSC; + if (Subtarget->hasLSE() && Size < 128) + return AtomicExpansionKind::None; + if (Subtarget->outlineAtomics() && Size < 128) { + // [U]Min/[U]Max RWM atomics are used in __sync_fetch_ libcalls so far. + // Don't outline them unless + // (1) high level support approved: + // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf + // (2) low level libgcc and compiler-rt support implemented by: + // min/max outline atomics helpers + if (AI->getOperation() != AtomicRMWInst::Min && + AI->getOperation() != AtomicRMWInst::Max && + AI->getOperation() != AtomicRMWInst::UMin && + AI->getOperation() != AtomicRMWInst::UMax) { + return AtomicExpansionKind::None; + } + } + return AtomicExpansionKind::LLSC; } TargetLowering::AtomicExpansionKind AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR( AtomicCmpXchgInst *AI) const { // If subtarget has LSE, leave cmpxchg intact for codegen. - if (Subtarget->hasLSE()) + if (Subtarget->hasLSE() || Subtarget->outlineAtomics()) return AtomicExpansionKind::None; // At -O0, fast-regalloc cannot cope with the live vregs necessary to // implement cmpxchg without spilling. If the address being exchanged is also diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -195,6 +195,7 @@ // Enable 64-bit vectorization in SLP. unsigned MinVectorRegisterBitWidth = 64; + bool OutlineAtomics = false; bool UseAA = false; bool PredictableSelectIsExpensive = false; bool BalanceFPOps = false; @@ -471,6 +472,12 @@ bool useAA() const override { return UseAA; } + bool outlineAtomics() const { + // Don't outline atomics if + // subtarget has LSE + return OutlineAtomics && !HasLSE; + } + bool hasVH() const { return HasVH; } bool hasPAN() const { return HasPAN; } bool hasLOR() const { return HasLOR; } diff --git a/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll b/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll --- a/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll +++ b/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll @@ -1,8 +1,10 @@ ; RUN: llc < %s -mtriple=arm64-linux-gnu -verify-machineinstrs -mcpu=cyclone | FileCheck %s +; RUN: llc < %s -mtriple=arm64-linux-gnu -verify-machineinstrs -mcpu=cyclone -mattr=+outline-atomics | FileCheck %s -check-prefix=OUTLINE-ATOMICS @var = global i128 0 define i128 @val_compare_and_swap(i128* %p, i128 %oldval, i128 %newval) { +; OUTLINE-ATOMICS: bl __aarch64_cas16_acq ; CHECK-LABEL: val_compare_and_swap: ; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: ; CHECK: ldaxp [[RESULTLO:x[0-9]+]], [[RESULTHI:x[0-9]+]], [x[[ADDR:[0-9]+]]] diff --git a/llvm/test/CodeGen/AArch64/arm64-atomic.ll b/llvm/test/CodeGen/AArch64/arm64-atomic.ll --- a/llvm/test/CodeGen/AArch64/arm64-atomic.ll +++ b/llvm/test/CodeGen/AArch64/arm64-atomic.ll @@ -1,6 +1,8 @@ ; RUN: llc < %s -mtriple=arm64-eabi -asm-verbose=false -verify-machineinstrs -mcpu=cyclone | FileCheck -enable-var-scope %s +; RUN: llc < %s -mtriple=arm64-eabi -asm-verbose=false -verify-machineinstrs -mcpu=cyclone -mattr=+outline-atomics | FileCheck -enable-var-scope %s -check-prefix=OUTLINE-ATOMICS define i32 @val_compare_and_swap(i32* %p, i32 %cmp, i32 %new) #0 { +; OUTLINE-ATOMICS: bl __aarch64_cas4_acq ; CHECK-LABEL: val_compare_and_swap: ; CHECK-NEXT: mov x[[ADDR:[0-9]+]], x0 ; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]: @@ -19,6 +21,7 @@ } define i32 @val_compare_and_swap_from_load(i32* %p, i32 %cmp, i32* %pnew) #0 { +; OUTLINE-ATOMICS: bl __aarch64_cas4_acq ; CHECK-LABEL: val_compare_and_swap_from_load: ; CHECK-NEXT: ldr [[NEW:w[0-9]+]], [x2] ; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]: @@ -40,6 +43,7 @@ } define i32 @val_compare_and_swap_rel(i32* %p, i32 %cmp, i32 %new) #0 { +; OUTLINE-ATOMICS: bl __aarch64_cas4_acq_rel ; CHECK-LABEL: val_compare_and_swap_rel: ; CHECK-NEXT: mov x[[ADDR:[0-9]+]], x0 ; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]: @@ -58,6 +62,7 @@ } define i64 @val_compare_and_swap_64(i64* %p, i64 %cmp, i64 %new) #0 { +; OUTLINE-ATOMICS: bl __aarch64_cas8_relax ; CHECK-LABEL: val_compare_and_swap_64: ; CHECK-NEXT: mov x[[ADDR:[0-9]+]], x0 ; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]: @@ -104,6 +109,7 @@ } define i32 @fetch_and_or(i32* %p) #0 { +; OUTLINE-ATOMICS: bl __aarch64_ldset4_acq_rel ; CHECK-LABEL: fetch_and_or: ; CHECK: mov [[OLDVAL_REG:w[0-9]+]], #5 ; CHECK: [[TRYBB:.?LBB[0-9_]+]]: @@ -118,6 +124,7 @@ } define i64 @fetch_and_or_64(i64* %p) #0 { +; OUTLINE-ATOMICS: bl __aarch64_ldset8_relax ; CHECK: fetch_and_or_64: ; CHECK: mov x[[ADDR:[0-9]+]], x0 ; CHECK: [[TRYBB:.?LBB[0-9_]+]]: @@ -332,6 +339,7 @@ } define void @atomic_store_relaxed_64(i64* %p, i32 %off32, i64 %val) #0 { +; OUTLINE-ATOMICS: bl __aarch64_ldadd4_acq_rel ; CHECK-LABEL: atomic_store_relaxed_64: %ptr_unsigned = getelementptr i64, i64* %p, i32 4095 store atomic i64 %val, i64* %ptr_unsigned monotonic, align 8 diff --git a/llvm/test/CodeGen/AArch64/arm64_32-atomics.ll b/llvm/test/CodeGen/AArch64/arm64_32-atomics.ll --- a/llvm/test/CodeGen/AArch64/arm64_32-atomics.ll +++ b/llvm/test/CodeGen/AArch64/arm64_32-atomics.ll @@ -1,4 +1,5 @@ ; RUN: llc -mtriple=arm64_32-apple-ios7.0 -o - %s | FileCheck %s +; RUN: llc -mtriple=arm64_32-apple-ios7.0 -mattr=+outline-atomics -o - %s | FileCheck %s -check-prefix=OUTLINE-ATOMICS define i8 @test_load_8(i8* %addr) { ; CHECK-LABAL: test_load_8: @@ -239,6 +240,7 @@ } define {i8*, i1} @test_cmpxchg_ptr(i8** %addr, i8* %cmp, i8* %new) { +; OUTLINE-ATOMICS: bl ___aarch64_cas4_acq_rel ; CHECK-LABEL: test_cmpxchg_ptr: ; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]: ; CHECK: ldaxr [[OLD:w[0-9]+]], [x0] diff --git a/llvm/test/CodeGen/AArch64/atomic-ops-lse.ll b/llvm/test/CodeGen/AArch64/atomic-ops-lse.ll --- a/llvm/test/CodeGen/AArch64/atomic-ops-lse.ll +++ b/llvm/test/CodeGen/AArch64/atomic-ops-lse.ll @@ -1,4 +1,7 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=+lse < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=+lse -mattr=+outline-atomics < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=+outline-atomics < %s | FileCheck %s --check-prefix=OUTLINE-ATOMICS ; RUN: llc -mtriple=aarch64_be-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=+lse < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=+lse < %s | FileCheck %s --check-prefix=CHECK-REG ; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mcpu=saphira < %s | FileCheck %s @@ -16,5172 +19,9698 @@ define i8 @test_atomic_load_add_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldaddalb w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i8: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var8 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd1_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw add i8* @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldaddalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_add_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldaddalh w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i16: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var16 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd2_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw add i16* @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldaddalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_add_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldaddal w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw add i32* @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_add_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldaddal x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw add i64* @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_add_i32_noret(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i32_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldaddal w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_noret: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw add i32* @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_add_i64_noret(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i64_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldaddal x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_noret: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw add i64* @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_or_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsetalb w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i8: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var8 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset1_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw or i8* @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsetalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_or_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsetalh w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i16: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var16 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset2_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw or i16* @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsetalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_or_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsetal w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset4_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw or i32* @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsetal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_or_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsetal x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset8_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw or i64* @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsetal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_or_i32_noret(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i32_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsetal w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_noret: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset4_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw or i32* @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsetal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_or_i64_noret(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i64_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsetal x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_noret: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset8_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw or i64* @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsetal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_xor_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldeoralb w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i8: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var8 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor1_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xor i8* @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldeoralb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_xor_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldeoralh w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i16: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var16 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor2_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xor i16* @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldeoralh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_xor_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldeoral w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor4_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xor i32* @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldeoral w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_xor_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldeoral x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor8_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xor i64* @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldeoral x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_xor_i32_noret(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i32_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldeoral w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_noret: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor4_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw xor i32* @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldeoral w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_xor_i64_noret(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i64_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldeoral x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_noret: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor8_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw xor i64* @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldeoral x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_min_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsminalb w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i8: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var8 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: .LBB18_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxrb w10, [x9] +; OUTLINE-ATOMICS-NEXT: sxtb w8, w10 +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxtb +; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, le +; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB18_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw min i8* @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsminalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_min_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsminalh w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i16: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var16 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: .LBB19_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxrh w10, [x9] +; OUTLINE-ATOMICS-NEXT: sxth w8, w10 +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxth +; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, le +; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB19_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw min i16* @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsminalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_min_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsminal w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var32 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB20_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0 +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, le +; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB20_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw min i32* @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsminal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_min_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsminal x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var64 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB21_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp x8, x0 +; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, le +; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB21_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov x0, x8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw min i64* @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsminal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_min_i32_noret(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i32_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsminal w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_noret: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var32 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB22_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w9, w0 +; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, le +; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB22_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw min i32* @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsminal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_min_i64_noret(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i64_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsminal x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_noret: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var64 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB23_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp x9, x0 +; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, le +; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB23_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw min i64* @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsminal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: lduminalb w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i8: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var8 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: .LBB24_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxrb w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, uxtb +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, ls +; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB24_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umin i8* @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: lduminalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_umin_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: lduminalh w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i16: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var16 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: .LBB25_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxrh w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, uxth +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, ls +; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB25_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umin i16* @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: lduminalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_umin_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: lduminal w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var32 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB26_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0 +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, ls +; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB26_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umin i32* @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: lduminal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_umin_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: lduminal x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var64 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB27_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp x8, x0 +; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, ls +; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB27_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov x0, x8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umin i64* @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: lduminal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_umin_i32_noret(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i32_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: lduminal w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_noret: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var32 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB28_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w9, w0 +; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, ls +; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB28_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw umin i32* @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: lduminal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_umin_i64_noret(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i64_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: lduminal x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_noret: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var64 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB29_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp x9, x0 +; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, ls +; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB29_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw umin i64* @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: lduminal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_max_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsmaxalb w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i8: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var8 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: .LBB30_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxrb w10, [x9] +; OUTLINE-ATOMICS-NEXT: sxtb w8, w10 +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxtb +; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, gt +; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB30_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw max i8* @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsmaxalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_max_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsmaxalh w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i16: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var16 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: .LBB31_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxrh w10, [x9] +; OUTLINE-ATOMICS-NEXT: sxth w8, w10 +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxth +; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, gt +; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB31_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw max i16* @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsmaxalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_max_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmaxal w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var32 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB32_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0 +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, gt +; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB32_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw max i32* @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmaxal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_max_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmaxal x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var64 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB33_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp x8, x0 +; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, gt +; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB33_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov x0, x8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw max i64* @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmaxal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_max_i32_noret(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i32_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmaxal w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_noret: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var32 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB34_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w9, w0 +; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, gt +; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB34_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw max i32* @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmaxal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_max_i64_noret(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i64_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmaxal x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_noret: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var64 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB35_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp x9, x0 +; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, gt +; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB35_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw max i64* @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmaxal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_umax_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldumaxalb w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i8: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var8 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: .LBB36_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxrb w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, uxtb +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, hi +; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB36_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umax i8* @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldumaxalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_umax_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldumaxalh w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i16: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var16 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: .LBB37_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxrh w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, uxth +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, hi +; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB37_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umax i16* @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldumaxalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_umax_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumaxal w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var32 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB38_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0 +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, hi +; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB38_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umax i32* @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumaxal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumaxal x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var64 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB39_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp x8, x0 +; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, hi +; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB39_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov x0, x8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umax i64* @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumaxal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_umax_i32_noret(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i32_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumaxal w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_noret: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var32 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB40_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w9, w0 +; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, hi +; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB40_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw umax i32* @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumaxal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_umax_i64_noret(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i64_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumaxal x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_noret: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var64 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB41_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp x9, x0 +; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, hi +; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB41_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw umax i64* @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumaxal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: swpalb w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i8: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var8 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp1_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xchg i8* @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: swpalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: swpalh w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i16: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var16 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp2_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xchg i16* @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: swpalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: swpal w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp4_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xchg i32* @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: swpal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: swpal x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp8_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xchg i64* @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: swpal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_xchg_i32_noret(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i32_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: swpal w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_noret: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp4_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw xchg i32* @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: swpal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_xchg_i64_noret(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i64_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: swpal x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_noret: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp8_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw xchg i64* @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: swpal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: casab w0, w1, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i8: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x2, var8 +; OUTLINE-ATOMICS-NEXT: add x2, x2, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas1_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %pair = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire acquire %old = extractvalue { i8, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK-NEXT: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK-NEXT: casab w0, w1, [x[[ADDR]]] -; CHECK-NEXT: ret ret i8 %old } define i1 @test_atomic_cmpxchg_i8_1(i8 %wanted, i8 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i8_1: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: mov w9, w0 +; CHECK-NEXT: casab w9, w1, [x8] +; CHECK-NEXT: cmp w9, w0, uxtb +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i8_1: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: mov w19, w0 +; OUTLINE-ATOMICS-NEXT: adrp x2, var8 +; OUTLINE-ATOMICS-NEXT: add x2, x2, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas1_acq +; OUTLINE-ATOMICS-NEXT: cmp w0, w19, uxtb +; OUTLINE-ATOMICS-NEXT: cset w0, eq +; OUTLINE-ATOMICS-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %pair = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire acquire %success = extractvalue { i8, i1 } %pair, 1 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: casab w[[NEW:[0-9]+]], w1, [x[[ADDR]]] -; CHECK-NEXT: cmp w[[NEW]], w0, uxtb -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret ret i1 %success } define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: casah w0, w1, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i16: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x2, var16 +; OUTLINE-ATOMICS-NEXT: add x2, x2, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas2_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %pair = cmpxchg i16* @var16, i16 %wanted, i16 %new acquire acquire %old = extractvalue { i16, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK-NEXT: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK-NEXT: casah w0, w1, [x[[ADDR]]] -; CHECK-NEXT: ret ret i16 %old } define i1 @test_atomic_cmpxchg_i16_1(i16 %wanted, i16 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i16_1: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: mov w9, w0 +; CHECK-NEXT: casah w9, w1, [x8] +; CHECK-NEXT: cmp w9, w0, uxth +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i16_1: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: mov w19, w0 +; OUTLINE-ATOMICS-NEXT: adrp x2, var16 +; OUTLINE-ATOMICS-NEXT: add x2, x2, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas2_acq +; OUTLINE-ATOMICS-NEXT: cmp w0, w19, uxth +; OUTLINE-ATOMICS-NEXT: cset w0, eq +; OUTLINE-ATOMICS-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %pair = cmpxchg i16* @var16, i16 %wanted, i16 %new acquire acquire %success = extractvalue { i16, i1 } %pair, 1 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK-NEXT: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: casah w[[NEW:[0-9]+]], w1, [x[[ADDR]]] -; CHECK-NEXT: cmp w[[NEW]], w0, uxth -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret ret i1 %success } define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: casa w0, w1, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i32: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x2, var32 +; OUTLINE-ATOMICS-NEXT: add x2, x2, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas4_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %pair = cmpxchg i32* @var32, i32 %wanted, i32 %new acquire acquire %old = extractvalue { i32, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: casa w0, w1, [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: casa x0, x1, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i64: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x2, var64 +; OUTLINE-ATOMICS-NEXT: add x2, x2, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas8_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %pair = cmpxchg i64* @var64, i64 %wanted, i64 %new acquire acquire %old = extractvalue { i64, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: casa x0, x1, [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define i128 @test_atomic_cmpxchg_i128(i128 %wanted, i128 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i128: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 +; CHECK-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1 +; CHECK-NEXT: adrp x8, var128 +; CHECK-NEXT: add x8, x8, :lo12:var128 +; CHECK-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 +; CHECK-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1 +; CHECK-NEXT: caspa x0, x1, x2, x3, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i128: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x4, var128 +; OUTLINE-ATOMICS-NEXT: add x4, x4, :lo12:var128 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas16_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %pair = cmpxchg i128* @var128, i128 %wanted, i128 %new acquire acquire %old = extractvalue { i128, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var128 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var128 -; CHECK: caspa x0, x1, x2, x3, [x[[ADDR]]] -; CHECK-NOT: dmb ret i128 %old } define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: ldaddalb w8, w0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: neg w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var8 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd1_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw sub i8* @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldaddalb w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_sub_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: ldaddalh w8, w0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: neg w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var16 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd2_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw sub i16* @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldaddalh w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_sub_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldaddal w8, w0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: neg w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw sub i32* @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddal w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_sub_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldaddal x8, x0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: neg x0, x0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw sub i64* @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddal x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_sub_i32_noret(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i32_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldaddal w8, w8, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_noret: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: neg w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw sub i32* @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddal w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_sub_i64_noret(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldaddal x8, x8, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_noret: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: neg x0, x0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw sub i64* @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddal x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_sub_i8_neg_imm() nounwind { ; CHECK-LABEL: test_atomic_load_sub_i8_neg_imm: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: mov w9, #1 +; CHECK-NEXT: ldaddalb w9, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8_neg_imm: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var8 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: mov w0, #1 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd1_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw sub i8* @var8, i8 -1 seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: mov w[[IMM:[0-9]+]], #1 -; CHECK: ldaddalb w[[IMM]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_sub_i16_neg_imm() nounwind { ; CHECK-LABEL: test_atomic_load_sub_i16_neg_imm: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: mov w9, #1 +; CHECK-NEXT: ldaddalh w9, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16_neg_imm: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var16 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: mov w0, #1 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd2_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw sub i16* @var16, i16 -1 seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: mov w[[IMM:[0-9]+]], #1 -; CHECK: ldaddalh w[[IMM]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_sub_i32_neg_imm() nounwind { ; CHECK-LABEL: test_atomic_load_sub_i32_neg_imm: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: mov w9, #1 +; CHECK-NEXT: ldaddal w9, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_neg_imm: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: mov w0, #1 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw sub i32* @var32, i32 -1 seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: mov w[[IMM:[0-9]+]], #1 -; CHECK: ldaddal w[[IMM]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_sub_i64_neg_imm() nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64_neg_imm: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: mov w9, #1 +; CHECK-NEXT: ldaddal x9, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_neg_imm: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: mov w0, #1 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw sub i64* @var64, i64 -1 seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: mov w[[IMM:[0-9]+]], #1 -; CHECK: ldaddal x[[IMM]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define i8 @test_atomic_load_sub_i8_neg_arg(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i8_neg_arg: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldaddalb w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8_neg_arg: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var8 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd1_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %neg = sub i8 0, %offset %old = atomicrmw sub i8* @var8, i8 %neg seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldaddalb w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_sub_i16_neg_arg(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i16_neg_arg: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldaddalh w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16_neg_arg: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var16 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd2_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %neg = sub i16 0, %offset %old = atomicrmw sub i16* @var16, i16 %neg seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldaddalh w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_sub_i32_neg_arg(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i32_neg_arg: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldaddal w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_neg_arg: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %neg = sub i32 0, %offset %old = atomicrmw sub i32* @var32, i32 %neg seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_sub_i64_neg_arg(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64_neg_arg: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldaddal x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_neg_arg: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %neg = sub i64 0, %offset %old = atomicrmw sub i64* @var64, i64 %neg seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define i8 @test_atomic_load_and_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: ldclralb w8, w0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var8 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr1_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw and i8* @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldclralb w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_and_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: ldclralh w8, w0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var16 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr2_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw and i16* @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldclralh w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_and_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldclral w8, w0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw and i32* @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldclral w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_and_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldclral x8, x0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: mvn x0, x0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw and i64* @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldclral x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define i8 @test_atomic_load_and_i8_inv_imm() nounwind { ; CHECK-LABEL: test_atomic_load_and_i8_inv_imm: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: mov w9, #1 +; CHECK-NEXT: ldclralb w9, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_inv_imm: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var8 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: mov w0, #1 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr1_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw and i8* @var8, i8 -2 seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: mov w[[CONST:[0-9]+]], #1 -; CHECK: ldclralb w[[CONST]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_and_i16_inv_imm() nounwind { ; CHECK-LABEL: test_atomic_load_and_i16_inv_imm: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: mov w9, #1 +; CHECK-NEXT: ldclralh w9, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_inv_imm: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var16 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: mov w0, #1 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr2_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw and i16* @var16, i16 -2 seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: mov w[[CONST:[0-9]+]], #1 -; CHECK: ldclralh w[[CONST]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_and_i32_inv_imm() nounwind { ; CHECK-LABEL: test_atomic_load_and_i32_inv_imm: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: mov w9, #1 +; CHECK-NEXT: ldclral w9, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_inv_imm: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: mov w0, #1 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw and i32* @var32, i32 -2 seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: mov w[[CONST:[0-9]+]], #1 -; CHECK: ldclral w[[CONST]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_and_i64_inv_imm() nounwind { ; CHECK-LABEL: test_atomic_load_and_i64_inv_imm: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: mov w9, #1 +; CHECK-NEXT: ldclral x9, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_inv_imm: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: mov w0, #1 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw and i64* @var64, i64 -2 seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: mov w[[CONST:[0-9]+]], #1 -; CHECK: ldclral x[[CONST]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define i8 @test_atomic_load_and_i8_inv_arg(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i8_inv_arg: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldclralb w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_inv_arg: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var8 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr1_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %inv = xor i8 %offset, -1 %old = atomicrmw and i8* @var8, i8 %inv seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldclralb w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_and_i16_inv_arg(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i16_inv_arg: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldclralh w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_inv_arg: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var16 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr2_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %inv = xor i16 %offset, -1 %old = atomicrmw and i16* @var16, i16 %inv seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldclralh w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_and_i32_inv_arg(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i32_inv_arg: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldclral w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_inv_arg: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %inv = xor i32 %offset, -1 %old = atomicrmw and i32* @var32, i32 %inv seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldclral w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_and_i64_inv_arg(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i64_inv_arg: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldclral x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_inv_arg: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %inv = xor i64 %offset, -1 %old = atomicrmw and i64* @var64, i64 %inv seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldclral x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_and_i32_noret(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i32_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldclral w8, w8, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_noret: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw and i32* @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldclral w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_and_i64_noret(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i64_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldclral x8, x8, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_noret: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: mvn x0, x0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw and i64* @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldclral x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_add_i8_acq_rel(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i8_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldaddalb w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i8_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var8 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd1_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw add i8* @var8, i8 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldaddalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_add_i16_acq_rel(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i16_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldaddalh w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i16_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var16 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd2_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw add i16* @var16, i16 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldaddalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_add_i32_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i32_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldaddal w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw add i32* @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_add_i64_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i64_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldaddal x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw add i64* @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_add_i32_noret_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i32_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldaddal w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_noret_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw add i32* @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_add_i64_noret_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i64_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldaddal x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_noret_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw add i64* @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_add_i8_acquire(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i8_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldaddab w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i8_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var8 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd1_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw add i8* @var8, i8 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldaddab w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_add_i16_acquire(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i16_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldaddah w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i16_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var16 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd2_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw add i16* @var16, i16 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldaddah w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_add_i32_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i32_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldadda w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw add i32* @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldadda w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_add_i64_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i64_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldadda x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw add i64* @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldadda x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_add_i32_noret_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i32_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldadda w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_noret_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw add i32* @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldadda w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_add_i64_noret_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i64_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldadda x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_noret_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw add i64* @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldadda x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_add_i8_monotonic(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i8_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldaddb w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i8_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var8 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd1_relax +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw add i8* @var8, i8 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldaddb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_add_i16_monotonic(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i16_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldaddh w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i16_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var16 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd2_relax +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw add i16* @var16, i16 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldaddh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_add_i32_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i32_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldadd w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_relax +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw add i32* @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldadd w[[OLD:[0-9]+]], w[[NEW:[0-9,a-z]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_add_i64_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i64_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldadd x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_relax +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw add i64* @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldadd x[[OLD:[0-9]+]], x[[NEW:[0-9,a-z]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_add_i32_noret_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i32_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldadd w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_noret_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_relax +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw add i32* @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldadd w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_add_i64_noret_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i64_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldadd x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_noret_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_relax +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw add i64* @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldadd x{{[0-9]}}, x{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_add_i8_release(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i8_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldaddlb w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i8_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var8 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd1_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw add i8* @var8, i8 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldaddlb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_add_i16_release(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i16_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldaddlh w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i16_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var16 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd2_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw add i16* @var16, i16 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldaddlh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_add_i32_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i32_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldaddl w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw add i32* @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddl w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_add_i64_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i64_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldaddl x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw add i64* @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddl x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_add_i32_noret_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i32_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldaddl w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_noret_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw add i32* @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddl w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_add_i64_noret_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i64_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldaddl x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_noret_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw add i64* @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddl x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_add_i8_seq_cst(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i8_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldaddalb w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i8_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var8 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd1_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw add i8* @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldaddalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_add_i16_seq_cst(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i16_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldaddalh w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i16_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var16 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd2_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw add i16* @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldaddalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_add_i32_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i32_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldaddal w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw add i32* @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_add_i64_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i64_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldaddal x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw add i64* @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_add_i32_noret_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i32_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldaddal w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_noret_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw add i32* @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_add_i64_noret_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i64_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldaddal x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_noret_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw add i64* @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_and_i8_acq_rel(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i8_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: ldclralb w8, w0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var8 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr1_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw and i8* @var8, i8 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldclralb w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_and_i16_acq_rel(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i16_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: ldclralh w8, w0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var16 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr2_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw and i16* @var16, i16 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldclralh w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_and_i32_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i32_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldclral w8, w0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw and i32* @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldclral w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_and_i64_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i64_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldclral x8, x0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: mvn x0, x0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw and i64* @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldclral x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_and_i32_noret_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i32_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldclral w8, w8, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_noret_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw and i32* @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldclral w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_and_i64_noret_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i64_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldclral x8, x8, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_noret_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: mvn x0, x0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw and i64* @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldclral x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_and_i8_acquire(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i8_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: ldclrab w8, w0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var8 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr1_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw and i8* @var8, i8 %offset acquire -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldclrab w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_and_i16_acquire(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i16_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: ldclrah w8, w0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var16 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr2_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw and i16* @var16, i16 %offset acquire -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldclrah w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_and_i32_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i32_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldclra w8, w0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw and i32* @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldclra w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_and_i64_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i64_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldclra x8, x0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: mvn x0, x0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw and i64* @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldclra x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_and_i32_noret_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i32_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldclra w8, w8, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_noret_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw and i32* @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldclra w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_and_i64_noret_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i64_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldclra x8, x8, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_noret_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: mvn x0, x0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw and i64* @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldclra x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_and_i8_monotonic(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i8_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: ldclrb w8, w0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var8 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr1_relax +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw and i8* @var8, i8 %offset monotonic -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldclrb w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_and_i16_monotonic(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i16_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: ldclrh w8, w0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var16 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr2_relax +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw and i16* @var16, i16 %offset monotonic -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldclrh w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_and_i32_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i32_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldclr w8, w0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_relax +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw and i32* @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldclr w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_and_i64_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i64_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldclr x8, x0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: mvn x0, x0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_relax +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw and i64* @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldclr x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_and_i32_noret_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i32_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldclr w8, w8, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_noret_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_relax +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw and i32* @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldclr w{{[0-9]+}}, w[[NEW:[1-9][0-9]*]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_and_i64_noret_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i64_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldclr x8, x8, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_noret_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: mvn x0, x0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_relax +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw and i64* @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldclr x{{[0-9]+}}, x[[NEW:[1-9][0-9]*]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_and_i8_release(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i8_release: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: ldclrlb w8, w0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var8 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr1_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw and i8* @var8, i8 %offset release -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldclrlb w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_and_i16_release(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i16_release: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: ldclrlh w8, w0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var16 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr2_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw and i16* @var16, i16 %offset release -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldclrlh w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_and_i32_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i32_release: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldclrl w8, w0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw and i32* @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldclrl w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_and_i64_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i64_release: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldclrl x8, x0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: mvn x0, x0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw and i64* @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldclrl x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_and_i32_noret_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i32_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldclrl w8, w8, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_noret_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw and i32* @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldclrl w{{[0-9]*}}, w[[NEW:[1-9][0-9]*]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_and_i64_noret_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i64_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldclrl x8, x8, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_noret_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: mvn x0, x0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw and i64* @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldclrl x{{[0-9]*}}, x[[NEW:[1-9][0-9]*]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_and_i8_seq_cst(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i8_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: ldclralb w8, w0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var8 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr1_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw and i8* @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldclralb w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_and_i16_seq_cst(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i16_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: ldclralh w8, w0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var16 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr2_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw and i16* @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldclralh w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_and_i32_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i32_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldclral w8, w0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw and i32* @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldclral w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_and_i64_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i64_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldclral x8, x0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: mvn x0, x0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw and i64* @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldclral x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_and_i32_noret_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i32_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldclral w8, w8, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_noret_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: mvn w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw and i32* @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldclral w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_and_i64_noret_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i64_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldclral x8, x8, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_noret_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: mvn x0, x0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw and i64* @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldclral x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_cmpxchg_i8_acquire(i8 %wanted, i8 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i8_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: casab w0, w1, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i8_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x2, var8 +; OUTLINE-ATOMICS-NEXT: add x2, x2, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas1_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %pair = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire acquire %old = extractvalue { i8, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: casab w[[NEW:[0-9]+]], w[[OLD:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_cmpxchg_i16_acquire(i16 %wanted, i16 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i16_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: casah w0, w1, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i16_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x2, var16 +; OUTLINE-ATOMICS-NEXT: add x2, x2, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas2_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %pair = cmpxchg i16* @var16, i16 %wanted, i16 %new acquire acquire %old = extractvalue { i16, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: casah w0, w1, [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_cmpxchg_i32_acquire(i32 %wanted, i32 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i32_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: casa w0, w1, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i32_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x2, var32 +; OUTLINE-ATOMICS-NEXT: add x2, x2, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas4_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %pair = cmpxchg i32* @var32, i32 %wanted, i32 %new acquire acquire %old = extractvalue { i32, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: casa w0, w1, [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_cmpxchg_i64_acquire(i64 %wanted, i64 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i64_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: casa x0, x1, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i64_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x2, var64 +; OUTLINE-ATOMICS-NEXT: add x2, x2, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas8_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %pair = cmpxchg i64* @var64, i64 %wanted, i64 %new acquire acquire %old = extractvalue { i64, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: casa x0, x1, [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define i128 @test_atomic_cmpxchg_i128_acquire(i128 %wanted, i128 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i128_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 +; CHECK-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1 +; CHECK-NEXT: adrp x8, var128 +; CHECK-NEXT: add x8, x8, :lo12:var128 +; CHECK-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 +; CHECK-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1 +; CHECK-NEXT: caspa x0, x1, x2, x3, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i128_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x4, var128 +; OUTLINE-ATOMICS-NEXT: add x4, x4, :lo12:var128 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas16_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %pair = cmpxchg i128* @var128, i128 %wanted, i128 %new acquire acquire %old = extractvalue { i128, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var128 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var128 -; CHECK: caspa x0, x1, x2, x3, [x[[ADDR]]] -; CHECK-NOT: dmb ret i128 %old } define i8 @test_atomic_cmpxchg_i8_monotonic(i8 %wanted, i8 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i8_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: casb w0, w1, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i8_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x2, var8 +; OUTLINE-ATOMICS-NEXT: add x2, x2, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas1_relax +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %pair = cmpxchg i8* @var8, i8 %wanted, i8 %new monotonic monotonic %old = extractvalue { i8, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: casb w[[NEW:[0-9]+]], w[[OLD:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_cmpxchg_i16_monotonic(i16 %wanted, i16 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i16_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: cash w0, w1, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i16_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x2, var16 +; OUTLINE-ATOMICS-NEXT: add x2, x2, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas2_relax +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %pair = cmpxchg i16* @var16, i16 %wanted, i16 %new monotonic monotonic %old = extractvalue { i16, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: cash w0, w1, [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_cmpxchg_i32_monotonic(i32 %wanted, i32 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i32_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: cas w0, w1, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i32_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x2, var32 +; OUTLINE-ATOMICS-NEXT: add x2, x2, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas4_relax +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %pair = cmpxchg i32* @var32, i32 %wanted, i32 %new monotonic monotonic %old = extractvalue { i32, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: cas w0, w1, [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_cmpxchg_i64_monotonic(i64 %wanted, i64 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i64_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: cas x0, x1, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i64_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x2, var64 +; OUTLINE-ATOMICS-NEXT: add x2, x2, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas8_relax +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %pair = cmpxchg i64* @var64, i64 %wanted, i64 %new monotonic monotonic %old = extractvalue { i64, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: cas x0, x1, [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define i128 @test_atomic_cmpxchg_i128_monotonic(i128 %wanted, i128 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i128_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 +; CHECK-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1 +; CHECK-NEXT: adrp x8, var128 +; CHECK-NEXT: add x8, x8, :lo12:var128 +; CHECK-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 +; CHECK-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1 +; CHECK-NEXT: casp x0, x1, x2, x3, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i128_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x4, var128 +; OUTLINE-ATOMICS-NEXT: add x4, x4, :lo12:var128 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas16_relax +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %pair = cmpxchg i128* @var128, i128 %wanted, i128 %new monotonic monotonic %old = extractvalue { i128, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var128 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var128 -; CHECK: casp x0, x1, x2, x3, [x[[ADDR]]] -; CHECK-NOT: dmb ret i128 %old } define i8 @test_atomic_cmpxchg_i8_seq_cst(i8 %wanted, i8 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i8_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: casalb w0, w1, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i8_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x2, var8 +; OUTLINE-ATOMICS-NEXT: add x2, x2, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas1_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %pair = cmpxchg i8* @var8, i8 %wanted, i8 %new seq_cst seq_cst %old = extractvalue { i8, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: casalb w[[NEW:[0-9]+]], w[[OLD:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_cmpxchg_i16_seq_cst(i16 %wanted, i16 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i16_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: casalh w0, w1, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i16_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x2, var16 +; OUTLINE-ATOMICS-NEXT: add x2, x2, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas2_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %pair = cmpxchg i16* @var16, i16 %wanted, i16 %new seq_cst seq_cst %old = extractvalue { i16, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: casalh w0, w1, [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_cmpxchg_i32_seq_cst(i32 %wanted, i32 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i32_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: casal w0, w1, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i32_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x2, var32 +; OUTLINE-ATOMICS-NEXT: add x2, x2, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas4_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %pair = cmpxchg i32* @var32, i32 %wanted, i32 %new seq_cst seq_cst %old = extractvalue { i32, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: casal w0, w1, [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_cmpxchg_i64_seq_cst(i64 %wanted, i64 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i64_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: casal x0, x1, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i64_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x2, var64 +; OUTLINE-ATOMICS-NEXT: add x2, x2, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas8_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %pair = cmpxchg i64* @var64, i64 %wanted, i64 %new seq_cst seq_cst %old = extractvalue { i64, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: casal x0, x1, [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define i128 @test_atomic_cmpxchg_i128_seq_cst(i128 %wanted, i128 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i128_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 +; CHECK-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1 +; CHECK-NEXT: adrp x8, var128 +; CHECK-NEXT: add x8, x8, :lo12:var128 +; CHECK-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 +; CHECK-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1 +; CHECK-NEXT: caspal x0, x1, x2, x3, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i128_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x4, var128 +; OUTLINE-ATOMICS-NEXT: add x4, x4, :lo12:var128 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas16_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %pair = cmpxchg i128* @var128, i128 %wanted, i128 %new seq_cst seq_cst %old = extractvalue { i128, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var128 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var128 -; CHECK: caspal x0, x1, x2, x3, [x[[ADDR]]] -; CHECK-NOT: dmb ret i128 %old } define i8 @test_atomic_load_max_i8_acq_rel(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i8_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsmaxalb w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i8_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var8 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: .LBB158_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxrb w10, [x9] +; OUTLINE-ATOMICS-NEXT: sxtb w8, w10 +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxtb +; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, gt +; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB158_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw max i8* @var8, i8 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsmaxalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_max_i16_acq_rel(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i16_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsmaxalh w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i16_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var16 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: .LBB159_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxrh w10, [x9] +; OUTLINE-ATOMICS-NEXT: sxth w8, w10 +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxth +; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, gt +; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB159_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw max i16* @var16, i16 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsmaxalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_max_i32_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i32_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmaxal w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var32 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB160_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0 +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, gt +; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB160_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw max i32* @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmaxal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_max_i64_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i64_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmaxal x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var64 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB161_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp x8, x0 +; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, gt +; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB161_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov x0, x8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw max i64* @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmaxal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_max_i32_noret_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i32_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmaxal w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_noret_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var32 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB162_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w9, w0 +; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, gt +; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB162_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw max i32* @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmaxal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_max_i64_noret_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i64_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmaxal x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_noret_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var64 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB163_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp x9, x0 +; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, gt +; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB163_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw max i64* @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmaxal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_max_i8_acquire(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i8_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsmaxab w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i8_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var8 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: .LBB164_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxrb w10, [x9] +; OUTLINE-ATOMICS-NEXT: sxtb w8, w10 +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxtb +; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, gt +; OUTLINE-ATOMICS-NEXT: stxrb w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB164_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw max i8* @var8, i8 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsmaxab w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_max_i16_acquire(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i16_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsmaxah w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i16_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var16 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: .LBB165_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxrh w10, [x9] +; OUTLINE-ATOMICS-NEXT: sxth w8, w10 +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxth +; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, gt +; OUTLINE-ATOMICS-NEXT: stxrh w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB165_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw max i16* @var16, i16 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsmaxah w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_max_i32_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i32_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmaxa w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var32 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB166_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0 +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, gt +; OUTLINE-ATOMICS-NEXT: stxr w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB166_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw max i32* @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmaxa w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_max_i64_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i64_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmaxa x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var64 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB167_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp x8, x0 +; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, gt +; OUTLINE-ATOMICS-NEXT: stxr w11, x10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB167_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov x0, x8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw max i64* @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmaxa x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_max_i32_noret_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i32_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmaxa w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_noret_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var32 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB168_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w9, w0 +; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, gt +; OUTLINE-ATOMICS-NEXT: stxr w10, w9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB168_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw max i32* @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmaxa w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_max_i64_noret_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i64_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmaxa x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_noret_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var64 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB169_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp x9, x0 +; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, gt +; OUTLINE-ATOMICS-NEXT: stxr w10, x9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB169_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw max i64* @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmaxa x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_max_i8_monotonic(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i8_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsmaxb w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i8_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var8 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: .LBB170_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxrb w10, [x9] +; OUTLINE-ATOMICS-NEXT: sxtb w8, w10 +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxtb +; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, gt +; OUTLINE-ATOMICS-NEXT: stxrb w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB170_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw max i8* @var8, i8 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsmaxb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_max_i16_monotonic(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i16_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsmaxh w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i16_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var16 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: .LBB171_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxrh w10, [x9] +; OUTLINE-ATOMICS-NEXT: sxth w8, w10 +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxth +; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, gt +; OUTLINE-ATOMICS-NEXT: stxrh w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB171_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw max i16* @var16, i16 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsmaxh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_max_i32_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i32_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmax w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var32 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB172_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxr w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0 +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, gt +; OUTLINE-ATOMICS-NEXT: stxr w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB172_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw max i32* @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmax w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_max_i64_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i64_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmax x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var64 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB173_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxr x8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp x8, x0 +; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, gt +; OUTLINE-ATOMICS-NEXT: stxr w11, x10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB173_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov x0, x8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw max i64* @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmax x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_max_i32_noret_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i32_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmax w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_noret_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var32 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB174_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxr w9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w9, w0 +; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, gt +; OUTLINE-ATOMICS-NEXT: stxr w10, w9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB174_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw max i32* @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmax w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_max_i64_noret_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i64_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmax x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_noret_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var64 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB175_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxr x9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp x9, x0 +; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, gt +; OUTLINE-ATOMICS-NEXT: stxr w10, x9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB175_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw max i64* @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmax x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_max_i8_release(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i8_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsmaxlb w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i8_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var8 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: .LBB176_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxrb w10, [x9] +; OUTLINE-ATOMICS-NEXT: sxtb w8, w10 +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxtb +; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, gt +; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB176_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw max i8* @var8, i8 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsmaxlb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_max_i16_release(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i16_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsmaxlh w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i16_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var16 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: .LBB177_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxrh w10, [x9] +; OUTLINE-ATOMICS-NEXT: sxth w8, w10 +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxth +; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, gt +; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB177_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw max i16* @var16, i16 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsmaxlh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_max_i32_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i32_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmaxl w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var32 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB178_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxr w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0 +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, gt +; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB178_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw max i32* @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmaxl w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_max_i64_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i64_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmaxl x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var64 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB179_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxr x8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp x8, x0 +; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, gt +; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB179_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov x0, x8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw max i64* @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmaxl x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_max_i32_noret_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i32_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmaxl w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_noret_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var32 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB180_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxr w9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w9, w0 +; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, gt +; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB180_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw max i32* @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmaxl w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_max_i64_noret_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i64_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmaxl x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_noret_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var64 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB181_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxr x9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp x9, x0 +; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, gt +; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB181_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw max i64* @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmaxl x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_max_i8_seq_cst(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i8_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsmaxalb w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i8_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var8 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: .LBB182_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxrb w10, [x9] +; OUTLINE-ATOMICS-NEXT: sxtb w8, w10 +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxtb +; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, gt +; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB182_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw max i8* @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsmaxalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_max_i16_seq_cst(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i16_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsmaxalh w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i16_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var16 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: .LBB183_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxrh w10, [x9] +; OUTLINE-ATOMICS-NEXT: sxth w8, w10 +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxth +; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, gt +; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB183_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw max i16* @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsmaxalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_max_i32_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i32_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmaxal w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var32 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB184_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0 +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, gt +; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB184_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw max i32* @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmaxal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_max_i64_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i64_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmaxal x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var64 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB185_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp x8, x0 +; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, gt +; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB185_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov x0, x8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw max i64* @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmaxal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_max_i32_noret_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i32_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmaxal w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_noret_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var32 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB186_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w9, w0 +; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, gt +; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB186_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw max i32* @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmaxal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_max_i64_noret_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i64_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmaxal x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_noret_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var64 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB187_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp x9, x0 +; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, gt +; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB187_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw max i64* @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmaxal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_min_i8_acq_rel(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i8_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsminalb w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i8_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var8 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: .LBB188_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxrb w10, [x9] +; OUTLINE-ATOMICS-NEXT: sxtb w8, w10 +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxtb +; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, le +; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB188_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw min i8* @var8, i8 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsminalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_min_i16_acq_rel(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i16_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsminalh w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i16_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var16 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: .LBB189_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxrh w10, [x9] +; OUTLINE-ATOMICS-NEXT: sxth w8, w10 +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxth +; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, le +; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB189_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw min i16* @var16, i16 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsminalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_min_i32_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i32_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsminal w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var32 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB190_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0 +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, le +; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB190_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw min i32* @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsminal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_min_i64_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i64_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsminal x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var64 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB191_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp x8, x0 +; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, le +; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB191_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov x0, x8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw min i64* @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsminal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_min_i32_noret_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i32_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsminal w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_noret_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var32 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB192_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w9, w0 +; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, le +; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB192_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw min i32* @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsminal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_min_i64_noret_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i64_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsminal x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_noret_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var64 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB193_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp x9, x0 +; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, le +; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB193_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw min i64* @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsminal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_min_i8_acquire(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i8_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsminab w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i8_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var8 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: .LBB194_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxrb w10, [x9] +; OUTLINE-ATOMICS-NEXT: sxtb w8, w10 +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxtb +; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, le +; OUTLINE-ATOMICS-NEXT: stxrb w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB194_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw min i8* @var8, i8 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsminab w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_min_i16_acquire(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i16_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsminah w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i16_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var16 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: .LBB195_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxrh w10, [x9] +; OUTLINE-ATOMICS-NEXT: sxth w8, w10 +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxth +; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, le +; OUTLINE-ATOMICS-NEXT: stxrh w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB195_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw min i16* @var16, i16 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsminah w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_min_i32_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i32_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmina w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var32 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB196_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0 +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, le +; OUTLINE-ATOMICS-NEXT: stxr w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB196_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw min i32* @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmina w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_min_i64_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i64_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmina x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var64 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB197_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp x8, x0 +; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, le +; OUTLINE-ATOMICS-NEXT: stxr w11, x10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB197_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov x0, x8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw min i64* @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmina x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_min_i32_noret_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i32_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmina w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_noret_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var32 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB198_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w9, w0 +; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, le +; OUTLINE-ATOMICS-NEXT: stxr w10, w9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB198_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw min i32* @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmina w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_min_i64_noret_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i64_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmina x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_noret_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var64 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB199_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp x9, x0 +; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, le +; OUTLINE-ATOMICS-NEXT: stxr w10, x9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB199_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw min i64* @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmina x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_min_i8_monotonic(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i8_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsminb w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i8_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var8 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: .LBB200_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxrb w10, [x9] +; OUTLINE-ATOMICS-NEXT: sxtb w8, w10 +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxtb +; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, le +; OUTLINE-ATOMICS-NEXT: stxrb w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB200_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw min i8* @var8, i8 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsminb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_min_i16_monotonic(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i16_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsminh w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i16_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var16 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: .LBB201_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxrh w10, [x9] +; OUTLINE-ATOMICS-NEXT: sxth w8, w10 +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxth +; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, le +; OUTLINE-ATOMICS-NEXT: stxrh w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB201_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw min i16* @var16, i16 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsminh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_min_i32_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i32_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmin w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var32 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB202_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxr w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0 +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, le +; OUTLINE-ATOMICS-NEXT: stxr w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB202_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw min i32* @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmin w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_min_i64_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i64_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmin x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var64 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB203_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxr x8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp x8, x0 +; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, le +; OUTLINE-ATOMICS-NEXT: stxr w11, x10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB203_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov x0, x8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw min i64* @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmin x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_min_i32_noret_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i32_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmin w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_noret_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var32 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB204_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxr w9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w9, w0 +; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, le +; OUTLINE-ATOMICS-NEXT: stxr w10, w9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB204_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw min i32* @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmin w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_min_i64_noret_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i64_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmin x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_noret_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var64 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB205_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxr x9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp x9, x0 +; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, le +; OUTLINE-ATOMICS-NEXT: stxr w10, x9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB205_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw min i64* @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmin x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_min_i8_release(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i8_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsminlb w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i8_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var8 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: .LBB206_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxrb w10, [x9] +; OUTLINE-ATOMICS-NEXT: sxtb w8, w10 +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxtb +; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, le +; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB206_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw min i8* @var8, i8 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsminlb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_min_i16_release(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i16_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsminlh w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i16_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var16 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: .LBB207_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxrh w10, [x9] +; OUTLINE-ATOMICS-NEXT: sxth w8, w10 +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxth +; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, le +; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB207_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw min i16* @var16, i16 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsminlh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_min_i32_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i32_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsminl w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var32 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB208_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxr w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0 +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, le +; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB208_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw min i32* @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsminl w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_min_i64_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i64_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsminl x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var64 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB209_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxr x8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp x8, x0 +; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, le +; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB209_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov x0, x8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw min i64* @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsminl x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_min_i32_noret_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i32_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsminl w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_noret_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var32 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB210_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxr w9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w9, w0 +; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, le +; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB210_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw min i32* @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsminl w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_min_i64_noret_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i64_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsminl x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_noret_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var64 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB211_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxr x9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp x9, x0 +; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, le +; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB211_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw min i64* @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsminl x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_min_i8_seq_cst(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i8_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsminalb w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i8_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var8 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: .LBB212_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxrb w10, [x9] +; OUTLINE-ATOMICS-NEXT: sxtb w8, w10 +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxtb +; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, le +; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB212_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw min i8* @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsminalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_min_i16_seq_cst(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i16_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsminalh w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i16_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var16 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: .LBB213_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxrh w10, [x9] +; OUTLINE-ATOMICS-NEXT: sxth w8, w10 +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxth +; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, le +; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB213_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw min i16* @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsminalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_min_i32_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i32_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsminal w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var32 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB214_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0 +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, le +; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB214_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw min i32* @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsminal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_min_i64_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i64_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsminal x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var64 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB215_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp x8, x0 +; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, le +; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB215_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov x0, x8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw min i64* @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsminal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_min_i32_noret_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i32_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsminal w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_noret_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var32 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB216_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w9, w0 +; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, le +; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB216_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw min i32* @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsminal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_min_i64_noret_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i64_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsminal x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_noret_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var64 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB217_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp x9, x0 +; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, le +; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB217_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw min i64* @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsminal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_or_i8_acq_rel(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i8_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsetalb w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i8_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var8 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset1_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw or i8* @var8, i8 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsetalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_or_i16_acq_rel(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i16_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsetalh w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i16_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var16 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset2_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw or i16* @var16, i16 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsetalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_or_i32_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i32_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsetal w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset4_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw or i32* @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsetal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_or_i64_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i64_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsetal x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset8_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw or i64* @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsetal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_or_i32_noret_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i32_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsetal w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_noret_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset4_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw or i32* @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsetal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_or_i64_noret_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i64_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsetal x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_noret_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset8_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw or i64* @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsetal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_or_i8_acquire(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i8_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsetab w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i8_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var8 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset1_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw or i8* @var8, i8 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsetab w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_or_i16_acquire(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i16_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsetah w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i16_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var16 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset2_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw or i16* @var16, i16 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsetah w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_or_i32_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i32_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldseta w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset4_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw or i32* @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldseta w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_or_i64_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i64_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldseta x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset8_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw or i64* @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldseta x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_or_i32_noret_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i32_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldseta w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_noret_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset4_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw or i32* @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldseta w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_or_i64_noret_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i64_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldseta x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_noret_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset8_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw or i64* @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldseta x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_or_i8_monotonic(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i8_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsetb w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i8_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var8 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset1_relax +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw or i8* @var8, i8 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsetb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_or_i16_monotonic(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i16_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldseth w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i16_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var16 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset2_relax +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw or i16* @var16, i16 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldseth w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_or_i32_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i32_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldset w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset4_relax +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw or i32* @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldset w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_or_i64_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i64_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldset x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset8_relax +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw or i64* @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldset x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_or_i32_noret_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i32_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldset w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_noret_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset4_relax +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw or i32* @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldset w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_or_i64_noret_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i64_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldset x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_noret_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset8_relax +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw or i64* @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldset x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_or_i8_release(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i8_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsetlb w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i8_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var8 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset1_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw or i8* @var8, i8 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsetlb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_or_i16_release(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i16_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsetlh w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i16_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var16 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset2_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw or i16* @var16, i16 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsetlh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_or_i32_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i32_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsetl w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset4_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw or i32* @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsetl w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_or_i64_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i64_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsetl x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset8_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw or i64* @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsetl x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_or_i32_noret_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i32_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsetl w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_noret_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset4_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw or i32* @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsetl w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_or_i64_noret_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i64_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsetl x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_noret_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset8_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw or i64* @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsetl x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_or_i8_seq_cst(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i8_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsetalb w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i8_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var8 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset1_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw or i8* @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsetalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_or_i16_seq_cst(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i16_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsetalh w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i16_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var16 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset2_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw or i16* @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsetalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_or_i32_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i32_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsetal w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset4_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw or i32* @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsetal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_or_i64_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i64_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsetal x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset8_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw or i64* @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsetal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_or_i32_noret_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i32_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsetal w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_noret_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset4_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw or i32* @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsetal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_or_i64_noret_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i64_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsetal x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_noret_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset8_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw or i64* @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsetal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_sub_i8_acq_rel(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i8_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: ldaddalb w8, w0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: neg w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var8 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd1_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw sub i8* @var8, i8 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldaddalb w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_sub_i16_acq_rel(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i16_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: ldaddalh w8, w0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: neg w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var16 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd2_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw sub i16* @var16, i16 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldaddalh w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_sub_i32_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i32_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldaddal w8, w0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: neg w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw sub i32* @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddal w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_sub_i64_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldaddal x8, x0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: neg x0, x0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw sub i64* @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddal x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_sub_i32_noret_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i32_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldaddal w8, w8, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_noret_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: neg w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw sub i32* @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddal w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_sub_i64_noret_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldaddal x8, x8, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_noret_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: neg x0, x0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw sub i64* @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddal x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_sub_i8_acquire(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i8_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: ldaddab w8, w0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: neg w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var8 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd1_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw sub i8* @var8, i8 %offset acquire -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldaddab w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_sub_i16_acquire(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i16_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: ldaddah w8, w0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: neg w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var16 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd2_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw sub i16* @var16, i16 %offset acquire -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldaddah w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_sub_i32_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i32_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldadda w8, w0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: neg w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw sub i32* @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldadda w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_sub_i64_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldadda x8, x0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: neg x0, x0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw sub i64* @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldadda x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_sub_i32_noret_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i32_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldadda w8, w8, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_noret_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: neg w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw sub i32* @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldadda w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_sub_i64_noret_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldadda x8, x8, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_noret_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: neg x0, x0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw sub i64* @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldadda x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_sub_i8_monotonic(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i8_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: ldaddb w8, w0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: neg w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var8 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd1_relax +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw sub i8* @var8, i8 %offset monotonic -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldaddb w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_sub_i16_monotonic(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i16_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: ldaddh w8, w0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: neg w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var16 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd2_relax +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw sub i16* @var16, i16 %offset monotonic -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldaddh w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_sub_i32_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i32_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldadd w8, w0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: neg w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_relax +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw sub i32* @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldadd w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_sub_i64_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldadd x8, x0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: neg x0, x0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_relax +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw sub i64* @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldadd x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_sub_i32_noret_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i32_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldadd w8, w8, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_noret_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: neg w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_relax +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw sub i32* @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldadd w{{[0-9]+}}, w[[NEW:[1-9][0-9]*]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_sub_i64_noret_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldadd x8, x8, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_noret_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: neg x0, x0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_relax +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw sub i64* @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldadd x{{[0-9]+}}, x[[NEW:[1-9][0-9]*]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_sub_i8_release(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i8_release: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: ldaddlb w8, w0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: neg w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var8 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd1_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw sub i8* @var8, i8 %offset release -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldaddlb w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_sub_i16_release(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i16_release: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: ldaddlh w8, w0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: neg w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var16 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd2_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw sub i16* @var16, i16 %offset release -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldaddlh w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_sub_i32_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i32_release: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldaddl w8, w0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: neg w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw sub i32* @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddl w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_sub_i64_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64_release: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldaddl x8, x0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: neg x0, x0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw sub i64* @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddl x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_sub_i32_noret_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i32_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldaddl w8, w8, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_noret_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: neg w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw sub i32* @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddl w{{[0-9]*}}, w[[NEW:[1-9][0-9]*]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_sub_i64_noret_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldaddl x8, x8, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_noret_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: neg x0, x0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw sub i64* @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddl x{{[0-9]*}}, x[[NEW:[1-9][0-9]*]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_sub_i8_seq_cst(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i8_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: ldaddalb w8, w0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: neg w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var8 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd1_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw sub i8* @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldaddalb w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_sub_i16_seq_cst(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i16_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: ldaddalh w8, w0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: neg w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var16 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd2_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw sub i16* @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldaddalh w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_sub_i32_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i32_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldaddal w8, w0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: neg w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw sub i32* @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddal w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_sub_i64_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldaddal x8, x0, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: neg x0, x0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw sub i64* @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddal x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_sub_i32_noret_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i32_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldaddal w8, w8, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_noret_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: neg w0, w0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw sub i32* @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddal w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_sub_i64_noret_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldaddal x8, x8, [x9] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_noret_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: neg x0, x0 +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw sub i64* @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddal x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_xchg_i8_acq_rel(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i8_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: swpalb w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i8_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var8 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp1_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xchg i8* @var8, i8 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: swpalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_xchg_i16_acq_rel(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i16_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: swpalh w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i16_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var16 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp2_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xchg i16* @var16, i16 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: swpalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_xchg_i32_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i32_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: swpal w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp4_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xchg i32* @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: swpal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_xchg_i64_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i64_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: swpal x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp8_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xchg i64* @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: swpal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_xchg_i32_noret_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i32_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: swpal w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_noret_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp4_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw xchg i32* @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: swpal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_xchg_i64_noret_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i64_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: swpal x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_noret_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp8_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw xchg i64* @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: swpal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_xchg_i8_acquire(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i8_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: swpab w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i8_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var8 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp1_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xchg i8* @var8, i8 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: swpab w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_xchg_i16_acquire(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i16_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: swpah w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i16_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var16 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp2_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xchg i16* @var16, i16 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: swpah w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_xchg_i32_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i32_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: swpa w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp4_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xchg i32* @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: swpa w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_xchg_i64_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i64_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: swpa x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp8_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xchg i64* @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: swpa x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_xchg_i32_noret_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i32_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: swpa w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_noret_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp4_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw xchg i32* @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: swpa w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_xchg_i64_noret_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i64_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: swpa x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_noret_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp8_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw xchg i64* @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: swpa x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_xchg_i8_monotonic(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i8_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: swpb w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i8_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var8 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp1_relax +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xchg i8* @var8, i8 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: swpb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_xchg_i16_monotonic(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i16_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: swph w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i16_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var16 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp2_relax +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xchg i16* @var16, i16 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: swph w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_xchg_i32_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i32_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: swp w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp4_relax +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xchg i32* @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: swp w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_xchg_i64_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i64_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: swp x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp8_relax +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xchg i64* @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: swp x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_xchg_i32_noret_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i32_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: swp w0, wzr, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_noret_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp4_relax +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw xchg i32* @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: swp w[[OLD:[0-9]+]], w[[NEW:[0-9,a-z]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_xchg_i64_noret_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i64_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: swp x0, xzr, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_noret_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp8_relax +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw xchg i64* @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: swp x[[OLD:[0-9]+]], x[[NEW:[0-9,a-z]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_xchg_i8_release(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i8_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: swplb w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i8_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var8 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp1_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xchg i8* @var8, i8 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: swplb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_xchg_i16_release(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i16_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: swplh w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i16_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var16 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp2_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xchg i16* @var16, i16 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: swplh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_xchg_i32_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i32_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: swpl w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp4_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xchg i32* @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: swpl w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_xchg_i64_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i64_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: swpl x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp8_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xchg i64* @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: swpl x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_xchg_i32_noret_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i32_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: swpl w0, wzr, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_noret_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp4_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw xchg i32* @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: swpl w[[OLD:[0-9]+]], w[[NEW:[0-9,a-z]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_xchg_i64_noret_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i64_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: swpl x0, xzr, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_noret_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp8_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw xchg i64* @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: swpl x[[OLD:[0-9]+]], x[[NEW:[0-9,a-z]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_xchg_i8_seq_cst(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i8_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: swpalb w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i8_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var8 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp1_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xchg i8* @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: swpalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_xchg_i16_seq_cst(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i16_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: swpalh w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i16_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var16 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp2_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xchg i16* @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: swpalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_xchg_i32_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i32_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: swpal w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp4_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xchg i32* @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: swpal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_xchg_i64_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i64_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: swpal x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp8_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xchg i64* @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: swpal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_xchg_i32_noret_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i32_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: swpal w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_noret_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp4_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw xchg i32* @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: swpal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_xchg_i64_noret_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i64_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: swpal x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_noret_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp8_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw xchg i64* @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: swpal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_umax_i8_acq_rel(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i8_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldumaxalb w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i8_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var8 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: .LBB308_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxrb w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, uxtb +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, hi +; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB308_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umax i8* @var8, i8 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldumaxalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_umax_i16_acq_rel(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i16_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldumaxalh w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i16_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var16 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: .LBB309_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxrh w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, uxth +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, hi +; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB309_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umax i16* @var16, i16 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldumaxalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_umax_i32_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i32_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumaxal w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var32 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB310_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0 +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, hi +; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB310_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umax i32* @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumaxal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_umax_i64_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i64_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumaxal x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var64 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB311_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp x8, x0 +; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, hi +; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB311_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov x0, x8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umax i64* @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumaxal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_umax_i32_noret_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i32_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumaxal w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_noret_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var32 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB312_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w9, w0 +; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, hi +; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB312_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw umax i32* @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumaxal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_umax_i64_noret_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i64_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumaxal x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_noret_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var64 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB313_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp x9, x0 +; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, hi +; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB313_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw umax i64* @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumaxal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_umax_i8_acquire(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i8_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldumaxab w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i8_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var8 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: .LBB314_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxrb w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, uxtb +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, hi +; OUTLINE-ATOMICS-NEXT: stxrb w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB314_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umax i8* @var8, i8 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldumaxab w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_umax_i16_acquire(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i16_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldumaxah w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i16_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var16 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: .LBB315_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxrh w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, uxth +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, hi +; OUTLINE-ATOMICS-NEXT: stxrh w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB315_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umax i16* @var16, i16 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldumaxah w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_umax_i32_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i32_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumaxa w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var32 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB316_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0 +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, hi +; OUTLINE-ATOMICS-NEXT: stxr w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB316_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umax i32* @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumaxa w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_umax_i64_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i64_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumaxa x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var64 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB317_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp x8, x0 +; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, hi +; OUTLINE-ATOMICS-NEXT: stxr w11, x10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB317_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov x0, x8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umax i64* @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumaxa x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_umax_i32_noret_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i32_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumaxa w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_noret_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var32 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB318_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w9, w0 +; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, hi +; OUTLINE-ATOMICS-NEXT: stxr w10, w9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB318_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw umax i32* @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumaxa w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_umax_i64_noret_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i64_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumaxa x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_noret_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var64 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB319_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp x9, x0 +; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, hi +; OUTLINE-ATOMICS-NEXT: stxr w10, x9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB319_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw umax i64* @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumaxa x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_umax_i8_monotonic(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i8_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldumaxb w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i8_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var8 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: .LBB320_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxrb w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, uxtb +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, hi +; OUTLINE-ATOMICS-NEXT: stxrb w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB320_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umax i8* @var8, i8 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldumaxb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_umax_i16_monotonic(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i16_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldumaxh w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i16_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var16 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: .LBB321_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxrh w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, uxth +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, hi +; OUTLINE-ATOMICS-NEXT: stxrh w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB321_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umax i16* @var16, i16 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldumaxh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_umax_i32_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i32_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumax w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var32 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB322_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxr w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0 +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, hi +; OUTLINE-ATOMICS-NEXT: stxr w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB322_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umax i32* @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumax w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_umax_i64_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i64_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumax x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var64 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB323_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxr x8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp x8, x0 +; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, hi +; OUTLINE-ATOMICS-NEXT: stxr w11, x10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB323_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov x0, x8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umax i64* @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumax x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_umax_i32_noret_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i32_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumax w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_noret_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var32 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB324_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxr w9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w9, w0 +; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, hi +; OUTLINE-ATOMICS-NEXT: stxr w10, w9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB324_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw umax i32* @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumax w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_umax_i64_noret_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i64_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumax x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_noret_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var64 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB325_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxr x9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp x9, x0 +; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, hi +; OUTLINE-ATOMICS-NEXT: stxr w10, x9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB325_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw umax i64* @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumax x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_umax_i8_release(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i8_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldumaxlb w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i8_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var8 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: .LBB326_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxrb w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, uxtb +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, hi +; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB326_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umax i8* @var8, i8 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldumaxlb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_umax_i16_release(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i16_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldumaxlh w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i16_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var16 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: .LBB327_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxrh w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, uxth +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, hi +; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB327_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umax i16* @var16, i16 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldumaxlh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_umax_i32_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i32_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumaxl w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var32 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB328_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxr w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0 +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, hi +; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB328_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umax i32* @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumaxl w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_umax_i64_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i64_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumaxl x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var64 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB329_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxr x8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp x8, x0 +; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, hi +; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB329_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov x0, x8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umax i64* @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumaxl x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_umax_i32_noret_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i32_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumaxl w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_noret_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var32 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB330_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxr w9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w9, w0 +; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, hi +; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB330_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw umax i32* @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumaxl w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_umax_i64_noret_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i64_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumaxl x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_noret_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var64 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB331_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxr x9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp x9, x0 +; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, hi +; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB331_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw umax i64* @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumaxl x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_umax_i8_seq_cst(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i8_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldumaxalb w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i8_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var8 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: .LBB332_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxrb w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, uxtb +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, hi +; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB332_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umax i8* @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldumaxalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_umax_i16_seq_cst(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i16_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldumaxalh w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i16_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var16 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: .LBB333_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxrh w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, uxth +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, hi +; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB333_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umax i16* @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldumaxalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_umax_i32_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i32_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumaxal w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var32 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB334_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0 +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, hi +; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB334_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umax i32* @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumaxal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_umax_i64_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i64_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumaxal x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var64 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB335_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp x8, x0 +; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, hi +; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB335_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov x0, x8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umax i64* @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumaxal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_umax_i32_noret_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i32_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumaxal w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_noret_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var32 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB336_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w9, w0 +; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, hi +; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB336_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw umax i32* @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumaxal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_umax_i64_noret_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i64_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumaxal x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_noret_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var64 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB337_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp x9, x0 +; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, hi +; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB337_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw umax i64* @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumaxal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_umin_i8_acq_rel(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i8_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: lduminalb w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i8_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var8 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: .LBB338_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxrb w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, uxtb +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, ls +; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB338_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umin i8* @var8, i8 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: lduminalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_umin_i16_acq_rel(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i16_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: lduminalh w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i16_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var16 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: .LBB339_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxrh w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, uxth +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, ls +; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB339_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umin i16* @var16, i16 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: lduminalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_umin_i32_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i32_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: lduminal w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var32 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB340_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0 +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, ls +; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB340_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umin i32* @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: lduminal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_umin_i64_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i64_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: lduminal x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var64 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB341_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp x8, x0 +; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, ls +; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB341_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov x0, x8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umin i64* @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: lduminal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_umin_i32_noret_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i32_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: lduminal w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_noret_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var32 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB342_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w9, w0 +; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, ls +; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB342_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw umin i32* @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: lduminal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_umin_i64_noret_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i64_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: lduminal x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_noret_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var64 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB343_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp x9, x0 +; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, ls +; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB343_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw umin i64* @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: lduminal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_umin_i8_acquire(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i8_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: lduminab w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i8_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var8 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: .LBB344_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxrb w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, uxtb +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, ls +; OUTLINE-ATOMICS-NEXT: stxrb w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB344_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umin i8* @var8, i8 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: lduminab w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_umin_i16_acquire(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i16_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: lduminah w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i16_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var16 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: .LBB345_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxrh w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, uxth +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, ls +; OUTLINE-ATOMICS-NEXT: stxrh w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB345_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umin i16* @var16, i16 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: lduminah w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_umin_i32_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i32_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumina w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var32 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB346_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0 +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, ls +; OUTLINE-ATOMICS-NEXT: stxr w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB346_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umin i32* @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumina w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_umin_i64_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i64_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumina x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var64 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB347_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp x8, x0 +; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, ls +; OUTLINE-ATOMICS-NEXT: stxr w11, x10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB347_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov x0, x8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umin i64* @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumina x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_umin_i32_noret_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i32_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumina w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_noret_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var32 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB348_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w9, w0 +; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, ls +; OUTLINE-ATOMICS-NEXT: stxr w10, w9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB348_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw umin i32* @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumina w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_umin_i64_noret_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i64_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumina x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_noret_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var64 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB349_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp x9, x0 +; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, ls +; OUTLINE-ATOMICS-NEXT: stxr w10, x9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB349_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw umin i64* @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumina x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_umin_i8_monotonic(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i8_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: lduminb w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i8_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var8 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: .LBB350_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxrb w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, uxtb +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, ls +; OUTLINE-ATOMICS-NEXT: stxrb w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB350_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umin i8* @var8, i8 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: lduminb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_umin_i16_monotonic(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i16_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: lduminh w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i16_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var16 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: .LBB351_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxrh w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, uxth +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, ls +; OUTLINE-ATOMICS-NEXT: stxrh w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB351_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umin i16* @var16, i16 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: lduminh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_umin_i32_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i32_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumin w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var32 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB352_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxr w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0 +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, ls +; OUTLINE-ATOMICS-NEXT: stxr w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB352_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umin i32* @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumin w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_umin_i64_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i64_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumin x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var64 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB353_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxr x8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp x8, x0 +; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, ls +; OUTLINE-ATOMICS-NEXT: stxr w11, x10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB353_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov x0, x8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umin i64* @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumin x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_umin_i32_noret_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i32_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumin w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_noret_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var32 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB354_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxr w9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w9, w0 +; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, ls +; OUTLINE-ATOMICS-NEXT: stxr w10, w9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB354_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw umin i32* @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumin w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_umin_i64_noret_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i64_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumin x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_noret_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var64 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB355_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxr x9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp x9, x0 +; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, ls +; OUTLINE-ATOMICS-NEXT: stxr w10, x9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB355_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw umin i64* @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumin x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_umin_i8_release(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i8_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: lduminlb w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i8_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var8 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: .LBB356_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxrb w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, uxtb +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, ls +; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB356_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umin i8* @var8, i8 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: lduminlb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_umin_i16_release(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i16_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: lduminlh w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i16_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var16 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: .LBB357_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxrh w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, uxth +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, ls +; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB357_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umin i16* @var16, i16 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: lduminlh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_umin_i32_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i32_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: lduminl w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var32 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB358_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxr w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0 +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, ls +; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB358_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umin i32* @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: lduminl w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_umin_i64_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i64_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: lduminl x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var64 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB359_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxr x8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp x8, x0 +; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, ls +; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB359_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov x0, x8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umin i64* @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: lduminl x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_umin_i32_noret_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i32_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: lduminl w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_noret_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var32 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB360_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxr w9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w9, w0 +; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, ls +; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB360_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw umin i32* @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: lduminl w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_umin_i64_noret_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i64_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: lduminl x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_noret_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var64 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB361_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxr x9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp x9, x0 +; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, ls +; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB361_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw umin i64* @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: lduminl x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_umin_i8_seq_cst(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i8_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: lduminalb w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i8_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var8 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: .LBB362_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxrb w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, uxtb +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, ls +; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB362_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umin i8* @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: lduminalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_umin_i16_seq_cst(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i16_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: lduminalh w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i16_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var16 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: .LBB363_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxrh w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0, uxth +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, ls +; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB363_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umin i16* @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: lduminalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_umin_i32_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i32_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: lduminal w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var32 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB364_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp w8, w0 +; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, ls +; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB364_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umin i32* @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: lduminal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_umin_i64_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i64_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: lduminal x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x9, var64 +; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB365_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] +; OUTLINE-ATOMICS-NEXT: cmp x8, x0 +; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, ls +; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB365_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: mov x0, x8 +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw umin i64* @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: lduminal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_umin_i32_noret_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i32_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: lduminal w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_noret_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var32 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: .LBB366_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w9, w0 +; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, ls +; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB366_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw umin i32* @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: lduminal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_umin_i64_noret_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i64_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: lduminal x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_noret_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var64 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: .LBB367_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] +; OUTLINE-ATOMICS-NEXT: cmp x9, x0 +; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, ls +; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB367_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret atomicrmw umin i64* @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: lduminal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_xor_i8_acq_rel(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i8_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldeoralb w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i8_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var8 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor1_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xor i8* @var8, i8 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldeoralb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_xor_i16_acq_rel(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i16_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldeoralh w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i16_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var16 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor2_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xor i16* @var16, i16 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldeoralh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_xor_i32_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i32_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldeoral w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor4_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xor i32* @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldeoral w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_xor_i64_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i64_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldeoral x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor8_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xor i64* @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldeoral x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_xor_i32_noret_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i32_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldeoral w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_noret_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor4_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw xor i32* @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldeoral w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_xor_i64_noret_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i64_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldeoral x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_noret_acq_rel: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor8_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw xor i64* @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldeoral x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_xor_i8_acquire(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i8_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldeorab w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i8_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var8 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor1_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xor i8* @var8, i8 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldeorab w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_xor_i16_acquire(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i16_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldeorah w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i16_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var16 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor2_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xor i16* @var16, i16 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldeorah w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_xor_i32_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i32_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldeora w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor4_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xor i32* @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldeora w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_xor_i64_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i64_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldeora x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor8_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xor i64* @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldeora x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_xor_i32_noret_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i32_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldeora w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_noret_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor4_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw xor i32* @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldeora w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_xor_i64_noret_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i64_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldeora x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_noret_acquire: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor8_acq +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw xor i64* @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldeora x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_xor_i8_monotonic(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i8_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldeorb w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i8_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var8 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor1_relax +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xor i8* @var8, i8 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldeorb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_xor_i16_monotonic(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i16_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldeorh w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i16_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var16 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor2_relax +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xor i16* @var16, i16 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldeorh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_xor_i32_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i32_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldeor w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor4_relax +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xor i32* @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldeor w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_xor_i64_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i64_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldeor x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor8_relax +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xor i64* @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldeor x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_xor_i32_noret_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i32_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldeor w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_noret_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor4_relax +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw xor i32* @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldeor w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_xor_i64_noret_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i64_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldeor x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_noret_monotonic: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor8_relax +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw xor i64* @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldeor x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_xor_i8_release(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i8_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldeorlb w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i8_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var8 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor1_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xor i8* @var8, i8 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldeorlb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_xor_i16_release(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i16_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldeorlh w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i16_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var16 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor2_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xor i16* @var16, i16 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldeorlh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_xor_i32_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i32_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldeorl w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor4_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xor i32* @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldeorl w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_xor_i64_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i64_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldeorl x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor8_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xor i64* @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldeorl x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_xor_i32_noret_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i32_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldeorl w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_noret_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor4_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw xor i32* @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldeorl w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_xor_i64_noret_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i64_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldeorl x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_noret_release: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor8_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw xor i64* @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldeorl x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define i8 @test_atomic_load_xor_i8_seq_cst(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i8_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldeoralb w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i8_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var8 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor1_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xor i8* @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldeoralb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define i16 @test_atomic_load_xor_i16_seq_cst(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i16_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldeoralh w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i16_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var16 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor2_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xor i16* @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldeoralh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define i32 @test_atomic_load_xor_i32_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i32_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldeoral w0, w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor4_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xor i32* @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldeoral w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define i64 @test_atomic_load_xor_i64_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i64_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldeoral x0, x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor8_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw xor i64* @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldeoral x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define void @test_atomic_load_xor_i32_noret_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i32_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldeoral w0, w8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_noret_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var32 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor4_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw xor i32* @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldeoral w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_load_xor_i64_noret_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i64_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldeoral x0, x8, [x8] +; CHECK-NEXT: ret +; +; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_noret_seq_cst: +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE-ATOMICS-NEXT: adrp x1, var64 +; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor8_acq_rel +; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE-ATOMICS-NEXT: ret atomicrmw xor i64* @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldeoral x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } diff --git a/llvm/test/CodeGen/AArch64/atomic-ops-not-barriers.ll b/llvm/test/CodeGen/AArch64/atomic-ops-not-barriers.ll --- a/llvm/test/CodeGen/AArch64/atomic-ops-not-barriers.ll +++ b/llvm/test/CodeGen/AArch64/atomic-ops-not-barriers.ll @@ -1,6 +1,8 @@ ; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -mattr=+outline-atomics < %s | FileCheck %s --check-prefix=OUTLINE-ATOMICS define i32 @foo(i32* %var, i1 %cond) { +; OUTLINE-ATOMICS: bl __aarch64_ldadd4_relax ; CHECK-LABEL: foo: br i1 %cond, label %atomic_ver, label %simple_ver simple_ver: diff --git a/llvm/test/CodeGen/AArch64/atomic-ops.ll b/llvm/test/CodeGen/AArch64/atomic-ops.ll --- a/llvm/test/CodeGen/AArch64/atomic-ops.ll +++ b/llvm/test/CodeGen/AArch64/atomic-ops.ll @@ -1,6 +1,7 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-REG - +; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=+outline-atomics < %s | FileCheck %s --check-prefix=OUTLINE_ATOMICS ; Point of CHECK-REG is to make sure UNPREDICTABLE instructions aren't created ; (i.e. reusing a register for status & data in store exclusive). @@ -14,1136 +15,1589 @@ define i8 @test_atomic_load_add_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: .LBB0_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldaxrb w8, [x9] +; CHECK-NEXT: add w10, w8, w0 +; CHECK-NEXT: stlxrb w11, w10, [x9] +; CHECK-NEXT: cbnz w11, .LBB0_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_add_i8: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE_ATOMICS-NEXT: adrp x1, var8 +; OUTLINE_ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE_ATOMICS-NEXT: bl __aarch64_ldadd1_acq_rel +; OUTLINE_ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE_ATOMICS-NEXT: ret %old = atomicrmw add i8* @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 - -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]] - ; w0 below is a reasonable guess but could change: it certainly comes into the - ; function there. -; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0 -; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] -; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb - -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] ret i8 %old } define i16 @test_atomic_load_add_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: .LBB1_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldaxrh w8, [x9] +; CHECK-NEXT: add w10, w8, w0 +; CHECK-NEXT: stxrh w11, w10, [x9] +; CHECK-NEXT: cbnz w11, .LBB1_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_add_i16: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE_ATOMICS-NEXT: adrp x1, var16 +; OUTLINE_ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE_ATOMICS-NEXT: bl __aarch64_ldadd2_acq +; OUTLINE_ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE_ATOMICS-NEXT: ret %old = atomicrmw add i16* @var16, i16 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 - -; CHECK: .LBB{{[0-9]+}}_1: -; ; CHECK: ldaxrh w[[OLD:[0-9]+]], [x[[ADDR]]] - ; w0 below is a reasonable guess but could change: it certainly comes into the - ; function there. -; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0 -; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] -; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb - -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] ret i16 %old } define i32 @test_atomic_load_add_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: .LBB2_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldxr w8, [x9] +; CHECK-NEXT: add w10, w8, w0 +; CHECK-NEXT: stlxr w11, w10, [x9] +; CHECK-NEXT: cbnz w11, .LBB2_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_add_i32: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE_ATOMICS-NEXT: adrp x1, var32 +; OUTLINE_ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE_ATOMICS-NEXT: bl __aarch64_ldadd4_rel +; OUTLINE_ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE_ATOMICS-NEXT: ret %old = atomicrmw add i32* @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 - -; CHECK: .LBB{{[0-9]+}}_1: -; ; CHECK: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]] - ; w0 below is a reasonable guess but could change: it certainly comes into the - ; function there. -; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0 -; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] -; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb - -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] ret i32 %old } define i64 @test_atomic_load_add_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: .LBB3_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldxr x8, [x9] +; CHECK-NEXT: add x10, x8, x0 +; CHECK-NEXT: stxr w11, x10, [x9] +; CHECK-NEXT: cbnz w11, .LBB3_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_add_i64: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE_ATOMICS-NEXT: adrp x1, var64 +; OUTLINE_ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE_ATOMICS-NEXT: bl __aarch64_ldadd8_relax +; OUTLINE_ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE_ATOMICS-NEXT: ret %old = atomicrmw add i64* @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 - -; CHECK: .LBB{{[0-9]+}}_1: -; ; CHECK: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]] - ; x0 below is a reasonable guess but could change: it certainly comes into the - ; function there. -; CHECK-NEXT: add [[NEW:x[0-9]+]], x[[OLD]], x0 -; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] -; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb - -; CHECK: mov x0, x[[OLD]] ret i64 %old } define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: .LBB4_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldxrb w8, [x9] +; CHECK-NEXT: sub w10, w8, w0 +; CHECK-NEXT: stxrb w11, w10, [x9] +; CHECK-NEXT: cbnz w11, .LBB4_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_sub_i8: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE_ATOMICS-NEXT: neg w0, w0 +; OUTLINE_ATOMICS-NEXT: adrp x1, var8 +; OUTLINE_ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE_ATOMICS-NEXT: bl __aarch64_ldadd1_relax +; OUTLINE_ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE_ATOMICS-NEXT: ret %old = atomicrmw sub i8* @var8, i8 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 - -; CHECK: .LBB{{[0-9]+}}_1: -; ; CHECK: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]] - ; w0 below is a reasonable guess but could change: it certainly comes into the - ; function there. -; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0 -; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] -; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb - -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] ret i8 %old } define i16 @test_atomic_load_sub_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: .LBB5_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldxrh w8, [x9] +; CHECK-NEXT: sub w10, w8, w0 +; CHECK-NEXT: stlxrh w11, w10, [x9] +; CHECK-NEXT: cbnz w11, .LBB5_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_sub_i16: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE_ATOMICS-NEXT: neg w0, w0 +; OUTLINE_ATOMICS-NEXT: adrp x1, var16 +; OUTLINE_ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE_ATOMICS-NEXT: bl __aarch64_ldadd2_rel +; OUTLINE_ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE_ATOMICS-NEXT: ret %old = atomicrmw sub i16* @var16, i16 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 - -; CHECK: .LBB{{[0-9]+}}_1: -; ; CHECK: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]] - ; w0 below is a reasonable guess but could change: it certainly comes into the - ; function there. -; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0 -; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] -; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb - -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] ret i16 %old } define i32 @test_atomic_load_sub_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: .LBB6_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldaxr w8, [x9] +; CHECK-NEXT: sub w10, w8, w0 +; CHECK-NEXT: stxr w11, w10, [x9] +; CHECK-NEXT: cbnz w11, .LBB6_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_sub_i32: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE_ATOMICS-NEXT: neg w0, w0 +; OUTLINE_ATOMICS-NEXT: adrp x1, var32 +; OUTLINE_ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE_ATOMICS-NEXT: bl __aarch64_ldadd4_acq +; OUTLINE_ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE_ATOMICS-NEXT: ret %old = atomicrmw sub i32* @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 - -; CHECK: .LBB{{[0-9]+}}_1: -; ; CHECK: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]] - ; w0 below is a reasonable guess but could change: it certainly comes into the - ; function there. -; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0 -; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] -; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb - -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] ret i32 %old } define i64 @test_atomic_load_sub_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: .LBB7_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldaxr x8, [x9] +; CHECK-NEXT: sub x10, x8, x0 +; CHECK-NEXT: stlxr w11, x10, [x9] +; CHECK-NEXT: cbnz w11, .LBB7_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_sub_i64: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE_ATOMICS-NEXT: neg x0, x0 +; OUTLINE_ATOMICS-NEXT: adrp x1, var64 +; OUTLINE_ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE_ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel +; OUTLINE_ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE_ATOMICS-NEXT: ret %old = atomicrmw sub i64* @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 - -; CHECK: .LBB{{[0-9]+}}_1: -; ; CHECK: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]] - ; x0 below is a reasonable guess but could change: it certainly comes into the - ; function there. -; CHECK-NEXT: sub [[NEW:x[0-9]+]], x[[OLD]], x0 -; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] -; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb - -; CHECK: mov x0, x[[OLD]] ret i64 %old } define i8 @test_atomic_load_and_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: .LBB8_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldxrb w8, [x9] +; CHECK-NEXT: and w10, w8, w0 +; CHECK-NEXT: stlxrb w11, w10, [x9] +; CHECK-NEXT: cbnz w11, .LBB8_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_and_i8: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE_ATOMICS-NEXT: mvn w0, w0 +; OUTLINE_ATOMICS-NEXT: adrp x1, var8 +; OUTLINE_ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE_ATOMICS-NEXT: bl __aarch64_ldclr1_rel +; OUTLINE_ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE_ATOMICS-NEXT: ret %old = atomicrmw and i8* @var8, i8 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 - -; CHECK: .LBB{{[0-9]+}}_1: -; ; CHECK: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]] - ; w0 below is a reasonable guess but could change: it certainly comes into the - ; function there. -; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0 -; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] -; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb - -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] ret i8 %old } define i16 @test_atomic_load_and_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: .LBB9_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldxrh w8, [x9] +; CHECK-NEXT: and w10, w8, w0 +; CHECK-NEXT: stxrh w11, w10, [x9] +; CHECK-NEXT: cbnz w11, .LBB9_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_and_i16: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE_ATOMICS-NEXT: mvn w0, w0 +; OUTLINE_ATOMICS-NEXT: adrp x1, var16 +; OUTLINE_ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE_ATOMICS-NEXT: bl __aarch64_ldclr2_relax +; OUTLINE_ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE_ATOMICS-NEXT: ret %old = atomicrmw and i16* @var16, i16 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 - -; CHECK: .LBB{{[0-9]+}}_1: -; ; CHECK: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]] - ; w0 below is a reasonable guess but could change: it certainly comes into the - ; function there. -; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0 -; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] -; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb - -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] ret i16 %old } define i32 @test_atomic_load_and_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: .LBB10_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldaxr w8, [x9] +; CHECK-NEXT: and w10, w8, w0 +; CHECK-NEXT: stlxr w11, w10, [x9] +; CHECK-NEXT: cbnz w11, .LBB10_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_and_i32: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE_ATOMICS-NEXT: mvn w0, w0 +; OUTLINE_ATOMICS-NEXT: adrp x1, var32 +; OUTLINE_ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE_ATOMICS-NEXT: bl __aarch64_ldclr4_acq_rel +; OUTLINE_ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE_ATOMICS-NEXT: ret %old = atomicrmw and i32* @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 - -; CHECK: .LBB{{[0-9]+}}_1: -; ; CHECK: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]] - ; w0 below is a reasonable guess but could change: it certainly comes into the - ; function there. -; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0 -; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] -; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb - -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] ret i32 %old } define i64 @test_atomic_load_and_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: .LBB11_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldaxr x8, [x9] +; CHECK-NEXT: and x10, x8, x0 +; CHECK-NEXT: stxr w11, x10, [x9] +; CHECK-NEXT: cbnz w11, .LBB11_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_and_i64: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE_ATOMICS-NEXT: mvn x0, x0 +; OUTLINE_ATOMICS-NEXT: adrp x1, var64 +; OUTLINE_ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE_ATOMICS-NEXT: bl __aarch64_ldclr8_acq +; OUTLINE_ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE_ATOMICS-NEXT: ret %old = atomicrmw and i64* @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 - -; CHECK: .LBB{{[0-9]+}}_1: -; ; CHECK: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]] - ; x0 below is a reasonable guess but could change: it certainly comes into the - ; function there. -; CHECK-NEXT: and [[NEW:x[0-9]+]], x[[OLD]], x0 -; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] -; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb - -; CHECK: mov x0, x[[OLD]] ret i64 %old } define i8 @test_atomic_load_or_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: .LBB12_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldaxrb w8, [x9] +; CHECK-NEXT: orr w10, w8, w0 +; CHECK-NEXT: stlxrb w11, w10, [x9] +; CHECK-NEXT: cbnz w11, .LBB12_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_or_i8: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE_ATOMICS-NEXT: adrp x1, var8 +; OUTLINE_ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE_ATOMICS-NEXT: bl __aarch64_ldset1_acq_rel +; OUTLINE_ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE_ATOMICS-NEXT: ret %old = atomicrmw or i8* @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 - -; CHECK: .LBB{{[0-9]+}}_1: -; ; CHECK: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]] - ; w0 below is a reasonable guess but could change: it certainly comes into the - ; function there. -; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0 -; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] -; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb - -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] ret i8 %old } define i16 @test_atomic_load_or_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: .LBB13_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldxrh w8, [x9] +; CHECK-NEXT: orr w10, w8, w0 +; CHECK-NEXT: stxrh w11, w10, [x9] +; CHECK-NEXT: cbnz w11, .LBB13_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_or_i16: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE_ATOMICS-NEXT: adrp x1, var16 +; OUTLINE_ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE_ATOMICS-NEXT: bl __aarch64_ldset2_relax +; OUTLINE_ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE_ATOMICS-NEXT: ret %old = atomicrmw or i16* @var16, i16 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 - -; CHECK: .LBB{{[0-9]+}}_1: -; ; CHECK: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]] - ; w0 below is a reasonable guess but could change: it certainly comes into the - ; function there. -; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0 -; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] -; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb - -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] ret i16 %old } define i32 @test_atomic_load_or_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: .LBB14_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldaxr w8, [x9] +; CHECK-NEXT: orr w10, w8, w0 +; CHECK-NEXT: stxr w11, w10, [x9] +; CHECK-NEXT: cbnz w11, .LBB14_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_or_i32: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE_ATOMICS-NEXT: adrp x1, var32 +; OUTLINE_ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE_ATOMICS-NEXT: bl __aarch64_ldset4_acq +; OUTLINE_ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE_ATOMICS-NEXT: ret %old = atomicrmw or i32* @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 - -; CHECK: .LBB{{[0-9]+}}_1: -; ; CHECK: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]] - ; w0 below is a reasonable guess but could change: it certainly comes into the - ; function there. -; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0 -; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] -; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb - -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] ret i32 %old } define i64 @test_atomic_load_or_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: .LBB15_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldxr x8, [x9] +; CHECK-NEXT: orr x10, x8, x0 +; CHECK-NEXT: stlxr w11, x10, [x9] +; CHECK-NEXT: cbnz w11, .LBB15_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_or_i64: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE_ATOMICS-NEXT: adrp x1, var64 +; OUTLINE_ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE_ATOMICS-NEXT: bl __aarch64_ldset8_rel +; OUTLINE_ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE_ATOMICS-NEXT: ret %old = atomicrmw or i64* @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 - -; CHECK: .LBB{{[0-9]+}}_1: -; ; CHECK: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]] - ; x0 below is a reasonable guess but could change: it certainly comes into the - ; function there. -; CHECK-NEXT: orr [[NEW:x[0-9]+]], x[[OLD]], x0 -; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] -; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb - -; CHECK: mov x0, x[[OLD]] ret i64 %old } define i8 @test_atomic_load_xor_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: .LBB16_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldaxrb w8, [x9] +; CHECK-NEXT: eor w10, w8, w0 +; CHECK-NEXT: stxrb w11, w10, [x9] +; CHECK-NEXT: cbnz w11, .LBB16_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_xor_i8: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE_ATOMICS-NEXT: adrp x1, var8 +; OUTLINE_ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE_ATOMICS-NEXT: bl __aarch64_ldeor1_acq +; OUTLINE_ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE_ATOMICS-NEXT: ret %old = atomicrmw xor i8* @var8, i8 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 - -; CHECK: .LBB{{[0-9]+}}_1: -; ; CHECK: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]] - ; w0 below is a reasonable guess but could change: it certainly comes into the - ; function there. -; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0 -; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] -; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb - -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] ret i8 %old } define i16 @test_atomic_load_xor_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: .LBB17_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldxrh w8, [x9] +; CHECK-NEXT: eor w10, w8, w0 +; CHECK-NEXT: stlxrh w11, w10, [x9] +; CHECK-NEXT: cbnz w11, .LBB17_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_xor_i16: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE_ATOMICS-NEXT: adrp x1, var16 +; OUTLINE_ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE_ATOMICS-NEXT: bl __aarch64_ldeor2_rel +; OUTLINE_ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE_ATOMICS-NEXT: ret %old = atomicrmw xor i16* @var16, i16 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 - -; CHECK: .LBB{{[0-9]+}}_1: -; ; CHECK: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]] - ; w0 below is a reasonable guess but could change: it certainly comes into the - ; function there. -; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0 -; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] -; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb - -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] ret i16 %old } define i32 @test_atomic_load_xor_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: .LBB18_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldaxr w8, [x9] +; CHECK-NEXT: eor w10, w8, w0 +; CHECK-NEXT: stlxr w11, w10, [x9] +; CHECK-NEXT: cbnz w11, .LBB18_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_xor_i32: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE_ATOMICS-NEXT: adrp x1, var32 +; OUTLINE_ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE_ATOMICS-NEXT: bl __aarch64_ldeor4_acq_rel +; OUTLINE_ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE_ATOMICS-NEXT: ret %old = atomicrmw xor i32* @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 - -; CHECK: .LBB{{[0-9]+}}_1: -; ; CHECK: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]] - ; w0 below is a reasonable guess but could change: it certainly comes into the - ; function there. -; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0 -; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] -; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb - -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] ret i32 %old } define i64 @test_atomic_load_xor_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: .LBB19_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldxr x8, [x9] +; CHECK-NEXT: eor x10, x8, x0 +; CHECK-NEXT: stxr w11, x10, [x9] +; CHECK-NEXT: cbnz w11, .LBB19_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_xor_i64: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE_ATOMICS-NEXT: adrp x1, var64 +; OUTLINE_ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE_ATOMICS-NEXT: bl __aarch64_ldeor8_relax +; OUTLINE_ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE_ATOMICS-NEXT: ret %old = atomicrmw xor i64* @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 - -; CHECK: .LBB{{[0-9]+}}_1: -; ; CHECK: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]] - ; x0 below is a reasonable guess but could change: it certainly comes into the - ; function there. -; CHECK-NEXT: eor [[NEW:x[0-9]+]], x[[OLD]], x0 -; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] -; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb - -; CHECK: mov x0, x[[OLD]] ret i64 %old } define i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: .LBB20_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldxrb w8, [x9] +; CHECK-NEXT: stxrb w10, w0, [x9] +; CHECK-NEXT: cbnz w10, .LBB20_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_xchg_i8: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE_ATOMICS-NEXT: adrp x1, var8 +; OUTLINE_ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE_ATOMICS-NEXT: bl __aarch64_swp1_relax +; OUTLINE_ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE_ATOMICS-NEXT: ret %old = atomicrmw xchg i8* @var8, i8 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 - -; CHECK: .LBB{{[0-9]+}}_1: -; ; CHECK: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]] - ; w0 below is a reasonable guess but could change: it certainly comes into the - ; function there. -; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], w0, [x[[ADDR]]] -; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb - -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] ret i8 %old } define i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: .LBB21_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldaxrh w8, [x9] +; CHECK-NEXT: stlxrh w10, w0, [x9] +; CHECK-NEXT: cbnz w10, .LBB21_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_xchg_i16: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE_ATOMICS-NEXT: adrp x1, var16 +; OUTLINE_ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE_ATOMICS-NEXT: bl __aarch64_swp2_acq_rel +; OUTLINE_ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE_ATOMICS-NEXT: ret %old = atomicrmw xchg i16* @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 - -; CHECK: .LBB{{[0-9]+}}_1: -; ; CHECK: ldaxrh w[[OLD:[0-9]+]], [x[[ADDR]]] - ; w0 below is a reasonable guess but could change: it certainly comes into the - ; function there. -; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], w0, [x[[ADDR]]] -; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb - -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] ret i16 %old } define i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i32: -; CHECK: mov {{[xw]}}8, w[[OLD:[0-9]+]] - %old = atomicrmw xchg i32* @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 - -; CHECK: .LBB{{[0-9]+}}_1: -; ; CHECK: ldxr {{[xw]}}[[OLD]], [x[[ADDR]]] - ; w0 below is a reasonable guess but could change: it certainly comes into the - ; function there. -; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], w8, [x[[ADDR]]] -; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb - ret i32 %old +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: .LBB22_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldxr w0, [x9] +; CHECK-NEXT: stlxr w10, w8, [x9] +; CHECK-NEXT: cbnz w10, .LBB22_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_xchg_i32: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE_ATOMICS-NEXT: adrp x1, var32 +; OUTLINE_ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE_ATOMICS-NEXT: bl __aarch64_swp4_rel +; OUTLINE_ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE_ATOMICS-NEXT: ret + %old = atomicrmw xchg i32* @var32, i32 %offset release ret i32 %old } define i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: .LBB23_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldaxr x8, [x9] +; CHECK-NEXT: stxr w10, x0, [x9] +; CHECK-NEXT: cbnz w10, .LBB23_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_xchg_i64: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE_ATOMICS-NEXT: adrp x1, var64 +; OUTLINE_ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE_ATOMICS-NEXT: bl __aarch64_swp8_acq +; OUTLINE_ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE_ATOMICS-NEXT: ret %old = atomicrmw xchg i64* @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 - -; CHECK: .LBB{{[0-9]+}}_1: -; ; CHECK: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]] - ; x0 below is a reasonable guess but could change: it certainly comes into the - ; function there. -; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], x0, [x[[ADDR]]] -; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb - -; CHECK: mov x0, x[[OLD]] ret i64 %old } - define i8 @test_atomic_load_min_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: .LBB24_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldaxrb w10, [x9] +; CHECK-NEXT: sxtb w8, w10 +; CHECK-NEXT: cmp w8, w0, sxtb +; CHECK-NEXT: csel w10, w10, w0, le +; CHECK-NEXT: stxrb w11, w10, [x9] +; CHECK-NEXT: cbnz w11, .LBB24_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_min_i8: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: adrp x9, var8 +; OUTLINE_ATOMICS-NEXT: add x9, x9, :lo12:var8 +; OUTLINE_ATOMICS-NEXT: .LBB24_1: // %atomicrmw.start +; OUTLINE_ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE_ATOMICS-NEXT: ldaxrb w10, [x9] +; OUTLINE_ATOMICS-NEXT: sxtb w8, w10 +; OUTLINE_ATOMICS-NEXT: cmp w8, w0, sxtb +; OUTLINE_ATOMICS-NEXT: csel w10, w10, w0, le +; OUTLINE_ATOMICS-NEXT: stxrb w11, w10, [x9] +; OUTLINE_ATOMICS-NEXT: cbnz w11, .LBB24_1 +; OUTLINE_ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE_ATOMICS-NEXT: mov w0, w8 +; OUTLINE_ATOMICS-NEXT: ret %old = atomicrmw min i8* @var8, i8 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 - -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]] - ; w0 below is a reasonable guess but could change: it certainly comes into the - ; function there. - -; CHECK-NEXT: sxtb w[[OLD_EXT:[0-9]+]], w[[OLD]] -; CHECK-NEXT: cmp w[[OLD_EXT]], w0, sxtb -; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, le - -; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] -; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb - -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD_EXT]] ret i8 %old } define i16 @test_atomic_load_min_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: .LBB25_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldxrh w10, [x9] +; CHECK-NEXT: sxth w8, w10 +; CHECK-NEXT: cmp w8, w0, sxth +; CHECK-NEXT: csel w10, w10, w0, le +; CHECK-NEXT: stlxrh w11, w10, [x9] +; CHECK-NEXT: cbnz w11, .LBB25_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_min_i16: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: adrp x9, var16 +; OUTLINE_ATOMICS-NEXT: add x9, x9, :lo12:var16 +; OUTLINE_ATOMICS-NEXT: .LBB25_1: // %atomicrmw.start +; OUTLINE_ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE_ATOMICS-NEXT: ldxrh w10, [x9] +; OUTLINE_ATOMICS-NEXT: sxth w8, w10 +; OUTLINE_ATOMICS-NEXT: cmp w8, w0, sxth +; OUTLINE_ATOMICS-NEXT: csel w10, w10, w0, le +; OUTLINE_ATOMICS-NEXT: stlxrh w11, w10, [x9] +; OUTLINE_ATOMICS-NEXT: cbnz w11, .LBB25_1 +; OUTLINE_ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE_ATOMICS-NEXT: mov w0, w8 +; OUTLINE_ATOMICS-NEXT: ret %old = atomicrmw min i16* @var16, i16 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 - -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]] - ; w0 below is a reasonable guess but could change: it certainly comes into the - ; function there. - -; CHECK-NEXT: sxth w[[OLD_EXT:[0-9]+]], w[[OLD]] -; CHECK-NEXT: cmp w[[OLD_EXT]], w0, sxth -; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, le - - -; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] -; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb - -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD_EXT]] ret i16 %old } define i32 @test_atomic_load_min_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: .LBB26_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldxr w8, [x9] +; CHECK-NEXT: cmp w8, w0 +; CHECK-NEXT: csel w10, w8, w0, le +; CHECK-NEXT: stxr w11, w10, [x9] +; CHECK-NEXT: cbnz w11, .LBB26_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_min_i32: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: adrp x9, var32 +; OUTLINE_ATOMICS-NEXT: add x9, x9, :lo12:var32 +; OUTLINE_ATOMICS-NEXT: .LBB26_1: // %atomicrmw.start +; OUTLINE_ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE_ATOMICS-NEXT: ldxr w8, [x9] +; OUTLINE_ATOMICS-NEXT: cmp w8, w0 +; OUTLINE_ATOMICS-NEXT: csel w10, w8, w0, le +; OUTLINE_ATOMICS-NEXT: stxr w11, w10, [x9] +; OUTLINE_ATOMICS-NEXT: cbnz w11, .LBB26_1 +; OUTLINE_ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE_ATOMICS-NEXT: mov w0, w8 +; OUTLINE_ATOMICS-NEXT: ret %old = atomicrmw min i32* @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 - -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]] - ; w0 below is a reasonable guess but could change: it certainly comes into the - ; function there. - -; CHECK-NEXT: cmp w[[OLD]], w0 -; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, le - - -; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] -; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb - -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] ret i32 %old } define i64 @test_atomic_load_min_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: .LBB27_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldaxr x8, [x9] +; CHECK-NEXT: cmp x8, x0 +; CHECK-NEXT: csel x10, x8, x0, le +; CHECK-NEXT: stlxr w11, x10, [x9] +; CHECK-NEXT: cbnz w11, .LBB27_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_min_i64: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: adrp x9, var64 +; OUTLINE_ATOMICS-NEXT: add x9, x9, :lo12:var64 +; OUTLINE_ATOMICS-NEXT: .LBB27_1: // %atomicrmw.start +; OUTLINE_ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE_ATOMICS-NEXT: ldaxr x8, [x9] +; OUTLINE_ATOMICS-NEXT: cmp x8, x0 +; OUTLINE_ATOMICS-NEXT: csel x10, x8, x0, le +; OUTLINE_ATOMICS-NEXT: stlxr w11, x10, [x9] +; OUTLINE_ATOMICS-NEXT: cbnz w11, .LBB27_1 +; OUTLINE_ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE_ATOMICS-NEXT: mov x0, x8 +; OUTLINE_ATOMICS-NEXT: ret %old = atomicrmw min i64* @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 - -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]] - ; x0 below is a reasonable guess but could change: it certainly comes into the - ; function there. - -; CHECK-NEXT: cmp x[[OLD]], x0 -; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, le - - -; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] -; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb - -; CHECK: mov x0, x[[OLD]] ret i64 %old } define i8 @test_atomic_load_max_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: .LBB28_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldaxrb w10, [x9] +; CHECK-NEXT: sxtb w8, w10 +; CHECK-NEXT: cmp w8, w0, sxtb +; CHECK-NEXT: csel w10, w10, w0, gt +; CHECK-NEXT: stlxrb w11, w10, [x9] +; CHECK-NEXT: cbnz w11, .LBB28_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_max_i8: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: adrp x9, var8 +; OUTLINE_ATOMICS-NEXT: add x9, x9, :lo12:var8 +; OUTLINE_ATOMICS-NEXT: .LBB28_1: // %atomicrmw.start +; OUTLINE_ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE_ATOMICS-NEXT: ldaxrb w10, [x9] +; OUTLINE_ATOMICS-NEXT: sxtb w8, w10 +; OUTLINE_ATOMICS-NEXT: cmp w8, w0, sxtb +; OUTLINE_ATOMICS-NEXT: csel w10, w10, w0, gt +; OUTLINE_ATOMICS-NEXT: stlxrb w11, w10, [x9] +; OUTLINE_ATOMICS-NEXT: cbnz w11, .LBB28_1 +; OUTLINE_ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE_ATOMICS-NEXT: mov w0, w8 +; OUTLINE_ATOMICS-NEXT: ret %old = atomicrmw max i8* @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 - -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]] - ; w0 below is a reasonable guess but could change: it certainly comes into the - ; function there. - -; CHECK-NEXT: sxtb w[[OLD_EXT:[0-9]+]], w[[OLD]] -; CHECK-NEXT: cmp w[[OLD_EXT]], w0, sxtb -; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt - - -; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] -; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb - -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD_EXT]] ret i8 %old } define i16 @test_atomic_load_max_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: .LBB29_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldaxrh w10, [x9] +; CHECK-NEXT: sxth w8, w10 +; CHECK-NEXT: cmp w8, w0, sxth +; CHECK-NEXT: csel w10, w10, w0, gt +; CHECK-NEXT: stxrh w11, w10, [x9] +; CHECK-NEXT: cbnz w11, .LBB29_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_max_i16: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: adrp x9, var16 +; OUTLINE_ATOMICS-NEXT: add x9, x9, :lo12:var16 +; OUTLINE_ATOMICS-NEXT: .LBB29_1: // %atomicrmw.start +; OUTLINE_ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE_ATOMICS-NEXT: ldaxrh w10, [x9] +; OUTLINE_ATOMICS-NEXT: sxth w8, w10 +; OUTLINE_ATOMICS-NEXT: cmp w8, w0, sxth +; OUTLINE_ATOMICS-NEXT: csel w10, w10, w0, gt +; OUTLINE_ATOMICS-NEXT: stxrh w11, w10, [x9] +; OUTLINE_ATOMICS-NEXT: cbnz w11, .LBB29_1 +; OUTLINE_ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE_ATOMICS-NEXT: mov w0, w8 +; OUTLINE_ATOMICS-NEXT: ret %old = atomicrmw max i16* @var16, i16 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 - -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldaxrh w[[OLD:[0-9]+]], [x[[ADDR]]] - ; w0 below is a reasonable guess but could change: it certainly comes into the - ; function there. - -; CHECK-NEXT: sxth w[[OLD_EXT:[0-9]+]], w[[OLD]] -; CHECK-NEXT: cmp w[[OLD_EXT]], w0, sxth -; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt - - -; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] -; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb - -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD_EXT]] ret i16 %old } define i32 @test_atomic_load_max_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: .LBB30_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldxr w8, [x9] +; CHECK-NEXT: cmp w8, w0 +; CHECK-NEXT: csel w10, w8, w0, gt +; CHECK-NEXT: stlxr w11, w10, [x9] +; CHECK-NEXT: cbnz w11, .LBB30_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_max_i32: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: adrp x9, var32 +; OUTLINE_ATOMICS-NEXT: add x9, x9, :lo12:var32 +; OUTLINE_ATOMICS-NEXT: .LBB30_1: // %atomicrmw.start +; OUTLINE_ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE_ATOMICS-NEXT: ldxr w8, [x9] +; OUTLINE_ATOMICS-NEXT: cmp w8, w0 +; OUTLINE_ATOMICS-NEXT: csel w10, w8, w0, gt +; OUTLINE_ATOMICS-NEXT: stlxr w11, w10, [x9] +; OUTLINE_ATOMICS-NEXT: cbnz w11, .LBB30_1 +; OUTLINE_ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE_ATOMICS-NEXT: mov w0, w8 +; OUTLINE_ATOMICS-NEXT: ret %old = atomicrmw max i32* @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 - -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]] - ; w0 below is a reasonable guess but could change: it certainly comes into the - ; function there. - -; CHECK-NEXT: cmp w[[OLD]], w0 -; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt - - -; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] -; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb - -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] ret i32 %old } define i64 @test_atomic_load_max_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: .LBB31_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldxr x8, [x9] +; CHECK-NEXT: cmp x8, x0 +; CHECK-NEXT: csel x10, x8, x0, gt +; CHECK-NEXT: stxr w11, x10, [x9] +; CHECK-NEXT: cbnz w11, .LBB31_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_max_i64: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: adrp x9, var64 +; OUTLINE_ATOMICS-NEXT: add x9, x9, :lo12:var64 +; OUTLINE_ATOMICS-NEXT: .LBB31_1: // %atomicrmw.start +; OUTLINE_ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE_ATOMICS-NEXT: ldxr x8, [x9] +; OUTLINE_ATOMICS-NEXT: cmp x8, x0 +; OUTLINE_ATOMICS-NEXT: csel x10, x8, x0, gt +; OUTLINE_ATOMICS-NEXT: stxr w11, x10, [x9] +; OUTLINE_ATOMICS-NEXT: cbnz w11, .LBB31_1 +; OUTLINE_ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE_ATOMICS-NEXT: mov x0, x8 +; OUTLINE_ATOMICS-NEXT: ret %old = atomicrmw max i64* @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 - -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]] - ; x0 below is a reasonable guess but could change: it certainly comes into the - ; function there. - -; CHECK-NEXT: cmp x[[OLD]], x0 -; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, gt - - -; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] -; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb - -; CHECK: mov x0, x[[OLD]] ret i64 %old } define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: .LBB32_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldxrb w8, [x9] +; CHECK-NEXT: cmp w8, w0, uxtb +; CHECK-NEXT: csel w10, w8, w0, ls +; CHECK-NEXT: stxrb w11, w10, [x9] +; CHECK-NEXT: cbnz w11, .LBB32_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_umin_i8: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: adrp x9, var8 +; OUTLINE_ATOMICS-NEXT: add x9, x9, :lo12:var8 +; OUTLINE_ATOMICS-NEXT: .LBB32_1: // %atomicrmw.start +; OUTLINE_ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE_ATOMICS-NEXT: ldxrb w8, [x9] +; OUTLINE_ATOMICS-NEXT: cmp w8, w0, uxtb +; OUTLINE_ATOMICS-NEXT: csel w10, w8, w0, ls +; OUTLINE_ATOMICS-NEXT: stxrb w11, w10, [x9] +; OUTLINE_ATOMICS-NEXT: cbnz w11, .LBB32_1 +; OUTLINE_ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE_ATOMICS-NEXT: mov w0, w8 +; OUTLINE_ATOMICS-NEXT: ret %old = atomicrmw umin i8* @var8, i8 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 - -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]] - ; w0 below is a reasonable guess but could change: it certainly comes into the - ; function there. - -; CHECK-NEXT: cmp w[[OLD]], w0, uxtb -; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, ls - - -; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] -; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb - -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] ret i8 %old } define i16 @test_atomic_load_umin_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: .LBB33_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldaxrh w8, [x9] +; CHECK-NEXT: cmp w8, w0, uxth +; CHECK-NEXT: csel w10, w8, w0, ls +; CHECK-NEXT: stxrh w11, w10, [x9] +; CHECK-NEXT: cbnz w11, .LBB33_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_umin_i16: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: adrp x9, var16 +; OUTLINE_ATOMICS-NEXT: add x9, x9, :lo12:var16 +; OUTLINE_ATOMICS-NEXT: .LBB33_1: // %atomicrmw.start +; OUTLINE_ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE_ATOMICS-NEXT: ldaxrh w8, [x9] +; OUTLINE_ATOMICS-NEXT: cmp w8, w0, uxth +; OUTLINE_ATOMICS-NEXT: csel w10, w8, w0, ls +; OUTLINE_ATOMICS-NEXT: stxrh w11, w10, [x9] +; OUTLINE_ATOMICS-NEXT: cbnz w11, .LBB33_1 +; OUTLINE_ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE_ATOMICS-NEXT: mov w0, w8 +; OUTLINE_ATOMICS-NEXT: ret %old = atomicrmw umin i16* @var16, i16 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 - -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldaxrh w[[OLD:[0-9]+]], [x[[ADDR]]] - ; w0 below is a reasonable guess but could change: it certainly comes into the - ; function there. - -; CHECK-NEXT: cmp w[[OLD]], w0, uxth -; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, ls - - -; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] -; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb - -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] ret i16 %old } define i32 @test_atomic_load_umin_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: .LBB34_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldaxr w8, [x9] +; CHECK-NEXT: cmp w8, w0 +; CHECK-NEXT: csel w10, w8, w0, ls +; CHECK-NEXT: stlxr w11, w10, [x9] +; CHECK-NEXT: cbnz w11, .LBB34_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_umin_i32: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: adrp x9, var32 +; OUTLINE_ATOMICS-NEXT: add x9, x9, :lo12:var32 +; OUTLINE_ATOMICS-NEXT: .LBB34_1: // %atomicrmw.start +; OUTLINE_ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE_ATOMICS-NEXT: ldaxr w8, [x9] +; OUTLINE_ATOMICS-NEXT: cmp w8, w0 +; OUTLINE_ATOMICS-NEXT: csel w10, w8, w0, ls +; OUTLINE_ATOMICS-NEXT: stlxr w11, w10, [x9] +; OUTLINE_ATOMICS-NEXT: cbnz w11, .LBB34_1 +; OUTLINE_ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE_ATOMICS-NEXT: mov w0, w8 +; OUTLINE_ATOMICS-NEXT: ret %old = atomicrmw umin i32* @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 - -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]] - ; w0 below is a reasonable guess but could change: it certainly comes into the - ; function there. - -; CHECK-NEXT: cmp w[[OLD]], w0 -; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, ls - - -; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] -; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb - -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] ret i32 %old } define i64 @test_atomic_load_umin_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: .LBB35_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldaxr x8, [x9] +; CHECK-NEXT: cmp x8, x0 +; CHECK-NEXT: csel x10, x8, x0, ls +; CHECK-NEXT: stlxr w11, x10, [x9] +; CHECK-NEXT: cbnz w11, .LBB35_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_umin_i64: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: adrp x9, var64 +; OUTLINE_ATOMICS-NEXT: add x9, x9, :lo12:var64 +; OUTLINE_ATOMICS-NEXT: .LBB35_1: // %atomicrmw.start +; OUTLINE_ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE_ATOMICS-NEXT: ldaxr x8, [x9] +; OUTLINE_ATOMICS-NEXT: cmp x8, x0 +; OUTLINE_ATOMICS-NEXT: csel x10, x8, x0, ls +; OUTLINE_ATOMICS-NEXT: stlxr w11, x10, [x9] +; OUTLINE_ATOMICS-NEXT: cbnz w11, .LBB35_1 +; OUTLINE_ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE_ATOMICS-NEXT: mov x0, x8 +; OUTLINE_ATOMICS-NEXT: ret %old = atomicrmw umin i64* @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 - -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]] - ; x0 below is a reasonable guess but could change: it certainly comes into the - ; function there. - -; CHECK-NEXT: cmp x[[OLD]], x0 -; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, ls - - -; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] -; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb - -; CHECK: mov x0, x[[OLD]] ret i64 %old } define i8 @test_atomic_load_umax_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: .LBB36_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldaxrb w8, [x9] +; CHECK-NEXT: cmp w8, w0, uxtb +; CHECK-NEXT: csel w10, w8, w0, hi +; CHECK-NEXT: stlxrb w11, w10, [x9] +; CHECK-NEXT: cbnz w11, .LBB36_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_umax_i8: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: adrp x9, var8 +; OUTLINE_ATOMICS-NEXT: add x9, x9, :lo12:var8 +; OUTLINE_ATOMICS-NEXT: .LBB36_1: // %atomicrmw.start +; OUTLINE_ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE_ATOMICS-NEXT: ldaxrb w8, [x9] +; OUTLINE_ATOMICS-NEXT: cmp w8, w0, uxtb +; OUTLINE_ATOMICS-NEXT: csel w10, w8, w0, hi +; OUTLINE_ATOMICS-NEXT: stlxrb w11, w10, [x9] +; OUTLINE_ATOMICS-NEXT: cbnz w11, .LBB36_1 +; OUTLINE_ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE_ATOMICS-NEXT: mov w0, w8 +; OUTLINE_ATOMICS-NEXT: ret %old = atomicrmw umax i8* @var8, i8 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 - -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]] - ; w0 below is a reasonable guess but could change: it certainly comes into the - ; function there. - -; CHECK-NEXT: cmp w[[OLD]], w0, uxtb -; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi - - -; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] -; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb - -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] ret i8 %old } define i16 @test_atomic_load_umax_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: .LBB37_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldxrh w8, [x9] +; CHECK-NEXT: cmp w8, w0, uxth +; CHECK-NEXT: csel w10, w8, w0, hi +; CHECK-NEXT: stxrh w11, w10, [x9] +; CHECK-NEXT: cbnz w11, .LBB37_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_umax_i16: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: adrp x9, var16 +; OUTLINE_ATOMICS-NEXT: add x9, x9, :lo12:var16 +; OUTLINE_ATOMICS-NEXT: .LBB37_1: // %atomicrmw.start +; OUTLINE_ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE_ATOMICS-NEXT: ldxrh w8, [x9] +; OUTLINE_ATOMICS-NEXT: cmp w8, w0, uxth +; OUTLINE_ATOMICS-NEXT: csel w10, w8, w0, hi +; OUTLINE_ATOMICS-NEXT: stxrh w11, w10, [x9] +; OUTLINE_ATOMICS-NEXT: cbnz w11, .LBB37_1 +; OUTLINE_ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE_ATOMICS-NEXT: mov w0, w8 +; OUTLINE_ATOMICS-NEXT: ret %old = atomicrmw umax i16* @var16, i16 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 - -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]] - ; w0 below is a reasonable guess but could change: it certainly comes into the - ; function there. - -; CHECK-NEXT: cmp w[[OLD]], w0, uxth -; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi - - -; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] -; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb - -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] ret i16 %old } define i32 @test_atomic_load_umax_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: .LBB38_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldaxr w8, [x9] +; CHECK-NEXT: cmp w8, w0 +; CHECK-NEXT: csel w10, w8, w0, hi +; CHECK-NEXT: stlxr w11, w10, [x9] +; CHECK-NEXT: cbnz w11, .LBB38_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_umax_i32: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: adrp x9, var32 +; OUTLINE_ATOMICS-NEXT: add x9, x9, :lo12:var32 +; OUTLINE_ATOMICS-NEXT: .LBB38_1: // %atomicrmw.start +; OUTLINE_ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE_ATOMICS-NEXT: ldaxr w8, [x9] +; OUTLINE_ATOMICS-NEXT: cmp w8, w0 +; OUTLINE_ATOMICS-NEXT: csel w10, w8, w0, hi +; OUTLINE_ATOMICS-NEXT: stlxr w11, w10, [x9] +; OUTLINE_ATOMICS-NEXT: cbnz w11, .LBB38_1 +; OUTLINE_ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE_ATOMICS-NEXT: mov w0, w8 +; OUTLINE_ATOMICS-NEXT: ret %old = atomicrmw umax i32* @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 - -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]] - ; w0 below is a reasonable guess but could change: it certainly comes into the - ; function there. - -; CHECK-NEXT: cmp w[[OLD]], w0 -; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi - - -; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] -; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb - -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] ret i32 %old } define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: .LBB39_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldxr x8, [x9] +; CHECK-NEXT: cmp x8, x0 +; CHECK-NEXT: csel x10, x8, x0, hi +; CHECK-NEXT: stlxr w11, x10, [x9] +; CHECK-NEXT: cbnz w11, .LBB39_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_umax_i64: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: adrp x9, var64 +; OUTLINE_ATOMICS-NEXT: add x9, x9, :lo12:var64 +; OUTLINE_ATOMICS-NEXT: .LBB39_1: // %atomicrmw.start +; OUTLINE_ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE_ATOMICS-NEXT: ldxr x8, [x9] +; OUTLINE_ATOMICS-NEXT: cmp x8, x0 +; OUTLINE_ATOMICS-NEXT: csel x10, x8, x0, hi +; OUTLINE_ATOMICS-NEXT: stlxr w11, x10, [x9] +; OUTLINE_ATOMICS-NEXT: cbnz w11, .LBB39_1 +; OUTLINE_ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE_ATOMICS-NEXT: mov x0, x8 +; OUTLINE_ATOMICS-NEXT: ret %old = atomicrmw umax i64* @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 - -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]] - ; x0 below is a reasonable guess but could change: it certainly comes into the - ; function there. - -; CHECK-NEXT: cmp x[[OLD]], x0 -; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, hi - - -; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] -; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb - -; CHECK: mov x0, x[[OLD]] ret i64 %old } define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: .LBB40_1: // %cmpxchg.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldaxrb w8, [x9] +; CHECK-NEXT: cmp w8, w0, uxtb +; CHECK-NEXT: b.ne .LBB40_4 +; CHECK-NEXT: // %bb.2: // %cmpxchg.trystore +; CHECK-NEXT: // in Loop: Header=BB40_1 Depth=1 +; CHECK-NEXT: stxrb w10, w1, [x9] +; CHECK-NEXT: cbnz w10, .LBB40_1 +; CHECK-NEXT: // %bb.3: // %cmpxchg.end +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB40_4: // %cmpxchg.nostore +; CHECK-NEXT: clrex +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_cmpxchg_i8: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE_ATOMICS-NEXT: adrp x2, var8 +; OUTLINE_ATOMICS-NEXT: add x2, x2, :lo12:var8 +; OUTLINE_ATOMICS-NEXT: bl __aarch64_cas1_acq +; OUTLINE_ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE_ATOMICS-NEXT: ret %pair = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire acquire %old = extractvalue { i8, i1 } %pair, 0 - -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 - -; CHECK: [[STARTAGAIN:.LBB[0-9]+_[0-9]+]]: -; CHECK: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]] - ; w0 below is a reasonable guess but could change: it certainly comes into the - ; function there. -; CHECK-NEXT: cmp w[[OLD]], w0 -; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]] -; CHECK: stxrb [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x[[ADDR]]] -; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]] -; CHECK: [[GET_OUT]]: -; CHECK: clrex -; CHECK-NOT: dmb - -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] ret i8 %old } define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: .LBB41_1: // %cmpxchg.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldaxrh w8, [x9] +; CHECK-NEXT: cmp w8, w0, uxth +; CHECK-NEXT: b.ne .LBB41_4 +; CHECK-NEXT: // %bb.2: // %cmpxchg.trystore +; CHECK-NEXT: // in Loop: Header=BB41_1 Depth=1 +; CHECK-NEXT: stlxrh w10, w1, [x9] +; CHECK-NEXT: cbnz w10, .LBB41_1 +; CHECK-NEXT: // %bb.3: // %cmpxchg.end +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB41_4: // %cmpxchg.nostore +; CHECK-NEXT: clrex +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_cmpxchg_i16: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE_ATOMICS-NEXT: adrp x2, var16 +; OUTLINE_ATOMICS-NEXT: add x2, x2, :lo12:var16 +; OUTLINE_ATOMICS-NEXT: bl __aarch64_cas2_acq_rel +; OUTLINE_ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE_ATOMICS-NEXT: ret %pair = cmpxchg i16* @var16, i16 %wanted, i16 %new seq_cst seq_cst %old = extractvalue { i16, i1 } %pair, 0 - -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 - -; CHECK: [[STARTAGAIN:.LBB[0-9]+_[0-9]+]]: -; CHECK: ldaxrh w[[OLD:[0-9]+]], [x[[ADDR]]] - ; w0 below is a reasonable guess but could change: it certainly comes into the - ; function there. -; CHECK-NEXT: cmp w[[OLD]], w0 -; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]] -; CHECK: stlxrh [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x[[ADDR]]] -; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]] -; CHECK: [[GET_OUT]]: -; CHECK: clrex -; CHECK-NOT: dmb - -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] ret i16 %old } define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: .LBB42_1: // %cmpxchg.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldxr w0, [x9] +; CHECK-NEXT: cmp w0, w8 +; CHECK-NEXT: b.ne .LBB42_4 +; CHECK-NEXT: // %bb.2: // %cmpxchg.trystore +; CHECK-NEXT: // in Loop: Header=BB42_1 Depth=1 +; CHECK-NEXT: stlxr w10, w1, [x9] +; CHECK-NEXT: cbnz w10, .LBB42_1 +; CHECK-NEXT: // %bb.3: // %cmpxchg.end +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB42_4: // %cmpxchg.nostore +; CHECK-NEXT: clrex +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_cmpxchg_i32: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; OUTLINE_ATOMICS-NEXT: adrp x2, var32 +; OUTLINE_ATOMICS-NEXT: add x2, x2, :lo12:var32 +; OUTLINE_ATOMICS-NEXT: bl __aarch64_cas4_rel +; OUTLINE_ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; OUTLINE_ATOMICS-NEXT: ret %pair = cmpxchg i32* @var32, i32 %wanted, i32 %new release monotonic %old = extractvalue { i32, i1 } %pair, 0 - -; CHECK: mov {{[xw]}}[[WANTED:[0-9]+]], {{[xw]}}0 - -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 - -; CHECK: [[STARTAGAIN:.LBB[0-9]+_[0-9]+]]: -; CHECK: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]] -; CHECK-NEXT: cmp w[[OLD]], w[[WANTED]] -; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]] -; CHECK: stlxr [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x[[ADDR]]] -; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]] -; CHECK: [[GET_OUT]]: -; CHECK: clrex -; CHECK-NOT: dmb ret i32 %old } define void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: .LBB43_1: // %cmpxchg.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldxr x8, [x9] +; CHECK-NEXT: cmp x8, x0 +; CHECK-NEXT: b.ne .LBB43_3 +; CHECK-NEXT: // %bb.2: // %cmpxchg.trystore +; CHECK-NEXT: // in Loop: Header=BB43_1 Depth=1 +; CHECK-NEXT: stxr w10, x1, [x9] +; CHECK-NEXT: cbnz w10, .LBB43_1 +; CHECK-NEXT: b .LBB43_4 +; CHECK-NEXT: .LBB43_3: // %cmpxchg.nostore +; CHECK-NEXT: clrex +; CHECK-NEXT: .LBB43_4: // %cmpxchg.end +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: str x8, [x9, :lo12:var64] +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_cmpxchg_i64: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill +; OUTLINE_ATOMICS-NEXT: adrp x19, var64 +; OUTLINE_ATOMICS-NEXT: add x19, x19, :lo12:var64 +; OUTLINE_ATOMICS-NEXT: mov x2, x19 +; OUTLINE_ATOMICS-NEXT: bl __aarch64_cas8_relax +; OUTLINE_ATOMICS-NEXT: str x0, [x19] +; OUTLINE_ATOMICS-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; OUTLINE_ATOMICS-NEXT: ret %pair = cmpxchg i64* @var64, i64 %wanted, i64 %new monotonic monotonic %old = extractvalue { i64, i1 } %pair, 0 - -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 - -; CHECK: [[STARTAGAIN:.LBB[0-9]+_[0-9]+]]: -; CHECK: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]] - ; w0 below is a reasonable guess but could change: it certainly comes into the - ; function there. -; CHECK-NEXT: cmp x[[OLD]], x0 -; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]] - ; As above, w1 is a reasonable guess. -; CHECK: stxr [[STATUS:w[0-9]+]], x1, [x[[ADDR]]] -; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]] -; CHECK: [[GET_OUT]]: -; CHECK: clrex -; CHECK-NOT: dmb - -; CHECK: str x[[OLD]], store i64 %old, i64* @var64 ret void } define i8 @test_atomic_load_monotonic_i8() nounwind { ; CHECK-LABEL: test_atomic_load_monotonic_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: ldrb w0, [x8, :lo12:var8] +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_monotonic_i8: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: adrp x8, var8 +; OUTLINE_ATOMICS-NEXT: ldrb w0, [x8, :lo12:var8] +; OUTLINE_ATOMICS-NEXT: ret %val = load atomic i8, i8* @var8 monotonic, align 1 -; CHECK-NOT: dmb -; CHECK: adrp x[[HIADDR:[0-9]+]], var8 -; CHECK: ldrb w0, [x[[HIADDR]], {{#?}}:lo12:var8] -; CHECK-NOT: dmb - ret i8 %val } define i8 @test_atomic_load_monotonic_regoff_i8(i64 %base, i64 %off) nounwind { ; CHECK-LABEL: test_atomic_load_monotonic_regoff_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ldrb w0, [x0, x1] +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_monotonic_regoff_i8: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: ldrb w0, [x0, x1] +; OUTLINE_ATOMICS-NEXT: ret %addr_int = add i64 %base, %off %addr = inttoptr i64 %addr_int to i8* - %val = load atomic i8, i8* %addr monotonic, align 1 -; CHECK-NOT: dmb -; CHECK: ldrb w0, [x0, x1] -; CHECK-NOT: dmb - ret i8 %val } define i8 @test_atomic_load_acquire_i8() nounwind { ; CHECK-LABEL: test_atomic_load_acquire_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldarb w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_acquire_i8: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: adrp x8, var8 +; OUTLINE_ATOMICS-NEXT: add x8, x8, :lo12:var8 +; OUTLINE_ATOMICS-NEXT: ldarb w0, [x8] +; OUTLINE_ATOMICS-NEXT: ret %val = load atomic i8, i8* @var8 acquire, align 1 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK-NOT: dmb -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK-NOT: dmb -; CHECK: ldarb w0, [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %val } define i8 @test_atomic_load_seq_cst_i8() nounwind { ; CHECK-LABEL: test_atomic_load_seq_cst_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldarb w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_seq_cst_i8: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: adrp x8, var8 +; OUTLINE_ATOMICS-NEXT: add x8, x8, :lo12:var8 +; OUTLINE_ATOMICS-NEXT: ldarb w0, [x8] +; OUTLINE_ATOMICS-NEXT: ret %val = load atomic i8, i8* @var8 seq_cst, align 1 -; CHECK-NOT: dmb -; CHECK: adrp [[HIADDR:x[0-9]+]], var8 -; CHECK-NOT: dmb -; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], {{#?}}:lo12:var8 -; CHECK-NOT: dmb -; CHECK: ldarb w0, [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %val } define i16 @test_atomic_load_monotonic_i16() nounwind { ; CHECK-LABEL: test_atomic_load_monotonic_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: ldrh w0, [x8, :lo12:var16] +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_monotonic_i16: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: adrp x8, var16 +; OUTLINE_ATOMICS-NEXT: ldrh w0, [x8, :lo12:var16] +; OUTLINE_ATOMICS-NEXT: ret %val = load atomic i16, i16* @var16 monotonic, align 2 -; CHECK-NOT: dmb -; CHECK: adrp x[[HIADDR:[0-9]+]], var16 -; CHECK-NOT: dmb -; CHECK: ldrh w0, [x[[HIADDR]], {{#?}}:lo12:var16] -; CHECK-NOT: dmb - ret i16 %val } define i32 @test_atomic_load_monotonic_regoff_i32(i64 %base, i64 %off) nounwind { ; CHECK-LABEL: test_atomic_load_monotonic_regoff_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w0, [x0, x1] +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_monotonic_regoff_i32: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: ldr w0, [x0, x1] +; OUTLINE_ATOMICS-NEXT: ret %addr_int = add i64 %base, %off %addr = inttoptr i64 %addr_int to i32* - %val = load atomic i32, i32* %addr monotonic, align 4 -; CHECK-NOT: dmb -; CHECK: ldr w0, [x0, x1] -; CHECK-NOT: dmb - ret i32 %val } define i64 @test_atomic_load_seq_cst_i64() nounwind { ; CHECK-LABEL: test_atomic_load_seq_cst_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldar x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_load_seq_cst_i64: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: adrp x8, var64 +; OUTLINE_ATOMICS-NEXT: add x8, x8, :lo12:var64 +; OUTLINE_ATOMICS-NEXT: ldar x0, [x8] +; OUTLINE_ATOMICS-NEXT: ret %val = load atomic i64, i64* @var64 seq_cst, align 8 -; CHECK-NOT: dmb -; CHECK: adrp [[HIADDR:x[0-9]+]], var64 -; CHECK-NOT: dmb -; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], {{#?}}:lo12:var64 -; CHECK-NOT: dmb -; CHECK: ldar x0, [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %val } define void @test_atomic_store_monotonic_i8(i8 %val) nounwind { ; CHECK-LABEL: test_atomic_store_monotonic_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: strb w0, [x8, :lo12:var8] +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_store_monotonic_i8: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: adrp x8, var8 +; OUTLINE_ATOMICS-NEXT: strb w0, [x8, :lo12:var8] +; OUTLINE_ATOMICS-NEXT: ret store atomic i8 %val, i8* @var8 monotonic, align 1 -; CHECK: adrp x[[HIADDR:[0-9]+]], var8 -; CHECK: strb w0, [x[[HIADDR]], {{#?}}:lo12:var8] - ret void } define void @test_atomic_store_monotonic_regoff_i8(i64 %base, i64 %off, i8 %val) nounwind { ; CHECK-LABEL: test_atomic_store_monotonic_regoff_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: strb w2, [x0, x1] +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_store_monotonic_regoff_i8: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: strb w2, [x0, x1] +; OUTLINE_ATOMICS-NEXT: ret %addr_int = add i64 %base, %off %addr = inttoptr i64 %addr_int to i8* - store atomic i8 %val, i8* %addr monotonic, align 1 -; CHECK: strb w2, [x0, x1] - ret void } define void @test_atomic_store_release_i8(i8 %val) nounwind { ; CHECK-LABEL: test_atomic_store_release_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: stlrb w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_store_release_i8: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: adrp x8, var8 +; OUTLINE_ATOMICS-NEXT: add x8, x8, :lo12:var8 +; OUTLINE_ATOMICS-NEXT: stlrb w0, [x8] +; OUTLINE_ATOMICS-NEXT: ret store atomic i8 %val, i8* @var8 release, align 1 -; CHECK-NOT: dmb -; CHECK: adrp [[HIADDR:x[0-9]+]], var8 -; CHECK-NOT: dmb -; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], {{#?}}:lo12:var8 -; CHECK-NOT: dmb -; CHECK: stlrb w0, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define void @test_atomic_store_seq_cst_i8(i8 %val) nounwind { ; CHECK-LABEL: test_atomic_store_seq_cst_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: stlrb w0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_store_seq_cst_i8: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: adrp x8, var8 +; OUTLINE_ATOMICS-NEXT: add x8, x8, :lo12:var8 +; OUTLINE_ATOMICS-NEXT: stlrb w0, [x8] +; OUTLINE_ATOMICS-NEXT: ret store atomic i8 %val, i8* @var8 seq_cst, align 1 -; CHECK-NOT: dmb -; CHECK: adrp [[HIADDR:x[0-9]+]], var8 -; CHECK-NOT: dmb -; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], {{#?}}:lo12:var8 -; CHECK-NOT: dmb -; CHECK: stlrb w0, [x[[ADDR]]] -; CHECK-NOT: dmb - ret void } define void @test_atomic_store_monotonic_i16(i16 %val) nounwind { ; CHECK-LABEL: test_atomic_store_monotonic_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: strh w0, [x8, :lo12:var16] +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_store_monotonic_i16: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: adrp x8, var16 +; OUTLINE_ATOMICS-NEXT: strh w0, [x8, :lo12:var16] +; OUTLINE_ATOMICS-NEXT: ret store atomic i16 %val, i16* @var16 monotonic, align 2 -; CHECK-NOT: dmb -; CHECK: adrp x[[HIADDR:[0-9]+]], var16 -; CHECK-NOT: dmb -; CHECK: strh w0, [x[[HIADDR]], {{#?}}:lo12:var16] -; CHECK-NOT: dmb ret void } define void @test_atomic_store_monotonic_regoff_i32(i64 %base, i64 %off, i32 %val) nounwind { ; CHECK-LABEL: test_atomic_store_monotonic_regoff_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: str w2, [x0, x1] +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_store_monotonic_regoff_i32: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: str w2, [x0, x1] +; OUTLINE_ATOMICS-NEXT: ret %addr_int = add i64 %base, %off %addr = inttoptr i64 %addr_int to i32* - store atomic i32 %val, i32* %addr monotonic, align 4 -; CHECK-NOT: dmb -; CHECK: str w2, [x0, x1] -; CHECK-NOT: dmb - ret void } define void @test_atomic_store_release_i64(i64 %val) nounwind { ; CHECK-LABEL: test_atomic_store_release_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: stlr x0, [x8] +; CHECK-NEXT: ret +; +; OUTLINE_ATOMICS-LABEL: test_atomic_store_release_i64: +; OUTLINE_ATOMICS: // %bb.0: +; OUTLINE_ATOMICS-NEXT: adrp x8, var64 +; OUTLINE_ATOMICS-NEXT: add x8, x8, :lo12:var64 +; OUTLINE_ATOMICS-NEXT: stlr x0, [x8] +; OUTLINE_ATOMICS-NEXT: ret store atomic i64 %val, i64* @var64 release, align 8 -; CHECK-NOT: dmb -; CHECK: adrp [[HIADDR:x[0-9]+]], var64 -; CHECK-NOT: dmb -; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], {{#?}}:lo12:var64 -; CHECK-NOT: dmb -; CHECK: stlr x0, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } diff --git a/llvm/test/CodeGen/AArch64/cmpxchg-O0.ll b/llvm/test/CodeGen/AArch64/cmpxchg-O0.ll --- a/llvm/test/CodeGen/AArch64/cmpxchg-O0.ll +++ b/llvm/test/CodeGen/AArch64/cmpxchg-O0.ll @@ -1,6 +1,8 @@ ; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -O0 -fast-isel=0 -global-isel=false %s -o - | FileCheck -enable-var-scope %s +; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -O0 -fast-isel=0 -global-isel=false -mattr=+outline-atomics %s -o - | FileCheck -enable-var-scope %s --check-prefix=OUTLINE-ATOMICS define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind { +; OUTLINE-ATOMICS: bl __aarch64_cas1_acq_rel ; CHECK-LABEL: test_cmpxchg_8: ; CHECK: mov [[ADDR:x[0-9]+]], x0 ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: @@ -17,6 +19,7 @@ } define { i16, i1 } @test_cmpxchg_16(i16* %addr, i16 %desired, i16 %new) nounwind { +; OUTLINE-ATOMICS: bl __aarch64_cas2_acq_rel ; CHECK-LABEL: test_cmpxchg_16: ; CHECK: mov [[ADDR:x[0-9]+]], x0 ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: @@ -33,6 +36,7 @@ } define { i32, i1 } @test_cmpxchg_32(i32* %addr, i32 %desired, i32 %new) nounwind { +; OUTLINE-ATOMICS: bl __aarch64_cas4_acq_rel ; CHECK-LABEL: test_cmpxchg_32: ; CHECK: mov [[ADDR:x[0-9]+]], x0 ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: @@ -49,6 +53,7 @@ } define { i64, i1 } @test_cmpxchg_64(i64* %addr, i64 %desired, i64 %new) nounwind { +; OUTLINE-ATOMICS: bl __aarch64_cas8_acq_rel ; CHECK-LABEL: test_cmpxchg_64: ; CHECK: mov [[ADDR:x[0-9]+]], x0 ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: @@ -65,6 +70,7 @@ } define { i128, i1 } @test_cmpxchg_128(i128* %addr, i128 %desired, i128 %new) nounwind { +; OUTLINE-ATOMICS: bl __aarch64_cas16_acq_rel ; CHECK-LABEL: test_cmpxchg_128: ; CHECK: mov [[ADDR:x[0-9]+]], x0 ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: @@ -86,6 +92,7 @@ ; was false. @var128 = global i128 0 define {i128, i1} @test_cmpxchg_128_unsplit(i128* %addr) { +; OUTLINE-ATOMICS: bl __aarch64_cas16_acq_rel ; CHECK-LABEL: test_cmpxchg_128_unsplit: ; CHECK: mov [[ADDR:x[0-9]+]], x0 ; CHECK: add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128 diff --git a/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll b/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll --- a/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll +++ b/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll @@ -1,6 +1,8 @@ ; RUN: llc -mtriple=aarch64-apple-ios7.0 -o - %s | FileCheck %s +; RUN: llc -mtriple=aarch64-apple-ios7.0 -mattr=+outline-atomics -o - %s | FileCheck %s --check-prefix=OUTLINE-ATOMICS define i32 @test_return(i32* %p, i32 %oldval, i32 %newval) { +; OUTLINE-ATOMICS: bl ___aarch64_cas4_acq_rel ; CHECK-LABEL: test_return: ; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]: @@ -27,6 +29,7 @@ } define i1 @test_return_bool(i8* %value, i8 %oldValue, i8 %newValue) { +; OUTLINE-ATOMICS: bl ___aarch64_cas1_acq_rel ; CHECK-LABEL: test_return_bool: ; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]: @@ -55,6 +58,7 @@ } define void @test_conditional(i32* %p, i32 %oldval, i32 %newval) { +; OUTLINE-ATOMICS: bl ___aarch64_cas4_acq_rel ; CHECK-LABEL: test_conditional: ; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]: @@ -92,6 +96,7 @@ declare void @baz() define i1 @test_conditional2(i32 %a, i32 %b, i32* %c) { +; OUTLINE-ATOMICS: bl ___aarch64_cas4_acq_rel ; CHECK-LABEL: test_conditional2: ; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]: ; CHECK: ldaxr [[LOADED:w[0-9]+]], [x19] diff --git a/llvm/test/CodeGen/AArch64/cmpxchg-lse-even-regs.ll b/llvm/test/CodeGen/AArch64/cmpxchg-lse-even-regs.ll --- a/llvm/test/CodeGen/AArch64/cmpxchg-lse-even-regs.ll +++ b/llvm/test/CodeGen/AArch64/cmpxchg-lse-even-regs.ll @@ -1,4 +1,5 @@ ; RUN: llc -mtriple arm64-apple-ios -mattr=+lse %s -o - | FileCheck %s +; RUN: llc -mtriple arm64-apple-ios -mattr=+lse -mattr=+outline-atomics %s -o - | FileCheck %s ; Only "even,even+1" pairs are valid for CASP instructions. Make sure LLVM ; doesn't allocate odd ones and that it can copy them around properly. N.b. we diff --git a/llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll b/llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll --- a/llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll +++ b/llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -mtriple=aarch64-- -atomic-expand %s | FileCheck %s +; RUN: opt -S -mtriple=aarch64-- -mattr=+outline-atomics -atomic-expand %s | FileCheck %s --check-prefix=OUTLINE-ATOMICS define void @atomic_swap_f16(half* %ptr, half %val) nounwind { ; CHECK-LABEL: @atomic_swap_f16( @@ -15,6 +16,10 @@ ; CHECK-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]] ; CHECK: atomicrmw.end: ; CHECK-NEXT: ret void +; +; OUTLINE-ATOMICS-LABEL: @atomic_swap_f16( +; OUTLINE-ATOMICS-NEXT: [[T1:%.*]] = atomicrmw xchg half* [[PTR:%.*]], half [[VAL:%.*]] acquire +; OUTLINE-ATOMICS-NEXT: ret void ; %t1 = atomicrmw xchg half* %ptr, half %val acquire ret void @@ -34,6 +39,10 @@ ; CHECK-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]] ; CHECK: atomicrmw.end: ; CHECK-NEXT: ret void +; +; OUTLINE-ATOMICS-LABEL: @atomic_swap_f32( +; OUTLINE-ATOMICS-NEXT: [[T1:%.*]] = atomicrmw xchg float* [[PTR:%.*]], float [[VAL:%.*]] acquire +; OUTLINE-ATOMICS-NEXT: ret void ; %t1 = atomicrmw xchg float* %ptr, float %val acquire ret void @@ -51,6 +60,10 @@ ; CHECK-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]] ; CHECK: atomicrmw.end: ; CHECK-NEXT: ret void +; +; OUTLINE-ATOMICS-LABEL: @atomic_swap_f64( +; OUTLINE-ATOMICS-NEXT: [[T1:%.*]] = atomicrmw xchg double* [[PTR:%.*]], double [[VAL:%.*]] acquire +; OUTLINE-ATOMICS-NEXT: ret void ; %t1 = atomicrmw xchg double* %ptr, double %val acquire ret void