diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -475,8 +475,10 @@ STZ2G, LDP, + LDIAPP, LDNP, STP, + STILP, STNP, // Memory Operations @@ -705,6 +707,7 @@ void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override; bool isOpSuitableForLDPSTP(const Instruction *I) const; + bool isOpSuitableForRCPC3(const Instruction *I) const; bool shouldInsertFencesForAtomic(const Instruction *I) const override; bool shouldInsertTrailingFenceForAtomicStore(const Instruction *I) const override; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -834,7 +834,7 @@ setOperationAction(ISD::STORE, MVT::i128, Custom); // Aligned 128-bit loads and stores are single-copy atomic according to the - // v8.4a spec. + // v8.4a spec. LRCPC3 introduces 128-bit STILP/LDIAPP but still requires LSE2. if (Subtarget->hasLSE2()) { setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom); setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom); @@ -2572,8 +2572,10 @@ MAKE_CASE(AArch64ISD::SSTNT1_PRED) MAKE_CASE(AArch64ISD::SSTNT1_INDEX_PRED) MAKE_CASE(AArch64ISD::LDP) + MAKE_CASE(AArch64ISD::LDIAPP) MAKE_CASE(AArch64ISD::LDNP) MAKE_CASE(AArch64ISD::STP) + MAKE_CASE(AArch64ISD::STILP) MAKE_CASE(AArch64ISD::STNP) MAKE_CASE(AArch64ISD::BITREVERSE_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::BSWAP_MERGE_PASSTHRU) @@ -5698,9 +5700,14 @@ MemSDNode *StoreNode = cast(Op); assert(StoreNode->getMemoryVT() == MVT::i128); assert(StoreNode->isVolatile() || StoreNode->isAtomic()); - assert(!StoreNode->isAtomic() || - StoreNode->getMergedOrdering() == AtomicOrdering::Unordered || - StoreNode->getMergedOrdering() == AtomicOrdering::Monotonic); + + bool IsStoreRelease = + StoreNode->getMergedOrdering() == AtomicOrdering::Release; + if (StoreNode->isAtomic()) + assert((Subtarget->hasFeature(AArch64::FeatureLSE2) && + Subtarget->hasFeature(AArch64::FeatureRCPC3) && IsStoreRelease) || + StoreNode->getMergedOrdering() == AtomicOrdering::Unordered || + StoreNode->getMergedOrdering() == AtomicOrdering::Monotonic); SDValue Value = StoreNode->getOpcode() == ISD::STORE ? StoreNode->getOperand(1) @@ -5710,8 +5717,10 @@ DAG.getConstant(0, DL, MVT::i64)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, Value, DAG.getConstant(1, DL, MVT::i64)); + + unsigned Opcode = IsStoreRelease ? AArch64ISD::STILP : AArch64ISD::STP; SDValue Result = DAG.getMemIntrinsicNode( - AArch64ISD::STP, DL, DAG.getVTList(MVT::Other), + Opcode, DL, DAG.getVTList(MVT::Other), {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()}, StoreNode->getMemoryVT(), StoreNode->getMemOperand()); return Result; @@ -5984,7 +5993,7 @@ return LowerINTRINSIC_VOID(Op, DAG); case ISD::ATOMIC_STORE: if (cast(Op)->getMemoryVT() == MVT::i128) { - assert(Subtarget->hasLSE2()); + assert(Subtarget->hasLSE2() || Subtarget->hasRCPC3()); return LowerStore128(Op, DAG); } return SDValue(); @@ -22325,9 +22334,16 @@ } if (SDValue(N, 0).getValueType() == MVT::i128) { + auto *AN = dyn_cast(LoadNode); + bool isLoadAcquire = + AN && AN->getSuccessOrdering() == AtomicOrdering::Acquire; + unsigned Opcode = isLoadAcquire ? AArch64ISD::LDIAPP : AArch64ISD::LDP; + + if (isLoadAcquire) + assert(Subtarget->hasFeature(AArch64::FeatureRCPC3)); + SDValue Result = DAG.getMemIntrinsicNode( - AArch64ISD::LDP, SDLoc(N), - DAG.getVTList({MVT::i64, MVT::i64, MVT::Other}), + Opcode, SDLoc(N), DAG.getVTList({MVT::i64, MVT::i64, MVT::Other}), {LoadNode->getChain(), LoadNode->getBasePtr()}, LoadNode->getMemoryVT(), LoadNode->getMemOperand()); @@ -22450,8 +22466,27 @@ return false; } +bool AArch64TargetLowering::isOpSuitableForRCPC3(const Instruction *I) const { + if (!Subtarget->hasLSE2() || !Subtarget->hasRCPC3()) + return false; + + if (auto LI = dyn_cast(I)) + return LI->getType()->getPrimitiveSizeInBits() == 128 && + LI->getAlign() >= Align(16) && + LI->getOrdering() == AtomicOrdering::Acquire; + + if (auto SI = dyn_cast(I)) + return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 && + SI->getAlign() >= Align(16) && + SI->getOrdering() == AtomicOrdering::Release; + + return false; +} + bool AArch64TargetLowering::shouldInsertFencesForAtomic( const Instruction *I) const { + if (isOpSuitableForRCPC3(I)) + return false; return isOpSuitableForLDPSTP(I); } @@ -22485,7 +22520,7 @@ TargetLoweringBase::AtomicExpansionKind AArch64TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits(); - if (Size != 128 || isOpSuitableForLDPSTP(SI)) + if (Size != 128 || isOpSuitableForLDPSTP(SI) || isOpSuitableForRCPC3(SI)) return AtomicExpansionKind::None; return AtomicExpansionKind::Expand; } @@ -22497,7 +22532,7 @@ AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { unsigned Size = LI->getType()->getPrimitiveSizeInBits(); - if (Size != 128 || isOpSuitableForLDPSTP(LI)) + if (Size != 128 || isOpSuitableForLDPSTP(LI) || isOpSuitableForRCPC3(LI)) return AtomicExpansionKind::None; // At -O0, fast-regalloc cannot cope with the live vregs necessary to diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -362,8 +362,10 @@ def SDT_AArch64uaddlp : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; def SDT_AArch64ldp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; +def SDT_AArch64ldiapp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; def SDT_AArch64ldnp : SDTypeProfile<2, 1, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; def SDT_AArch64stp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; +def SDT_AArch64stilp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; def SDT_AArch64stnp : SDTypeProfile<0, 3, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; // Generates the general dynamic sequences, i.e. @@ -786,8 +788,10 @@ def AArch64uunpklo : SDNode<"AArch64ISD::UUNPKLO", SDT_AArch64unpk>; def AArch64ldp : SDNode<"AArch64ISD::LDP", SDT_AArch64ldp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def AArch64ldiapp : SDNode<"AArch64ISD::LDIAPP", SDT_AArch64ldiapp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def AArch64ldnp : SDNode<"AArch64ISD::LDNP", SDT_AArch64ldnp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def AArch64stp : SDNode<"AArch64ISD::STP", SDT_AArch64stp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def AArch64stilp : SDNode<"AArch64ISD::STILP", SDT_AArch64stilp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def AArch64stnp : SDNode<"AArch64ISD::STNP", SDT_AArch64stnp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def AArch64tbl : SDNode<"AArch64ISD::TBL", SDT_AArch64TBL>; @@ -3408,7 +3412,7 @@ def STPDpre : StorePairPreIdx<0b01, 1, FPR64Op, simm7s8, "stp">; def STPQpre : StorePairPreIdx<0b10, 1, FPR128Op, simm7s16, "stp">; -// Pair (pre-indexed) +// Pair (post-indexed) def STPWpost : StorePairPostIdx<0b00, 0, GPR32z, simm7s4, "stp">; def STPXpost : StorePairPostIdx<0b10, 0, GPR64z, simm7s8, "stp">; def STPSpost : StorePairPostIdx<0b00, 1, FPR32Op, simm7s4, "stp">; @@ -8684,6 +8688,9 @@ def LDIAPPW: BaseLRCPC3IntegerLoadStorePair<0b10, 0b01, 0b0001, (outs GPR32:$Rt, GPR32:$Rt2), (ins GPR64sp0:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn]", "">; def LDIAPPX: BaseLRCPC3IntegerLoadStorePair<0b11, 0b01, 0b0001, (outs GPR64:$Rt, GPR64:$Rt2), (ins GPR64sp0:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn]", "">; + def : Pat<(AArch64ldiapp GPR64sp:$Rn), (LDIAPPX GPR64sp:$Rn)>; + def : Pat<(AArch64stilp GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), (STILPX GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn)>; + // Aliases for when offset=0 def : InstAlias<"stilp\t$Rt, $Rt2, [$Rn, #0]", (STILPW GPR32: $Rt, GPR32: $Rt2, GPR64sp:$Rn)>; def : InstAlias<"stilp\t$Rt, $Rt2, [$Rn, #0]", (STILPX GPR64: $Rt, GPR64: $Rt2, GPR64sp:$Rn)>; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -79,6 +79,7 @@ const LLT &MinFPScalar = HasFP16 ? s16 : s32; const bool HasCSSC = ST.hasCSSC(); + const bool HasRCPC3 = ST.hasRCPC3(); getActionDefinitionsBuilder({G_IMPLICIT_DEF, G_FREEZE}) .legalFor({p0, s8, s16, s32, s64}) @@ -309,6 +310,10 @@ }; getActionDefinitionsBuilder(G_LOAD) + .customIf([=](const LegalityQuery &Query) { + return HasRCPC3 && Query.Types[0] == s128 && + Query.MMODescrs[0].Ordering == AtomicOrdering::Acquire; + }) .customIf([=](const LegalityQuery &Query) { return Query.Types[0] == s128 && Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic; @@ -328,16 +333,17 @@ {v2s64, p0, s128, 8}}) // These extends are also legal .legalForTypesWithMemDesc({{s32, p0, s8, 8}, {s32, p0, s16, 8}}) - .widenScalarToNextPow2(0, /* MinSize = */8) + .widenScalarToNextPow2(0, /* MinSize = */ 8) .lowerIfMemSizeNotByteSizePow2() .clampScalar(0, s8, s64) - .narrowScalarIf([=](const LegalityQuery &Query) { - // Clamp extending load results to 32-bits. - return Query.Types[0].isScalar() && - Query.Types[0] != Query.MMODescrs[0].MemoryTy && - Query.Types[0].getSizeInBits() > 32; - }, - changeTo(0, s32)) + .narrowScalarIf( + [=](const LegalityQuery &Query) { + // Clamp extending load results to 32-bits. + return Query.Types[0].isScalar() && + Query.Types[0] != Query.MMODescrs[0].MemoryTy && + Query.Types[0].getSizeInBits() > 32; + }, + changeTo(0, s32)) .clampMaxNumElements(0, s8, 16) .clampMaxNumElements(0, s16, 8) .clampMaxNumElements(0, s32, 4) @@ -347,31 +353,25 @@ .scalarizeIf(typeIs(0, v2s16), 0); getActionDefinitionsBuilder(G_STORE) + .customIf([=](const LegalityQuery &Query) { + return HasRCPC3 && Query.Types[0] == s128 && + Query.MMODescrs[0].Ordering == AtomicOrdering::Release; + }) .customIf([=](const LegalityQuery &Query) { return Query.Types[0] == s128 && Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic; }) - .legalForTypesWithMemDesc({{s8, p0, s8, 8}, - {s16, p0, s8, 8}, // truncstorei8 from s16 - {s32, p0, s8, 8}, // truncstorei8 from s32 - {s64, p0, s8, 8}, // truncstorei8 from s64 - {s16, p0, s16, 8}, - {s32, p0, s16, 8}, // truncstorei16 from s32 - {s64, p0, s16, 8}, // truncstorei16 from s64 - {s32, p0, s8, 8}, - {s32, p0, s16, 8}, - {s32, p0, s32, 8}, - {s64, p0, s64, 8}, - {s64, p0, s32, 8}, // truncstorei32 from s64 - {p0, p0, s64, 8}, - {s128, p0, s128, 8}, - {v16s8, p0, s128, 8}, - {v8s8, p0, s64, 8}, - {v4s16, p0, s64, 8}, - {v8s16, p0, s128, 8}, - {v2s32, p0, s64, 8}, - {v4s32, p0, s128, 8}, - {v2s64, p0, s128, 8}}) + .legalForTypesWithMemDesc( + {{s8, p0, s8, 8}, {s16, p0, s8, 8}, // truncstorei8 from s16 + {s32, p0, s8, 8}, // truncstorei8 from s32 + {s64, p0, s8, 8}, // truncstorei8 from s64 + {s16, p0, s16, 8}, {s32, p0, s16, 8}, // truncstorei16 from s32 + {s64, p0, s16, 8}, // truncstorei16 from s64 + {s32, p0, s8, 8}, {s32, p0, s16, 8}, {s32, p0, s32, 8}, + {s64, p0, s64, 8}, {s64, p0, s32, 8}, // truncstorei32 from s64 + {p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8}, + {v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8}, + {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}}) .clampScalar(0, s8, s64) .lowerIf([=](const LegalityQuery &Query) { return Query.Types[0].isScalar() && @@ -1188,27 +1188,49 @@ const LLT ValTy = MRI.getType(ValReg); if (ValTy == LLT::scalar(128)) { - assert((*MI.memoperands_begin())->getSuccessOrdering() == - AtomicOrdering::Monotonic || - (*MI.memoperands_begin())->getSuccessOrdering() == - AtomicOrdering::Unordered); - assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2"); + + AtomicOrdering Ordering = (*MI.memoperands_begin())->getSuccessOrdering(); + bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD; + bool IsLoadAcquire = IsLoad && Ordering == AtomicOrdering::Acquire; + bool IsStoreRelease = !IsLoad && Ordering == AtomicOrdering::Release; + bool IsRcpC3 = + ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease); + LLT s64 = LLT::scalar(64); + + unsigned Opcode; + if (IsRcpC3) { + Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX; + } else { + // For LSE2, loads/stores should have been converted to monotonic and had + // a fence inserted after them. + assert(Ordering == AtomicOrdering::Monotonic || + Ordering == AtomicOrdering::Unordered); + assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2"); + + Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi; + } + MachineInstrBuilder NewI; - if (MI.getOpcode() == TargetOpcode::G_LOAD) { - NewI = MIRBuilder.buildInstr(AArch64::LDPXi, {s64, s64}, {}); + if (IsLoad) { + NewI = MIRBuilder.buildInstr(Opcode, {s64, s64}, {}); MIRBuilder.buildMergeLikeInstr( ValReg, {NewI->getOperand(0), NewI->getOperand(1)}); } else { auto Split = MIRBuilder.buildUnmerge(s64, MI.getOperand(0)); NewI = MIRBuilder.buildInstr( - AArch64::STPXi, {}, {Split->getOperand(0), Split->getOperand(1)}); + Opcode, {}, {Split->getOperand(0), Split->getOperand(1)}); + } + + if (IsRcpC3) { + NewI.addUse(MI.getOperand(1).getReg()); + } else { + Register Base; + int Offset; + matchLDPSTPAddrMode(MI.getOperand(1).getReg(), Base, Offset, MRI); + NewI.addUse(Base); + NewI.addImm(Offset / 8); } - Register Base; - int Offset; - matchLDPSTPAddrMode(MI.getOperand(1).getReg(), Base, Offset, MRI); - NewI.addUse(Base); - NewI.addImm(Offset / 8); NewI.cloneMemRefs(MI); constrainSelectedInstRegOperands(*NewI, *ST->getInstrInfo(), diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc3.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc3.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc3.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc3.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "\b(sp)\b" --filter "^\s*(ld|st[^r]|swp|cas|bl|add|and|eor|orn|orr|sub|mvn|sxt|cmp|ccmp|csel|dmb)" ; The base test file was generated by ./llvm/test/CodeGen/AArch64/Atomics/generate-tests.py -; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+rcpc3 -O0 | FileCheck %s --check-prefixes=CHECK,-O0 -; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+rcpc3 -O1 | FileCheck %s --check-prefixes=CHECK,-O1 +; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+lse2,+rcpc3 -O0 | FileCheck %s --check-prefixes=CHECK,-O0 +; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+lse2,+rcpc3 -O1 | FileCheck %s --check-prefixes=CHECK,-O1 define dso_local i8 @load_atomic_i8_aligned_unordered(ptr %ptr) { ; CHECK-LABEL: load_atomic_i8_aligned_unordered: @@ -228,121 +228,59 @@ } define dso_local i128 @load_atomic_i128_aligned_unordered(ptr %ptr) { -; -O0-LABEL: load_atomic_i128_aligned_unordered: -; -O0: ldxp x0, x1, [x9] -; -O0: cmp x0, x10 -; -O0: cmp x1, x10 -; -O0: stxp w8, x10, x10, [x9] -; -O0: stxp w8, x0, x1, [x9] -; -; -O1-LABEL: load_atomic_i128_aligned_unordered: -; -O1: ldxp x0, x1, [x8] -; -O1: stxp w9, x0, x1, [x8] +; CHECK-LABEL: load_atomic_i128_aligned_unordered: +; CHECK: ldp x0, x1, [x0] %r = load atomic i128, ptr %ptr unordered, align 16 ret i128 %r } define dso_local i128 @load_atomic_i128_aligned_unordered_const(ptr readonly %ptr) { -; -O0-LABEL: load_atomic_i128_aligned_unordered_const: -; -O0: ldxp x0, x1, [x9] -; -O0: cmp x0, x10 -; -O0: cmp x1, x10 -; -O0: stxp w8, x10, x10, [x9] -; -O0: stxp w8, x0, x1, [x9] -; -; -O1-LABEL: load_atomic_i128_aligned_unordered_const: -; -O1: ldxp x0, x1, [x8] -; -O1: stxp w9, x0, x1, [x8] +; CHECK-LABEL: load_atomic_i128_aligned_unordered_const: +; CHECK: ldp x0, x1, [x0] %r = load atomic i128, ptr %ptr unordered, align 16 ret i128 %r } define dso_local i128 @load_atomic_i128_aligned_monotonic(ptr %ptr) { -; -O0-LABEL: load_atomic_i128_aligned_monotonic: -; -O0: ldxp x0, x1, [x9] -; -O0: cmp x0, x10 -; -O0: cmp x1, x10 -; -O0: stxp w8, x10, x10, [x9] -; -O0: stxp w8, x0, x1, [x9] -; -; -O1-LABEL: load_atomic_i128_aligned_monotonic: -; -O1: ldxp x0, x1, [x8] -; -O1: stxp w9, x0, x1, [x8] +; CHECK-LABEL: load_atomic_i128_aligned_monotonic: +; CHECK: ldp x0, x1, [x0] %r = load atomic i128, ptr %ptr monotonic, align 16 ret i128 %r } define dso_local i128 @load_atomic_i128_aligned_monotonic_const(ptr readonly %ptr) { -; -O0-LABEL: load_atomic_i128_aligned_monotonic_const: -; -O0: ldxp x0, x1, [x9] -; -O0: cmp x0, x10 -; -O0: cmp x1, x10 -; -O0: stxp w8, x10, x10, [x9] -; -O0: stxp w8, x0, x1, [x9] -; -; -O1-LABEL: load_atomic_i128_aligned_monotonic_const: -; -O1: ldxp x0, x1, [x8] -; -O1: stxp w9, x0, x1, [x8] +; CHECK-LABEL: load_atomic_i128_aligned_monotonic_const: +; CHECK: ldp x0, x1, [x0] %r = load atomic i128, ptr %ptr monotonic, align 16 ret i128 %r } define dso_local i128 @load_atomic_i128_aligned_acquire(ptr %ptr) { -; -O0-LABEL: load_atomic_i128_aligned_acquire: -; -O0: ldaxp x0, x1, [x9] -; -O0: cmp x0, x10 -; -O0: cmp x1, x10 -; -O0: stxp w8, x10, x10, [x9] -; -O0: stxp w8, x0, x1, [x9] -; -; -O1-LABEL: load_atomic_i128_aligned_acquire: -; -O1: ldaxp x0, x1, [x8] -; -O1: stxp w9, x0, x1, [x8] +; CHECK-LABEL: load_atomic_i128_aligned_acquire: +; CHECK: ldiapp x0, x1, [x0] %r = load atomic i128, ptr %ptr acquire, align 16 ret i128 %r } define dso_local i128 @load_atomic_i128_aligned_acquire_const(ptr readonly %ptr) { -; -O0-LABEL: load_atomic_i128_aligned_acquire_const: -; -O0: ldaxp x0, x1, [x9] -; -O0: cmp x0, x10 -; -O0: cmp x1, x10 -; -O0: stxp w8, x10, x10, [x9] -; -O0: stxp w8, x0, x1, [x9] -; -; -O1-LABEL: load_atomic_i128_aligned_acquire_const: -; -O1: ldaxp x0, x1, [x8] -; -O1: stxp w9, x0, x1, [x8] +; CHECK-LABEL: load_atomic_i128_aligned_acquire_const: +; CHECK: ldiapp x0, x1, [x0] %r = load atomic i128, ptr %ptr acquire, align 16 ret i128 %r } define dso_local i128 @load_atomic_i128_aligned_seq_cst(ptr %ptr) { -; -O0-LABEL: load_atomic_i128_aligned_seq_cst: -; -O0: ldaxp x0, x1, [x9] -; -O0: cmp x0, x10 -; -O0: cmp x1, x10 -; -O0: stlxp w8, x10, x10, [x9] -; -O0: stlxp w8, x0, x1, [x9] -; -; -O1-LABEL: load_atomic_i128_aligned_seq_cst: -; -O1: ldaxp x0, x1, [x8] -; -O1: stlxp w9, x0, x1, [x8] +; CHECK-LABEL: load_atomic_i128_aligned_seq_cst: +; CHECK: ldp x0, x1, [x0] +; CHECK: dmb ish %r = load atomic i128, ptr %ptr seq_cst, align 16 ret i128 %r } define dso_local i128 @load_atomic_i128_aligned_seq_cst_const(ptr readonly %ptr) { -; -O0-LABEL: load_atomic_i128_aligned_seq_cst_const: -; -O0: ldaxp x0, x1, [x9] -; -O0: cmp x0, x10 -; -O0: cmp x1, x10 -; -O0: stlxp w8, x10, x10, [x9] -; -O0: stlxp w8, x0, x1, [x9] -; -; -O1-LABEL: load_atomic_i128_aligned_seq_cst_const: -; -O1: ldaxp x0, x1, [x8] -; -O1: stlxp w9, x0, x1, [x8] +; CHECK-LABEL: load_atomic_i128_aligned_seq_cst_const: +; CHECK: ldp x0, x1, [x0] +; CHECK: dmb ish %r = load atomic i128, ptr %ptr seq_cst, align 16 ret i128 %r } @@ -626,3 +564,6 @@ %r = load atomic i128, ptr %ptr seq_cst, align 1 ret i128 %r } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; -O0: {{.*}} +; -O1: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-rcpc3.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-rcpc3.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-rcpc3.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-rcpc3.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "\b(sp)\b" --filter "^\s*(ld[^r]|st|swp|cas|bl|add|and|eor|orn|orr|sub|mvn|sxt|cmp|ccmp|csel|dmb)" ; The base test file was generated by ./llvm/test/CodeGen/AArch64/Atomics/generate-tests.py -; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+rcpc3 -O0 | FileCheck %s --check-prefixes=CHECK,-O0 -; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+rcpc3 -O1 | FileCheck %s --check-prefixes=CHECK,-O1 +; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+lse2,+rcpc3 -O0 | FileCheck %s --check-prefixes=CHECK,-O0 +; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+lse2,+rcpc3 -O1 | FileCheck %s --check-prefixes=CHECK,-O1 define dso_local void @store_atomic_i8_aligned_unordered(i8 %value, ptr %ptr) { ; CHECK-LABEL: store_atomic_i8_aligned_unordered: @@ -116,77 +116,31 @@ } define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr) { -; -O0-LABEL: store_atomic_i128_aligned_unordered: -; -O0: ldxp x10, x9, [x11] -; -O0: cmp x10, x12 -; -O0: cmp x9, x13 -; -O0: stxp w8, x14, x15, [x11] -; -O0: stxp w8, x10, x9, [x11] -; -O0: eor x8, x10, x8 -; -O0: eor x11, x9, x11 -; -O0: orr x8, x8, x11 -; -O0: subs x8, x8, #0 -; -; -O1-LABEL: store_atomic_i128_aligned_unordered: -; -O1: ldxp xzr, x8, [x2] -; -O1: stxp w8, x0, x1, [x2] +; CHECK-LABEL: store_atomic_i128_aligned_unordered: +; CHECK: stp x0, x1, [x2] store atomic i128 %value, ptr %ptr unordered, align 16 ret void } define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr) { -; -O0-LABEL: store_atomic_i128_aligned_monotonic: -; -O0: ldxp x10, x9, [x11] -; -O0: cmp x10, x12 -; -O0: cmp x9, x13 -; -O0: stxp w8, x14, x15, [x11] -; -O0: stxp w8, x10, x9, [x11] -; -O0: eor x8, x10, x8 -; -O0: eor x11, x9, x11 -; -O0: orr x8, x8, x11 -; -O0: subs x8, x8, #0 -; -; -O1-LABEL: store_atomic_i128_aligned_monotonic: -; -O1: ldxp xzr, x8, [x2] -; -O1: stxp w8, x0, x1, [x2] +; CHECK-LABEL: store_atomic_i128_aligned_monotonic: +; CHECK: stp x0, x1, [x2] store atomic i128 %value, ptr %ptr monotonic, align 16 ret void } define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr) { -; -O0-LABEL: store_atomic_i128_aligned_release: -; -O0: ldxp x10, x9, [x11] -; -O0: cmp x10, x12 -; -O0: cmp x9, x13 -; -O0: stlxp w8, x14, x15, [x11] -; -O0: stlxp w8, x10, x9, [x11] -; -O0: eor x8, x10, x8 -; -O0: eor x11, x9, x11 -; -O0: orr x8, x8, x11 -; -O0: subs x8, x8, #0 -; -; -O1-LABEL: store_atomic_i128_aligned_release: -; -O1: ldxp xzr, x8, [x2] -; -O1: stlxp w8, x0, x1, [x2] +; CHECK-LABEL: store_atomic_i128_aligned_release: +; CHECK: stilp x0, x1, [x2] store atomic i128 %value, ptr %ptr release, align 16 ret void } define dso_local void @store_atomic_i128_aligned_seq_cst(i128 %value, ptr %ptr) { -; -O0-LABEL: store_atomic_i128_aligned_seq_cst: -; -O0: ldaxp x10, x9, [x11] -; -O0: cmp x10, x12 -; -O0: cmp x9, x13 -; -O0: stlxp w8, x14, x15, [x11] -; -O0: stlxp w8, x10, x9, [x11] -; -O0: eor x8, x10, x8 -; -O0: eor x11, x9, x11 -; -O0: orr x8, x8, x11 -; -O0: subs x8, x8, #0 -; -; -O1-LABEL: store_atomic_i128_aligned_seq_cst: -; -O1: ldaxp xzr, x8, [x2] -; -O1: stlxp w8, x0, x1, [x2] +; CHECK-LABEL: store_atomic_i128_aligned_seq_cst: +; CHECK: dmb ish +; CHECK: stp x0, x1, [x2] +; CHECK: dmb ish store atomic i128 %value, ptr %ptr seq_cst, align 16 ret void } @@ -330,3 +284,6 @@ store atomic i128 %value, ptr %ptr seq_cst, align 1 ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; -O0: {{.*}} +; -O1: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "\b(sp)\b" --filter "^\s*(ld[^r]|st[^r]|swp|cas|bl|add|and|eor|orn|orr|sub|mvn|sxt|cmp|ccmp|csel|dmb)" ; The base test file was generated by ./llvm/test/CodeGen/AArch64/Atomics/generate-tests.py -; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+rcpc3 -O0 | FileCheck %s --check-prefixes=CHECK,-O0 -; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+rcpc3 -O1 | FileCheck %s --check-prefixes=CHECK,-O1 +; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+lse2,+rcpc3 -O0 | FileCheck %s --check-prefixes=CHECK,-O0 +; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+lse2,+rcpc3 -O1 | FileCheck %s --check-prefixes=CHECK,-O1 define dso_local i8 @atomicrmw_xchg_i8_aligned_monotonic(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_xchg_i8_aligned_monotonic: diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-cmpxchg-rcpc3.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-cmpxchg-rcpc3.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-cmpxchg-rcpc3.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-cmpxchg-rcpc3.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "\b(sp)\b" --filter "^\s*(ld[^r]|st[^r]|swp|cas|bl|add|and|eor|orn|orr|sub|mvn|sxt|cmp|ccmp|csel|dmb)" ; The base test file was generated by ./llvm/test/CodeGen/AArch64/Atomics/generate-tests.py -; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+rcpc3 -O0 | FileCheck %s --check-prefixes=CHECK,-O0 -; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+rcpc3 -O1 | FileCheck %s --check-prefixes=CHECK,-O1 +; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+lse2,+rcpc3 -O0 | FileCheck %s --check-prefixes=CHECK,-O0 +; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+lse2,+rcpc3 -O1 | FileCheck %s --check-prefixes=CHECK,-O1 define dso_local i8 @cmpxchg_i8_aligned_monotonic_monotonic(i8 %expected, i8 %new, ptr %ptr) { ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_monotonic: diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-fence.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-fence.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-fence.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-fence.ll @@ -10,8 +10,8 @@ ; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+lse2 -O1 | FileCheck %s --check-prefixes=CHECK,-O1 ; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+outline-atomics -O0 | FileCheck %s --check-prefixes=CHECK,-O0 ; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+outline-atomics -O1 | FileCheck %s --check-prefixes=CHECK,-O1 -; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+rcpc3 -O0 | FileCheck %s --check-prefixes=CHECK,-O0 -; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+rcpc3 -O1 | FileCheck %s --check-prefixes=CHECK,-O1 +; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+lse2,+rcpc3 -O0 | FileCheck %s --check-prefixes=CHECK,-O0 +; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+lse2,+rcpc3 -O1 | FileCheck %s --check-prefixes=CHECK,-O1 ; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+lse128 -O0 | FileCheck %s --check-prefixes=CHECK,-O0 ; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+lse128 -O1 | FileCheck %s --check-prefixes=CHECK,-O1 diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-load-rcpc3.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-load-rcpc3.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-load-rcpc3.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-load-rcpc3.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "\b(sp)\b" --filter "^\s*(ld|st[^r]|swp|cas|bl|add|and|eor|orn|orr|sub|mvn|sxt|cmp|ccmp|csel|dmb)" ; The base test file was generated by ./llvm/test/CodeGen/AArch64/Atomics/generate-tests.py -; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+rcpc3 -O0 | FileCheck %s --check-prefixes=CHECK,-O0 -; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+rcpc3 -O1 | FileCheck %s --check-prefixes=CHECK,-O1 +; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+lse2,+rcpc3 -O0 | FileCheck %s --check-prefixes=CHECK,-O0 +; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+lse2,+rcpc3 -O1 | FileCheck %s --check-prefixes=CHECK,-O1 define dso_local i8 @load_atomic_i8_aligned_unordered(ptr %ptr) { ; CHECK-LABEL: load_atomic_i8_aligned_unordered: @@ -228,121 +228,59 @@ } define dso_local i128 @load_atomic_i128_aligned_unordered(ptr %ptr) { -; -O0-LABEL: load_atomic_i128_aligned_unordered: -; -O0: ldxp x1, x0, [x9] -; -O0: cmp x1, x10 -; -O0: cmp x0, x10 -; -O0: stxp w8, x10, x10, [x9] -; -O0: stxp w8, x1, x0, [x9] -; -; -O1-LABEL: load_atomic_i128_aligned_unordered: -; -O1: ldxp x1, x0, [x8] -; -O1: stxp w9, x1, x0, [x8] +; CHECK-LABEL: load_atomic_i128_aligned_unordered: +; CHECK: ldp x1, x0, [x0] %r = load atomic i128, ptr %ptr unordered, align 16 ret i128 %r } define dso_local i128 @load_atomic_i128_aligned_unordered_const(ptr readonly %ptr) { -; -O0-LABEL: load_atomic_i128_aligned_unordered_const: -; -O0: ldxp x1, x0, [x9] -; -O0: cmp x1, x10 -; -O0: cmp x0, x10 -; -O0: stxp w8, x10, x10, [x9] -; -O0: stxp w8, x1, x0, [x9] -; -; -O1-LABEL: load_atomic_i128_aligned_unordered_const: -; -O1: ldxp x1, x0, [x8] -; -O1: stxp w9, x1, x0, [x8] +; CHECK-LABEL: load_atomic_i128_aligned_unordered_const: +; CHECK: ldp x1, x0, [x0] %r = load atomic i128, ptr %ptr unordered, align 16 ret i128 %r } define dso_local i128 @load_atomic_i128_aligned_monotonic(ptr %ptr) { -; -O0-LABEL: load_atomic_i128_aligned_monotonic: -; -O0: ldxp x1, x0, [x9] -; -O0: cmp x1, x10 -; -O0: cmp x0, x10 -; -O0: stxp w8, x10, x10, [x9] -; -O0: stxp w8, x1, x0, [x9] -; -; -O1-LABEL: load_atomic_i128_aligned_monotonic: -; -O1: ldxp x1, x0, [x8] -; -O1: stxp w9, x1, x0, [x8] +; CHECK-LABEL: load_atomic_i128_aligned_monotonic: +; CHECK: ldp x1, x0, [x0] %r = load atomic i128, ptr %ptr monotonic, align 16 ret i128 %r } define dso_local i128 @load_atomic_i128_aligned_monotonic_const(ptr readonly %ptr) { -; -O0-LABEL: load_atomic_i128_aligned_monotonic_const: -; -O0: ldxp x1, x0, [x9] -; -O0: cmp x1, x10 -; -O0: cmp x0, x10 -; -O0: stxp w8, x10, x10, [x9] -; -O0: stxp w8, x1, x0, [x9] -; -; -O1-LABEL: load_atomic_i128_aligned_monotonic_const: -; -O1: ldxp x1, x0, [x8] -; -O1: stxp w9, x1, x0, [x8] +; CHECK-LABEL: load_atomic_i128_aligned_monotonic_const: +; CHECK: ldp x1, x0, [x0] %r = load atomic i128, ptr %ptr monotonic, align 16 ret i128 %r } define dso_local i128 @load_atomic_i128_aligned_acquire(ptr %ptr) { -; -O0-LABEL: load_atomic_i128_aligned_acquire: -; -O0: ldaxp x1, x0, [x9] -; -O0: cmp x1, x10 -; -O0: cmp x0, x10 -; -O0: stxp w8, x10, x10, [x9] -; -O0: stxp w8, x1, x0, [x9] -; -; -O1-LABEL: load_atomic_i128_aligned_acquire: -; -O1: ldaxp x1, x0, [x8] -; -O1: stxp w9, x1, x0, [x8] +; CHECK-LABEL: load_atomic_i128_aligned_acquire: +; CHECK: ldiapp x1, x0, [x0] %r = load atomic i128, ptr %ptr acquire, align 16 ret i128 %r } define dso_local i128 @load_atomic_i128_aligned_acquire_const(ptr readonly %ptr) { -; -O0-LABEL: load_atomic_i128_aligned_acquire_const: -; -O0: ldaxp x1, x0, [x9] -; -O0: cmp x1, x10 -; -O0: cmp x0, x10 -; -O0: stxp w8, x10, x10, [x9] -; -O0: stxp w8, x1, x0, [x9] -; -; -O1-LABEL: load_atomic_i128_aligned_acquire_const: -; -O1: ldaxp x1, x0, [x8] -; -O1: stxp w9, x1, x0, [x8] +; CHECK-LABEL: load_atomic_i128_aligned_acquire_const: +; CHECK: ldiapp x1, x0, [x0] %r = load atomic i128, ptr %ptr acquire, align 16 ret i128 %r } define dso_local i128 @load_atomic_i128_aligned_seq_cst(ptr %ptr) { -; -O0-LABEL: load_atomic_i128_aligned_seq_cst: -; -O0: ldaxp x1, x0, [x9] -; -O0: cmp x1, x10 -; -O0: cmp x0, x10 -; -O0: stlxp w8, x10, x10, [x9] -; -O0: stlxp w8, x1, x0, [x9] -; -; -O1-LABEL: load_atomic_i128_aligned_seq_cst: -; -O1: ldaxp x1, x0, [x8] -; -O1: stlxp w9, x1, x0, [x8] +; CHECK-LABEL: load_atomic_i128_aligned_seq_cst: +; CHECK: ldp x1, x0, [x0] +; CHECK: dmb ish %r = load atomic i128, ptr %ptr seq_cst, align 16 ret i128 %r } define dso_local i128 @load_atomic_i128_aligned_seq_cst_const(ptr readonly %ptr) { -; -O0-LABEL: load_atomic_i128_aligned_seq_cst_const: -; -O0: ldaxp x1, x0, [x9] -; -O0: cmp x1, x10 -; -O0: cmp x0, x10 -; -O0: stlxp w8, x10, x10, [x9] -; -O0: stlxp w8, x1, x0, [x9] -; -; -O1-LABEL: load_atomic_i128_aligned_seq_cst_const: -; -O1: ldaxp x1, x0, [x8] -; -O1: stlxp w9, x1, x0, [x8] +; CHECK-LABEL: load_atomic_i128_aligned_seq_cst_const: +; CHECK: ldp x1, x0, [x0] +; CHECK: dmb ish %r = load atomic i128, ptr %ptr seq_cst, align 16 ret i128 %r } @@ -626,3 +564,6 @@ %r = load atomic i128, ptr %ptr seq_cst, align 1 ret i128 %r } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; -O0: {{.*}} +; -O1: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-rcpc3.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-rcpc3.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-rcpc3.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-rcpc3.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "\b(sp)\b" --filter "^\s*(ld[^r]|st|swp|cas|bl|add|and|eor|orn|orr|sub|mvn|sxt|cmp|ccmp|csel|dmb)" ; The base test file was generated by ./llvm/test/CodeGen/AArch64/Atomics/generate-tests.py -; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+rcpc3 -O0 | FileCheck %s --check-prefixes=CHECK,-O0 -; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+rcpc3 -O1 | FileCheck %s --check-prefixes=CHECK,-O1 +; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+lse2,+rcpc3 -O0 | FileCheck %s --check-prefixes=CHECK,-O0 +; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+lse2,+rcpc3 -O1 | FileCheck %s --check-prefixes=CHECK,-O1 define dso_local void @store_atomic_i8_aligned_unordered(i8 %value, ptr %ptr) { ; CHECK-LABEL: store_atomic_i8_aligned_unordered: @@ -116,69 +116,31 @@ } define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr) { -; -O0-LABEL: store_atomic_i128_aligned_unordered: -; -O0: ldxp x10, x12, [x9] -; -O0: cmp x10, x11 -; -O0: cmp x12, x13 -; -O0: stxp w8, x14, x15, [x9] -; -O0: stxp w8, x10, x12, [x9] -; -O0: subs x12, x12, x13 -; -O0: ccmp x10, x11, #0, eq -; -; -O1-LABEL: store_atomic_i128_aligned_unordered: -; -O1: ldxp xzr, x8, [x2] -; -O1: stxp w8, x1, x0, [x2] +; CHECK-LABEL: store_atomic_i128_aligned_unordered: +; CHECK: stp x1, x0, [x2] store atomic i128 %value, ptr %ptr unordered, align 16 ret void } define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr) { -; -O0-LABEL: store_atomic_i128_aligned_monotonic: -; -O0: ldxp x10, x12, [x9] -; -O0: cmp x10, x11 -; -O0: cmp x12, x13 -; -O0: stxp w8, x14, x15, [x9] -; -O0: stxp w8, x10, x12, [x9] -; -O0: subs x12, x12, x13 -; -O0: ccmp x10, x11, #0, eq -; -; -O1-LABEL: store_atomic_i128_aligned_monotonic: -; -O1: ldxp xzr, x8, [x2] -; -O1: stxp w8, x1, x0, [x2] +; CHECK-LABEL: store_atomic_i128_aligned_monotonic: +; CHECK: stp x1, x0, [x2] store atomic i128 %value, ptr %ptr monotonic, align 16 ret void } define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr) { -; -O0-LABEL: store_atomic_i128_aligned_release: -; -O0: ldxp x10, x12, [x9] -; -O0: cmp x10, x11 -; -O0: cmp x12, x13 -; -O0: stlxp w8, x14, x15, [x9] -; -O0: stlxp w8, x10, x12, [x9] -; -O0: subs x12, x12, x13 -; -O0: ccmp x10, x11, #0, eq -; -; -O1-LABEL: store_atomic_i128_aligned_release: -; -O1: ldxp xzr, x8, [x2] -; -O1: stlxp w8, x1, x0, [x2] +; CHECK-LABEL: store_atomic_i128_aligned_release: +; CHECK: stilp x1, x0, [x2] store atomic i128 %value, ptr %ptr release, align 16 ret void } define dso_local void @store_atomic_i128_aligned_seq_cst(i128 %value, ptr %ptr) { -; -O0-LABEL: store_atomic_i128_aligned_seq_cst: -; -O0: ldaxp x10, x12, [x9] -; -O0: cmp x10, x11 -; -O0: cmp x12, x13 -; -O0: stlxp w8, x14, x15, [x9] -; -O0: stlxp w8, x10, x12, [x9] -; -O0: subs x12, x12, x13 -; -O0: ccmp x10, x11, #0, eq -; -; -O1-LABEL: store_atomic_i128_aligned_seq_cst: -; -O1: ldaxp xzr, x8, [x2] -; -O1: stlxp w8, x1, x0, [x2] +; CHECK-LABEL: store_atomic_i128_aligned_seq_cst: +; CHECK: dmb ish +; CHECK: stp x1, x0, [x2] +; CHECK: dmb ish store atomic i128 %value, ptr %ptr seq_cst, align 16 ret void } @@ -322,3 +284,6 @@ store atomic i128 %value, ptr %ptr seq_cst, align 1 ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; -O0: {{.*}} +; -O1: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc3.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc3.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc3.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc3.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "\b(sp)\b" --filter "^\s*(ld[^r]|st[^r]|swp|cas|bl|add|and|eor|orn|orr|sub|mvn|sxt|cmp|ccmp|csel|dmb)" ; The base test file was generated by ./llvm/test/CodeGen/AArch64/Atomics/generate-tests.py -; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+rcpc3 -O0 | FileCheck %s --check-prefixes=CHECK,-O0 -; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+rcpc3 -O1 | FileCheck %s --check-prefixes=CHECK,-O1 +; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+lse2,+rcpc3 -O0 | FileCheck %s --check-prefixes=CHECK,-O0 +; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+lse2,+rcpc3 -O1 | FileCheck %s --check-prefixes=CHECK,-O1 define dso_local i8 @atomicrmw_xchg_i8_aligned_monotonic(ptr %ptr, i8 %value) { ; -O0-LABEL: atomicrmw_xchg_i8_aligned_monotonic: diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-cmpxchg-rcpc3.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-cmpxchg-rcpc3.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-cmpxchg-rcpc3.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-cmpxchg-rcpc3.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "\b(sp)\b" --filter "^\s*(ld[^r]|st[^r]|swp|cas|bl|add|and|eor|orn|orr|sub|mvn|sxt|cmp|ccmp|csel|dmb)" ; The base test file was generated by ./llvm/test/CodeGen/AArch64/Atomics/generate-tests.py -; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+rcpc3 -O0 | FileCheck %s --check-prefixes=CHECK,-O0 -; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+rcpc3 -O1 | FileCheck %s --check-prefixes=CHECK,-O1 +; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+lse2,+rcpc3 -O0 | FileCheck %s --check-prefixes=CHECK,-O0 +; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+lse2,+rcpc3 -O1 | FileCheck %s --check-prefixes=CHECK,-O1 define dso_local i8 @cmpxchg_i8_aligned_monotonic_monotonic(i8 %expected, i8 %new, ptr %ptr) { ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_monotonic: diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-fence.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-fence.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-fence.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-fence.ll @@ -10,8 +10,8 @@ ; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+lse2 -O1 | FileCheck %s --check-prefixes=CHECK,-O1 ; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+outline-atomics -O0 | FileCheck %s --check-prefixes=CHECK,-O0 ; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+outline-atomics -O1 | FileCheck %s --check-prefixes=CHECK,-O1 -; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+rcpc3 -O0 | FileCheck %s --check-prefixes=CHECK,-O0 -; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+rcpc3 -O1 | FileCheck %s --check-prefixes=CHECK,-O1 +; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+lse2,+rcpc3 -O0 | FileCheck %s --check-prefixes=CHECK,-O0 +; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+lse2,+rcpc3 -O1 | FileCheck %s --check-prefixes=CHECK,-O1 ; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+lse128 -O0 | FileCheck %s --check-prefixes=CHECK,-O0 ; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+lse128 -O1 | FileCheck %s --check-prefixes=CHECK,-O1 diff --git a/llvm/test/CodeGen/AArch64/Atomics/generate-tests.py b/llvm/test/CodeGen/AArch64/Atomics/generate-tests.py --- a/llvm/test/CodeGen/AArch64/Atomics/generate-tests.py +++ b/llvm/test/CodeGen/AArch64/Atomics/generate-tests.py @@ -105,21 +105,24 @@ class Feature(enum.Flag): + # Feature names in filenames are determined by the spelling here: v8a = enum.auto() v8_1a = enum.auto() # -mattr=+v8.1a, mandatory FEAT_LOR, FEAT_LSE rcpc = enum.auto() # FEAT_LRCPC lse2 = enum.auto() # FEAT_LSE2 outline_atomics = enum.auto() # -moutline-atomics - rcpc3 = enum.auto() # FEAT_LRCPC3 + rcpc3 = enum.auto() # FEAT_LSE2 + FEAT_LRCPC3 lse128 = enum.auto() # FEAT_LSE128 @property def mattr(self): if self == Feature.outline_atomics: - return 'outline-atomics' + return '+outline-atomics' if self == Feature.v8_1a: - return 'v8.1a' - return self.name + return '+v8.1a' + if self == Feature.rcpc3: + return '+lse2,+rcpc3' + return '+' + self.name ATOMICRMW_OPS = [ @@ -230,7 +233,7 @@ for OptFlag in ['-O0', '-O1']: f.write(' '.join([ ';', 'RUN:', 'llc', '%s', '-o', '-', '-verify-machineinstrs', - f'-mtriple={triple}', f'-mattr=+{feat.mattr}', OptFlag, '|', + f'-mtriple={triple}', f'-mattr={feat.mattr}', OptFlag, '|', 'FileCheck', '%s', f'--check-prefixes=CHECK,{OptFlag}\n' ]))