diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -821,6 +821,14 @@ #undef LCALLNAME5 } + if (Subtarget->hasLSE128()) { + // Custom lowering because i128 is not legal. Must be replaced by 2x64 + // values. ATOMIC_LOAD_AND also needs op legalisation to emit LDCLRP. + setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i128, Custom); + setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i128, Custom); + setOperationAction(ISD::ATOMIC_SWAP, MVT::i128, Custom); + } + // 128-bit loads and stores can be done without expanding setOperationAction(ISD::LOAD, MVT::i128, Custom); setOperationAction(ISD::STORE, MVT::i128, Custom); @@ -13301,12 +13309,14 @@ SDValue AArch64TargetLowering::LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const { auto &Subtarget = DAG.getSubtarget(); + // No point replacing if we don't have the relevant instruction/libcall anyway if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics()) return SDValue(); // LSE has an atomic load-clear instruction, but not a load-and. SDLoc dl(Op); MVT VT = Op.getSimpleValueType(); + assert(VT != MVT::i128 && "Handled elsewhere, code replicated."); SDValue RHS = Op.getOperand(2); AtomicSDNode *AN = cast(Op.getNode()); RHS = DAG.getNode(ISD::XOR, dl, VT, DAG.getConstant(-1ULL, dl, VT), RHS); @@ -22082,6 +22092,137 @@ Results.push_back(SDValue(CmpSwap, 3)); } +static unsigned getAtomicLoad128Opcode(unsigned ISDOpcode, + AtomicOrdering Ordering) { + // ATOMIC_LOAD_CLR only appears when lowering ATOMIC_LOAD_AND (see + // LowerATOMIC_LOAD_AND). We can't take that approach with 128-bit, because + // the type is not legal. Therefore we shouldn't expect to see a 128-bit + // ATOMIC_LOAD_CLR at any point. + assert(ISDOpcode != ISD::ATOMIC_LOAD_CLR && + "ATOMIC_LOAD_AND should be lowered to LDCLRP directly"); + assert(ISDOpcode != ISD::ATOMIC_LOAD_ADD && "There is no 128 bit LDADD"); + assert(ISDOpcode != ISD::ATOMIC_LOAD_SUB && "There is no 128 bit LDSUB"); + + if (ISDOpcode == ISD::ATOMIC_LOAD_AND) { + // The operand will need to be XORed in a separate step. + switch (Ordering) { + case AtomicOrdering::Monotonic: + return AArch64::LDCLRP; + break; + case AtomicOrdering::Acquire: + return AArch64::LDCLRPA; + break; + case AtomicOrdering::Release: + return AArch64::LDCLRPL; + break; + case AtomicOrdering::AcquireRelease: + case AtomicOrdering::SequentiallyConsistent: + return AArch64::LDCLRPAL; + break; + default: + llvm_unreachable("Unexpected ordering!"); + } + } + + if (ISDOpcode == ISD::ATOMIC_LOAD_OR) { + switch (Ordering) { + case AtomicOrdering::Monotonic: + return AArch64::LDSETP; + break; + case AtomicOrdering::Acquire: + return AArch64::LDSETPA; + break; + case AtomicOrdering::Release: + return AArch64::LDSETPL; + break; + case AtomicOrdering::AcquireRelease: + case AtomicOrdering::SequentiallyConsistent: + return AArch64::LDSETPAL; + break; + default: + llvm_unreachable("Unexpected ordering!"); + } + } + + if (ISDOpcode == ISD::ATOMIC_SWAP) { + switch (Ordering) { + case AtomicOrdering::Monotonic: + return AArch64::SWPP; + break; + case AtomicOrdering::Acquire: + return AArch64::SWPPA; + break; + case AtomicOrdering::Release: + return AArch64::SWPPL; + break; + case AtomicOrdering::AcquireRelease: + case AtomicOrdering::SequentiallyConsistent: + return AArch64::SWPPAL; + break; + default: + llvm_unreachable("Unexpected ordering!"); + } + } + + llvm_unreachable("Unexpected ISDOpcode!"); +} + +static void ReplaceATOMIC_LOAD_128Results(SDNode *N, + SmallVectorImpl &Results, + SelectionDAG &DAG, + const AArch64Subtarget *Subtarget) { + // LSE128 has a 128-bit RMW ops, but i128 is not a legal type, so lower it + // here. This follows the approach of the CMP_SWAP_XXX pseudo instructions + // rather than the CASP instructions, because CASP has register classes for + // the pairs of registers and therefore uses REG_SEQUENCE and EXTRACT_SUBREG + // to present them as single operands. LSE128 instructions use the GPR64 + // register class (because the pair does not have to be sequential), like + // CMP_SWAP_XXX, and therefore we use TRUNCATE and BUILD_PAIR. + + assert(N->getValueType(0) == MVT::i128 && + "AtomicLoadXXX on types less than 128 should be legal"); + + if (!Subtarget->hasLSE128()) + return; + + MachineMemOperand *MemOp = cast(N)->getMemOperand(); + const SDValue &Chain = N->getOperand(0); + const SDValue &Ptr = N->getOperand(1); + const SDValue &Val128 = N->getOperand(2); + std::pair Val2x64 = splitInt128(Val128, DAG); + + const unsigned ISDOpcode = N->getOpcode(); + const unsigned MachineOpcode = + getAtomicLoad128Opcode(ISDOpcode, MemOp->getMergedOrdering()); + + if (ISDOpcode == ISD::ATOMIC_LOAD_AND) { + SDLoc dl(Val128); + Val2x64.first = + DAG.getNode(ISD::XOR, dl, MVT::i64, + DAG.getConstant(-1ULL, dl, MVT::i64), Val2x64.first); + Val2x64.second = + DAG.getNode(ISD::XOR, dl, MVT::i64, + DAG.getConstant(-1ULL, dl, MVT::i64), Val2x64.second); + } + + SDValue Ops[] = {Val2x64.first, Val2x64.second, Ptr, Chain}; + if (DAG.getDataLayout().isBigEndian()) + std::swap(Ops[0], Ops[1]); + + MachineSDNode *AtomicInst = + DAG.getMachineNode(MachineOpcode, SDLoc(N), + DAG.getVTList(MVT::i64, MVT::i64, MVT::Other), Ops); + + DAG.setNodeMemRefs(AtomicInst, {MemOp}); + + SDValue Lo = SDValue(AtomicInst, 0), Hi = SDValue(AtomicInst, 1); + if (DAG.getDataLayout().isBigEndian()) + std::swap(Lo, Hi); + + Results.push_back(DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128, Lo, Hi)); + Results.push_back(SDValue(AtomicInst, 2)); // Chain out +} + void AArch64TargetLowering::ReplaceNodeResults( SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { switch (N->getOpcode()) { @@ -22135,6 +22276,20 @@ case ISD::ATOMIC_CMP_SWAP: ReplaceCMP_SWAP_128Results(N, Results, DAG, Subtarget); return; + case ISD::ATOMIC_LOAD_CLR: + assert(N->getValueType(0) != MVT::i128 && + "128-bit ATOMIC_LOAD_AND should be lowered directly to LDCLRP"); + break; + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_SWAP: { + AtomicSDNode *AN = cast(N); + assert(AN->getVal().getValueType() == MVT::i128 && + "Expected 128-bit atomicrmw."); + // These need custom type legalisation so we go directly to instruction. + ReplaceATOMIC_LOAD_128Results(N, Results, DAG, Subtarget); + return; + } case ISD::ATOMIC_LOAD: case ISD::LOAD: { MemSDNode *LoadNode = cast(N); @@ -22368,6 +22523,13 @@ unsigned Size = AI->getType()->getPrimitiveSizeInBits(); if (Size > 128) return AtomicExpansionKind::None; + bool CanUseLSE128 = Subtarget->hasLSE128() && Size == 128 && + (AI->getOperation() == AtomicRMWInst::Xchg || + AI->getOperation() == AtomicRMWInst::Or || + AI->getOperation() == AtomicRMWInst::And); + if (CanUseLSE128) + return AtomicExpansionKind::None; + // Nand is not supported in LSE. // Leave 128 bits to LLSC or CmpXChg. if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128) { diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-lse128.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-lse128.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-lse128.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-lse128.ll @@ -116,69 +116,29 @@ } define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr) { -; -O0-LABEL: store_atomic_i128_aligned_unordered: -; -O0: casp x0, x1, x2, x3, [x8] -; -O0: eor x8, x10, x8 -; -O0: eor x11, x9, x11 -; -O0: orr x8, x8, x11 -; -O0: subs x8, x8, #0 -; -; -O1-LABEL: store_atomic_i128_aligned_unordered: -; -O1: ldp x4, x5, [x2] -; -O1: casp x6, x7, x0, x1, [x2] -; -O1: cmp x7, x5 -; -O1: ccmp x6, x4, #0, eq +; CHECK-LABEL: store_atomic_i128_aligned_unordered: +; CHECK: swpp x0, x1, [x2] store atomic i128 %value, ptr %ptr unordered, align 16 ret void } define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr) { -; -O0-LABEL: store_atomic_i128_aligned_monotonic: -; -O0: casp x0, x1, x2, x3, [x8] -; -O0: eor x8, x10, x8 -; -O0: eor x11, x9, x11 -; -O0: orr x8, x8, x11 -; -O0: subs x8, x8, #0 -; -; -O1-LABEL: store_atomic_i128_aligned_monotonic: -; -O1: ldp x4, x5, [x2] -; -O1: casp x6, x7, x0, x1, [x2] -; -O1: cmp x7, x5 -; -O1: ccmp x6, x4, #0, eq +; CHECK-LABEL: store_atomic_i128_aligned_monotonic: +; CHECK: swpp x0, x1, [x2] store atomic i128 %value, ptr %ptr monotonic, align 16 ret void } define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr) { -; -O0-LABEL: store_atomic_i128_aligned_release: -; -O0: caspl x0, x1, x2, x3, [x8] -; -O0: eor x8, x10, x8 -; -O0: eor x11, x9, x11 -; -O0: orr x8, x8, x11 -; -O0: subs x8, x8, #0 -; -; -O1-LABEL: store_atomic_i128_aligned_release: -; -O1: ldp x4, x5, [x2] -; -O1: caspl x6, x7, x0, x1, [x2] -; -O1: cmp x7, x5 -; -O1: ccmp x6, x4, #0, eq +; CHECK-LABEL: store_atomic_i128_aligned_release: +; CHECK: swppl x0, x1, [x2] store atomic i128 %value, ptr %ptr release, align 16 ret void } define dso_local void @store_atomic_i128_aligned_seq_cst(i128 %value, ptr %ptr) { -; -O0-LABEL: store_atomic_i128_aligned_seq_cst: -; -O0: caspal x0, x1, x2, x3, [x8] -; -O0: eor x8, x10, x8 -; -O0: eor x11, x9, x11 -; -O0: orr x8, x8, x11 -; -O0: subs x8, x8, #0 -; -; -O1-LABEL: store_atomic_i128_aligned_seq_cst: -; -O1: ldp x4, x5, [x2] -; -O1: caspal x6, x7, x0, x1, [x2] -; -O1: cmp x7, x5 -; -O1: ccmp x6, x4, #0, eq +; CHECK-LABEL: store_atomic_i128_aligned_seq_cst: +; CHECK: swppal x0, x1, [x2] store atomic i128 %value, ptr %ptr seq_cst, align 16 ret void } @@ -322,3 +282,6 @@ store atomic i128 %value, ptr %ptr seq_cst, align 1 ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; -O0: {{.*}} +; -O1: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse128.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse128.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse128.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse128.ll @@ -145,85 +145,50 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_xchg_i128_aligned_monotonic: -; -O0: casp x0, x1, x2, x3, [x8] -; -O0: eor x8, x10, x8 -; -O0: eor x11, x9, x11 -; -O0: orr x8, x8, x11 -; -O0: subs x8, x8, #0 +; -O0: swpp x0, x1, [x8] ; ; -O1-LABEL: atomicrmw_xchg_i128_aligned_monotonic: -; -O1: ldp x4, x5, [x0] -; -O1: casp x4, x5, x2, x3, [x0] -; -O1: cmp x5, x7 -; -O1: ccmp x4, x6, #0, eq +; -O1: swpp x2, x1, [x0] %r = atomicrmw xchg ptr %ptr, i128 %value monotonic, align 16 ret i128 %r } define dso_local i128 @atomicrmw_xchg_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_xchg_i128_aligned_acquire: -; -O0: caspa x0, x1, x2, x3, [x8] -; -O0: eor x8, x10, x8 -; -O0: eor x11, x9, x11 -; -O0: orr x8, x8, x11 -; -O0: subs x8, x8, #0 +; -O0: swppa x0, x1, [x8] ; ; -O1-LABEL: atomicrmw_xchg_i128_aligned_acquire: -; -O1: ldp x4, x5, [x0] -; -O1: caspa x4, x5, x2, x3, [x0] -; -O1: cmp x5, x7 -; -O1: ccmp x4, x6, #0, eq +; -O1: swppa x2, x1, [x0] %r = atomicrmw xchg ptr %ptr, i128 %value acquire, align 16 ret i128 %r } define dso_local i128 @atomicrmw_xchg_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_xchg_i128_aligned_release: -; -O0: caspl x0, x1, x2, x3, [x8] -; -O0: eor x8, x10, x8 -; -O0: eor x11, x9, x11 -; -O0: orr x8, x8, x11 -; -O0: subs x8, x8, #0 +; -O0: swppl x0, x1, [x8] ; ; -O1-LABEL: atomicrmw_xchg_i128_aligned_release: -; -O1: ldp x4, x5, [x0] -; -O1: caspl x4, x5, x2, x3, [x0] -; -O1: cmp x5, x7 -; -O1: ccmp x4, x6, #0, eq +; -O1: swppl x2, x1, [x0] %r = atomicrmw xchg ptr %ptr, i128 %value release, align 16 ret i128 %r } define dso_local i128 @atomicrmw_xchg_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_xchg_i128_aligned_acq_rel: -; -O0: caspal x0, x1, x2, x3, [x8] -; -O0: eor x8, x10, x8 -; -O0: eor x11, x9, x11 -; -O0: orr x8, x8, x11 -; -O0: subs x8, x8, #0 +; -O0: swppal x0, x1, [x8] ; ; -O1-LABEL: atomicrmw_xchg_i128_aligned_acq_rel: -; -O1: ldp x4, x5, [x0] -; -O1: caspal x4, x5, x2, x3, [x0] -; -O1: cmp x5, x7 -; -O1: ccmp x4, x6, #0, eq +; -O1: swppal x2, x1, [x0] %r = atomicrmw xchg ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r } define dso_local i128 @atomicrmw_xchg_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_xchg_i128_aligned_seq_cst: -; -O0: caspal x0, x1, x2, x3, [x8] -; -O0: eor x8, x10, x8 -; -O0: eor x11, x9, x11 -; -O0: orr x8, x8, x11 -; -O0: subs x8, x8, #0 +; -O0: swppal x0, x1, [x8] ; ; -O1-LABEL: atomicrmw_xchg_i128_aligned_seq_cst: -; -O1: ldp x4, x5, [x0] -; -O1: caspal x4, x5, x2, x3, [x0] -; -O1: cmp x5, x7 -; -O1: ccmp x4, x6, #0, eq +; -O1: swppal x2, x1, [x0] %r = atomicrmw xchg ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r } @@ -1575,105 +1540,70 @@ define dso_local i128 @atomicrmw_and_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_and_i128_aligned_monotonic: -; -O0: and x2, x9, x11 -; -O0: and x9, x9, x10 -; -O0: casp x0, x1, x2, x3, [x8] -; -O0: eor x8, x10, x8 -; -O0: eor x11, x9, x11 -; -O0: orr x8, x8, x11 -; -O0: subs x8, x8, #0 +; -O0: mvn x1, x3 +; -O0: mvn x0, x2 +; -O0: ldclrp x0, x1, [x8] ; ; -O1-LABEL: atomicrmw_and_i128_aligned_monotonic: -; -O1: ldp x4, x5, [x0] -; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: casp x4, x5, x8, x9, [x0] -; -O1: cmp x5, x7 -; -O1: ccmp x4, x6, #0, eq +; -O1: mvn x1, x3 +; -O1: mvn x8, x2 +; -O1: ldclrp x8, x1, [x0] %r = atomicrmw and ptr %ptr, i128 %value monotonic, align 16 ret i128 %r } define dso_local i128 @atomicrmw_and_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_and_i128_aligned_acquire: -; -O0: and x2, x9, x11 -; -O0: and x9, x9, x10 -; -O0: caspa x0, x1, x2, x3, [x8] -; -O0: eor x8, x10, x8 -; -O0: eor x11, x9, x11 -; -O0: orr x8, x8, x11 -; -O0: subs x8, x8, #0 +; -O0: mvn x1, x3 +; -O0: mvn x0, x2 +; -O0: ldclrpa x0, x1, [x8] ; ; -O1-LABEL: atomicrmw_and_i128_aligned_acquire: -; -O1: ldp x4, x5, [x0] -; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: caspa x4, x5, x8, x9, [x0] -; -O1: cmp x5, x7 -; -O1: ccmp x4, x6, #0, eq +; -O1: mvn x1, x3 +; -O1: mvn x8, x2 +; -O1: ldclrpa x8, x1, [x0] %r = atomicrmw and ptr %ptr, i128 %value acquire, align 16 ret i128 %r } define dso_local i128 @atomicrmw_and_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_and_i128_aligned_release: -; -O0: and x2, x9, x11 -; -O0: and x9, x9, x10 -; -O0: caspl x0, x1, x2, x3, [x8] -; -O0: eor x8, x10, x8 -; -O0: eor x11, x9, x11 -; -O0: orr x8, x8, x11 -; -O0: subs x8, x8, #0 +; -O0: mvn x1, x3 +; -O0: mvn x0, x2 +; -O0: ldclrpl x0, x1, [x8] ; ; -O1-LABEL: atomicrmw_and_i128_aligned_release: -; -O1: ldp x4, x5, [x0] -; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: caspl x4, x5, x8, x9, [x0] -; -O1: cmp x5, x7 -; -O1: ccmp x4, x6, #0, eq +; -O1: mvn x1, x3 +; -O1: mvn x8, x2 +; -O1: ldclrpl x8, x1, [x0] %r = atomicrmw and ptr %ptr, i128 %value release, align 16 ret i128 %r } define dso_local i128 @atomicrmw_and_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_and_i128_aligned_acq_rel: -; -O0: and x2, x9, x11 -; -O0: and x9, x9, x10 -; -O0: caspal x0, x1, x2, x3, [x8] -; -O0: eor x8, x10, x8 -; -O0: eor x11, x9, x11 -; -O0: orr x8, x8, x11 -; -O0: subs x8, x8, #0 +; -O0: mvn x1, x3 +; -O0: mvn x0, x2 +; -O0: ldclrpal x0, x1, [x8] ; ; -O1-LABEL: atomicrmw_and_i128_aligned_acq_rel: -; -O1: ldp x4, x5, [x0] -; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: caspal x4, x5, x8, x9, [x0] -; -O1: cmp x5, x7 -; -O1: ccmp x4, x6, #0, eq +; -O1: mvn x1, x3 +; -O1: mvn x8, x2 +; -O1: ldclrpal x8, x1, [x0] %r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r } define dso_local i128 @atomicrmw_and_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_and_i128_aligned_seq_cst: -; -O0: and x2, x9, x11 -; -O0: and x9, x9, x10 -; -O0: caspal x0, x1, x2, x3, [x8] -; -O0: eor x8, x10, x8 -; -O0: eor x11, x9, x11 -; -O0: orr x8, x8, x11 -; -O0: subs x8, x8, #0 +; -O0: mvn x1, x3 +; -O0: mvn x0, x2 +; -O0: ldclrpal x0, x1, [x8] ; ; -O1-LABEL: atomicrmw_and_i128_aligned_seq_cst: -; -O1: ldp x4, x5, [x0] -; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: caspal x4, x5, x8, x9, [x0] -; -O1: cmp x5, x7 -; -O1: ccmp x4, x6, #0, eq +; -O1: mvn x1, x3 +; -O1: mvn x8, x2 +; -O1: ldclrpal x8, x1, [x0] %r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r } @@ -2955,105 +2885,50 @@ define dso_local i128 @atomicrmw_or_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_or_i128_aligned_monotonic: -; -O0: orr x2, x9, x11 -; -O0: orr x9, x9, x10 -; -O0: casp x0, x1, x2, x3, [x8] -; -O0: eor x8, x10, x8 -; -O0: eor x11, x9, x11 -; -O0: orr x8, x8, x11 -; -O0: subs x8, x8, #0 +; -O0: ldsetp x0, x1, [x8] ; ; -O1-LABEL: atomicrmw_or_i128_aligned_monotonic: -; -O1: ldp x4, x5, [x0] -; -O1: orr x8, x4, x2 -; -O1: orr x9, x7, x3 -; -O1: casp x4, x5, x8, x9, [x0] -; -O1: cmp x5, x7 -; -O1: ccmp x4, x6, #0, eq +; -O1: ldsetp x2, x1, [x0] %r = atomicrmw or ptr %ptr, i128 %value monotonic, align 16 ret i128 %r } define dso_local i128 @atomicrmw_or_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_or_i128_aligned_acquire: -; -O0: orr x2, x9, x11 -; -O0: orr x9, x9, x10 -; -O0: caspa x0, x1, x2, x3, [x8] -; -O0: eor x8, x10, x8 -; -O0: eor x11, x9, x11 -; -O0: orr x8, x8, x11 -; -O0: subs x8, x8, #0 +; -O0: ldsetpa x0, x1, [x8] ; ; -O1-LABEL: atomicrmw_or_i128_aligned_acquire: -; -O1: ldp x4, x5, [x0] -; -O1: orr x8, x4, x2 -; -O1: orr x9, x7, x3 -; -O1: caspa x4, x5, x8, x9, [x0] -; -O1: cmp x5, x7 -; -O1: ccmp x4, x6, #0, eq +; -O1: ldsetpa x2, x1, [x0] %r = atomicrmw or ptr %ptr, i128 %value acquire, align 16 ret i128 %r } define dso_local i128 @atomicrmw_or_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_or_i128_aligned_release: -; -O0: orr x2, x9, x11 -; -O0: orr x9, x9, x10 -; -O0: caspl x0, x1, x2, x3, [x8] -; -O0: eor x8, x10, x8 -; -O0: eor x11, x9, x11 -; -O0: orr x8, x8, x11 -; -O0: subs x8, x8, #0 +; -O0: ldsetpl x0, x1, [x8] ; ; -O1-LABEL: atomicrmw_or_i128_aligned_release: -; -O1: ldp x4, x5, [x0] -; -O1: orr x8, x4, x2 -; -O1: orr x9, x7, x3 -; -O1: caspl x4, x5, x8, x9, [x0] -; -O1: cmp x5, x7 -; -O1: ccmp x4, x6, #0, eq +; -O1: ldsetpl x2, x1, [x0] %r = atomicrmw or ptr %ptr, i128 %value release, align 16 ret i128 %r } define dso_local i128 @atomicrmw_or_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_or_i128_aligned_acq_rel: -; -O0: orr x2, x9, x11 -; -O0: orr x9, x9, x10 -; -O0: caspal x0, x1, x2, x3, [x8] -; -O0: eor x8, x10, x8 -; -O0: eor x11, x9, x11 -; -O0: orr x8, x8, x11 -; -O0: subs x8, x8, #0 +; -O0: ldsetpal x0, x1, [x8] ; ; -O1-LABEL: atomicrmw_or_i128_aligned_acq_rel: -; -O1: ldp x4, x5, [x0] -; -O1: orr x8, x4, x2 -; -O1: orr x9, x7, x3 -; -O1: caspal x4, x5, x8, x9, [x0] -; -O1: cmp x5, x7 -; -O1: ccmp x4, x6, #0, eq +; -O1: ldsetpal x2, x1, [x0] %r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r } define dso_local i128 @atomicrmw_or_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_or_i128_aligned_seq_cst: -; -O0: orr x2, x9, x11 -; -O0: orr x9, x9, x10 -; -O0: caspal x0, x1, x2, x3, [x8] -; -O0: eor x8, x10, x8 -; -O0: eor x11, x9, x11 -; -O0: orr x8, x8, x11 -; -O0: subs x8, x8, #0 +; -O0: ldsetpal x0, x1, [x8] ; ; -O1-LABEL: atomicrmw_or_i128_aligned_seq_cst: -; -O1: ldp x4, x5, [x0] -; -O1: orr x8, x4, x2 -; -O1: orr x9, x7, x3 -; -O1: caspal x4, x5, x8, x9, [x0] -; -O1: cmp x5, x7 -; -O1: ccmp x4, x6, #0, eq +; -O1: ldsetpal x2, x1, [x0] %r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r } diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-lse128.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-lse128.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-lse128.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-lse128.ll @@ -116,61 +116,29 @@ } define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr) { -; -O0-LABEL: store_atomic_i128_aligned_unordered: -; -O0: casp x0, x1, x2, x3, [x8] -; -O0: subs x11, x9, x11 -; -O0: ccmp x8, x10, #0, eq -; -; -O1-LABEL: store_atomic_i128_aligned_unordered: -; -O1: ldp x4, x5, [x2] -; -O1: casp x6, x7, x0, x1, [x2] -; -O1: cmp x6, x4 -; -O1: ccmp x7, x5, #0, eq +; CHECK-LABEL: store_atomic_i128_aligned_unordered: +; CHECK: swpp x0, x1, [x2] store atomic i128 %value, ptr %ptr unordered, align 16 ret void } define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr) { -; -O0-LABEL: store_atomic_i128_aligned_monotonic: -; -O0: casp x0, x1, x2, x3, [x8] -; -O0: subs x11, x9, x11 -; -O0: ccmp x8, x10, #0, eq -; -; -O1-LABEL: store_atomic_i128_aligned_monotonic: -; -O1: ldp x4, x5, [x2] -; -O1: casp x6, x7, x0, x1, [x2] -; -O1: cmp x6, x4 -; -O1: ccmp x7, x5, #0, eq +; CHECK-LABEL: store_atomic_i128_aligned_monotonic: +; CHECK: swpp x0, x1, [x2] store atomic i128 %value, ptr %ptr monotonic, align 16 ret void } define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr) { -; -O0-LABEL: store_atomic_i128_aligned_release: -; -O0: caspl x0, x1, x2, x3, [x8] -; -O0: subs x11, x9, x11 -; -O0: ccmp x8, x10, #0, eq -; -; -O1-LABEL: store_atomic_i128_aligned_release: -; -O1: ldp x4, x5, [x2] -; -O1: caspl x6, x7, x0, x1, [x2] -; -O1: cmp x6, x4 -; -O1: ccmp x7, x5, #0, eq +; CHECK-LABEL: store_atomic_i128_aligned_release: +; CHECK: swppl x0, x1, [x2] store atomic i128 %value, ptr %ptr release, align 16 ret void } define dso_local void @store_atomic_i128_aligned_seq_cst(i128 %value, ptr %ptr) { -; -O0-LABEL: store_atomic_i128_aligned_seq_cst: -; -O0: caspal x0, x1, x2, x3, [x8] -; -O0: subs x11, x9, x11 -; -O0: ccmp x8, x10, #0, eq -; -; -O1-LABEL: store_atomic_i128_aligned_seq_cst: -; -O1: ldp x4, x5, [x2] -; -O1: caspal x6, x7, x0, x1, [x2] -; -O1: cmp x6, x4 -; -O1: ccmp x7, x5, #0, eq +; CHECK-LABEL: store_atomic_i128_aligned_seq_cst: +; CHECK: swppal x0, x1, [x2] store atomic i128 %value, ptr %ptr seq_cst, align 16 ret void } @@ -314,3 +282,6 @@ store atomic i128 %value, ptr %ptr seq_cst, align 1 ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; -O0: {{.*}} +; -O1: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse128.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse128.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse128.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse128.ll @@ -145,75 +145,50 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_xchg_i128_aligned_monotonic: -; -O0: casp x0, x1, x2, x3, [x8] -; -O0: subs x11, x9, x11 -; -O0: ccmp x8, x10, #0, eq +; -O0: swpp x0, x1, [x8] ; ; -O1-LABEL: atomicrmw_xchg_i128_aligned_monotonic: -; -O1: ldp x4, x5, [x0] -; -O1: casp x4, x5, x2, x3, [x0] -; -O1: cmp x4, x6 -; -O1: ccmp x5, x7, #0, eq +; -O1: swpp x2, x1, [x0] %r = atomicrmw xchg ptr %ptr, i128 %value monotonic, align 16 ret i128 %r } define dso_local i128 @atomicrmw_xchg_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_xchg_i128_aligned_acquire: -; -O0: caspa x0, x1, x2, x3, [x8] -; -O0: subs x11, x9, x11 -; -O0: ccmp x8, x10, #0, eq +; -O0: swppa x0, x1, [x8] ; ; -O1-LABEL: atomicrmw_xchg_i128_aligned_acquire: -; -O1: ldp x4, x5, [x0] -; -O1: caspa x4, x5, x2, x3, [x0] -; -O1: cmp x4, x6 -; -O1: ccmp x5, x7, #0, eq +; -O1: swppa x2, x1, [x0] %r = atomicrmw xchg ptr %ptr, i128 %value acquire, align 16 ret i128 %r } define dso_local i128 @atomicrmw_xchg_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_xchg_i128_aligned_release: -; -O0: caspl x0, x1, x2, x3, [x8] -; -O0: subs x11, x9, x11 -; -O0: ccmp x8, x10, #0, eq +; -O0: swppl x0, x1, [x8] ; ; -O1-LABEL: atomicrmw_xchg_i128_aligned_release: -; -O1: ldp x4, x5, [x0] -; -O1: caspl x4, x5, x2, x3, [x0] -; -O1: cmp x4, x6 -; -O1: ccmp x5, x7, #0, eq +; -O1: swppl x2, x1, [x0] %r = atomicrmw xchg ptr %ptr, i128 %value release, align 16 ret i128 %r } define dso_local i128 @atomicrmw_xchg_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_xchg_i128_aligned_acq_rel: -; -O0: caspal x0, x1, x2, x3, [x8] -; -O0: subs x11, x9, x11 -; -O0: ccmp x8, x10, #0, eq +; -O0: swppal x0, x1, [x8] ; ; -O1-LABEL: atomicrmw_xchg_i128_aligned_acq_rel: -; -O1: ldp x4, x5, [x0] -; -O1: caspal x4, x5, x2, x3, [x0] -; -O1: cmp x4, x6 -; -O1: ccmp x5, x7, #0, eq +; -O1: swppal x2, x1, [x0] %r = atomicrmw xchg ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r } define dso_local i128 @atomicrmw_xchg_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_xchg_i128_aligned_seq_cst: -; -O0: caspal x0, x1, x2, x3, [x8] -; -O0: subs x11, x9, x11 -; -O0: ccmp x8, x10, #0, eq +; -O0: swppal x0, x1, [x8] ; ; -O1-LABEL: atomicrmw_xchg_i128_aligned_seq_cst: -; -O1: ldp x4, x5, [x0] -; -O1: caspal x4, x5, x2, x3, [x0] -; -O1: cmp x4, x6 -; -O1: ccmp x5, x7, #0, eq +; -O1: swppal x2, x1, [x0] %r = atomicrmw xchg ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r } @@ -1665,95 +1640,70 @@ define dso_local i128 @atomicrmw_and_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_and_i128_aligned_monotonic: -; -O0: and x2, x11, x12 -; -O0: and x9, x10, x9 -; -O0: casp x0, x1, x2, x3, [x8] -; -O0: subs x11, x9, x11 -; -O0: ccmp x8, x10, #0, eq +; -O0: mvn x1, x3 +; -O0: mvn x0, x2 +; -O0: ldclrp x0, x1, [x8] ; ; -O1-LABEL: atomicrmw_and_i128_aligned_monotonic: -; -O1: ldp x4, x5, [x0] -; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: casp x4, x5, x8, x9, [x0] -; -O1: cmp x4, x6 -; -O1: ccmp x5, x7, #0, eq +; -O1: mvn x1, x3 +; -O1: mvn x8, x2 +; -O1: ldclrp x8, x1, [x0] %r = atomicrmw and ptr %ptr, i128 %value monotonic, align 16 ret i128 %r } define dso_local i128 @atomicrmw_and_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_and_i128_aligned_acquire: -; -O0: and x2, x11, x12 -; -O0: and x9, x10, x9 -; -O0: caspa x0, x1, x2, x3, [x8] -; -O0: subs x11, x9, x11 -; -O0: ccmp x8, x10, #0, eq +; -O0: mvn x1, x3 +; -O0: mvn x0, x2 +; -O0: ldclrpa x0, x1, [x8] ; ; -O1-LABEL: atomicrmw_and_i128_aligned_acquire: -; -O1: ldp x4, x5, [x0] -; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: caspa x4, x5, x8, x9, [x0] -; -O1: cmp x4, x6 -; -O1: ccmp x5, x7, #0, eq +; -O1: mvn x1, x3 +; -O1: mvn x8, x2 +; -O1: ldclrpa x8, x1, [x0] %r = atomicrmw and ptr %ptr, i128 %value acquire, align 16 ret i128 %r } define dso_local i128 @atomicrmw_and_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_and_i128_aligned_release: -; -O0: and x2, x11, x12 -; -O0: and x9, x10, x9 -; -O0: caspl x0, x1, x2, x3, [x8] -; -O0: subs x11, x9, x11 -; -O0: ccmp x8, x10, #0, eq +; -O0: mvn x1, x3 +; -O0: mvn x0, x2 +; -O0: ldclrpl x0, x1, [x8] ; ; -O1-LABEL: atomicrmw_and_i128_aligned_release: -; -O1: ldp x4, x5, [x0] -; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: caspl x4, x5, x8, x9, [x0] -; -O1: cmp x4, x6 -; -O1: ccmp x5, x7, #0, eq +; -O1: mvn x1, x3 +; -O1: mvn x8, x2 +; -O1: ldclrpl x8, x1, [x0] %r = atomicrmw and ptr %ptr, i128 %value release, align 16 ret i128 %r } define dso_local i128 @atomicrmw_and_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_and_i128_aligned_acq_rel: -; -O0: and x2, x11, x12 -; -O0: and x9, x10, x9 -; -O0: caspal x0, x1, x2, x3, [x8] -; -O0: subs x11, x9, x11 -; -O0: ccmp x8, x10, #0, eq +; -O0: mvn x1, x3 +; -O0: mvn x0, x2 +; -O0: ldclrpal x0, x1, [x8] ; ; -O1-LABEL: atomicrmw_and_i128_aligned_acq_rel: -; -O1: ldp x4, x5, [x0] -; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: caspal x4, x5, x8, x9, [x0] -; -O1: cmp x4, x6 -; -O1: ccmp x5, x7, #0, eq +; -O1: mvn x1, x3 +; -O1: mvn x8, x2 +; -O1: ldclrpal x8, x1, [x0] %r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r } define dso_local i128 @atomicrmw_and_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_and_i128_aligned_seq_cst: -; -O0: and x2, x11, x12 -; -O0: and x9, x10, x9 -; -O0: caspal x0, x1, x2, x3, [x8] -; -O0: subs x11, x9, x11 -; -O0: ccmp x8, x10, #0, eq +; -O0: mvn x1, x3 +; -O0: mvn x0, x2 +; -O0: ldclrpal x0, x1, [x8] ; ; -O1-LABEL: atomicrmw_and_i128_aligned_seq_cst: -; -O1: ldp x4, x5, [x0] -; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: caspal x4, x5, x8, x9, [x0] -; -O1: cmp x4, x6 -; -O1: ccmp x5, x7, #0, eq +; -O1: mvn x1, x3 +; -O1: mvn x8, x2 +; -O1: ldclrpal x8, x1, [x0] %r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r } @@ -3040,95 +2990,50 @@ define dso_local i128 @atomicrmw_or_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_or_i128_aligned_monotonic: -; -O0: orr x2, x11, x12 -; -O0: orr x9, x10, x9 -; -O0: casp x0, x1, x2, x3, [x8] -; -O0: subs x11, x9, x11 -; -O0: ccmp x8, x10, #0, eq +; -O0: ldsetp x0, x1, [x8] ; ; -O1-LABEL: atomicrmw_or_i128_aligned_monotonic: -; -O1: ldp x4, x5, [x0] -; -O1: orr x8, x4, x2 -; -O1: orr x9, x7, x3 -; -O1: casp x4, x5, x8, x9, [x0] -; -O1: cmp x4, x6 -; -O1: ccmp x5, x7, #0, eq +; -O1: ldsetp x2, x1, [x0] %r = atomicrmw or ptr %ptr, i128 %value monotonic, align 16 ret i128 %r } define dso_local i128 @atomicrmw_or_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_or_i128_aligned_acquire: -; -O0: orr x2, x11, x12 -; -O0: orr x9, x10, x9 -; -O0: caspa x0, x1, x2, x3, [x8] -; -O0: subs x11, x9, x11 -; -O0: ccmp x8, x10, #0, eq +; -O0: ldsetpa x0, x1, [x8] ; ; -O1-LABEL: atomicrmw_or_i128_aligned_acquire: -; -O1: ldp x4, x5, [x0] -; -O1: orr x8, x4, x2 -; -O1: orr x9, x7, x3 -; -O1: caspa x4, x5, x8, x9, [x0] -; -O1: cmp x4, x6 -; -O1: ccmp x5, x7, #0, eq +; -O1: ldsetpa x2, x1, [x0] %r = atomicrmw or ptr %ptr, i128 %value acquire, align 16 ret i128 %r } define dso_local i128 @atomicrmw_or_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_or_i128_aligned_release: -; -O0: orr x2, x11, x12 -; -O0: orr x9, x10, x9 -; -O0: caspl x0, x1, x2, x3, [x8] -; -O0: subs x11, x9, x11 -; -O0: ccmp x8, x10, #0, eq +; -O0: ldsetpl x0, x1, [x8] ; ; -O1-LABEL: atomicrmw_or_i128_aligned_release: -; -O1: ldp x4, x5, [x0] -; -O1: orr x8, x4, x2 -; -O1: orr x9, x7, x3 -; -O1: caspl x4, x5, x8, x9, [x0] -; -O1: cmp x4, x6 -; -O1: ccmp x5, x7, #0, eq +; -O1: ldsetpl x2, x1, [x0] %r = atomicrmw or ptr %ptr, i128 %value release, align 16 ret i128 %r } define dso_local i128 @atomicrmw_or_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_or_i128_aligned_acq_rel: -; -O0: orr x2, x11, x12 -; -O0: orr x9, x10, x9 -; -O0: caspal x0, x1, x2, x3, [x8] -; -O0: subs x11, x9, x11 -; -O0: ccmp x8, x10, #0, eq +; -O0: ldsetpal x0, x1, [x8] ; ; -O1-LABEL: atomicrmw_or_i128_aligned_acq_rel: -; -O1: ldp x4, x5, [x0] -; -O1: orr x8, x4, x2 -; -O1: orr x9, x7, x3 -; -O1: caspal x4, x5, x8, x9, [x0] -; -O1: cmp x4, x6 -; -O1: ccmp x5, x7, #0, eq +; -O1: ldsetpal x2, x1, [x0] %r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r } define dso_local i128 @atomicrmw_or_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_or_i128_aligned_seq_cst: -; -O0: orr x2, x11, x12 -; -O0: orr x9, x10, x9 -; -O0: caspal x0, x1, x2, x3, [x8] -; -O0: subs x11, x9, x11 -; -O0: ccmp x8, x10, #0, eq +; -O0: ldsetpal x0, x1, [x8] ; ; -O1-LABEL: atomicrmw_or_i128_aligned_seq_cst: -; -O1: ldp x4, x5, [x0] -; -O1: orr x8, x4, x2 -; -O1: orr x9, x7, x3 -; -O1: caspal x4, x5, x8, x9, [x0] -; -O1: cmp x4, x6 -; -O1: ccmp x5, x7, #0, eq +; -O1: ldsetpal x2, x1, [x0] %r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r }