diff --git a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp --- a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp @@ -59,6 +59,9 @@ bool expandAtomicCmpXchg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsMasked, int Width, MachineBasicBlock::iterator &NextMBBI); + bool expandAMOCAS(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + bool IsPaired, int Width, + MachineBasicBlock::iterator &NextMBBI); #ifndef NDEBUG unsigned getInstSizeInBytes(const MachineFunction &MF) const { unsigned Size = 0; @@ -145,6 +148,14 @@ return expandAtomicCmpXchg(MBB, MBBI, false, 64, NextMBBI); case RISCV::PseudoMaskedCmpXchg32: return expandAtomicCmpXchg(MBB, MBBI, true, 32, NextMBBI); + case RISCV::PseudoAMOCAS_W: + return expandAMOCAS(MBB, MBBI, false, 32, NextMBBI); + case RISCV::PseudoAMOCAS_D_64: + return expandAMOCAS(MBB, MBBI, false, 64, NextMBBI); + case RISCV::PseudoAMOCAS_D_32: + return expandAMOCAS(MBB, MBBI, true, 64, NextMBBI); + case RISCV::PseudoAMOCAS_Q: + return expandAMOCAS(MBB, MBBI, true, 128, NextMBBI); } return false; @@ -256,6 +267,74 @@ llvm_unreachable("Unexpected SC width\n"); } +static unsigned getAMOCASForRMW32(AtomicOrdering Ordering, + const RISCVSubtarget *Subtarget) { + if (Subtarget->hasStdExtZtso()) + return RISCV::AMOCAS_W; + switch (Ordering) { + default: + llvm_unreachable("Unexpected AtomicOrdering"); + case AtomicOrdering::Monotonic: + return RISCV::AMOCAS_W; + case AtomicOrdering::Acquire: + return RISCV::AMOCAS_W_AQ; + case AtomicOrdering::Release: + return RISCV::AMOCAS_W_RL; + case AtomicOrdering::AcquireRelease: + case AtomicOrdering::SequentiallyConsistent: + return RISCV::AMOCAS_W_AQ_RL; + } +} + +static unsigned getAMOCASForRMW64(AtomicOrdering Ordering, + const RISCVSubtarget *Subtarget) { + if (Subtarget->hasStdExtZtso()) + return RISCV::AMOCAS_D; + switch (Ordering) { + default: + llvm_unreachable("Unexpected AtomicOrdering"); + case AtomicOrdering::Monotonic: + return RISCV::AMOCAS_D; + case AtomicOrdering::Acquire: + return RISCV::AMOCAS_D_AQ; + case AtomicOrdering::Release: + return RISCV::AMOCAS_D_RL; + case AtomicOrdering::AcquireRelease: + case AtomicOrdering::SequentiallyConsistent: + return RISCV::AMOCAS_D_AQ_RL; + } +} + +static unsigned getAMOCASForRMW128(AtomicOrdering Ordering, + const RISCVSubtarget *Subtarget) { + if (Subtarget->hasStdExtZtso()) + return RISCV::AMOCAS_Q; + switch (Ordering) { + default: + llvm_unreachable("Unexpected AtomicOrdering"); + case AtomicOrdering::Monotonic: + return RISCV::AMOCAS_Q; + case AtomicOrdering::Acquire: + return RISCV::AMOCAS_Q_AQ; + case AtomicOrdering::Release: + return RISCV::AMOCAS_Q_RL; + case AtomicOrdering::AcquireRelease: + case AtomicOrdering::SequentiallyConsistent: + return RISCV::AMOCAS_Q_AQ_RL; + } +} + +static unsigned getAMOCASForRMW(AtomicOrdering Ordering, int Width, + const RISCVSubtarget *Subtarget) { + if (Width == 32) + return getAMOCASForRMW32(Ordering, Subtarget); + if (Width == 64) + return getAMOCASForRMW64(Ordering, Subtarget); + if (Width == 128) + return getAMOCASForRMW128(Ordering, Subtarget); + llvm_unreachable("Unexpected AMOCAS width\n"); +} + static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI, DebugLoc DL, MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopMBB, @@ -728,6 +807,72 @@ return true; } +static Register getGPRPairEvenReg(Register PairedReg) { + switch (PairedReg) { + case RISCV::X0_PD: + return RISCV::X0; + case RISCV::X2_PD: + return RISCV::X2; + case RISCV::X4_PD: + return RISCV::X4; + case RISCV::X6_PD: + return RISCV::X6; + case RISCV::X8_PD: + return RISCV::X8; + case RISCV::X10_PD: + return RISCV::X10; + case RISCV::X12_PD: + return RISCV::X12; + case RISCV::X14_PD: + return RISCV::X14; + case RISCV::X16_PD: + return RISCV::X16; + case RISCV::X18_PD: + return RISCV::X18; + case RISCV::X20_PD: + return RISCV::X20; + case RISCV::X22_PD: + return RISCV::X22; + case RISCV::X24_PD: + return RISCV::X24; + case RISCV::X26_PD: + return RISCV::X26; + case RISCV::X28_PD: + return RISCV::X28; + case RISCV::X30_PD: + return RISCV::X30; + default: + llvm_unreachable("Unexpected GPR pair"); + } +} + +bool RISCVExpandAtomicPseudo::expandAMOCAS( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsPaired, + int Width, MachineBasicBlock::iterator &NextMBBI) { + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + + Register DestReg = MI.getOperand(0).getReg(); + if (IsPaired) + DestReg = getGPRPairEvenReg(DestReg); + Register AddrReg = MI.getOperand(1).getReg(); + Register NewValReg = MI.getOperand(3).getReg(); + if (IsPaired) + NewValReg = getGPRPairEvenReg(NewValReg); + AtomicOrdering Ordering = + static_cast(MI.getOperand(4).getImm()); + + MachineInstr *NewMI = + BuildMI(MBB, MBBI, DL, TII->get(getAMOCASForRMW(Ordering, Width, STI))) + .addReg(DestReg) + .addReg(AddrReg) + .addReg(NewValReg); + NewMI->getOperand(0).setIsDef(true); + + MI.eraseFromParent(); + return true; +} + } // end of anonymous namespace INITIALIZE_PASS(RISCVExpandAtomicPseudo, "riscv-expand-atomic-pseudo", diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -550,7 +550,10 @@ } if (Subtarget.hasStdExtA()) { - setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); + if (Subtarget.hasStdExtZacas()) + setMaxAtomicSizeInBitsSupported(Subtarget.getXLen() * 2); + else + setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); setMinCmpXchgSizeInBits(32); } else if (Subtarget.hasForcedAtomics()) { setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); @@ -1236,6 +1239,12 @@ XLenVT, Expand); } + // Set atomic_cmp_swap operations to expand to AMOCAS.D (RV32) and AMOCAS.Q + // (RV64). + if (Subtarget.hasStdExtZacas()) + setOperationAction(ISD::ATOMIC_CMP_SWAP, + Subtarget.is64Bit() ? MVT::i128 : MVT::i64, Custom); + if (Subtarget.hasVendorXTHeadMemIdx()) { for (unsigned im = (unsigned)ISD::PRE_INC; im != (unsigned)ISD::POST_DEC; ++im) { @@ -10148,6 +10157,63 @@ return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes); } +// Create an even/odd pair of X registers holding integer value V. +static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V, MVT VT, + MVT SubRegVT) { + SDLoc DL(V.getNode()); + SDValue VLo = DAG.getAnyExtOrTrunc(V, DL, SubRegVT); + SDValue VHi = DAG.getAnyExtOrTrunc( + DAG.getNode( + ISD::SRL, DL, VT, V, + DAG.getConstant(SubRegVT == MVT::i64 ? 64 : 32, DL, SubRegVT)), + DL, SubRegVT); + SDValue RegClass = DAG.getTargetConstant( + VT == MVT::i128 ? RISCV::GPRPI128RegClassID : RISCV::GPRPI64RegClassID, + DL, MVT::i32); + SDValue SubReg0 = DAG.getTargetConstant(RISCV::sub_32, DL, MVT::i32); + SDValue SubReg1 = DAG.getTargetConstant(RISCV::sub_32_hi, DL, MVT::i32); + const SDValue Ops[] = {RegClass, VLo, SubReg0, VHi, SubReg1}; + return SDValue( + DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops), 0); +} + +static void ReplaceCMP_SWAP_2XLenResults(SDNode *N, + SmallVectorImpl &Results, + SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + MVT VT = N->getSimpleValueType(0); + assert(N->getValueType(0) == (Subtarget.is64Bit() ? MVT::i128 : MVT::i64) && + "AtomicCmpSwap on types less than 2*XLen should be legal"); + assert(Subtarget.hasStdExtZacas()); + MVT SubRegVT = (VT == MVT::i64 ? MVT::i32 : MVT::i64); + + SDLoc DL(N); + MachineMemOperand *MemOp = cast(N)->getMemOperand(); + AtomicOrdering Ordering = MemOp->getMergedOrdering(); + SDValue Ops[] = { + N->getOperand(1), // Ptr + createGPRPairNode(DAG, N->getOperand(2), VT, SubRegVT), // Compare value + createGPRPairNode(DAG, N->getOperand(3), VT, SubRegVT), // Store value + DAG.getTargetConstant(static_cast(Ordering), DL, + MVT::i32), // Ordering + N->getOperand(0), // Chain in + }; + + unsigned Opcode = + (VT == MVT::i64 ? RISCV::PseudoAMOCAS_D_32 : RISCV::PseudoAMOCAS_Q); + MachineSDNode *CmpSwap = DAG.getMachineNode( + Opcode, DL, DAG.getVTList(MVT::Untyped, MVT::Other), Ops); + DAG.setNodeMemRefs(CmpSwap, {MemOp}); + + unsigned SubReg1 = RISCV::sub_32, SubReg2 = RISCV::sub_32_hi; + SDValue Lo = + DAG.getTargetExtractSubreg(SubReg1, DL, SubRegVT, SDValue(CmpSwap, 0)); + SDValue Hi = + DAG.getTargetExtractSubreg(SubReg2, DL, SubRegVT, SDValue(CmpSwap, 0)); + Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, VT, Lo, Hi)); + Results.push_back(SDValue(CmpSwap, 1)); +} + void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { @@ -10155,6 +10221,10 @@ switch (N->getOpcode()) { default: llvm_unreachable("Don't know how to custom type legalize this operation!"); + case ISD::ATOMIC_CMP_SWAP: { + ReplaceCMP_SWAP_2XLenResults(N, Results, DAG, Subtarget); + break; + } case ISD::STRICT_FP_TO_SINT: case ISD::STRICT_FP_TO_UINT: case ISD::FP_TO_SINT: diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td @@ -311,6 +311,28 @@ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>; } +let Predicates = [HasStdExtZacas] in { +class PseudoAMOCAS + : Pseudo<(outs RC:$res), + (ins GPR:$addr, RC:$cmpval, RC:$newval, ixlenimm:$ordering), []> { + let Constraints = "@earlyclobber $res, $res = $cmpval"; + let mayLoad = 1; + let mayStore = 1; + let hasSideEffects = 0; +} +def PseudoAMOCAS_W: PseudoAMOCAS; +defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoAMOCAS_W>; + +let Predicates = [HasStdExtZacas, IsRV32] in { + def PseudoAMOCAS_D_32: PseudoAMOCAS; +} +let Predicates = [HasStdExtZacas, IsRV64] in { + def PseudoAMOCAS_D_64: PseudoAMOCAS; + defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoAMOCAS_D_64>; + def PseudoAMOCAS_Q: PseudoAMOCAS; +} +} + def PseudoCmpXchg32 : PseudoCmpXchg; defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoCmpXchg32>; diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -577,6 +577,26 @@ X0_PD, X2_PD, X4_PD )>; +let RegInfos = RegInfoByHwMode<[RV32], [RegInfo<32, 32, 32>]> in +def GPRPI64 : RegisterClass<"RISCV", [i64], 32, (add + X10_PD, X12_PD, X14_PD, X16_PD, + X6_PD, + X28_PD, X30_PD, + X8_PD, + X18_PD, X20_PD, X22_PD, X24_PD, X26_PD, + X0_PD, X2_PD, X4_PD +)>; + +let RegInfos = RegInfoByHwMode<[RV64], [RegInfo<64, 64, 64>]> in +def GPRPI128 : RegisterClass<"RISCV", [i128], 64, (add + X10_PD, X12_PD, X14_PD, X16_PD, + X6_PD, + X28_PD, X30_PD, + X8_PD, + X18_PD, X20_PD, X22_PD, X24_PD, X26_PD, + X0_PD, X2_PD, X4_PD +)>; + // The register class is added for inline assembly for vector mask types. def VM : VRegThis Loop Header: Depth=1 -; CHECK-NEXT: # Child Loop BB0_3 Depth 2 -; CHECK-NEXT: .LBB0_3: # %do_cmpxchg -; CHECK-NEXT: # Parent Loop BB0_1 Depth=1 -; CHECK-NEXT: # => This Inner Loop Header: Depth=2 -; CHECK-NEXT: lr.w.aqrl a3, (a0) -; CHECK-NEXT: bne a3, a1, .LBB0_1 -; CHECK-NEXT: # %bb.4: # %do_cmpxchg -; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=2 -; CHECK-NEXT: sc.w.rl a4, a2, (a0) -; CHECK-NEXT: bnez a4, .LBB0_3 -; CHECK-NEXT: # %bb.5: # %do_cmpxchg -; CHECK-NEXT: # %bb.2: # %exit -; CHECK-NEXT: ret +; RV32IA-LABEL: cmpxchg_and_branch1: +; RV32IA: # %bb.0: # %entry +; RV32IA-NEXT: .LBB0_1: # %do_cmpxchg +; RV32IA-NEXT: # =>This Loop Header: Depth=1 +; RV32IA-NEXT: # Child Loop BB0_3 Depth 2 +; RV32IA-NEXT: .LBB0_3: # %do_cmpxchg +; RV32IA-NEXT: # Parent Loop BB0_1 Depth=1 +; RV32IA-NEXT: # => This Inner Loop Header: Depth=2 +; RV32IA-NEXT: lr.w.aqrl a3, (a0) +; RV32IA-NEXT: bne a3, a1, .LBB0_1 +; RV32IA-NEXT: # %bb.4: # %do_cmpxchg +; RV32IA-NEXT: # in Loop: Header=BB0_3 Depth=2 +; RV32IA-NEXT: sc.w.rl a4, a2, (a0) +; RV32IA-NEXT: bnez a4, .LBB0_3 +; RV32IA-NEXT: # %bb.5: # %do_cmpxchg +; RV32IA-NEXT: # %bb.2: # %exit +; RV32IA-NEXT: ret +; +; RV32IA-ZACAS-LABEL: cmpxchg_and_branch1: +; RV32IA-ZACAS: # %bb.0: # %entry +; RV32IA-ZACAS-NEXT: .LBB0_1: # %do_cmpxchg +; RV32IA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32IA-ZACAS-NEXT: mv a3, a1 +; RV32IA-ZACAS-NEXT: amocas.w.aqrl a3, a2, (a0) +; RV32IA-ZACAS-NEXT: bne a3, a1, .LBB0_1 +; RV32IA-ZACAS-NEXT: # %bb.2: # %exit +; RV32IA-ZACAS-NEXT: ret +; +; RV64IA-LABEL: cmpxchg_and_branch1: +; RV64IA: # %bb.0: # %entry +; RV64IA-NEXT: .LBB0_1: # %do_cmpxchg +; RV64IA-NEXT: # =>This Loop Header: Depth=1 +; RV64IA-NEXT: # Child Loop BB0_3 Depth 2 +; RV64IA-NEXT: .LBB0_3: # %do_cmpxchg +; RV64IA-NEXT: # Parent Loop BB0_1 Depth=1 +; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 +; RV64IA-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-NEXT: bne a3, a1, .LBB0_1 +; RV64IA-NEXT: # %bb.4: # %do_cmpxchg +; RV64IA-NEXT: # in Loop: Header=BB0_3 Depth=2 +; RV64IA-NEXT: sc.w.rl a4, a2, (a0) +; RV64IA-NEXT: bnez a4, .LBB0_3 +; RV64IA-NEXT: # %bb.5: # %do_cmpxchg +; RV64IA-NEXT: # %bb.2: # %exit +; RV64IA-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_and_branch1: +; RV64IA-ZACAS: # %bb.0: # %entry +; RV64IA-ZACAS-NEXT: .LBB0_1: # %do_cmpxchg +; RV64IA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64IA-ZACAS-NEXT: mv a3, a1 +; RV64IA-ZACAS-NEXT: amocas.w.aqrl a3, a2, (a0) +; RV64IA-ZACAS-NEXT: bne a3, a1, .LBB0_1 +; RV64IA-ZACAS-NEXT: # %bb.2: # %exit +; RV64IA-ZACAS-NEXT: ret entry: br label %do_cmpxchg do_cmpxchg: @@ -36,25 +78,65 @@ } define void @cmpxchg_and_branch2(ptr %ptr, i32 signext %cmp, i32 signext %val) nounwind { -; CHECK-LABEL: cmpxchg_and_branch2: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: .LBB1_1: # %do_cmpxchg -; CHECK-NEXT: # =>This Loop Header: Depth=1 -; CHECK-NEXT: # Child Loop BB1_3 Depth 2 -; CHECK-NEXT: .LBB1_3: # %do_cmpxchg -; CHECK-NEXT: # Parent Loop BB1_1 Depth=1 -; CHECK-NEXT: # => This Inner Loop Header: Depth=2 -; CHECK-NEXT: lr.w.aqrl a3, (a0) -; CHECK-NEXT: bne a3, a1, .LBB1_5 -; CHECK-NEXT: # %bb.4: # %do_cmpxchg -; CHECK-NEXT: # in Loop: Header=BB1_3 Depth=2 -; CHECK-NEXT: sc.w.rl a4, a2, (a0) -; CHECK-NEXT: bnez a4, .LBB1_3 -; CHECK-NEXT: .LBB1_5: # %do_cmpxchg -; CHECK-NEXT: # in Loop: Header=BB1_1 Depth=1 -; CHECK-NEXT: beq a3, a1, .LBB1_1 -; CHECK-NEXT: # %bb.2: # %exit -; CHECK-NEXT: ret +; RV32IA-LABEL: cmpxchg_and_branch2: +; RV32IA: # %bb.0: # %entry +; RV32IA-NEXT: .LBB1_1: # %do_cmpxchg +; RV32IA-NEXT: # =>This Loop Header: Depth=1 +; RV32IA-NEXT: # Child Loop BB1_3 Depth 2 +; RV32IA-NEXT: .LBB1_3: # %do_cmpxchg +; RV32IA-NEXT: # Parent Loop BB1_1 Depth=1 +; RV32IA-NEXT: # => This Inner Loop Header: Depth=2 +; RV32IA-NEXT: lr.w.aqrl a3, (a0) +; RV32IA-NEXT: bne a3, a1, .LBB1_5 +; RV32IA-NEXT: # %bb.4: # %do_cmpxchg +; RV32IA-NEXT: # in Loop: Header=BB1_3 Depth=2 +; RV32IA-NEXT: sc.w.rl a4, a2, (a0) +; RV32IA-NEXT: bnez a4, .LBB1_3 +; RV32IA-NEXT: .LBB1_5: # %do_cmpxchg +; RV32IA-NEXT: # in Loop: Header=BB1_1 Depth=1 +; RV32IA-NEXT: beq a3, a1, .LBB1_1 +; RV32IA-NEXT: # %bb.2: # %exit +; RV32IA-NEXT: ret +; +; RV32IA-ZACAS-LABEL: cmpxchg_and_branch2: +; RV32IA-ZACAS: # %bb.0: # %entry +; RV32IA-ZACAS-NEXT: .LBB1_1: # %do_cmpxchg +; RV32IA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32IA-ZACAS-NEXT: mv a3, a1 +; RV32IA-ZACAS-NEXT: amocas.w.aqrl a3, a2, (a0) +; RV32IA-ZACAS-NEXT: beq a3, a1, .LBB1_1 +; RV32IA-ZACAS-NEXT: # %bb.2: # %exit +; RV32IA-ZACAS-NEXT: ret +; +; RV64IA-LABEL: cmpxchg_and_branch2: +; RV64IA: # %bb.0: # %entry +; RV64IA-NEXT: .LBB1_1: # %do_cmpxchg +; RV64IA-NEXT: # =>This Loop Header: Depth=1 +; RV64IA-NEXT: # Child Loop BB1_3 Depth 2 +; RV64IA-NEXT: .LBB1_3: # %do_cmpxchg +; RV64IA-NEXT: # Parent Loop BB1_1 Depth=1 +; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 +; RV64IA-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-NEXT: bne a3, a1, .LBB1_5 +; RV64IA-NEXT: # %bb.4: # %do_cmpxchg +; RV64IA-NEXT: # in Loop: Header=BB1_3 Depth=2 +; RV64IA-NEXT: sc.w.rl a4, a2, (a0) +; RV64IA-NEXT: bnez a4, .LBB1_3 +; RV64IA-NEXT: .LBB1_5: # %do_cmpxchg +; RV64IA-NEXT: # in Loop: Header=BB1_1 Depth=1 +; RV64IA-NEXT: beq a3, a1, .LBB1_1 +; RV64IA-NEXT: # %bb.2: # %exit +; RV64IA-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_and_branch2: +; RV64IA-ZACAS: # %bb.0: # %entry +; RV64IA-ZACAS-NEXT: .LBB1_1: # %do_cmpxchg +; RV64IA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64IA-ZACAS-NEXT: mv a3, a1 +; RV64IA-ZACAS-NEXT: amocas.w.aqrl a3, a2, (a0) +; RV64IA-ZACAS-NEXT: beq a3, a1, .LBB1_1 +; RV64IA-ZACAS-NEXT: # %bb.2: # %exit +; RV64IA-ZACAS-NEXT: ret entry: br label %do_cmpxchg do_cmpxchg: @@ -96,6 +178,36 @@ ; RV32IA-NEXT: # %bb.2: # %exit ; RV32IA-NEXT: ret ; +; RV32IA-ZACAS-LABEL: cmpxchg_masked_and_branch1: +; RV32IA-ZACAS: # %bb.0: # %entry +; RV32IA-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-ZACAS-NEXT: slli a4, a0, 3 +; RV32IA-ZACAS-NEXT: li a0, 255 +; RV32IA-ZACAS-NEXT: sll a0, a0, a4 +; RV32IA-ZACAS-NEXT: andi a1, a1, 255 +; RV32IA-ZACAS-NEXT: sll a1, a1, a4 +; RV32IA-ZACAS-NEXT: andi a2, a2, 255 +; RV32IA-ZACAS-NEXT: sll a2, a2, a4 +; RV32IA-ZACAS-NEXT: .LBB2_1: # %do_cmpxchg +; RV32IA-ZACAS-NEXT: # =>This Loop Header: Depth=1 +; RV32IA-ZACAS-NEXT: # Child Loop BB2_3 Depth 2 +; RV32IA-ZACAS-NEXT: .LBB2_3: # %do_cmpxchg +; RV32IA-ZACAS-NEXT: # Parent Loop BB2_1 Depth=1 +; RV32IA-ZACAS-NEXT: # => This Inner Loop Header: Depth=2 +; RV32IA-ZACAS-NEXT: lr.w.aqrl a4, (a3) +; RV32IA-ZACAS-NEXT: and a5, a4, a0 +; RV32IA-ZACAS-NEXT: bne a5, a1, .LBB2_1 +; RV32IA-ZACAS-NEXT: # %bb.4: # %do_cmpxchg +; RV32IA-ZACAS-NEXT: # in Loop: Header=BB2_3 Depth=2 +; RV32IA-ZACAS-NEXT: xor a5, a4, a2 +; RV32IA-ZACAS-NEXT: and a5, a5, a0 +; RV32IA-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV32IA-ZACAS-NEXT: bnez a5, .LBB2_3 +; RV32IA-ZACAS-NEXT: # %bb.5: # %do_cmpxchg +; RV32IA-ZACAS-NEXT: # %bb.2: # %exit +; RV32IA-ZACAS-NEXT: ret +; ; RV64IA-LABEL: cmpxchg_masked_and_branch1: ; RV64IA: # %bb.0: # %entry ; RV64IA-NEXT: andi a3, a0, -4 @@ -125,6 +237,36 @@ ; RV64IA-NEXT: # %bb.5: # %do_cmpxchg ; RV64IA-NEXT: # %bb.2: # %exit ; RV64IA-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_masked_and_branch1: +; RV64IA-ZACAS: # %bb.0: # %entry +; RV64IA-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-ZACAS-NEXT: slli a4, a0, 3 +; RV64IA-ZACAS-NEXT: li a0, 255 +; RV64IA-ZACAS-NEXT: sllw a0, a0, a4 +; RV64IA-ZACAS-NEXT: andi a1, a1, 255 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a4 +; RV64IA-ZACAS-NEXT: andi a2, a2, 255 +; RV64IA-ZACAS-NEXT: sllw a2, a2, a4 +; RV64IA-ZACAS-NEXT: .LBB2_1: # %do_cmpxchg +; RV64IA-ZACAS-NEXT: # =>This Loop Header: Depth=1 +; RV64IA-ZACAS-NEXT: # Child Loop BB2_3 Depth 2 +; RV64IA-ZACAS-NEXT: .LBB2_3: # %do_cmpxchg +; RV64IA-ZACAS-NEXT: # Parent Loop BB2_1 Depth=1 +; RV64IA-ZACAS-NEXT: # => This Inner Loop Header: Depth=2 +; RV64IA-ZACAS-NEXT: lr.w.aqrl a4, (a3) +; RV64IA-ZACAS-NEXT: and a5, a4, a0 +; RV64IA-ZACAS-NEXT: bne a5, a1, .LBB2_1 +; RV64IA-ZACAS-NEXT: # %bb.4: # %do_cmpxchg +; RV64IA-ZACAS-NEXT: # in Loop: Header=BB2_3 Depth=2 +; RV64IA-ZACAS-NEXT: xor a5, a4, a2 +; RV64IA-ZACAS-NEXT: and a5, a5, a0 +; RV64IA-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-ZACAS-NEXT: bnez a5, .LBB2_3 +; RV64IA-ZACAS-NEXT: # %bb.5: # %do_cmpxchg +; RV64IA-ZACAS-NEXT: # %bb.2: # %exit +; RV64IA-ZACAS-NEXT: ret entry: br label %do_cmpxchg do_cmpxchg: @@ -169,6 +311,39 @@ ; RV32IA-NEXT: # %bb.2: # %exit ; RV32IA-NEXT: ret ; +; RV32IA-ZACAS-LABEL: cmpxchg_masked_and_branch2: +; RV32IA-ZACAS: # %bb.0: # %entry +; RV32IA-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-ZACAS-NEXT: slli a4, a0, 3 +; RV32IA-ZACAS-NEXT: li a0, 255 +; RV32IA-ZACAS-NEXT: sll a0, a0, a4 +; RV32IA-ZACAS-NEXT: andi a1, a1, 255 +; RV32IA-ZACAS-NEXT: sll a1, a1, a4 +; RV32IA-ZACAS-NEXT: andi a2, a2, 255 +; RV32IA-ZACAS-NEXT: sll a2, a2, a4 +; RV32IA-ZACAS-NEXT: .LBB3_1: # %do_cmpxchg +; RV32IA-ZACAS-NEXT: # =>This Loop Header: Depth=1 +; RV32IA-ZACAS-NEXT: # Child Loop BB3_3 Depth 2 +; RV32IA-ZACAS-NEXT: .LBB3_3: # %do_cmpxchg +; RV32IA-ZACAS-NEXT: # Parent Loop BB3_1 Depth=1 +; RV32IA-ZACAS-NEXT: # => This Inner Loop Header: Depth=2 +; RV32IA-ZACAS-NEXT: lr.w.aqrl a4, (a3) +; RV32IA-ZACAS-NEXT: and a5, a4, a0 +; RV32IA-ZACAS-NEXT: bne a5, a1, .LBB3_5 +; RV32IA-ZACAS-NEXT: # %bb.4: # %do_cmpxchg +; RV32IA-ZACAS-NEXT: # in Loop: Header=BB3_3 Depth=2 +; RV32IA-ZACAS-NEXT: xor a5, a4, a2 +; RV32IA-ZACAS-NEXT: and a5, a5, a0 +; RV32IA-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV32IA-ZACAS-NEXT: bnez a5, .LBB3_3 +; RV32IA-ZACAS-NEXT: .LBB3_5: # %do_cmpxchg +; RV32IA-ZACAS-NEXT: # in Loop: Header=BB3_1 Depth=1 +; RV32IA-ZACAS-NEXT: and a4, a4, a0 +; RV32IA-ZACAS-NEXT: beq a1, a4, .LBB3_1 +; RV32IA-ZACAS-NEXT: # %bb.2: # %exit +; RV32IA-ZACAS-NEXT: ret +; ; RV64IA-LABEL: cmpxchg_masked_and_branch2: ; RV64IA: # %bb.0: # %entry ; RV64IA-NEXT: andi a3, a0, -4 @@ -201,6 +376,39 @@ ; RV64IA-NEXT: beq a1, a4, .LBB3_1 ; RV64IA-NEXT: # %bb.2: # %exit ; RV64IA-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_masked_and_branch2: +; RV64IA-ZACAS: # %bb.0: # %entry +; RV64IA-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-ZACAS-NEXT: slli a4, a0, 3 +; RV64IA-ZACAS-NEXT: li a0, 255 +; RV64IA-ZACAS-NEXT: sllw a0, a0, a4 +; RV64IA-ZACAS-NEXT: andi a1, a1, 255 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a4 +; RV64IA-ZACAS-NEXT: andi a2, a2, 255 +; RV64IA-ZACAS-NEXT: sllw a2, a2, a4 +; RV64IA-ZACAS-NEXT: .LBB3_1: # %do_cmpxchg +; RV64IA-ZACAS-NEXT: # =>This Loop Header: Depth=1 +; RV64IA-ZACAS-NEXT: # Child Loop BB3_3 Depth 2 +; RV64IA-ZACAS-NEXT: .LBB3_3: # %do_cmpxchg +; RV64IA-ZACAS-NEXT: # Parent Loop BB3_1 Depth=1 +; RV64IA-ZACAS-NEXT: # => This Inner Loop Header: Depth=2 +; RV64IA-ZACAS-NEXT: lr.w.aqrl a4, (a3) +; RV64IA-ZACAS-NEXT: and a5, a4, a0 +; RV64IA-ZACAS-NEXT: bne a5, a1, .LBB3_5 +; RV64IA-ZACAS-NEXT: # %bb.4: # %do_cmpxchg +; RV64IA-ZACAS-NEXT: # in Loop: Header=BB3_3 Depth=2 +; RV64IA-ZACAS-NEXT: xor a5, a4, a2 +; RV64IA-ZACAS-NEXT: and a5, a5, a0 +; RV64IA-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-ZACAS-NEXT: bnez a5, .LBB3_3 +; RV64IA-ZACAS-NEXT: .LBB3_5: # %do_cmpxchg +; RV64IA-ZACAS-NEXT: # in Loop: Header=BB3_1 Depth=1 +; RV64IA-ZACAS-NEXT: and a4, a4, a0 +; RV64IA-ZACAS-NEXT: beq a1, a4, .LBB3_1 +; RV64IA-ZACAS-NEXT: # %bb.2: # %exit +; RV64IA-ZACAS-NEXT: ret entry: br label %do_cmpxchg do_cmpxchg: @@ -212,25 +420,65 @@ } define void @cmpxchg_and_irrelevant_branch(ptr %ptr, i32 signext %cmp, i32 signext %val, i1 zeroext %bool) nounwind { -; CHECK-LABEL: cmpxchg_and_irrelevant_branch: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: .LBB4_1: # %do_cmpxchg -; CHECK-NEXT: # =>This Loop Header: Depth=1 -; CHECK-NEXT: # Child Loop BB4_3 Depth 2 -; CHECK-NEXT: .LBB4_3: # %do_cmpxchg -; CHECK-NEXT: # Parent Loop BB4_1 Depth=1 -; CHECK-NEXT: # => This Inner Loop Header: Depth=2 -; CHECK-NEXT: lr.w.aqrl a4, (a0) -; CHECK-NEXT: bne a4, a1, .LBB4_5 -; CHECK-NEXT: # %bb.4: # %do_cmpxchg -; CHECK-NEXT: # in Loop: Header=BB4_3 Depth=2 -; CHECK-NEXT: sc.w.rl a5, a2, (a0) -; CHECK-NEXT: bnez a5, .LBB4_3 -; CHECK-NEXT: .LBB4_5: # %do_cmpxchg -; CHECK-NEXT: # in Loop: Header=BB4_1 Depth=1 -; CHECK-NEXT: beqz a3, .LBB4_1 -; CHECK-NEXT: # %bb.2: # %exit -; CHECK-NEXT: ret +; RV32IA-LABEL: cmpxchg_and_irrelevant_branch: +; RV32IA: # %bb.0: # %entry +; RV32IA-NEXT: .LBB4_1: # %do_cmpxchg +; RV32IA-NEXT: # =>This Loop Header: Depth=1 +; RV32IA-NEXT: # Child Loop BB4_3 Depth 2 +; RV32IA-NEXT: .LBB4_3: # %do_cmpxchg +; RV32IA-NEXT: # Parent Loop BB4_1 Depth=1 +; RV32IA-NEXT: # => This Inner Loop Header: Depth=2 +; RV32IA-NEXT: lr.w.aqrl a4, (a0) +; RV32IA-NEXT: bne a4, a1, .LBB4_5 +; RV32IA-NEXT: # %bb.4: # %do_cmpxchg +; RV32IA-NEXT: # in Loop: Header=BB4_3 Depth=2 +; RV32IA-NEXT: sc.w.rl a5, a2, (a0) +; RV32IA-NEXT: bnez a5, .LBB4_3 +; RV32IA-NEXT: .LBB4_5: # %do_cmpxchg +; RV32IA-NEXT: # in Loop: Header=BB4_1 Depth=1 +; RV32IA-NEXT: beqz a3, .LBB4_1 +; RV32IA-NEXT: # %bb.2: # %exit +; RV32IA-NEXT: ret +; +; RV32IA-ZACAS-LABEL: cmpxchg_and_irrelevant_branch: +; RV32IA-ZACAS: # %bb.0: # %entry +; RV32IA-ZACAS-NEXT: .LBB4_1: # %do_cmpxchg +; RV32IA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32IA-ZACAS-NEXT: mv a4, a1 +; RV32IA-ZACAS-NEXT: amocas.w.aqrl a4, a2, (a0) +; RV32IA-ZACAS-NEXT: beqz a3, .LBB4_1 +; RV32IA-ZACAS-NEXT: # %bb.2: # %exit +; RV32IA-ZACAS-NEXT: ret +; +; RV64IA-LABEL: cmpxchg_and_irrelevant_branch: +; RV64IA: # %bb.0: # %entry +; RV64IA-NEXT: .LBB4_1: # %do_cmpxchg +; RV64IA-NEXT: # =>This Loop Header: Depth=1 +; RV64IA-NEXT: # Child Loop BB4_3 Depth 2 +; RV64IA-NEXT: .LBB4_3: # %do_cmpxchg +; RV64IA-NEXT: # Parent Loop BB4_1 Depth=1 +; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 +; RV64IA-NEXT: lr.w.aqrl a4, (a0) +; RV64IA-NEXT: bne a4, a1, .LBB4_5 +; RV64IA-NEXT: # %bb.4: # %do_cmpxchg +; RV64IA-NEXT: # in Loop: Header=BB4_3 Depth=2 +; RV64IA-NEXT: sc.w.rl a5, a2, (a0) +; RV64IA-NEXT: bnez a5, .LBB4_3 +; RV64IA-NEXT: .LBB4_5: # %do_cmpxchg +; RV64IA-NEXT: # in Loop: Header=BB4_1 Depth=1 +; RV64IA-NEXT: beqz a3, .LBB4_1 +; RV64IA-NEXT: # %bb.2: # %exit +; RV64IA-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_and_irrelevant_branch: +; RV64IA-ZACAS: # %bb.0: # %entry +; RV64IA-ZACAS-NEXT: .LBB4_1: # %do_cmpxchg +; RV64IA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64IA-ZACAS-NEXT: mv a4, a1 +; RV64IA-ZACAS-NEXT: amocas.w.aqrl a4, a2, (a0) +; RV64IA-ZACAS-NEXT: beqz a3, .LBB4_1 +; RV64IA-ZACAS-NEXT: # %bb.2: # %exit +; RV64IA-ZACAS-NEXT: ret entry: br label %do_cmpxchg do_cmpxchg: diff --git a/llvm/test/CodeGen/RISCV/atomic-cmpxchg-flag.ll b/llvm/test/CodeGen/RISCV/atomic-cmpxchg-flag.ll --- a/llvm/test/CodeGen/RISCV/atomic-cmpxchg-flag.ll +++ b/llvm/test/CodeGen/RISCV/atomic-cmpxchg-flag.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64IA %s +; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64IA-ZACAS %s ; This test ensures that the output of the 'lr.w' instruction is sign-extended. ; Previously, the default zero-extension was being used and 'cmp' parameter @@ -21,6 +23,14 @@ ; RV64IA-NEXT: xor a1, a3, a1 ; RV64IA-NEXT: seqz a0, a1 ; RV64IA-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i32_seq_cst_seq_cst: +; RV64IA-ZACAS: # %bb.0: # %entry +; RV64IA-ZACAS-NEXT: mv a3, a1 +; RV64IA-ZACAS-NEXT: amocas.w.aqrl a3, a2, (a0) +; RV64IA-ZACAS-NEXT: xor a1, a3, a1 +; RV64IA-ZACAS-NEXT: seqz a0, a1 +; RV64IA-ZACAS-NEXT: ret i32 signext %val) nounwind { entry: %0 = cmpxchg ptr %ptr, i32 %cmp, i32 %val seq_cst seq_cst diff --git a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll --- a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll +++ b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll @@ -3,12 +3,16 @@ ; RUN: | FileCheck -check-prefix=RV32I %s ; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-WMO %s +; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-ZACAS %s ; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-ztso -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-TSO %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64I %s ; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-WMO %s +; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZACAS %s ; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-ztso -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-TSO %s @@ -125,6 +129,29 @@ ; RV32IA-WMO-NEXT: .LBB1_3: ; RV32IA-WMO-NEXT: ret ; +; RV32IA-ZACAS-LABEL: cmpxchg_i8_acquire_monotonic: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-ZACAS-NEXT: li a4, 255 +; RV32IA-ZACAS-NEXT: sll a4, a4, a0 +; RV32IA-ZACAS-NEXT: andi a1, a1, 255 +; RV32IA-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-ZACAS-NEXT: andi a2, a2, 255 +; RV32IA-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-ZACAS-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV32IA-ZACAS-NEXT: and a5, a2, a4 +; RV32IA-ZACAS-NEXT: bne a5, a1, .LBB1_3 +; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 +; RV32IA-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-ZACAS-NEXT: and a5, a5, a4 +; RV32IA-ZACAS-NEXT: xor a5, a2, a5 +; RV32IA-ZACAS-NEXT: sc.w a5, a5, (a3) +; RV32IA-ZACAS-NEXT: bnez a5, .LBB1_1 +; RV32IA-ZACAS-NEXT: .LBB1_3: +; RV32IA-ZACAS-NEXT: ret +; ; RV32IA-TSO-LABEL: cmpxchg_i8_acquire_monotonic: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: andi a3, a0, -4 @@ -184,6 +211,29 @@ ; RV64IA-WMO-NEXT: .LBB1_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i8_acquire_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: li a4, 255 +; RV64IA-ZACAS-NEXT: sllw a4, a4, a0 +; RV64IA-ZACAS-NEXT: andi a1, a1, 255 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-ZACAS-NEXT: andi a2, a2, 255 +; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-ZACAS-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV64IA-ZACAS-NEXT: and a5, a2, a4 +; RV64IA-ZACAS-NEXT: bne a5, a1, .LBB1_3 +; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 +; RV64IA-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-ZACAS-NEXT: and a5, a5, a4 +; RV64IA-ZACAS-NEXT: xor a5, a2, a5 +; RV64IA-ZACAS-NEXT: sc.w a5, a5, (a3) +; RV64IA-ZACAS-NEXT: bnez a5, .LBB1_1 +; RV64IA-ZACAS-NEXT: .LBB1_3: +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i8_acquire_monotonic: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: andi a3, a0, -4 @@ -206,6 +256,50 @@ ; RV64IA-TSO-NEXT: bnez a5, .LBB1_1 ; RV64IA-TSO-NEXT: .LBB1_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i8_acquire_monotonic: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: li a4, 255 +; RV32IA-WMO-ZACAS-NEXT: sll a4, a4, a0 +; RV32IA-WMO-ZACAS-NEXT: andi a1, a1, 255 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: andi a2, a2, 255 +; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV32IA-WMO-ZACAS-NEXT: and a5, a2, a4 +; RV32IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB1_3 +; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a5 +; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a3) +; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB1_1 +; RV32IA-WMO-ZACAS-NEXT: .LBB1_3: +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i8_acquire_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: li a4, 255 +; RV64IA-WMO-ZACAS-NEXT: sllw a4, a4, a0 +; RV64IA-WMO-ZACAS-NEXT: andi a1, a1, 255 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: andi a2, a2, 255 +; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV64IA-WMO-ZACAS-NEXT: and a5, a2, a4 +; RV64IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB1_3 +; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a4 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a5 +; RV64IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a3) +; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB1_1 +; RV64IA-WMO-ZACAS-NEXT: .LBB1_3: +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire monotonic ret void } @@ -247,6 +341,29 @@ ; RV32IA-WMO-NEXT: .LBB2_3: ; RV32IA-WMO-NEXT: ret ; +; RV32IA-ZACAS-LABEL: cmpxchg_i8_acquire_acquire: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-ZACAS-NEXT: li a4, 255 +; RV32IA-ZACAS-NEXT: sll a4, a4, a0 +; RV32IA-ZACAS-NEXT: andi a1, a1, 255 +; RV32IA-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-ZACAS-NEXT: andi a2, a2, 255 +; RV32IA-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-ZACAS-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV32IA-ZACAS-NEXT: and a5, a2, a4 +; RV32IA-ZACAS-NEXT: bne a5, a1, .LBB2_3 +; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 +; RV32IA-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-ZACAS-NEXT: and a5, a5, a4 +; RV32IA-ZACAS-NEXT: xor a5, a2, a5 +; RV32IA-ZACAS-NEXT: sc.w a5, a5, (a3) +; RV32IA-ZACAS-NEXT: bnez a5, .LBB2_1 +; RV32IA-ZACAS-NEXT: .LBB2_3: +; RV32IA-ZACAS-NEXT: ret +; ; RV32IA-TSO-LABEL: cmpxchg_i8_acquire_acquire: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: andi a3, a0, -4 @@ -306,6 +423,29 @@ ; RV64IA-WMO-NEXT: .LBB2_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i8_acquire_acquire: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: li a4, 255 +; RV64IA-ZACAS-NEXT: sllw a4, a4, a0 +; RV64IA-ZACAS-NEXT: andi a1, a1, 255 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-ZACAS-NEXT: andi a2, a2, 255 +; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-ZACAS-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV64IA-ZACAS-NEXT: and a5, a2, a4 +; RV64IA-ZACAS-NEXT: bne a5, a1, .LBB2_3 +; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 +; RV64IA-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-ZACAS-NEXT: and a5, a5, a4 +; RV64IA-ZACAS-NEXT: xor a5, a2, a5 +; RV64IA-ZACAS-NEXT: sc.w a5, a5, (a3) +; RV64IA-ZACAS-NEXT: bnez a5, .LBB2_1 +; RV64IA-ZACAS-NEXT: .LBB2_3: +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i8_acquire_acquire: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: andi a3, a0, -4 @@ -328,6 +468,50 @@ ; RV64IA-TSO-NEXT: bnez a5, .LBB2_1 ; RV64IA-TSO-NEXT: .LBB2_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i8_acquire_acquire: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: li a4, 255 +; RV32IA-WMO-ZACAS-NEXT: sll a4, a4, a0 +; RV32IA-WMO-ZACAS-NEXT: andi a1, a1, 255 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: andi a2, a2, 255 +; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV32IA-WMO-ZACAS-NEXT: and a5, a2, a4 +; RV32IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB2_3 +; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a5 +; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a3) +; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB2_1 +; RV32IA-WMO-ZACAS-NEXT: .LBB2_3: +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i8_acquire_acquire: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: li a4, 255 +; RV64IA-WMO-ZACAS-NEXT: sllw a4, a4, a0 +; RV64IA-WMO-ZACAS-NEXT: andi a1, a1, 255 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: andi a2, a2, 255 +; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV64IA-WMO-ZACAS-NEXT: and a5, a2, a4 +; RV64IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB2_3 +; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a4 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a5 +; RV64IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a3) +; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB2_1 +; RV64IA-WMO-ZACAS-NEXT: .LBB2_3: +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire acquire ret void } @@ -369,6 +553,29 @@ ; RV32IA-WMO-NEXT: .LBB3_3: ; RV32IA-WMO-NEXT: ret ; +; RV32IA-ZACAS-LABEL: cmpxchg_i8_release_monotonic: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-ZACAS-NEXT: li a4, 255 +; RV32IA-ZACAS-NEXT: sll a4, a4, a0 +; RV32IA-ZACAS-NEXT: andi a1, a1, 255 +; RV32IA-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-ZACAS-NEXT: andi a2, a2, 255 +; RV32IA-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-ZACAS-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-ZACAS-NEXT: lr.w a2, (a3) +; RV32IA-ZACAS-NEXT: and a5, a2, a4 +; RV32IA-ZACAS-NEXT: bne a5, a1, .LBB3_3 +; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 +; RV32IA-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-ZACAS-NEXT: and a5, a5, a4 +; RV32IA-ZACAS-NEXT: xor a5, a2, a5 +; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV32IA-ZACAS-NEXT: bnez a5, .LBB3_1 +; RV32IA-ZACAS-NEXT: .LBB3_3: +; RV32IA-ZACAS-NEXT: ret +; ; RV32IA-TSO-LABEL: cmpxchg_i8_release_monotonic: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: andi a3, a0, -4 @@ -428,6 +635,29 @@ ; RV64IA-WMO-NEXT: .LBB3_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i8_release_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: li a4, 255 +; RV64IA-ZACAS-NEXT: sllw a4, a4, a0 +; RV64IA-ZACAS-NEXT: andi a1, a1, 255 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-ZACAS-NEXT: andi a2, a2, 255 +; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-ZACAS-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-ZACAS-NEXT: lr.w a2, (a3) +; RV64IA-ZACAS-NEXT: and a5, a2, a4 +; RV64IA-ZACAS-NEXT: bne a5, a1, .LBB3_3 +; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 +; RV64IA-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-ZACAS-NEXT: and a5, a5, a4 +; RV64IA-ZACAS-NEXT: xor a5, a2, a5 +; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-ZACAS-NEXT: bnez a5, .LBB3_1 +; RV64IA-ZACAS-NEXT: .LBB3_3: +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i8_release_monotonic: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: andi a3, a0, -4 @@ -450,6 +680,50 @@ ; RV64IA-TSO-NEXT: bnez a5, .LBB3_1 ; RV64IA-TSO-NEXT: .LBB3_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i8_release_monotonic: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: li a4, 255 +; RV32IA-WMO-ZACAS-NEXT: sll a4, a4, a0 +; RV32IA-WMO-ZACAS-NEXT: andi a1, a1, 255 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: andi a2, a2, 255 +; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-ZACAS-NEXT: lr.w a2, (a3) +; RV32IA-WMO-ZACAS-NEXT: and a5, a2, a4 +; RV32IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB3_3 +; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a5 +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB3_1 +; RV32IA-WMO-ZACAS-NEXT: .LBB3_3: +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i8_release_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: li a4, 255 +; RV64IA-WMO-ZACAS-NEXT: sllw a4, a4, a0 +; RV64IA-WMO-ZACAS-NEXT: andi a1, a1, 255 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: andi a2, a2, 255 +; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-ZACAS-NEXT: lr.w a2, (a3) +; RV64IA-WMO-ZACAS-NEXT: and a5, a2, a4 +; RV64IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB3_3 +; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a4 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a5 +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB3_1 +; RV64IA-WMO-ZACAS-NEXT: .LBB3_3: +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val release monotonic ret void } @@ -491,6 +765,29 @@ ; RV32IA-WMO-NEXT: .LBB4_3: ; RV32IA-WMO-NEXT: ret ; +; RV32IA-ZACAS-LABEL: cmpxchg_i8_release_acquire: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-ZACAS-NEXT: li a4, 255 +; RV32IA-ZACAS-NEXT: sll a4, a4, a0 +; RV32IA-ZACAS-NEXT: andi a1, a1, 255 +; RV32IA-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-ZACAS-NEXT: andi a2, a2, 255 +; RV32IA-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-ZACAS-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV32IA-ZACAS-NEXT: and a5, a2, a4 +; RV32IA-ZACAS-NEXT: bne a5, a1, .LBB4_3 +; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 +; RV32IA-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-ZACAS-NEXT: and a5, a5, a4 +; RV32IA-ZACAS-NEXT: xor a5, a2, a5 +; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV32IA-ZACAS-NEXT: bnez a5, .LBB4_1 +; RV32IA-ZACAS-NEXT: .LBB4_3: +; RV32IA-ZACAS-NEXT: ret +; ; RV32IA-TSO-LABEL: cmpxchg_i8_release_acquire: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: andi a3, a0, -4 @@ -550,6 +847,29 @@ ; RV64IA-WMO-NEXT: .LBB4_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i8_release_acquire: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: li a4, 255 +; RV64IA-ZACAS-NEXT: sllw a4, a4, a0 +; RV64IA-ZACAS-NEXT: andi a1, a1, 255 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-ZACAS-NEXT: andi a2, a2, 255 +; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-ZACAS-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV64IA-ZACAS-NEXT: and a5, a2, a4 +; RV64IA-ZACAS-NEXT: bne a5, a1, .LBB4_3 +; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 +; RV64IA-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-ZACAS-NEXT: and a5, a5, a4 +; RV64IA-ZACAS-NEXT: xor a5, a2, a5 +; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-ZACAS-NEXT: bnez a5, .LBB4_1 +; RV64IA-ZACAS-NEXT: .LBB4_3: +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i8_release_acquire: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: andi a3, a0, -4 @@ -572,6 +892,50 @@ ; RV64IA-TSO-NEXT: bnez a5, .LBB4_1 ; RV64IA-TSO-NEXT: .LBB4_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i8_release_acquire: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: li a4, 255 +; RV32IA-WMO-ZACAS-NEXT: sll a4, a4, a0 +; RV32IA-WMO-ZACAS-NEXT: andi a1, a1, 255 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: andi a2, a2, 255 +; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV32IA-WMO-ZACAS-NEXT: and a5, a2, a4 +; RV32IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB4_3 +; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a5 +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB4_1 +; RV32IA-WMO-ZACAS-NEXT: .LBB4_3: +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i8_release_acquire: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: li a4, 255 +; RV64IA-WMO-ZACAS-NEXT: sllw a4, a4, a0 +; RV64IA-WMO-ZACAS-NEXT: andi a1, a1, 255 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: andi a2, a2, 255 +; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV64IA-WMO-ZACAS-NEXT: and a5, a2, a4 +; RV64IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB4_3 +; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a4 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a5 +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB4_1 +; RV64IA-WMO-ZACAS-NEXT: .LBB4_3: +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val release acquire ret void } @@ -613,6 +977,29 @@ ; RV32IA-WMO-NEXT: .LBB5_3: ; RV32IA-WMO-NEXT: ret ; +; RV32IA-ZACAS-LABEL: cmpxchg_i8_acq_rel_monotonic: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-ZACAS-NEXT: li a4, 255 +; RV32IA-ZACAS-NEXT: sll a4, a4, a0 +; RV32IA-ZACAS-NEXT: andi a1, a1, 255 +; RV32IA-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-ZACAS-NEXT: andi a2, a2, 255 +; RV32IA-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-ZACAS-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV32IA-ZACAS-NEXT: and a5, a2, a4 +; RV32IA-ZACAS-NEXT: bne a5, a1, .LBB5_3 +; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 +; RV32IA-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-ZACAS-NEXT: and a5, a5, a4 +; RV32IA-ZACAS-NEXT: xor a5, a2, a5 +; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV32IA-ZACAS-NEXT: bnez a5, .LBB5_1 +; RV32IA-ZACAS-NEXT: .LBB5_3: +; RV32IA-ZACAS-NEXT: ret +; ; RV32IA-TSO-LABEL: cmpxchg_i8_acq_rel_monotonic: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: andi a3, a0, -4 @@ -672,6 +1059,29 @@ ; RV64IA-WMO-NEXT: .LBB5_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i8_acq_rel_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: li a4, 255 +; RV64IA-ZACAS-NEXT: sllw a4, a4, a0 +; RV64IA-ZACAS-NEXT: andi a1, a1, 255 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-ZACAS-NEXT: andi a2, a2, 255 +; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-ZACAS-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV64IA-ZACAS-NEXT: and a5, a2, a4 +; RV64IA-ZACAS-NEXT: bne a5, a1, .LBB5_3 +; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 +; RV64IA-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-ZACAS-NEXT: and a5, a5, a4 +; RV64IA-ZACAS-NEXT: xor a5, a2, a5 +; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-ZACAS-NEXT: bnez a5, .LBB5_1 +; RV64IA-ZACAS-NEXT: .LBB5_3: +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i8_acq_rel_monotonic: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: andi a3, a0, -4 @@ -694,6 +1104,50 @@ ; RV64IA-TSO-NEXT: bnez a5, .LBB5_1 ; RV64IA-TSO-NEXT: .LBB5_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i8_acq_rel_monotonic: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: li a4, 255 +; RV32IA-WMO-ZACAS-NEXT: sll a4, a4, a0 +; RV32IA-WMO-ZACAS-NEXT: andi a1, a1, 255 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: andi a2, a2, 255 +; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV32IA-WMO-ZACAS-NEXT: and a5, a2, a4 +; RV32IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB5_3 +; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a5 +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB5_1 +; RV32IA-WMO-ZACAS-NEXT: .LBB5_3: +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i8_acq_rel_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: li a4, 255 +; RV64IA-WMO-ZACAS-NEXT: sllw a4, a4, a0 +; RV64IA-WMO-ZACAS-NEXT: andi a1, a1, 255 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: andi a2, a2, 255 +; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV64IA-WMO-ZACAS-NEXT: and a5, a2, a4 +; RV64IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB5_3 +; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a4 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a5 +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB5_1 +; RV64IA-WMO-ZACAS-NEXT: .LBB5_3: +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acq_rel monotonic ret void } @@ -735,6 +1189,29 @@ ; RV32IA-WMO-NEXT: .LBB6_3: ; RV32IA-WMO-NEXT: ret ; +; RV32IA-ZACAS-LABEL: cmpxchg_i8_acq_rel_acquire: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-ZACAS-NEXT: li a4, 255 +; RV32IA-ZACAS-NEXT: sll a4, a4, a0 +; RV32IA-ZACAS-NEXT: andi a1, a1, 255 +; RV32IA-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-ZACAS-NEXT: andi a2, a2, 255 +; RV32IA-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-ZACAS-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV32IA-ZACAS-NEXT: and a5, a2, a4 +; RV32IA-ZACAS-NEXT: bne a5, a1, .LBB6_3 +; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 +; RV32IA-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-ZACAS-NEXT: and a5, a5, a4 +; RV32IA-ZACAS-NEXT: xor a5, a2, a5 +; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV32IA-ZACAS-NEXT: bnez a5, .LBB6_1 +; RV32IA-ZACAS-NEXT: .LBB6_3: +; RV32IA-ZACAS-NEXT: ret +; ; RV32IA-TSO-LABEL: cmpxchg_i8_acq_rel_acquire: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: andi a3, a0, -4 @@ -794,6 +1271,29 @@ ; RV64IA-WMO-NEXT: .LBB6_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i8_acq_rel_acquire: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: li a4, 255 +; RV64IA-ZACAS-NEXT: sllw a4, a4, a0 +; RV64IA-ZACAS-NEXT: andi a1, a1, 255 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-ZACAS-NEXT: andi a2, a2, 255 +; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-ZACAS-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV64IA-ZACAS-NEXT: and a5, a2, a4 +; RV64IA-ZACAS-NEXT: bne a5, a1, .LBB6_3 +; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 +; RV64IA-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-ZACAS-NEXT: and a5, a5, a4 +; RV64IA-ZACAS-NEXT: xor a5, a2, a5 +; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-ZACAS-NEXT: bnez a5, .LBB6_1 +; RV64IA-ZACAS-NEXT: .LBB6_3: +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i8_acq_rel_acquire: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: andi a3, a0, -4 @@ -816,6 +1316,50 @@ ; RV64IA-TSO-NEXT: bnez a5, .LBB6_1 ; RV64IA-TSO-NEXT: .LBB6_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i8_acq_rel_acquire: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: li a4, 255 +; RV32IA-WMO-ZACAS-NEXT: sll a4, a4, a0 +; RV32IA-WMO-ZACAS-NEXT: andi a1, a1, 255 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: andi a2, a2, 255 +; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV32IA-WMO-ZACAS-NEXT: and a5, a2, a4 +; RV32IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB6_3 +; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a5 +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB6_1 +; RV32IA-WMO-ZACAS-NEXT: .LBB6_3: +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i8_acq_rel_acquire: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: li a4, 255 +; RV64IA-WMO-ZACAS-NEXT: sllw a4, a4, a0 +; RV64IA-WMO-ZACAS-NEXT: andi a1, a1, 255 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: andi a2, a2, 255 +; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV64IA-WMO-ZACAS-NEXT: and a5, a2, a4 +; RV64IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB6_3 +; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a4 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a5 +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB6_1 +; RV64IA-WMO-ZACAS-NEXT: .LBB6_3: +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acq_rel acquire ret void } @@ -1164,6 +1708,30 @@ ; RV32IA-WMO-NEXT: .LBB11_3: ; RV32IA-WMO-NEXT: ret ; +; RV32IA-ZACAS-LABEL: cmpxchg_i16_acquire_monotonic: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-ZACAS-NEXT: lui a4, 16 +; RV32IA-ZACAS-NEXT: addi a4, a4, -1 +; RV32IA-ZACAS-NEXT: sll a5, a4, a0 +; RV32IA-ZACAS-NEXT: and a1, a1, a4 +; RV32IA-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-ZACAS-NEXT: and a2, a2, a4 +; RV32IA-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-ZACAS-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV32IA-ZACAS-NEXT: and a4, a2, a5 +; RV32IA-ZACAS-NEXT: bne a4, a1, .LBB11_3 +; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 +; RV32IA-ZACAS-NEXT: xor a4, a2, a0 +; RV32IA-ZACAS-NEXT: and a4, a4, a5 +; RV32IA-ZACAS-NEXT: xor a4, a2, a4 +; RV32IA-ZACAS-NEXT: sc.w a4, a4, (a3) +; RV32IA-ZACAS-NEXT: bnez a4, .LBB11_1 +; RV32IA-ZACAS-NEXT: .LBB11_3: +; RV32IA-ZACAS-NEXT: ret +; ; RV32IA-TSO-LABEL: cmpxchg_i16_acquire_monotonic: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: andi a3, a0, -4 @@ -1225,6 +1793,30 @@ ; RV64IA-WMO-NEXT: .LBB11_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i16_acquire_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: lui a4, 16 +; RV64IA-ZACAS-NEXT: addiw a4, a4, -1 +; RV64IA-ZACAS-NEXT: sllw a5, a4, a0 +; RV64IA-ZACAS-NEXT: and a1, a1, a4 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-ZACAS-NEXT: and a2, a2, a4 +; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-ZACAS-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV64IA-ZACAS-NEXT: and a4, a2, a5 +; RV64IA-ZACAS-NEXT: bne a4, a1, .LBB11_3 +; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 +; RV64IA-ZACAS-NEXT: xor a4, a2, a0 +; RV64IA-ZACAS-NEXT: and a4, a4, a5 +; RV64IA-ZACAS-NEXT: xor a4, a2, a4 +; RV64IA-ZACAS-NEXT: sc.w a4, a4, (a3) +; RV64IA-ZACAS-NEXT: bnez a4, .LBB11_1 +; RV64IA-ZACAS-NEXT: .LBB11_3: +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i16_acquire_monotonic: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: andi a3, a0, -4 @@ -1248,6 +1840,52 @@ ; RV64IA-TSO-NEXT: bnez a4, .LBB11_1 ; RV64IA-TSO-NEXT: .LBB11_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_acquire_monotonic: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: lui a4, 16 +; RV32IA-WMO-ZACAS-NEXT: addi a4, a4, -1 +; RV32IA-WMO-ZACAS-NEXT: sll a5, a4, a0 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV32IA-WMO-ZACAS-NEXT: and a4, a2, a5 +; RV32IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB11_3 +; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 +; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: and a4, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a4 +; RV32IA-WMO-ZACAS-NEXT: sc.w a4, a4, (a3) +; RV32IA-WMO-ZACAS-NEXT: bnez a4, .LBB11_1 +; RV32IA-WMO-ZACAS-NEXT: .LBB11_3: +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_acquire_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: lui a4, 16 +; RV64IA-WMO-ZACAS-NEXT: addiw a4, a4, -1 +; RV64IA-WMO-ZACAS-NEXT: sllw a5, a4, a0 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a4 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a4 +; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV64IA-WMO-ZACAS-NEXT: and a4, a2, a5 +; RV64IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB11_3 +; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 +; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: and a4, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a4 +; RV64IA-WMO-ZACAS-NEXT: sc.w a4, a4, (a3) +; RV64IA-WMO-ZACAS-NEXT: bnez a4, .LBB11_1 +; RV64IA-WMO-ZACAS-NEXT: .LBB11_3: +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire monotonic ret void } @@ -1290,6 +1928,30 @@ ; RV32IA-WMO-NEXT: .LBB12_3: ; RV32IA-WMO-NEXT: ret ; +; RV32IA-ZACAS-LABEL: cmpxchg_i16_acquire_acquire: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-ZACAS-NEXT: lui a4, 16 +; RV32IA-ZACAS-NEXT: addi a4, a4, -1 +; RV32IA-ZACAS-NEXT: sll a5, a4, a0 +; RV32IA-ZACAS-NEXT: and a1, a1, a4 +; RV32IA-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-ZACAS-NEXT: and a2, a2, a4 +; RV32IA-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-ZACAS-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV32IA-ZACAS-NEXT: and a4, a2, a5 +; RV32IA-ZACAS-NEXT: bne a4, a1, .LBB12_3 +; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 +; RV32IA-ZACAS-NEXT: xor a4, a2, a0 +; RV32IA-ZACAS-NEXT: and a4, a4, a5 +; RV32IA-ZACAS-NEXT: xor a4, a2, a4 +; RV32IA-ZACAS-NEXT: sc.w a4, a4, (a3) +; RV32IA-ZACAS-NEXT: bnez a4, .LBB12_1 +; RV32IA-ZACAS-NEXT: .LBB12_3: +; RV32IA-ZACAS-NEXT: ret +; ; RV32IA-TSO-LABEL: cmpxchg_i16_acquire_acquire: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: andi a3, a0, -4 @@ -1351,6 +2013,30 @@ ; RV64IA-WMO-NEXT: .LBB12_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i16_acquire_acquire: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: lui a4, 16 +; RV64IA-ZACAS-NEXT: addiw a4, a4, -1 +; RV64IA-ZACAS-NEXT: sllw a5, a4, a0 +; RV64IA-ZACAS-NEXT: and a1, a1, a4 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-ZACAS-NEXT: and a2, a2, a4 +; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-ZACAS-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV64IA-ZACAS-NEXT: and a4, a2, a5 +; RV64IA-ZACAS-NEXT: bne a4, a1, .LBB12_3 +; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 +; RV64IA-ZACAS-NEXT: xor a4, a2, a0 +; RV64IA-ZACAS-NEXT: and a4, a4, a5 +; RV64IA-ZACAS-NEXT: xor a4, a2, a4 +; RV64IA-ZACAS-NEXT: sc.w a4, a4, (a3) +; RV64IA-ZACAS-NEXT: bnez a4, .LBB12_1 +; RV64IA-ZACAS-NEXT: .LBB12_3: +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i16_acquire_acquire: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: andi a3, a0, -4 @@ -1374,6 +2060,52 @@ ; RV64IA-TSO-NEXT: bnez a4, .LBB12_1 ; RV64IA-TSO-NEXT: .LBB12_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_acquire_acquire: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: lui a4, 16 +; RV32IA-WMO-ZACAS-NEXT: addi a4, a4, -1 +; RV32IA-WMO-ZACAS-NEXT: sll a5, a4, a0 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV32IA-WMO-ZACAS-NEXT: and a4, a2, a5 +; RV32IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB12_3 +; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 +; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: and a4, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a4 +; RV32IA-WMO-ZACAS-NEXT: sc.w a4, a4, (a3) +; RV32IA-WMO-ZACAS-NEXT: bnez a4, .LBB12_1 +; RV32IA-WMO-ZACAS-NEXT: .LBB12_3: +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_acquire_acquire: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: lui a4, 16 +; RV64IA-WMO-ZACAS-NEXT: addiw a4, a4, -1 +; RV64IA-WMO-ZACAS-NEXT: sllw a5, a4, a0 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a4 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a4 +; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV64IA-WMO-ZACAS-NEXT: and a4, a2, a5 +; RV64IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB12_3 +; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 +; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: and a4, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a4 +; RV64IA-WMO-ZACAS-NEXT: sc.w a4, a4, (a3) +; RV64IA-WMO-ZACAS-NEXT: bnez a4, .LBB12_1 +; RV64IA-WMO-ZACAS-NEXT: .LBB12_3: +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire acquire ret void } @@ -1416,6 +2148,30 @@ ; RV32IA-WMO-NEXT: .LBB13_3: ; RV32IA-WMO-NEXT: ret ; +; RV32IA-ZACAS-LABEL: cmpxchg_i16_release_monotonic: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-ZACAS-NEXT: lui a4, 16 +; RV32IA-ZACAS-NEXT: addi a4, a4, -1 +; RV32IA-ZACAS-NEXT: sll a5, a4, a0 +; RV32IA-ZACAS-NEXT: and a1, a1, a4 +; RV32IA-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-ZACAS-NEXT: and a2, a2, a4 +; RV32IA-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-ZACAS-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-ZACAS-NEXT: lr.w a2, (a3) +; RV32IA-ZACAS-NEXT: and a4, a2, a5 +; RV32IA-ZACAS-NEXT: bne a4, a1, .LBB13_3 +; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 +; RV32IA-ZACAS-NEXT: xor a4, a2, a0 +; RV32IA-ZACAS-NEXT: and a4, a4, a5 +; RV32IA-ZACAS-NEXT: xor a4, a2, a4 +; RV32IA-ZACAS-NEXT: sc.w.rl a4, a4, (a3) +; RV32IA-ZACAS-NEXT: bnez a4, .LBB13_1 +; RV32IA-ZACAS-NEXT: .LBB13_3: +; RV32IA-ZACAS-NEXT: ret +; ; RV32IA-TSO-LABEL: cmpxchg_i16_release_monotonic: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: andi a3, a0, -4 @@ -1477,6 +2233,30 @@ ; RV64IA-WMO-NEXT: .LBB13_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i16_release_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: lui a4, 16 +; RV64IA-ZACAS-NEXT: addiw a4, a4, -1 +; RV64IA-ZACAS-NEXT: sllw a5, a4, a0 +; RV64IA-ZACAS-NEXT: and a1, a1, a4 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-ZACAS-NEXT: and a2, a2, a4 +; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-ZACAS-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-ZACAS-NEXT: lr.w a2, (a3) +; RV64IA-ZACAS-NEXT: and a4, a2, a5 +; RV64IA-ZACAS-NEXT: bne a4, a1, .LBB13_3 +; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 +; RV64IA-ZACAS-NEXT: xor a4, a2, a0 +; RV64IA-ZACAS-NEXT: and a4, a4, a5 +; RV64IA-ZACAS-NEXT: xor a4, a2, a4 +; RV64IA-ZACAS-NEXT: sc.w.rl a4, a4, (a3) +; RV64IA-ZACAS-NEXT: bnez a4, .LBB13_1 +; RV64IA-ZACAS-NEXT: .LBB13_3: +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i16_release_monotonic: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: andi a3, a0, -4 @@ -1500,6 +2280,52 @@ ; RV64IA-TSO-NEXT: bnez a4, .LBB13_1 ; RV64IA-TSO-NEXT: .LBB13_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_release_monotonic: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: lui a4, 16 +; RV32IA-WMO-ZACAS-NEXT: addi a4, a4, -1 +; RV32IA-WMO-ZACAS-NEXT: sll a5, a4, a0 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-ZACAS-NEXT: lr.w a2, (a3) +; RV32IA-WMO-ZACAS-NEXT: and a4, a2, a5 +; RV32IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB13_3 +; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 +; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: and a4, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a4 +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a3) +; RV32IA-WMO-ZACAS-NEXT: bnez a4, .LBB13_1 +; RV32IA-WMO-ZACAS-NEXT: .LBB13_3: +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_release_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: lui a4, 16 +; RV64IA-WMO-ZACAS-NEXT: addiw a4, a4, -1 +; RV64IA-WMO-ZACAS-NEXT: sllw a5, a4, a0 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a4 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a4 +; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-ZACAS-NEXT: lr.w a2, (a3) +; RV64IA-WMO-ZACAS-NEXT: and a4, a2, a5 +; RV64IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB13_3 +; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 +; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: and a4, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a4 +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a3) +; RV64IA-WMO-ZACAS-NEXT: bnez a4, .LBB13_1 +; RV64IA-WMO-ZACAS-NEXT: .LBB13_3: +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val release monotonic ret void } @@ -1542,6 +2368,30 @@ ; RV32IA-WMO-NEXT: .LBB14_3: ; RV32IA-WMO-NEXT: ret ; +; RV32IA-ZACAS-LABEL: cmpxchg_i16_release_acquire: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-ZACAS-NEXT: lui a4, 16 +; RV32IA-ZACAS-NEXT: addi a4, a4, -1 +; RV32IA-ZACAS-NEXT: sll a5, a4, a0 +; RV32IA-ZACAS-NEXT: and a1, a1, a4 +; RV32IA-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-ZACAS-NEXT: and a2, a2, a4 +; RV32IA-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-ZACAS-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV32IA-ZACAS-NEXT: and a4, a2, a5 +; RV32IA-ZACAS-NEXT: bne a4, a1, .LBB14_3 +; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 +; RV32IA-ZACAS-NEXT: xor a4, a2, a0 +; RV32IA-ZACAS-NEXT: and a4, a4, a5 +; RV32IA-ZACAS-NEXT: xor a4, a2, a4 +; RV32IA-ZACAS-NEXT: sc.w.rl a4, a4, (a3) +; RV32IA-ZACAS-NEXT: bnez a4, .LBB14_1 +; RV32IA-ZACAS-NEXT: .LBB14_3: +; RV32IA-ZACAS-NEXT: ret +; ; RV32IA-TSO-LABEL: cmpxchg_i16_release_acquire: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: andi a3, a0, -4 @@ -1603,6 +2453,30 @@ ; RV64IA-WMO-NEXT: .LBB14_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i16_release_acquire: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: lui a4, 16 +; RV64IA-ZACAS-NEXT: addiw a4, a4, -1 +; RV64IA-ZACAS-NEXT: sllw a5, a4, a0 +; RV64IA-ZACAS-NEXT: and a1, a1, a4 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-ZACAS-NEXT: and a2, a2, a4 +; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-ZACAS-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV64IA-ZACAS-NEXT: and a4, a2, a5 +; RV64IA-ZACAS-NEXT: bne a4, a1, .LBB14_3 +; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 +; RV64IA-ZACAS-NEXT: xor a4, a2, a0 +; RV64IA-ZACAS-NEXT: and a4, a4, a5 +; RV64IA-ZACAS-NEXT: xor a4, a2, a4 +; RV64IA-ZACAS-NEXT: sc.w.rl a4, a4, (a3) +; RV64IA-ZACAS-NEXT: bnez a4, .LBB14_1 +; RV64IA-ZACAS-NEXT: .LBB14_3: +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i16_release_acquire: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: andi a3, a0, -4 @@ -1626,6 +2500,52 @@ ; RV64IA-TSO-NEXT: bnez a4, .LBB14_1 ; RV64IA-TSO-NEXT: .LBB14_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_release_acquire: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: lui a4, 16 +; RV32IA-WMO-ZACAS-NEXT: addi a4, a4, -1 +; RV32IA-WMO-ZACAS-NEXT: sll a5, a4, a0 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV32IA-WMO-ZACAS-NEXT: and a4, a2, a5 +; RV32IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB14_3 +; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 +; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: and a4, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a4 +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a3) +; RV32IA-WMO-ZACAS-NEXT: bnez a4, .LBB14_1 +; RV32IA-WMO-ZACAS-NEXT: .LBB14_3: +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_release_acquire: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: lui a4, 16 +; RV64IA-WMO-ZACAS-NEXT: addiw a4, a4, -1 +; RV64IA-WMO-ZACAS-NEXT: sllw a5, a4, a0 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a4 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a4 +; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV64IA-WMO-ZACAS-NEXT: and a4, a2, a5 +; RV64IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB14_3 +; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 +; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: and a4, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a4 +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a3) +; RV64IA-WMO-ZACAS-NEXT: bnez a4, .LBB14_1 +; RV64IA-WMO-ZACAS-NEXT: .LBB14_3: +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val release acquire ret void } @@ -1668,6 +2588,30 @@ ; RV32IA-WMO-NEXT: .LBB15_3: ; RV32IA-WMO-NEXT: ret ; +; RV32IA-ZACAS-LABEL: cmpxchg_i16_acq_rel_monotonic: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-ZACAS-NEXT: lui a4, 16 +; RV32IA-ZACAS-NEXT: addi a4, a4, -1 +; RV32IA-ZACAS-NEXT: sll a5, a4, a0 +; RV32IA-ZACAS-NEXT: and a1, a1, a4 +; RV32IA-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-ZACAS-NEXT: and a2, a2, a4 +; RV32IA-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-ZACAS-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV32IA-ZACAS-NEXT: and a4, a2, a5 +; RV32IA-ZACAS-NEXT: bne a4, a1, .LBB15_3 +; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 +; RV32IA-ZACAS-NEXT: xor a4, a2, a0 +; RV32IA-ZACAS-NEXT: and a4, a4, a5 +; RV32IA-ZACAS-NEXT: xor a4, a2, a4 +; RV32IA-ZACAS-NEXT: sc.w.rl a4, a4, (a3) +; RV32IA-ZACAS-NEXT: bnez a4, .LBB15_1 +; RV32IA-ZACAS-NEXT: .LBB15_3: +; RV32IA-ZACAS-NEXT: ret +; ; RV32IA-TSO-LABEL: cmpxchg_i16_acq_rel_monotonic: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: andi a3, a0, -4 @@ -1729,6 +2673,30 @@ ; RV64IA-WMO-NEXT: .LBB15_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i16_acq_rel_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: lui a4, 16 +; RV64IA-ZACAS-NEXT: addiw a4, a4, -1 +; RV64IA-ZACAS-NEXT: sllw a5, a4, a0 +; RV64IA-ZACAS-NEXT: and a1, a1, a4 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-ZACAS-NEXT: and a2, a2, a4 +; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-ZACAS-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV64IA-ZACAS-NEXT: and a4, a2, a5 +; RV64IA-ZACAS-NEXT: bne a4, a1, .LBB15_3 +; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 +; RV64IA-ZACAS-NEXT: xor a4, a2, a0 +; RV64IA-ZACAS-NEXT: and a4, a4, a5 +; RV64IA-ZACAS-NEXT: xor a4, a2, a4 +; RV64IA-ZACAS-NEXT: sc.w.rl a4, a4, (a3) +; RV64IA-ZACAS-NEXT: bnez a4, .LBB15_1 +; RV64IA-ZACAS-NEXT: .LBB15_3: +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i16_acq_rel_monotonic: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: andi a3, a0, -4 @@ -1752,6 +2720,52 @@ ; RV64IA-TSO-NEXT: bnez a4, .LBB15_1 ; RV64IA-TSO-NEXT: .LBB15_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_acq_rel_monotonic: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: lui a4, 16 +; RV32IA-WMO-ZACAS-NEXT: addi a4, a4, -1 +; RV32IA-WMO-ZACAS-NEXT: sll a5, a4, a0 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV32IA-WMO-ZACAS-NEXT: and a4, a2, a5 +; RV32IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB15_3 +; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 +; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: and a4, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a4 +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a3) +; RV32IA-WMO-ZACAS-NEXT: bnez a4, .LBB15_1 +; RV32IA-WMO-ZACAS-NEXT: .LBB15_3: +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_acq_rel_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: lui a4, 16 +; RV64IA-WMO-ZACAS-NEXT: addiw a4, a4, -1 +; RV64IA-WMO-ZACAS-NEXT: sllw a5, a4, a0 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a4 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a4 +; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV64IA-WMO-ZACAS-NEXT: and a4, a2, a5 +; RV64IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB15_3 +; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 +; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: and a4, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a4 +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a3) +; RV64IA-WMO-ZACAS-NEXT: bnez a4, .LBB15_1 +; RV64IA-WMO-ZACAS-NEXT: .LBB15_3: +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acq_rel monotonic ret void } @@ -1794,6 +2808,30 @@ ; RV32IA-WMO-NEXT: .LBB16_3: ; RV32IA-WMO-NEXT: ret ; +; RV32IA-ZACAS-LABEL: cmpxchg_i16_acq_rel_acquire: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-ZACAS-NEXT: lui a4, 16 +; RV32IA-ZACAS-NEXT: addi a4, a4, -1 +; RV32IA-ZACAS-NEXT: sll a5, a4, a0 +; RV32IA-ZACAS-NEXT: and a1, a1, a4 +; RV32IA-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-ZACAS-NEXT: and a2, a2, a4 +; RV32IA-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-ZACAS-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV32IA-ZACAS-NEXT: and a4, a2, a5 +; RV32IA-ZACAS-NEXT: bne a4, a1, .LBB16_3 +; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 +; RV32IA-ZACAS-NEXT: xor a4, a2, a0 +; RV32IA-ZACAS-NEXT: and a4, a4, a5 +; RV32IA-ZACAS-NEXT: xor a4, a2, a4 +; RV32IA-ZACAS-NEXT: sc.w.rl a4, a4, (a3) +; RV32IA-ZACAS-NEXT: bnez a4, .LBB16_1 +; RV32IA-ZACAS-NEXT: .LBB16_3: +; RV32IA-ZACAS-NEXT: ret +; ; RV32IA-TSO-LABEL: cmpxchg_i16_acq_rel_acquire: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: andi a3, a0, -4 @@ -1855,6 +2893,30 @@ ; RV64IA-WMO-NEXT: .LBB16_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i16_acq_rel_acquire: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: lui a4, 16 +; RV64IA-ZACAS-NEXT: addiw a4, a4, -1 +; RV64IA-ZACAS-NEXT: sllw a5, a4, a0 +; RV64IA-ZACAS-NEXT: and a1, a1, a4 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-ZACAS-NEXT: and a2, a2, a4 +; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-ZACAS-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV64IA-ZACAS-NEXT: and a4, a2, a5 +; RV64IA-ZACAS-NEXT: bne a4, a1, .LBB16_3 +; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 +; RV64IA-ZACAS-NEXT: xor a4, a2, a0 +; RV64IA-ZACAS-NEXT: and a4, a4, a5 +; RV64IA-ZACAS-NEXT: xor a4, a2, a4 +; RV64IA-ZACAS-NEXT: sc.w.rl a4, a4, (a3) +; RV64IA-ZACAS-NEXT: bnez a4, .LBB16_1 +; RV64IA-ZACAS-NEXT: .LBB16_3: +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i16_acq_rel_acquire: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: andi a3, a0, -4 @@ -1878,6 +2940,52 @@ ; RV64IA-TSO-NEXT: bnez a4, .LBB16_1 ; RV64IA-TSO-NEXT: .LBB16_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_acq_rel_acquire: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: lui a4, 16 +; RV32IA-WMO-ZACAS-NEXT: addi a4, a4, -1 +; RV32IA-WMO-ZACAS-NEXT: sll a5, a4, a0 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV32IA-WMO-ZACAS-NEXT: and a4, a2, a5 +; RV32IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB16_3 +; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 +; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: and a4, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a4 +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a3) +; RV32IA-WMO-ZACAS-NEXT: bnez a4, .LBB16_1 +; RV32IA-WMO-ZACAS-NEXT: .LBB16_3: +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_acq_rel_acquire: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: lui a4, 16 +; RV64IA-WMO-ZACAS-NEXT: addiw a4, a4, -1 +; RV64IA-WMO-ZACAS-NEXT: sllw a5, a4, a0 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a4 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a4 +; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) +; RV64IA-WMO-ZACAS-NEXT: and a4, a2, a5 +; RV64IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB16_3 +; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 +; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: and a4, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a4 +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a3) +; RV64IA-WMO-ZACAS-NEXT: bnez a4, .LBB16_1 +; RV64IA-WMO-ZACAS-NEXT: .LBB16_3: +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acq_rel acquire ret void } @@ -2130,16 +3238,32 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i32_monotonic_monotonic: -; RV32IA: # %bb.0: -; RV32IA-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a3, (a0) -; RV32IA-NEXT: bne a3, a1, .LBB20_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1 -; RV32IA-NEXT: sc.w a4, a2, (a0) -; RV32IA-NEXT: bnez a4, .LBB20_1 -; RV32IA-NEXT: .LBB20_3: -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i32_monotonic_monotonic: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w a3, (a0) +; RV32IA-WMO-NEXT: bne a3, a1, .LBB20_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w a4, a2, (a0) +; RV32IA-WMO-NEXT: bnez a4, .LBB20_1 +; RV32IA-WMO-NEXT: .LBB20_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-ZACAS-LABEL: cmpxchg_i32_monotonic_monotonic: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: amocas.w a1, a2, (a0) +; RV32IA-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i32_monotonic_monotonic: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a3, (a0) +; RV32IA-TSO-NEXT: bne a3, a1, .LBB20_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w a4, a2, (a0) +; RV32IA-TSO-NEXT: bnez a4, .LBB20_1 +; RV32IA-TSO-NEXT: .LBB20_3: +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: cmpxchg_i32_monotonic_monotonic: ; RV64I: # %bb.0: @@ -2154,17 +3278,44 @@ ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: cmpxchg_i32_monotonic_monotonic: -; RV64IA: # %bb.0: -; RV64IA-NEXT: sext.w a1, a1 -; RV64IA-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a3, (a0) -; RV64IA-NEXT: bne a3, a1, .LBB20_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1 -; RV64IA-NEXT: sc.w a4, a2, (a0) -; RV64IA-NEXT: bnez a4, .LBB20_1 -; RV64IA-NEXT: .LBB20_3: -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: cmpxchg_i32_monotonic_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: sext.w a1, a1 +; RV64IA-WMO-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB20_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB20_1 +; RV64IA-WMO-NEXT: .LBB20_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i32_monotonic_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: sext.w a1, a1 +; RV64IA-ZACAS-NEXT: amocas.w a1, a2, (a0) +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i32_monotonic_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: sext.w a1, a1 +; RV64IA-TSO-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB20_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB20_1 +; RV64IA-TSO-NEXT: .LBB20_3: +; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_monotonic_monotonic: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: amocas.w a1, a2, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_monotonic_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: sext.w a1, a1 +; RV64IA-WMO-ZACAS-NEXT: amocas.w a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic ret void } @@ -2194,6 +3345,11 @@ ; RV32IA-WMO-NEXT: .LBB21_3: ; RV32IA-WMO-NEXT: ret ; +; RV32IA-ZACAS-LABEL: cmpxchg_i32_acquire_monotonic: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: amocas.w.aq a1, a2, (a0) +; RV32IA-ZACAS-NEXT: ret +; ; RV32IA-TSO-LABEL: cmpxchg_i32_acquire_monotonic: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 @@ -2230,6 +3386,12 @@ ; RV64IA-WMO-NEXT: .LBB21_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i32_acquire_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: sext.w a1, a1 +; RV64IA-ZACAS-NEXT: amocas.w.aq a1, a2, (a0) +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i32_acquire_monotonic: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: sext.w a1, a1 @@ -2241,6 +3403,15 @@ ; RV64IA-TSO-NEXT: bnez a4, .LBB21_1 ; RV64IA-TSO-NEXT: .LBB21_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_acquire_monotonic: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: amocas.w.aq a1, a2, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_acquire_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: sext.w a1, a1 +; RV64IA-WMO-ZACAS-NEXT: amocas.w.aq a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire monotonic ret void } @@ -2270,6 +3441,11 @@ ; RV32IA-WMO-NEXT: .LBB22_3: ; RV32IA-WMO-NEXT: ret ; +; RV32IA-ZACAS-LABEL: cmpxchg_i32_acquire_acquire: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: amocas.w.aq a1, a2, (a0) +; RV32IA-ZACAS-NEXT: ret +; ; RV32IA-TSO-LABEL: cmpxchg_i32_acquire_acquire: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 @@ -2306,6 +3482,12 @@ ; RV64IA-WMO-NEXT: .LBB22_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i32_acquire_acquire: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: sext.w a1, a1 +; RV64IA-ZACAS-NEXT: amocas.w.aq a1, a2, (a0) +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i32_acquire_acquire: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: sext.w a1, a1 @@ -2317,6 +3499,15 @@ ; RV64IA-TSO-NEXT: bnez a4, .LBB22_1 ; RV64IA-TSO-NEXT: .LBB22_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_acquire_acquire: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: amocas.w.aq a1, a2, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_acquire_acquire: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: sext.w a1, a1 +; RV64IA-WMO-ZACAS-NEXT: amocas.w.aq a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire ret void } @@ -2346,6 +3537,11 @@ ; RV32IA-WMO-NEXT: .LBB23_3: ; RV32IA-WMO-NEXT: ret ; +; RV32IA-ZACAS-LABEL: cmpxchg_i32_release_monotonic: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: amocas.w.rl a1, a2, (a0) +; RV32IA-ZACAS-NEXT: ret +; ; RV32IA-TSO-LABEL: cmpxchg_i32_release_monotonic: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 @@ -2382,6 +3578,12 @@ ; RV64IA-WMO-NEXT: .LBB23_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i32_release_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: sext.w a1, a1 +; RV64IA-ZACAS-NEXT: amocas.w.rl a1, a2, (a0) +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i32_release_monotonic: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: sext.w a1, a1 @@ -2393,6 +3595,15 @@ ; RV64IA-TSO-NEXT: bnez a4, .LBB23_1 ; RV64IA-TSO-NEXT: .LBB23_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_release_monotonic: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: amocas.w.rl a1, a2, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_release_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: sext.w a1, a1 +; RV64IA-WMO-ZACAS-NEXT: amocas.w.rl a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val release monotonic ret void } @@ -2422,6 +3633,11 @@ ; RV32IA-WMO-NEXT: .LBB24_3: ; RV32IA-WMO-NEXT: ret ; +; RV32IA-ZACAS-LABEL: cmpxchg_i32_release_acquire: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV32IA-ZACAS-NEXT: ret +; ; RV32IA-TSO-LABEL: cmpxchg_i32_release_acquire: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 @@ -2458,6 +3674,12 @@ ; RV64IA-WMO-NEXT: .LBB24_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i32_release_acquire: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: sext.w a1, a1 +; RV64IA-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i32_release_acquire: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: sext.w a1, a1 @@ -2469,6 +3691,15 @@ ; RV64IA-TSO-NEXT: bnez a4, .LBB24_1 ; RV64IA-TSO-NEXT: .LBB24_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_release_acquire: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_release_acquire: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: sext.w a1, a1 +; RV64IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val release acquire ret void } @@ -2498,6 +3729,11 @@ ; RV32IA-WMO-NEXT: .LBB25_3: ; RV32IA-WMO-NEXT: ret ; +; RV32IA-ZACAS-LABEL: cmpxchg_i32_acq_rel_monotonic: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV32IA-ZACAS-NEXT: ret +; ; RV32IA-TSO-LABEL: cmpxchg_i32_acq_rel_monotonic: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 @@ -2534,6 +3770,12 @@ ; RV64IA-WMO-NEXT: .LBB25_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i32_acq_rel_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: sext.w a1, a1 +; RV64IA-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i32_acq_rel_monotonic: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: sext.w a1, a1 @@ -2545,6 +3787,15 @@ ; RV64IA-TSO-NEXT: bnez a4, .LBB25_1 ; RV64IA-TSO-NEXT: .LBB25_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_acq_rel_monotonic: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_acq_rel_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: sext.w a1, a1 +; RV64IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acq_rel monotonic ret void } @@ -2574,6 +3825,11 @@ ; RV32IA-WMO-NEXT: .LBB26_3: ; RV32IA-WMO-NEXT: ret ; +; RV32IA-ZACAS-LABEL: cmpxchg_i32_acq_rel_acquire: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV32IA-ZACAS-NEXT: ret +; ; RV32IA-TSO-LABEL: cmpxchg_i32_acq_rel_acquire: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 @@ -2610,6 +3866,12 @@ ; RV64IA-WMO-NEXT: .LBB26_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i32_acq_rel_acquire: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: sext.w a1, a1 +; RV64IA-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i32_acq_rel_acquire: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: sext.w a1, a1 @@ -2621,6 +3883,15 @@ ; RV64IA-TSO-NEXT: bnez a4, .LBB26_1 ; RV64IA-TSO-NEXT: .LBB26_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_acq_rel_acquire: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_acq_rel_acquire: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: sext.w a1, a1 +; RV64IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acq_rel acquire ret void } @@ -2639,16 +3910,32 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i32_seq_cst_monotonic: -; RV32IA: # %bb.0: -; RV32IA-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a3, (a0) -; RV32IA-NEXT: bne a3, a1, .LBB27_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB27_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a4, a2, (a0) -; RV32IA-NEXT: bnez a4, .LBB27_1 -; RV32IA-NEXT: .LBB27_3: -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i32_seq_cst_monotonic: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aqrl a3, (a0) +; RV32IA-WMO-NEXT: bne a3, a1, .LBB27_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB27_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w.rl a4, a2, (a0) +; RV32IA-WMO-NEXT: bnez a4, .LBB27_1 +; RV32IA-WMO-NEXT: .LBB27_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-ZACAS-LABEL: cmpxchg_i32_seq_cst_monotonic: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV32IA-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i32_seq_cst_monotonic: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w.aqrl a3, (a0) +; RV32IA-TSO-NEXT: bne a3, a1, .LBB27_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB27_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w.rl a4, a2, (a0) +; RV32IA-TSO-NEXT: bnez a4, .LBB27_1 +; RV32IA-TSO-NEXT: .LBB27_3: +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: cmpxchg_i32_seq_cst_monotonic: ; RV64I: # %bb.0: @@ -2663,17 +3950,44 @@ ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: cmpxchg_i32_seq_cst_monotonic: -; RV64IA: # %bb.0: -; RV64IA-NEXT: sext.w a1, a1 -; RV64IA-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aqrl a3, (a0) -; RV64IA-NEXT: bne a3, a1, .LBB27_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB27_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a4, a2, (a0) -; RV64IA-NEXT: bnez a4, .LBB27_1 -; RV64IA-NEXT: .LBB27_3: -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: cmpxchg_i32_seq_cst_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: sext.w a1, a1 +; RV64IA-WMO-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB27_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB27_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w.rl a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB27_1 +; RV64IA-WMO-NEXT: .LBB27_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i32_seq_cst_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: sext.w a1, a1 +; RV64IA-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i32_seq_cst_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: sext.w a1, a1 +; RV64IA-TSO-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB27_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB27_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w.rl a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB27_1 +; RV64IA-TSO-NEXT: .LBB27_3: +; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_seq_cst_monotonic: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_seq_cst_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: sext.w a1, a1 +; RV64IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val seq_cst monotonic ret void } @@ -2692,16 +4006,32 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i32_seq_cst_acquire: -; RV32IA: # %bb.0: -; RV32IA-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a3, (a0) -; RV32IA-NEXT: bne a3, a1, .LBB28_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB28_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a4, a2, (a0) -; RV32IA-NEXT: bnez a4, .LBB28_1 -; RV32IA-NEXT: .LBB28_3: -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i32_seq_cst_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aqrl a3, (a0) +; RV32IA-WMO-NEXT: bne a3, a1, .LBB28_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB28_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w.rl a4, a2, (a0) +; RV32IA-WMO-NEXT: bnez a4, .LBB28_1 +; RV32IA-WMO-NEXT: .LBB28_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-ZACAS-LABEL: cmpxchg_i32_seq_cst_acquire: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV32IA-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i32_seq_cst_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w.aqrl a3, (a0) +; RV32IA-TSO-NEXT: bne a3, a1, .LBB28_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB28_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w.rl a4, a2, (a0) +; RV32IA-TSO-NEXT: bnez a4, .LBB28_1 +; RV32IA-TSO-NEXT: .LBB28_3: +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: cmpxchg_i32_seq_cst_acquire: ; RV64I: # %bb.0: @@ -2716,17 +4046,44 @@ ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: cmpxchg_i32_seq_cst_acquire: -; RV64IA: # %bb.0: -; RV64IA-NEXT: sext.w a1, a1 -; RV64IA-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aqrl a3, (a0) -; RV64IA-NEXT: bne a3, a1, .LBB28_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB28_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a4, a2, (a0) -; RV64IA-NEXT: bnez a4, .LBB28_1 -; RV64IA-NEXT: .LBB28_3: -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: cmpxchg_i32_seq_cst_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: sext.w a1, a1 +; RV64IA-WMO-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB28_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB28_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w.rl a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB28_1 +; RV64IA-WMO-NEXT: .LBB28_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i32_seq_cst_acquire: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: sext.w a1, a1 +; RV64IA-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i32_seq_cst_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: sext.w a1, a1 +; RV64IA-TSO-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB28_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB28_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w.rl a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB28_1 +; RV64IA-TSO-NEXT: .LBB28_3: +; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_seq_cst_acquire: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_seq_cst_acquire: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: sext.w a1, a1 +; RV64IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val seq_cst acquire ret void } @@ -2745,16 +4102,32 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i32_seq_cst_seq_cst: -; RV32IA: # %bb.0: -; RV32IA-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a3, (a0) -; RV32IA-NEXT: bne a3, a1, .LBB29_3 -; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB29_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a4, a2, (a0) -; RV32IA-NEXT: bnez a4, .LBB29_1 -; RV32IA-NEXT: .LBB29_3: -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i32_seq_cst_seq_cst: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aqrl a3, (a0) +; RV32IA-WMO-NEXT: bne a3, a1, .LBB29_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB29_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w.rl a4, a2, (a0) +; RV32IA-WMO-NEXT: bnez a4, .LBB29_1 +; RV32IA-WMO-NEXT: .LBB29_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-ZACAS-LABEL: cmpxchg_i32_seq_cst_seq_cst: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV32IA-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i32_seq_cst_seq_cst: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w.aqrl a3, (a0) +; RV32IA-TSO-NEXT: bne a3, a1, .LBB29_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB29_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w.rl a4, a2, (a0) +; RV32IA-TSO-NEXT: bnez a4, .LBB29_1 +; RV32IA-TSO-NEXT: .LBB29_3: +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: cmpxchg_i32_seq_cst_seq_cst: ; RV64I: # %bb.0: @@ -2769,17 +4142,44 @@ ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: cmpxchg_i32_seq_cst_seq_cst: -; RV64IA: # %bb.0: -; RV64IA-NEXT: sext.w a1, a1 -; RV64IA-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aqrl a3, (a0) -; RV64IA-NEXT: bne a3, a1, .LBB29_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB29_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a4, a2, (a0) -; RV64IA-NEXT: bnez a4, .LBB29_1 -; RV64IA-NEXT: .LBB29_3: -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: cmpxchg_i32_seq_cst_seq_cst: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: sext.w a1, a1 +; RV64IA-WMO-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB29_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB29_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w.rl a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB29_1 +; RV64IA-WMO-NEXT: .LBB29_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i32_seq_cst_seq_cst: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: sext.w a1, a1 +; RV64IA-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i32_seq_cst_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: sext.w a1, a1 +; RV64IA-TSO-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB29_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB29_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w.rl a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB29_1 +; RV64IA-TSO-NEXT: .LBB29_3: +; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_seq_cst_seq_cst: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_seq_cst_seq_cst: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: sext.w a1, a1 +; RV64IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val seq_cst seq_cst ret void } @@ -2801,21 +4201,46 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i64_monotonic_monotonic: -; RV32IA: # %bb.0: -; RV32IA-NEXT: addi sp, sp, -16 -; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IA-NEXT: sw a2, 4(sp) -; RV32IA-NEXT: sw a1, 0(sp) -; RV32IA-NEXT: mv a1, sp -; RV32IA-NEXT: mv a2, a3 -; RV32IA-NEXT: mv a3, a4 -; RV32IA-NEXT: li a4, 0 -; RV32IA-NEXT: li a5, 0 -; RV32IA-NEXT: call __atomic_compare_exchange_8@plt -; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IA-NEXT: addi sp, sp, 16 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i64_monotonic_monotonic: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: addi sp, sp, -16 +; RV32IA-WMO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-WMO-NEXT: sw a2, 4(sp) +; RV32IA-WMO-NEXT: sw a1, 0(sp) +; RV32IA-WMO-NEXT: mv a1, sp +; RV32IA-WMO-NEXT: mv a2, a3 +; RV32IA-WMO-NEXT: mv a3, a4 +; RV32IA-WMO-NEXT: li a4, 0 +; RV32IA-WMO-NEXT: li a5, 0 +; RV32IA-WMO-NEXT: call __atomic_compare_exchange_8@plt +; RV32IA-WMO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-WMO-NEXT: addi sp, sp, 16 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-ZACAS-LABEL: cmpxchg_i64_monotonic_monotonic: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: mv a5, a4 +; RV32IA-ZACAS-NEXT: mv a7, a2 +; RV32IA-ZACAS-NEXT: mv a4, a3 +; RV32IA-ZACAS-NEXT: mv a6, a1 +; RV32IA-ZACAS-NEXT: amocas.d a6, a4, (a0) +; RV32IA-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i64_monotonic_monotonic: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: addi sp, sp, -16 +; RV32IA-TSO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-TSO-NEXT: sw a2, 4(sp) +; RV32IA-TSO-NEXT: sw a1, 0(sp) +; RV32IA-TSO-NEXT: mv a1, sp +; RV32IA-TSO-NEXT: mv a2, a3 +; RV32IA-TSO-NEXT: mv a3, a4 +; RV32IA-TSO-NEXT: li a4, 0 +; RV32IA-TSO-NEXT: li a5, 0 +; RV32IA-TSO-NEXT: call __atomic_compare_exchange_8@plt +; RV32IA-TSO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-TSO-NEXT: addi sp, sp, 16 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: cmpxchg_i64_monotonic_monotonic: ; RV64I: # %bb.0: @@ -2830,16 +4255,44 @@ ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: cmpxchg_i64_monotonic_monotonic: -; RV64IA: # %bb.0: -; RV64IA-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.d a3, (a0) -; RV64IA-NEXT: bne a3, a1, .LBB30_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB30_1 Depth=1 -; RV64IA-NEXT: sc.d a4, a2, (a0) -; RV64IA-NEXT: bnez a4, .LBB30_1 -; RV64IA-NEXT: .LBB30_3: -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: cmpxchg_i64_monotonic_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.d a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB30_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB30_1 Depth=1 +; RV64IA-WMO-NEXT: sc.d a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB30_1 +; RV64IA-WMO-NEXT: .LBB30_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i64_monotonic_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: amocas.d a1, a2, (a0) +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i64_monotonic_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.d a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB30_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB30_1 Depth=1 +; RV64IA-TSO-NEXT: sc.d a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB30_1 +; RV64IA-TSO-NEXT: .LBB30_3: +; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i64_monotonic_monotonic: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: mv a5, a4 +; RV32IA-WMO-ZACAS-NEXT: mv a7, a2 +; RV32IA-WMO-ZACAS-NEXT: mv a4, a3 +; RV32IA-WMO-ZACAS-NEXT: mv a6, a1 +; RV32IA-WMO-ZACAS-NEXT: amocas.d a6, a4, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_monotonic_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: amocas.d a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic ret void } @@ -2862,22 +4315,48 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i64_acquire_monotonic: -; RV32IA: # %bb.0: -; RV32IA-NEXT: addi sp, sp, -16 -; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: sw a2, 4(sp) -; RV32IA-NEXT: sw a1, 0(sp) -; RV32IA-NEXT: mv a1, sp -; RV32IA-NEXT: li a4, 2 -; RV32IA-NEXT: mv a2, a3 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: li a5, 0 -; RV32IA-NEXT: call __atomic_compare_exchange_8@plt -; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IA-NEXT: addi sp, sp, 16 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i64_acquire_monotonic: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: addi sp, sp, -16 +; RV32IA-WMO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-WMO-NEXT: mv a5, a4 +; RV32IA-WMO-NEXT: sw a2, 4(sp) +; RV32IA-WMO-NEXT: sw a1, 0(sp) +; RV32IA-WMO-NEXT: mv a1, sp +; RV32IA-WMO-NEXT: li a4, 2 +; RV32IA-WMO-NEXT: mv a2, a3 +; RV32IA-WMO-NEXT: mv a3, a5 +; RV32IA-WMO-NEXT: li a5, 0 +; RV32IA-WMO-NEXT: call __atomic_compare_exchange_8@plt +; RV32IA-WMO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-WMO-NEXT: addi sp, sp, 16 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-ZACAS-LABEL: cmpxchg_i64_acquire_monotonic: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: mv a5, a4 +; RV32IA-ZACAS-NEXT: mv a7, a2 +; RV32IA-ZACAS-NEXT: mv a4, a3 +; RV32IA-ZACAS-NEXT: mv a6, a1 +; RV32IA-ZACAS-NEXT: amocas.d.aq a6, a4, (a0) +; RV32IA-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i64_acquire_monotonic: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: addi sp, sp, -16 +; RV32IA-TSO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-TSO-NEXT: mv a5, a4 +; RV32IA-TSO-NEXT: sw a2, 4(sp) +; RV32IA-TSO-NEXT: sw a1, 0(sp) +; RV32IA-TSO-NEXT: mv a1, sp +; RV32IA-TSO-NEXT: li a4, 2 +; RV32IA-TSO-NEXT: mv a2, a3 +; RV32IA-TSO-NEXT: mv a3, a5 +; RV32IA-TSO-NEXT: li a5, 0 +; RV32IA-TSO-NEXT: call __atomic_compare_exchange_8@plt +; RV32IA-TSO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-TSO-NEXT: addi sp, sp, 16 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: cmpxchg_i64_acquire_monotonic: ; RV64I: # %bb.0: @@ -2903,6 +4382,11 @@ ; RV64IA-WMO-NEXT: .LBB31_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i64_acquire_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: amocas.d.aq a1, a2, (a0) +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i64_acquire_monotonic: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1 @@ -2913,6 +4397,18 @@ ; RV64IA-TSO-NEXT: bnez a4, .LBB31_1 ; RV64IA-TSO-NEXT: .LBB31_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i64_acquire_monotonic: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: mv a5, a4 +; RV32IA-WMO-ZACAS-NEXT: mv a7, a2 +; RV32IA-WMO-ZACAS-NEXT: mv a4, a3 +; RV32IA-WMO-ZACAS-NEXT: mv a6, a1 +; RV32IA-WMO-ZACAS-NEXT: amocas.d.aq a6, a4, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_acquire_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: amocas.d.aq a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire monotonic ret void } @@ -2935,22 +4431,48 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i64_acquire_acquire: -; RV32IA: # %bb.0: -; RV32IA-NEXT: addi sp, sp, -16 -; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IA-NEXT: mv a6, a4 -; RV32IA-NEXT: sw a2, 4(sp) -; RV32IA-NEXT: sw a1, 0(sp) -; RV32IA-NEXT: mv a1, sp -; RV32IA-NEXT: li a4, 2 -; RV32IA-NEXT: li a5, 2 -; RV32IA-NEXT: mv a2, a3 -; RV32IA-NEXT: mv a3, a6 -; RV32IA-NEXT: call __atomic_compare_exchange_8@plt -; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IA-NEXT: addi sp, sp, 16 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i64_acquire_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: addi sp, sp, -16 +; RV32IA-WMO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-WMO-NEXT: mv a6, a4 +; RV32IA-WMO-NEXT: sw a2, 4(sp) +; RV32IA-WMO-NEXT: sw a1, 0(sp) +; RV32IA-WMO-NEXT: mv a1, sp +; RV32IA-WMO-NEXT: li a4, 2 +; RV32IA-WMO-NEXT: li a5, 2 +; RV32IA-WMO-NEXT: mv a2, a3 +; RV32IA-WMO-NEXT: mv a3, a6 +; RV32IA-WMO-NEXT: call __atomic_compare_exchange_8@plt +; RV32IA-WMO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-WMO-NEXT: addi sp, sp, 16 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-ZACAS-LABEL: cmpxchg_i64_acquire_acquire: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: mv a5, a4 +; RV32IA-ZACAS-NEXT: mv a7, a2 +; RV32IA-ZACAS-NEXT: mv a4, a3 +; RV32IA-ZACAS-NEXT: mv a6, a1 +; RV32IA-ZACAS-NEXT: amocas.d.aq a6, a4, (a0) +; RV32IA-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i64_acquire_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: addi sp, sp, -16 +; RV32IA-TSO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-TSO-NEXT: mv a6, a4 +; RV32IA-TSO-NEXT: sw a2, 4(sp) +; RV32IA-TSO-NEXT: sw a1, 0(sp) +; RV32IA-TSO-NEXT: mv a1, sp +; RV32IA-TSO-NEXT: li a4, 2 +; RV32IA-TSO-NEXT: li a5, 2 +; RV32IA-TSO-NEXT: mv a2, a3 +; RV32IA-TSO-NEXT: mv a3, a6 +; RV32IA-TSO-NEXT: call __atomic_compare_exchange_8@plt +; RV32IA-TSO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-TSO-NEXT: addi sp, sp, 16 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: cmpxchg_i64_acquire_acquire: ; RV64I: # %bb.0: @@ -2976,6 +4498,11 @@ ; RV64IA-WMO-NEXT: .LBB32_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i64_acquire_acquire: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: amocas.d.aq a1, a2, (a0) +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i64_acquire_acquire: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 @@ -2986,6 +4513,18 @@ ; RV64IA-TSO-NEXT: bnez a4, .LBB32_1 ; RV64IA-TSO-NEXT: .LBB32_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i64_acquire_acquire: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: mv a5, a4 +; RV32IA-WMO-ZACAS-NEXT: mv a7, a2 +; RV32IA-WMO-ZACAS-NEXT: mv a4, a3 +; RV32IA-WMO-ZACAS-NEXT: mv a6, a1 +; RV32IA-WMO-ZACAS-NEXT: amocas.d.aq a6, a4, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_acquire_acquire: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: amocas.d.aq a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire acquire ret void } @@ -3008,22 +4547,48 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i64_release_monotonic: -; RV32IA: # %bb.0: -; RV32IA-NEXT: addi sp, sp, -16 -; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: sw a2, 4(sp) -; RV32IA-NEXT: sw a1, 0(sp) -; RV32IA-NEXT: mv a1, sp -; RV32IA-NEXT: li a4, 3 -; RV32IA-NEXT: mv a2, a3 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: li a5, 0 -; RV32IA-NEXT: call __atomic_compare_exchange_8@plt -; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IA-NEXT: addi sp, sp, 16 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i64_release_monotonic: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: addi sp, sp, -16 +; RV32IA-WMO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-WMO-NEXT: mv a5, a4 +; RV32IA-WMO-NEXT: sw a2, 4(sp) +; RV32IA-WMO-NEXT: sw a1, 0(sp) +; RV32IA-WMO-NEXT: mv a1, sp +; RV32IA-WMO-NEXT: li a4, 3 +; RV32IA-WMO-NEXT: mv a2, a3 +; RV32IA-WMO-NEXT: mv a3, a5 +; RV32IA-WMO-NEXT: li a5, 0 +; RV32IA-WMO-NEXT: call __atomic_compare_exchange_8@plt +; RV32IA-WMO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-WMO-NEXT: addi sp, sp, 16 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-ZACAS-LABEL: cmpxchg_i64_release_monotonic: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: mv a5, a4 +; RV32IA-ZACAS-NEXT: mv a7, a2 +; RV32IA-ZACAS-NEXT: mv a4, a3 +; RV32IA-ZACAS-NEXT: mv a6, a1 +; RV32IA-ZACAS-NEXT: amocas.d.rl a6, a4, (a0) +; RV32IA-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i64_release_monotonic: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: addi sp, sp, -16 +; RV32IA-TSO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-TSO-NEXT: mv a5, a4 +; RV32IA-TSO-NEXT: sw a2, 4(sp) +; RV32IA-TSO-NEXT: sw a1, 0(sp) +; RV32IA-TSO-NEXT: mv a1, sp +; RV32IA-TSO-NEXT: li a4, 3 +; RV32IA-TSO-NEXT: mv a2, a3 +; RV32IA-TSO-NEXT: mv a3, a5 +; RV32IA-TSO-NEXT: li a5, 0 +; RV32IA-TSO-NEXT: call __atomic_compare_exchange_8@plt +; RV32IA-TSO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-TSO-NEXT: addi sp, sp, 16 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: cmpxchg_i64_release_monotonic: ; RV64I: # %bb.0: @@ -3049,6 +4614,11 @@ ; RV64IA-WMO-NEXT: .LBB33_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i64_release_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: amocas.d.rl a1, a2, (a0) +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i64_release_monotonic: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 @@ -3059,6 +4629,18 @@ ; RV64IA-TSO-NEXT: bnez a4, .LBB33_1 ; RV64IA-TSO-NEXT: .LBB33_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i64_release_monotonic: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: mv a5, a4 +; RV32IA-WMO-ZACAS-NEXT: mv a7, a2 +; RV32IA-WMO-ZACAS-NEXT: mv a4, a3 +; RV32IA-WMO-ZACAS-NEXT: mv a6, a1 +; RV32IA-WMO-ZACAS-NEXT: amocas.d.rl a6, a4, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_release_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: amocas.d.rl a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val release monotonic ret void } @@ -3081,22 +4663,48 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i64_release_acquire: -; RV32IA: # %bb.0: -; RV32IA-NEXT: addi sp, sp, -16 -; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IA-NEXT: mv a6, a4 -; RV32IA-NEXT: sw a2, 4(sp) -; RV32IA-NEXT: sw a1, 0(sp) -; RV32IA-NEXT: mv a1, sp -; RV32IA-NEXT: li a4, 3 -; RV32IA-NEXT: li a5, 2 -; RV32IA-NEXT: mv a2, a3 -; RV32IA-NEXT: mv a3, a6 -; RV32IA-NEXT: call __atomic_compare_exchange_8@plt -; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IA-NEXT: addi sp, sp, 16 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i64_release_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: addi sp, sp, -16 +; RV32IA-WMO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-WMO-NEXT: mv a6, a4 +; RV32IA-WMO-NEXT: sw a2, 4(sp) +; RV32IA-WMO-NEXT: sw a1, 0(sp) +; RV32IA-WMO-NEXT: mv a1, sp +; RV32IA-WMO-NEXT: li a4, 3 +; RV32IA-WMO-NEXT: li a5, 2 +; RV32IA-WMO-NEXT: mv a2, a3 +; RV32IA-WMO-NEXT: mv a3, a6 +; RV32IA-WMO-NEXT: call __atomic_compare_exchange_8@plt +; RV32IA-WMO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-WMO-NEXT: addi sp, sp, 16 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-ZACAS-LABEL: cmpxchg_i64_release_acquire: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: mv a5, a4 +; RV32IA-ZACAS-NEXT: mv a7, a2 +; RV32IA-ZACAS-NEXT: mv a4, a3 +; RV32IA-ZACAS-NEXT: mv a6, a1 +; RV32IA-ZACAS-NEXT: amocas.d.aqrl a6, a4, (a0) +; RV32IA-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i64_release_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: addi sp, sp, -16 +; RV32IA-TSO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-TSO-NEXT: mv a6, a4 +; RV32IA-TSO-NEXT: sw a2, 4(sp) +; RV32IA-TSO-NEXT: sw a1, 0(sp) +; RV32IA-TSO-NEXT: mv a1, sp +; RV32IA-TSO-NEXT: li a4, 3 +; RV32IA-TSO-NEXT: li a5, 2 +; RV32IA-TSO-NEXT: mv a2, a3 +; RV32IA-TSO-NEXT: mv a3, a6 +; RV32IA-TSO-NEXT: call __atomic_compare_exchange_8@plt +; RV32IA-TSO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-TSO-NEXT: addi sp, sp, 16 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: cmpxchg_i64_release_acquire: ; RV64I: # %bb.0: @@ -3122,6 +4730,11 @@ ; RV64IA-WMO-NEXT: .LBB34_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i64_release_acquire: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0) +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i64_release_acquire: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1 @@ -3132,6 +4745,18 @@ ; RV64IA-TSO-NEXT: bnez a4, .LBB34_1 ; RV64IA-TSO-NEXT: .LBB34_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i64_release_acquire: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: mv a5, a4 +; RV32IA-WMO-ZACAS-NEXT: mv a7, a2 +; RV32IA-WMO-ZACAS-NEXT: mv a4, a3 +; RV32IA-WMO-ZACAS-NEXT: mv a6, a1 +; RV32IA-WMO-ZACAS-NEXT: amocas.d.aqrl a6, a4, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_release_acquire: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val release acquire ret void } @@ -3154,22 +4779,48 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i64_acq_rel_monotonic: -; RV32IA: # %bb.0: -; RV32IA-NEXT: addi sp, sp, -16 -; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: sw a2, 4(sp) -; RV32IA-NEXT: sw a1, 0(sp) -; RV32IA-NEXT: mv a1, sp -; RV32IA-NEXT: li a4, 4 -; RV32IA-NEXT: mv a2, a3 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: li a5, 0 -; RV32IA-NEXT: call __atomic_compare_exchange_8@plt -; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IA-NEXT: addi sp, sp, 16 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i64_acq_rel_monotonic: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: addi sp, sp, -16 +; RV32IA-WMO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-WMO-NEXT: mv a5, a4 +; RV32IA-WMO-NEXT: sw a2, 4(sp) +; RV32IA-WMO-NEXT: sw a1, 0(sp) +; RV32IA-WMO-NEXT: mv a1, sp +; RV32IA-WMO-NEXT: li a4, 4 +; RV32IA-WMO-NEXT: mv a2, a3 +; RV32IA-WMO-NEXT: mv a3, a5 +; RV32IA-WMO-NEXT: li a5, 0 +; RV32IA-WMO-NEXT: call __atomic_compare_exchange_8@plt +; RV32IA-WMO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-WMO-NEXT: addi sp, sp, 16 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-ZACAS-LABEL: cmpxchg_i64_acq_rel_monotonic: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: mv a5, a4 +; RV32IA-ZACAS-NEXT: mv a7, a2 +; RV32IA-ZACAS-NEXT: mv a4, a3 +; RV32IA-ZACAS-NEXT: mv a6, a1 +; RV32IA-ZACAS-NEXT: amocas.d.aqrl a6, a4, (a0) +; RV32IA-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i64_acq_rel_monotonic: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: addi sp, sp, -16 +; RV32IA-TSO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-TSO-NEXT: mv a5, a4 +; RV32IA-TSO-NEXT: sw a2, 4(sp) +; RV32IA-TSO-NEXT: sw a1, 0(sp) +; RV32IA-TSO-NEXT: mv a1, sp +; RV32IA-TSO-NEXT: li a4, 4 +; RV32IA-TSO-NEXT: mv a2, a3 +; RV32IA-TSO-NEXT: mv a3, a5 +; RV32IA-TSO-NEXT: li a5, 0 +; RV32IA-TSO-NEXT: call __atomic_compare_exchange_8@plt +; RV32IA-TSO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-TSO-NEXT: addi sp, sp, 16 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: cmpxchg_i64_acq_rel_monotonic: ; RV64I: # %bb.0: @@ -3195,6 +4846,11 @@ ; RV64IA-WMO-NEXT: .LBB35_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i64_acq_rel_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0) +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i64_acq_rel_monotonic: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: .LBB35_1: # =>This Inner Loop Header: Depth=1 @@ -3205,6 +4861,18 @@ ; RV64IA-TSO-NEXT: bnez a4, .LBB35_1 ; RV64IA-TSO-NEXT: .LBB35_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i64_acq_rel_monotonic: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: mv a5, a4 +; RV32IA-WMO-ZACAS-NEXT: mv a7, a2 +; RV32IA-WMO-ZACAS-NEXT: mv a4, a3 +; RV32IA-WMO-ZACAS-NEXT: mv a6, a1 +; RV32IA-WMO-ZACAS-NEXT: amocas.d.aqrl a6, a4, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_acq_rel_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acq_rel monotonic ret void } @@ -3227,22 +4895,48 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i64_acq_rel_acquire: -; RV32IA: # %bb.0: -; RV32IA-NEXT: addi sp, sp, -16 -; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IA-NEXT: mv a6, a4 -; RV32IA-NEXT: sw a2, 4(sp) -; RV32IA-NEXT: sw a1, 0(sp) -; RV32IA-NEXT: mv a1, sp -; RV32IA-NEXT: li a4, 4 -; RV32IA-NEXT: li a5, 2 -; RV32IA-NEXT: mv a2, a3 -; RV32IA-NEXT: mv a3, a6 -; RV32IA-NEXT: call __atomic_compare_exchange_8@plt -; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IA-NEXT: addi sp, sp, 16 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i64_acq_rel_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: addi sp, sp, -16 +; RV32IA-WMO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-WMO-NEXT: mv a6, a4 +; RV32IA-WMO-NEXT: sw a2, 4(sp) +; RV32IA-WMO-NEXT: sw a1, 0(sp) +; RV32IA-WMO-NEXT: mv a1, sp +; RV32IA-WMO-NEXT: li a4, 4 +; RV32IA-WMO-NEXT: li a5, 2 +; RV32IA-WMO-NEXT: mv a2, a3 +; RV32IA-WMO-NEXT: mv a3, a6 +; RV32IA-WMO-NEXT: call __atomic_compare_exchange_8@plt +; RV32IA-WMO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-WMO-NEXT: addi sp, sp, 16 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-ZACAS-LABEL: cmpxchg_i64_acq_rel_acquire: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: mv a5, a4 +; RV32IA-ZACAS-NEXT: mv a7, a2 +; RV32IA-ZACAS-NEXT: mv a4, a3 +; RV32IA-ZACAS-NEXT: mv a6, a1 +; RV32IA-ZACAS-NEXT: amocas.d.aqrl a6, a4, (a0) +; RV32IA-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i64_acq_rel_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: addi sp, sp, -16 +; RV32IA-TSO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-TSO-NEXT: mv a6, a4 +; RV32IA-TSO-NEXT: sw a2, 4(sp) +; RV32IA-TSO-NEXT: sw a1, 0(sp) +; RV32IA-TSO-NEXT: mv a1, sp +; RV32IA-TSO-NEXT: li a4, 4 +; RV32IA-TSO-NEXT: li a5, 2 +; RV32IA-TSO-NEXT: mv a2, a3 +; RV32IA-TSO-NEXT: mv a3, a6 +; RV32IA-TSO-NEXT: call __atomic_compare_exchange_8@plt +; RV32IA-TSO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-TSO-NEXT: addi sp, sp, 16 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: cmpxchg_i64_acq_rel_acquire: ; RV64I: # %bb.0: @@ -3268,6 +4962,11 @@ ; RV64IA-WMO-NEXT: .LBB36_3: ; RV64IA-WMO-NEXT: ret ; +; RV64IA-ZACAS-LABEL: cmpxchg_i64_acq_rel_acquire: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0) +; RV64IA-ZACAS-NEXT: ret +; ; RV64IA-TSO-LABEL: cmpxchg_i64_acq_rel_acquire: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 @@ -3278,6 +4977,18 @@ ; RV64IA-TSO-NEXT: bnez a4, .LBB36_1 ; RV64IA-TSO-NEXT: .LBB36_3: ; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i64_acq_rel_acquire: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: mv a5, a4 +; RV32IA-WMO-ZACAS-NEXT: mv a7, a2 +; RV32IA-WMO-ZACAS-NEXT: mv a4, a3 +; RV32IA-WMO-ZACAS-NEXT: mv a6, a1 +; RV32IA-WMO-ZACAS-NEXT: amocas.d.aqrl a6, a4, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_acq_rel_acquire: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acq_rel acquire ret void } @@ -3300,22 +5011,48 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i64_seq_cst_monotonic: -; RV32IA: # %bb.0: -; RV32IA-NEXT: addi sp, sp, -16 -; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: sw a2, 4(sp) -; RV32IA-NEXT: sw a1, 0(sp) -; RV32IA-NEXT: mv a1, sp -; RV32IA-NEXT: li a4, 5 -; RV32IA-NEXT: mv a2, a3 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: li a5, 0 -; RV32IA-NEXT: call __atomic_compare_exchange_8@plt -; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IA-NEXT: addi sp, sp, 16 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i64_seq_cst_monotonic: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: addi sp, sp, -16 +; RV32IA-WMO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-WMO-NEXT: mv a5, a4 +; RV32IA-WMO-NEXT: sw a2, 4(sp) +; RV32IA-WMO-NEXT: sw a1, 0(sp) +; RV32IA-WMO-NEXT: mv a1, sp +; RV32IA-WMO-NEXT: li a4, 5 +; RV32IA-WMO-NEXT: mv a2, a3 +; RV32IA-WMO-NEXT: mv a3, a5 +; RV32IA-WMO-NEXT: li a5, 0 +; RV32IA-WMO-NEXT: call __atomic_compare_exchange_8@plt +; RV32IA-WMO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-WMO-NEXT: addi sp, sp, 16 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-ZACAS-LABEL: cmpxchg_i64_seq_cst_monotonic: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: mv a5, a4 +; RV32IA-ZACAS-NEXT: mv a7, a2 +; RV32IA-ZACAS-NEXT: mv a4, a3 +; RV32IA-ZACAS-NEXT: mv a6, a1 +; RV32IA-ZACAS-NEXT: amocas.d.aqrl a6, a4, (a0) +; RV32IA-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i64_seq_cst_monotonic: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: addi sp, sp, -16 +; RV32IA-TSO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-TSO-NEXT: mv a5, a4 +; RV32IA-TSO-NEXT: sw a2, 4(sp) +; RV32IA-TSO-NEXT: sw a1, 0(sp) +; RV32IA-TSO-NEXT: mv a1, sp +; RV32IA-TSO-NEXT: li a4, 5 +; RV32IA-TSO-NEXT: mv a2, a3 +; RV32IA-TSO-NEXT: mv a3, a5 +; RV32IA-TSO-NEXT: li a5, 0 +; RV32IA-TSO-NEXT: call __atomic_compare_exchange_8@plt +; RV32IA-TSO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-TSO-NEXT: addi sp, sp, 16 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: cmpxchg_i64_seq_cst_monotonic: ; RV64I: # %bb.0: @@ -3330,16 +5067,44 @@ ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: cmpxchg_i64_seq_cst_monotonic: -; RV64IA: # %bb.0: -; RV64IA-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.d.aqrl a3, (a0) -; RV64IA-NEXT: bne a3, a1, .LBB37_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB37_1 Depth=1 -; RV64IA-NEXT: sc.d.rl a4, a2, (a0) -; RV64IA-NEXT: bnez a4, .LBB37_1 -; RV64IA-NEXT: .LBB37_3: -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: cmpxchg_i64_seq_cst_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.d.aqrl a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB37_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB37_1 Depth=1 +; RV64IA-WMO-NEXT: sc.d.rl a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB37_1 +; RV64IA-WMO-NEXT: .LBB37_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i64_seq_cst_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0) +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i64_seq_cst_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.d.aqrl a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB37_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB37_1 Depth=1 +; RV64IA-TSO-NEXT: sc.d.rl a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB37_1 +; RV64IA-TSO-NEXT: .LBB37_3: +; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i64_seq_cst_monotonic: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: mv a5, a4 +; RV32IA-WMO-ZACAS-NEXT: mv a7, a2 +; RV32IA-WMO-ZACAS-NEXT: mv a4, a3 +; RV32IA-WMO-ZACAS-NEXT: mv a6, a1 +; RV32IA-WMO-ZACAS-NEXT: amocas.d.aqrl a6, a4, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_seq_cst_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val seq_cst monotonic ret void } @@ -3362,22 +5127,48 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i64_seq_cst_acquire: -; RV32IA: # %bb.0: -; RV32IA-NEXT: addi sp, sp, -16 -; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IA-NEXT: mv a6, a4 -; RV32IA-NEXT: sw a2, 4(sp) -; RV32IA-NEXT: sw a1, 0(sp) -; RV32IA-NEXT: mv a1, sp -; RV32IA-NEXT: li a4, 5 -; RV32IA-NEXT: li a5, 2 -; RV32IA-NEXT: mv a2, a3 -; RV32IA-NEXT: mv a3, a6 -; RV32IA-NEXT: call __atomic_compare_exchange_8@plt -; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IA-NEXT: addi sp, sp, 16 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i64_seq_cst_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: addi sp, sp, -16 +; RV32IA-WMO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-WMO-NEXT: mv a6, a4 +; RV32IA-WMO-NEXT: sw a2, 4(sp) +; RV32IA-WMO-NEXT: sw a1, 0(sp) +; RV32IA-WMO-NEXT: mv a1, sp +; RV32IA-WMO-NEXT: li a4, 5 +; RV32IA-WMO-NEXT: li a5, 2 +; RV32IA-WMO-NEXT: mv a2, a3 +; RV32IA-WMO-NEXT: mv a3, a6 +; RV32IA-WMO-NEXT: call __atomic_compare_exchange_8@plt +; RV32IA-WMO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-WMO-NEXT: addi sp, sp, 16 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-ZACAS-LABEL: cmpxchg_i64_seq_cst_acquire: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: mv a5, a4 +; RV32IA-ZACAS-NEXT: mv a7, a2 +; RV32IA-ZACAS-NEXT: mv a4, a3 +; RV32IA-ZACAS-NEXT: mv a6, a1 +; RV32IA-ZACAS-NEXT: amocas.d.aqrl a6, a4, (a0) +; RV32IA-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i64_seq_cst_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: addi sp, sp, -16 +; RV32IA-TSO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-TSO-NEXT: mv a6, a4 +; RV32IA-TSO-NEXT: sw a2, 4(sp) +; RV32IA-TSO-NEXT: sw a1, 0(sp) +; RV32IA-TSO-NEXT: mv a1, sp +; RV32IA-TSO-NEXT: li a4, 5 +; RV32IA-TSO-NEXT: li a5, 2 +; RV32IA-TSO-NEXT: mv a2, a3 +; RV32IA-TSO-NEXT: mv a3, a6 +; RV32IA-TSO-NEXT: call __atomic_compare_exchange_8@plt +; RV32IA-TSO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-TSO-NEXT: addi sp, sp, 16 +; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: cmpxchg_i64_seq_cst_acquire: ; RV64I: # %bb.0: @@ -3392,16 +5183,44 @@ ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: cmpxchg_i64_seq_cst_acquire: -; RV64IA: # %bb.0: -; RV64IA-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.d.aqrl a3, (a0) -; RV64IA-NEXT: bne a3, a1, .LBB38_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB38_1 Depth=1 -; RV64IA-NEXT: sc.d.rl a4, a2, (a0) -; RV64IA-NEXT: bnez a4, .LBB38_1 -; RV64IA-NEXT: .LBB38_3: -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: cmpxchg_i64_seq_cst_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.d.aqrl a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB38_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB38_1 Depth=1 +; RV64IA-WMO-NEXT: sc.d.rl a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB38_1 +; RV64IA-WMO-NEXT: .LBB38_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i64_seq_cst_acquire: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0) +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i64_seq_cst_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.d.aqrl a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB38_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB38_1 Depth=1 +; RV64IA-TSO-NEXT: sc.d.rl a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB38_1 +; RV64IA-TSO-NEXT: .LBB38_3: +; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i64_seq_cst_acquire: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: mv a5, a4 +; RV32IA-WMO-ZACAS-NEXT: mv a7, a2 +; RV32IA-WMO-ZACAS-NEXT: mv a4, a3 +; RV32IA-WMO-ZACAS-NEXT: mv a6, a1 +; RV32IA-WMO-ZACAS-NEXT: amocas.d.aqrl a6, a4, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_seq_cst_acquire: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val seq_cst acquire ret void } @@ -3424,25 +5243,51 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-LABEL: cmpxchg_i64_seq_cst_seq_cst: -; RV32IA: # %bb.0: -; RV32IA-NEXT: addi sp, sp, -16 -; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IA-NEXT: mv a6, a4 -; RV32IA-NEXT: sw a2, 4(sp) -; RV32IA-NEXT: sw a1, 0(sp) -; RV32IA-NEXT: mv a1, sp -; RV32IA-NEXT: li a4, 5 -; RV32IA-NEXT: li a5, 5 -; RV32IA-NEXT: mv a2, a3 -; RV32IA-NEXT: mv a3, a6 -; RV32IA-NEXT: call __atomic_compare_exchange_8@plt -; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IA-NEXT: addi sp, sp, 16 -; RV32IA-NEXT: ret +; RV32IA-WMO-LABEL: cmpxchg_i64_seq_cst_seq_cst: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: addi sp, sp, -16 +; RV32IA-WMO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-WMO-NEXT: mv a6, a4 +; RV32IA-WMO-NEXT: sw a2, 4(sp) +; RV32IA-WMO-NEXT: sw a1, 0(sp) +; RV32IA-WMO-NEXT: mv a1, sp +; RV32IA-WMO-NEXT: li a4, 5 +; RV32IA-WMO-NEXT: li a5, 5 +; RV32IA-WMO-NEXT: mv a2, a3 +; RV32IA-WMO-NEXT: mv a3, a6 +; RV32IA-WMO-NEXT: call __atomic_compare_exchange_8@plt +; RV32IA-WMO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-WMO-NEXT: addi sp, sp, 16 +; RV32IA-WMO-NEXT: ret ; -; RV64I-LABEL: cmpxchg_i64_seq_cst_seq_cst: -; RV64I: # %bb.0: +; RV32IA-ZACAS-LABEL: cmpxchg_i64_seq_cst_seq_cst: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: mv a5, a4 +; RV32IA-ZACAS-NEXT: mv a7, a2 +; RV32IA-ZACAS-NEXT: mv a4, a3 +; RV32IA-ZACAS-NEXT: mv a6, a1 +; RV32IA-ZACAS-NEXT: amocas.d.aqrl a6, a4, (a0) +; RV32IA-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i64_seq_cst_seq_cst: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: addi sp, sp, -16 +; RV32IA-TSO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-TSO-NEXT: mv a6, a4 +; RV32IA-TSO-NEXT: sw a2, 4(sp) +; RV32IA-TSO-NEXT: sw a1, 0(sp) +; RV32IA-TSO-NEXT: mv a1, sp +; RV32IA-TSO-NEXT: li a4, 5 +; RV32IA-TSO-NEXT: li a5, 5 +; RV32IA-TSO-NEXT: mv a2, a3 +; RV32IA-TSO-NEXT: mv a3, a6 +; RV32IA-TSO-NEXT: call __atomic_compare_exchange_8@plt +; RV32IA-TSO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-TSO-NEXT: addi sp, sp, 16 +; RV32IA-TSO-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i64_seq_cst_seq_cst: +; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd a1, 0(sp) @@ -3454,16 +5299,1321 @@ ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-LABEL: cmpxchg_i64_seq_cst_seq_cst: -; RV64IA: # %bb.0: -; RV64IA-NEXT: .LBB39_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.d.aqrl a3, (a0) -; RV64IA-NEXT: bne a3, a1, .LBB39_3 -; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB39_1 Depth=1 -; RV64IA-NEXT: sc.d.rl a4, a2, (a0) -; RV64IA-NEXT: bnez a4, .LBB39_1 -; RV64IA-NEXT: .LBB39_3: -; RV64IA-NEXT: ret +; RV64IA-WMO-LABEL: cmpxchg_i64_seq_cst_seq_cst: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: .LBB39_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.d.aqrl a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB39_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB39_1 Depth=1 +; RV64IA-WMO-NEXT: sc.d.rl a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB39_1 +; RV64IA-WMO-NEXT: .LBB39_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i64_seq_cst_seq_cst: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0) +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i64_seq_cst_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: .LBB39_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.d.aqrl a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB39_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB39_1 Depth=1 +; RV64IA-TSO-NEXT: sc.d.rl a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB39_1 +; RV64IA-TSO-NEXT: .LBB39_3: +; RV64IA-TSO-NEXT: ret +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i64_seq_cst_seq_cst: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: mv a5, a4 +; RV32IA-WMO-ZACAS-NEXT: mv a7, a2 +; RV32IA-WMO-ZACAS-NEXT: mv a4, a3 +; RV32IA-WMO-ZACAS-NEXT: mv a6, a1 +; RV32IA-WMO-ZACAS-NEXT: amocas.d.aqrl a6, a4, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_seq_cst_seq_cst: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val seq_cst seq_cst ret void } + +define void @cmpxchg_i128_monotonic_monotonic(ptr %ptr, i128 %cmp, i128 %val) nounwind { +; RV32I-LABEL: cmpxchg_i128_monotonic_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv a4, a0 +; RV32I-NEXT: lw a0, 0(a2) +; RV32I-NEXT: lw a3, 4(a2) +; RV32I-NEXT: lw a5, 8(a2) +; RV32I-NEXT: lw a2, 12(a2) +; RV32I-NEXT: lw a6, 12(a1) +; RV32I-NEXT: lw a7, 8(a1) +; RV32I-NEXT: lw t0, 4(a1) +; RV32I-NEXT: lw a1, 0(a1) +; RV32I-NEXT: sw a6, 36(sp) +; RV32I-NEXT: sw a7, 32(sp) +; RV32I-NEXT: sw t0, 28(sp) +; RV32I-NEXT: sw a1, 24(sp) +; RV32I-NEXT: sw a2, 20(sp) +; RV32I-NEXT: sw a5, 16(sp) +; RV32I-NEXT: sw a3, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: li a0, 16 +; RV32I-NEXT: addi a2, sp, 24 +; RV32I-NEXT: addi a3, sp, 8 +; RV32I-NEXT: mv a1, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: li a5, 0 +; RV32I-NEXT: call __atomic_compare_exchange@plt +; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i128_monotonic_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -48 +; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32IA-NEXT: mv a4, a0 +; RV32IA-NEXT: lw a0, 0(a2) +; RV32IA-NEXT: lw a3, 4(a2) +; RV32IA-NEXT: lw a5, 8(a2) +; RV32IA-NEXT: lw a2, 12(a2) +; RV32IA-NEXT: lw a6, 12(a1) +; RV32IA-NEXT: lw a7, 8(a1) +; RV32IA-NEXT: lw t0, 4(a1) +; RV32IA-NEXT: lw a1, 0(a1) +; RV32IA-NEXT: sw a6, 36(sp) +; RV32IA-NEXT: sw a7, 32(sp) +; RV32IA-NEXT: sw t0, 28(sp) +; RV32IA-NEXT: sw a1, 24(sp) +; RV32IA-NEXT: sw a2, 20(sp) +; RV32IA-NEXT: sw a5, 16(sp) +; RV32IA-NEXT: sw a3, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: li a0, 16 +; RV32IA-NEXT: addi a2, sp, 24 +; RV32IA-NEXT: addi a3, sp, 8 +; RV32IA-NEXT: mv a1, a4 +; RV32IA-NEXT: li a4, 0 +; RV32IA-NEXT: li a5, 0 +; RV32IA-NEXT: call __atomic_compare_exchange@plt +; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 48 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i128_monotonic_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: sd a1, 0(sp) +; RV64I-NEXT: mv a1, sp +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: mv a3, a4 +; RV64I-NEXT: li a4, 0 +; RV64I-NEXT: li a5, 0 +; RV64I-NEXT: call __atomic_compare_exchange_16@plt +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i128_monotonic_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: addi sp, sp, -32 +; RV64IA-WMO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IA-WMO-NEXT: sd a2, 8(sp) +; RV64IA-WMO-NEXT: sd a1, 0(sp) +; RV64IA-WMO-NEXT: mv a1, sp +; RV64IA-WMO-NEXT: mv a2, a3 +; RV64IA-WMO-NEXT: mv a3, a4 +; RV64IA-WMO-NEXT: li a4, 0 +; RV64IA-WMO-NEXT: li a5, 0 +; RV64IA-WMO-NEXT: call __atomic_compare_exchange_16@plt +; RV64IA-WMO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IA-WMO-NEXT: addi sp, sp, 32 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i128_monotonic_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: mv a5, a4 +; RV64IA-ZACAS-NEXT: mv a7, a2 +; RV64IA-ZACAS-NEXT: mv a4, a3 +; RV64IA-ZACAS-NEXT: mv a6, a1 +; RV64IA-ZACAS-NEXT: amocas.q a6, a4, (a0) +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i128_monotonic_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: addi sp, sp, -32 +; RV64IA-TSO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IA-TSO-NEXT: sd a2, 8(sp) +; RV64IA-TSO-NEXT: sd a1, 0(sp) +; RV64IA-TSO-NEXT: mv a1, sp +; RV64IA-TSO-NEXT: mv a2, a3 +; RV64IA-TSO-NEXT: mv a3, a4 +; RV64IA-TSO-NEXT: li a4, 0 +; RV64IA-TSO-NEXT: li a5, 0 +; RV64IA-TSO-NEXT: call __atomic_compare_exchange_16@plt +; RV64IA-TSO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IA-TSO-NEXT: addi sp, sp, 32 +; RV64IA-TSO-NEXT: ret + %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val monotonic monotonic + ret void +} + +define void @cmpxchg_i128_acquire_monotonic(ptr %ptr, i128 %cmp, i128 %val) nounwind { +; RV32I-LABEL: cmpxchg_i128_acquire_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv a5, a0 +; RV32I-NEXT: lw a0, 0(a2) +; RV32I-NEXT: lw a3, 4(a2) +; RV32I-NEXT: lw a4, 8(a2) +; RV32I-NEXT: lw a2, 12(a2) +; RV32I-NEXT: lw a6, 12(a1) +; RV32I-NEXT: lw a7, 8(a1) +; RV32I-NEXT: lw t0, 4(a1) +; RV32I-NEXT: lw a1, 0(a1) +; RV32I-NEXT: sw a6, 36(sp) +; RV32I-NEXT: sw a7, 32(sp) +; RV32I-NEXT: sw t0, 28(sp) +; RV32I-NEXT: sw a1, 24(sp) +; RV32I-NEXT: sw a2, 20(sp) +; RV32I-NEXT: sw a4, 16(sp) +; RV32I-NEXT: sw a3, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: li a0, 16 +; RV32I-NEXT: addi a2, sp, 24 +; RV32I-NEXT: addi a3, sp, 8 +; RV32I-NEXT: li a4, 2 +; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: li a5, 0 +; RV32I-NEXT: call __atomic_compare_exchange@plt +; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i128_acquire_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -48 +; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32IA-NEXT: mv a5, a0 +; RV32IA-NEXT: lw a0, 0(a2) +; RV32IA-NEXT: lw a3, 4(a2) +; RV32IA-NEXT: lw a4, 8(a2) +; RV32IA-NEXT: lw a2, 12(a2) +; RV32IA-NEXT: lw a6, 12(a1) +; RV32IA-NEXT: lw a7, 8(a1) +; RV32IA-NEXT: lw t0, 4(a1) +; RV32IA-NEXT: lw a1, 0(a1) +; RV32IA-NEXT: sw a6, 36(sp) +; RV32IA-NEXT: sw a7, 32(sp) +; RV32IA-NEXT: sw t0, 28(sp) +; RV32IA-NEXT: sw a1, 24(sp) +; RV32IA-NEXT: sw a2, 20(sp) +; RV32IA-NEXT: sw a4, 16(sp) +; RV32IA-NEXT: sw a3, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: li a0, 16 +; RV32IA-NEXT: addi a2, sp, 24 +; RV32IA-NEXT: addi a3, sp, 8 +; RV32IA-NEXT: li a4, 2 +; RV32IA-NEXT: mv a1, a5 +; RV32IA-NEXT: li a5, 0 +; RV32IA-NEXT: call __atomic_compare_exchange@plt +; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 48 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i128_acquire_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv a5, a4 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: sd a1, 0(sp) +; RV64I-NEXT: mv a1, sp +; RV64I-NEXT: li a4, 2 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: mv a3, a5 +; RV64I-NEXT: li a5, 0 +; RV64I-NEXT: call __atomic_compare_exchange_16@plt +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i128_acquire_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: addi sp, sp, -32 +; RV64IA-WMO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IA-WMO-NEXT: mv a5, a4 +; RV64IA-WMO-NEXT: sd a2, 8(sp) +; RV64IA-WMO-NEXT: sd a1, 0(sp) +; RV64IA-WMO-NEXT: mv a1, sp +; RV64IA-WMO-NEXT: li a4, 2 +; RV64IA-WMO-NEXT: mv a2, a3 +; RV64IA-WMO-NEXT: mv a3, a5 +; RV64IA-WMO-NEXT: li a5, 0 +; RV64IA-WMO-NEXT: call __atomic_compare_exchange_16@plt +; RV64IA-WMO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IA-WMO-NEXT: addi sp, sp, 32 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i128_acquire_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: mv a5, a4 +; RV64IA-ZACAS-NEXT: mv a7, a2 +; RV64IA-ZACAS-NEXT: mv a4, a3 +; RV64IA-ZACAS-NEXT: mv a6, a1 +; RV64IA-ZACAS-NEXT: amocas.q.aq a6, a4, (a0) +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i128_acquire_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: addi sp, sp, -32 +; RV64IA-TSO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IA-TSO-NEXT: mv a5, a4 +; RV64IA-TSO-NEXT: sd a2, 8(sp) +; RV64IA-TSO-NEXT: sd a1, 0(sp) +; RV64IA-TSO-NEXT: mv a1, sp +; RV64IA-TSO-NEXT: li a4, 2 +; RV64IA-TSO-NEXT: mv a2, a3 +; RV64IA-TSO-NEXT: mv a3, a5 +; RV64IA-TSO-NEXT: li a5, 0 +; RV64IA-TSO-NEXT: call __atomic_compare_exchange_16@plt +; RV64IA-TSO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IA-TSO-NEXT: addi sp, sp, 32 +; RV64IA-TSO-NEXT: ret + %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val acquire monotonic + ret void +} + +define void @cmpxchg_i128_acquire_acquire(ptr %ptr, i128 %cmp, i128 %val) nounwind { +; RV32I-LABEL: cmpxchg_i128_acquire_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv a6, a0 +; RV32I-NEXT: lw a0, 0(a2) +; RV32I-NEXT: lw a3, 4(a2) +; RV32I-NEXT: lw a4, 8(a2) +; RV32I-NEXT: lw a2, 12(a2) +; RV32I-NEXT: lw a5, 12(a1) +; RV32I-NEXT: lw a7, 8(a1) +; RV32I-NEXT: lw t0, 4(a1) +; RV32I-NEXT: lw a1, 0(a1) +; RV32I-NEXT: sw a5, 36(sp) +; RV32I-NEXT: sw a7, 32(sp) +; RV32I-NEXT: sw t0, 28(sp) +; RV32I-NEXT: sw a1, 24(sp) +; RV32I-NEXT: sw a2, 20(sp) +; RV32I-NEXT: sw a4, 16(sp) +; RV32I-NEXT: sw a3, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: li a0, 16 +; RV32I-NEXT: addi a2, sp, 24 +; RV32I-NEXT: addi a3, sp, 8 +; RV32I-NEXT: li a4, 2 +; RV32I-NEXT: li a5, 2 +; RV32I-NEXT: mv a1, a6 +; RV32I-NEXT: call __atomic_compare_exchange@plt +; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i128_acquire_acquire: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -48 +; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32IA-NEXT: mv a6, a0 +; RV32IA-NEXT: lw a0, 0(a2) +; RV32IA-NEXT: lw a3, 4(a2) +; RV32IA-NEXT: lw a4, 8(a2) +; RV32IA-NEXT: lw a2, 12(a2) +; RV32IA-NEXT: lw a5, 12(a1) +; RV32IA-NEXT: lw a7, 8(a1) +; RV32IA-NEXT: lw t0, 4(a1) +; RV32IA-NEXT: lw a1, 0(a1) +; RV32IA-NEXT: sw a5, 36(sp) +; RV32IA-NEXT: sw a7, 32(sp) +; RV32IA-NEXT: sw t0, 28(sp) +; RV32IA-NEXT: sw a1, 24(sp) +; RV32IA-NEXT: sw a2, 20(sp) +; RV32IA-NEXT: sw a4, 16(sp) +; RV32IA-NEXT: sw a3, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: li a0, 16 +; RV32IA-NEXT: addi a2, sp, 24 +; RV32IA-NEXT: addi a3, sp, 8 +; RV32IA-NEXT: li a4, 2 +; RV32IA-NEXT: li a5, 2 +; RV32IA-NEXT: mv a1, a6 +; RV32IA-NEXT: call __atomic_compare_exchange@plt +; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 48 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i128_acquire_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv a6, a4 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: sd a1, 0(sp) +; RV64I-NEXT: mv a1, sp +; RV64I-NEXT: li a4, 2 +; RV64I-NEXT: li a5, 2 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: mv a3, a6 +; RV64I-NEXT: call __atomic_compare_exchange_16@plt +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i128_acquire_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: addi sp, sp, -32 +; RV64IA-WMO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IA-WMO-NEXT: mv a6, a4 +; RV64IA-WMO-NEXT: sd a2, 8(sp) +; RV64IA-WMO-NEXT: sd a1, 0(sp) +; RV64IA-WMO-NEXT: mv a1, sp +; RV64IA-WMO-NEXT: li a4, 2 +; RV64IA-WMO-NEXT: li a5, 2 +; RV64IA-WMO-NEXT: mv a2, a3 +; RV64IA-WMO-NEXT: mv a3, a6 +; RV64IA-WMO-NEXT: call __atomic_compare_exchange_16@plt +; RV64IA-WMO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IA-WMO-NEXT: addi sp, sp, 32 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i128_acquire_acquire: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: mv a5, a4 +; RV64IA-ZACAS-NEXT: mv a7, a2 +; RV64IA-ZACAS-NEXT: mv a4, a3 +; RV64IA-ZACAS-NEXT: mv a6, a1 +; RV64IA-ZACAS-NEXT: amocas.q.aq a6, a4, (a0) +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i128_acquire_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: addi sp, sp, -32 +; RV64IA-TSO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IA-TSO-NEXT: mv a6, a4 +; RV64IA-TSO-NEXT: sd a2, 8(sp) +; RV64IA-TSO-NEXT: sd a1, 0(sp) +; RV64IA-TSO-NEXT: mv a1, sp +; RV64IA-TSO-NEXT: li a4, 2 +; RV64IA-TSO-NEXT: li a5, 2 +; RV64IA-TSO-NEXT: mv a2, a3 +; RV64IA-TSO-NEXT: mv a3, a6 +; RV64IA-TSO-NEXT: call __atomic_compare_exchange_16@plt +; RV64IA-TSO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IA-TSO-NEXT: addi sp, sp, 32 +; RV64IA-TSO-NEXT: ret + %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val acquire acquire + ret void +} + +define void @cmpxchg_i128_release_monotonic(ptr %ptr, i128 %cmp, i128 %val) nounwind { +; RV32I-LABEL: cmpxchg_i128_release_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv a5, a0 +; RV32I-NEXT: lw a0, 0(a2) +; RV32I-NEXT: lw a3, 4(a2) +; RV32I-NEXT: lw a4, 8(a2) +; RV32I-NEXT: lw a2, 12(a2) +; RV32I-NEXT: lw a6, 12(a1) +; RV32I-NEXT: lw a7, 8(a1) +; RV32I-NEXT: lw t0, 4(a1) +; RV32I-NEXT: lw a1, 0(a1) +; RV32I-NEXT: sw a6, 36(sp) +; RV32I-NEXT: sw a7, 32(sp) +; RV32I-NEXT: sw t0, 28(sp) +; RV32I-NEXT: sw a1, 24(sp) +; RV32I-NEXT: sw a2, 20(sp) +; RV32I-NEXT: sw a4, 16(sp) +; RV32I-NEXT: sw a3, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: li a0, 16 +; RV32I-NEXT: addi a2, sp, 24 +; RV32I-NEXT: addi a3, sp, 8 +; RV32I-NEXT: li a4, 3 +; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: li a5, 0 +; RV32I-NEXT: call __atomic_compare_exchange@plt +; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i128_release_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -48 +; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32IA-NEXT: mv a5, a0 +; RV32IA-NEXT: lw a0, 0(a2) +; RV32IA-NEXT: lw a3, 4(a2) +; RV32IA-NEXT: lw a4, 8(a2) +; RV32IA-NEXT: lw a2, 12(a2) +; RV32IA-NEXT: lw a6, 12(a1) +; RV32IA-NEXT: lw a7, 8(a1) +; RV32IA-NEXT: lw t0, 4(a1) +; RV32IA-NEXT: lw a1, 0(a1) +; RV32IA-NEXT: sw a6, 36(sp) +; RV32IA-NEXT: sw a7, 32(sp) +; RV32IA-NEXT: sw t0, 28(sp) +; RV32IA-NEXT: sw a1, 24(sp) +; RV32IA-NEXT: sw a2, 20(sp) +; RV32IA-NEXT: sw a4, 16(sp) +; RV32IA-NEXT: sw a3, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: li a0, 16 +; RV32IA-NEXT: addi a2, sp, 24 +; RV32IA-NEXT: addi a3, sp, 8 +; RV32IA-NEXT: li a4, 3 +; RV32IA-NEXT: mv a1, a5 +; RV32IA-NEXT: li a5, 0 +; RV32IA-NEXT: call __atomic_compare_exchange@plt +; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 48 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i128_release_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv a5, a4 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: sd a1, 0(sp) +; RV64I-NEXT: mv a1, sp +; RV64I-NEXT: li a4, 3 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: mv a3, a5 +; RV64I-NEXT: li a5, 0 +; RV64I-NEXT: call __atomic_compare_exchange_16@plt +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i128_release_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: addi sp, sp, -32 +; RV64IA-WMO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IA-WMO-NEXT: mv a5, a4 +; RV64IA-WMO-NEXT: sd a2, 8(sp) +; RV64IA-WMO-NEXT: sd a1, 0(sp) +; RV64IA-WMO-NEXT: mv a1, sp +; RV64IA-WMO-NEXT: li a4, 3 +; RV64IA-WMO-NEXT: mv a2, a3 +; RV64IA-WMO-NEXT: mv a3, a5 +; RV64IA-WMO-NEXT: li a5, 0 +; RV64IA-WMO-NEXT: call __atomic_compare_exchange_16@plt +; RV64IA-WMO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IA-WMO-NEXT: addi sp, sp, 32 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i128_release_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: mv a5, a4 +; RV64IA-ZACAS-NEXT: mv a7, a2 +; RV64IA-ZACAS-NEXT: mv a4, a3 +; RV64IA-ZACAS-NEXT: mv a6, a1 +; RV64IA-ZACAS-NEXT: amocas.q.rl a6, a4, (a0) +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i128_release_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: addi sp, sp, -32 +; RV64IA-TSO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IA-TSO-NEXT: mv a5, a4 +; RV64IA-TSO-NEXT: sd a2, 8(sp) +; RV64IA-TSO-NEXT: sd a1, 0(sp) +; RV64IA-TSO-NEXT: mv a1, sp +; RV64IA-TSO-NEXT: li a4, 3 +; RV64IA-TSO-NEXT: mv a2, a3 +; RV64IA-TSO-NEXT: mv a3, a5 +; RV64IA-TSO-NEXT: li a5, 0 +; RV64IA-TSO-NEXT: call __atomic_compare_exchange_16@plt +; RV64IA-TSO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IA-TSO-NEXT: addi sp, sp, 32 +; RV64IA-TSO-NEXT: ret + %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val release monotonic + ret void +} + +define void @cmpxchg_i128_release_acquire(ptr %ptr, i128 %cmp, i128 %val) nounwind { +; RV32I-LABEL: cmpxchg_i128_release_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv a6, a0 +; RV32I-NEXT: lw a0, 0(a2) +; RV32I-NEXT: lw a3, 4(a2) +; RV32I-NEXT: lw a4, 8(a2) +; RV32I-NEXT: lw a2, 12(a2) +; RV32I-NEXT: lw a5, 12(a1) +; RV32I-NEXT: lw a7, 8(a1) +; RV32I-NEXT: lw t0, 4(a1) +; RV32I-NEXT: lw a1, 0(a1) +; RV32I-NEXT: sw a5, 36(sp) +; RV32I-NEXT: sw a7, 32(sp) +; RV32I-NEXT: sw t0, 28(sp) +; RV32I-NEXT: sw a1, 24(sp) +; RV32I-NEXT: sw a2, 20(sp) +; RV32I-NEXT: sw a4, 16(sp) +; RV32I-NEXT: sw a3, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: li a0, 16 +; RV32I-NEXT: addi a2, sp, 24 +; RV32I-NEXT: addi a3, sp, 8 +; RV32I-NEXT: li a4, 3 +; RV32I-NEXT: li a5, 2 +; RV32I-NEXT: mv a1, a6 +; RV32I-NEXT: call __atomic_compare_exchange@plt +; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i128_release_acquire: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -48 +; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32IA-NEXT: mv a6, a0 +; RV32IA-NEXT: lw a0, 0(a2) +; RV32IA-NEXT: lw a3, 4(a2) +; RV32IA-NEXT: lw a4, 8(a2) +; RV32IA-NEXT: lw a2, 12(a2) +; RV32IA-NEXT: lw a5, 12(a1) +; RV32IA-NEXT: lw a7, 8(a1) +; RV32IA-NEXT: lw t0, 4(a1) +; RV32IA-NEXT: lw a1, 0(a1) +; RV32IA-NEXT: sw a5, 36(sp) +; RV32IA-NEXT: sw a7, 32(sp) +; RV32IA-NEXT: sw t0, 28(sp) +; RV32IA-NEXT: sw a1, 24(sp) +; RV32IA-NEXT: sw a2, 20(sp) +; RV32IA-NEXT: sw a4, 16(sp) +; RV32IA-NEXT: sw a3, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: li a0, 16 +; RV32IA-NEXT: addi a2, sp, 24 +; RV32IA-NEXT: addi a3, sp, 8 +; RV32IA-NEXT: li a4, 3 +; RV32IA-NEXT: li a5, 2 +; RV32IA-NEXT: mv a1, a6 +; RV32IA-NEXT: call __atomic_compare_exchange@plt +; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 48 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i128_release_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv a6, a4 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: sd a1, 0(sp) +; RV64I-NEXT: mv a1, sp +; RV64I-NEXT: li a4, 3 +; RV64I-NEXT: li a5, 2 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: mv a3, a6 +; RV64I-NEXT: call __atomic_compare_exchange_16@plt +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i128_release_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: addi sp, sp, -32 +; RV64IA-WMO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IA-WMO-NEXT: mv a6, a4 +; RV64IA-WMO-NEXT: sd a2, 8(sp) +; RV64IA-WMO-NEXT: sd a1, 0(sp) +; RV64IA-WMO-NEXT: mv a1, sp +; RV64IA-WMO-NEXT: li a4, 3 +; RV64IA-WMO-NEXT: li a5, 2 +; RV64IA-WMO-NEXT: mv a2, a3 +; RV64IA-WMO-NEXT: mv a3, a6 +; RV64IA-WMO-NEXT: call __atomic_compare_exchange_16@plt +; RV64IA-WMO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IA-WMO-NEXT: addi sp, sp, 32 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i128_release_acquire: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: mv a5, a4 +; RV64IA-ZACAS-NEXT: mv a7, a2 +; RV64IA-ZACAS-NEXT: mv a4, a3 +; RV64IA-ZACAS-NEXT: mv a6, a1 +; RV64IA-ZACAS-NEXT: amocas.q.aqrl a6, a4, (a0) +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i128_release_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: addi sp, sp, -32 +; RV64IA-TSO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IA-TSO-NEXT: mv a6, a4 +; RV64IA-TSO-NEXT: sd a2, 8(sp) +; RV64IA-TSO-NEXT: sd a1, 0(sp) +; RV64IA-TSO-NEXT: mv a1, sp +; RV64IA-TSO-NEXT: li a4, 3 +; RV64IA-TSO-NEXT: li a5, 2 +; RV64IA-TSO-NEXT: mv a2, a3 +; RV64IA-TSO-NEXT: mv a3, a6 +; RV64IA-TSO-NEXT: call __atomic_compare_exchange_16@plt +; RV64IA-TSO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IA-TSO-NEXT: addi sp, sp, 32 +; RV64IA-TSO-NEXT: ret + %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val release acquire + ret void +} + +define void @cmpxchg_i128_acq_rel_monotonic(ptr %ptr, i128 %cmp, i128 %val) nounwind { +; RV32I-LABEL: cmpxchg_i128_acq_rel_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv a5, a0 +; RV32I-NEXT: lw a0, 0(a2) +; RV32I-NEXT: lw a3, 4(a2) +; RV32I-NEXT: lw a4, 8(a2) +; RV32I-NEXT: lw a2, 12(a2) +; RV32I-NEXT: lw a6, 12(a1) +; RV32I-NEXT: lw a7, 8(a1) +; RV32I-NEXT: lw t0, 4(a1) +; RV32I-NEXT: lw a1, 0(a1) +; RV32I-NEXT: sw a6, 36(sp) +; RV32I-NEXT: sw a7, 32(sp) +; RV32I-NEXT: sw t0, 28(sp) +; RV32I-NEXT: sw a1, 24(sp) +; RV32I-NEXT: sw a2, 20(sp) +; RV32I-NEXT: sw a4, 16(sp) +; RV32I-NEXT: sw a3, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: li a0, 16 +; RV32I-NEXT: addi a2, sp, 24 +; RV32I-NEXT: addi a3, sp, 8 +; RV32I-NEXT: li a4, 4 +; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: li a5, 0 +; RV32I-NEXT: call __atomic_compare_exchange@plt +; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i128_acq_rel_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -48 +; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32IA-NEXT: mv a5, a0 +; RV32IA-NEXT: lw a0, 0(a2) +; RV32IA-NEXT: lw a3, 4(a2) +; RV32IA-NEXT: lw a4, 8(a2) +; RV32IA-NEXT: lw a2, 12(a2) +; RV32IA-NEXT: lw a6, 12(a1) +; RV32IA-NEXT: lw a7, 8(a1) +; RV32IA-NEXT: lw t0, 4(a1) +; RV32IA-NEXT: lw a1, 0(a1) +; RV32IA-NEXT: sw a6, 36(sp) +; RV32IA-NEXT: sw a7, 32(sp) +; RV32IA-NEXT: sw t0, 28(sp) +; RV32IA-NEXT: sw a1, 24(sp) +; RV32IA-NEXT: sw a2, 20(sp) +; RV32IA-NEXT: sw a4, 16(sp) +; RV32IA-NEXT: sw a3, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: li a0, 16 +; RV32IA-NEXT: addi a2, sp, 24 +; RV32IA-NEXT: addi a3, sp, 8 +; RV32IA-NEXT: li a4, 4 +; RV32IA-NEXT: mv a1, a5 +; RV32IA-NEXT: li a5, 0 +; RV32IA-NEXT: call __atomic_compare_exchange@plt +; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 48 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i128_acq_rel_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv a5, a4 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: sd a1, 0(sp) +; RV64I-NEXT: mv a1, sp +; RV64I-NEXT: li a4, 4 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: mv a3, a5 +; RV64I-NEXT: li a5, 0 +; RV64I-NEXT: call __atomic_compare_exchange_16@plt +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i128_acq_rel_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: addi sp, sp, -32 +; RV64IA-WMO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IA-WMO-NEXT: mv a5, a4 +; RV64IA-WMO-NEXT: sd a2, 8(sp) +; RV64IA-WMO-NEXT: sd a1, 0(sp) +; RV64IA-WMO-NEXT: mv a1, sp +; RV64IA-WMO-NEXT: li a4, 4 +; RV64IA-WMO-NEXT: mv a2, a3 +; RV64IA-WMO-NEXT: mv a3, a5 +; RV64IA-WMO-NEXT: li a5, 0 +; RV64IA-WMO-NEXT: call __atomic_compare_exchange_16@plt +; RV64IA-WMO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IA-WMO-NEXT: addi sp, sp, 32 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i128_acq_rel_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: mv a5, a4 +; RV64IA-ZACAS-NEXT: mv a7, a2 +; RV64IA-ZACAS-NEXT: mv a4, a3 +; RV64IA-ZACAS-NEXT: mv a6, a1 +; RV64IA-ZACAS-NEXT: amocas.q.aqrl a6, a4, (a0) +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i128_acq_rel_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: addi sp, sp, -32 +; RV64IA-TSO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IA-TSO-NEXT: mv a5, a4 +; RV64IA-TSO-NEXT: sd a2, 8(sp) +; RV64IA-TSO-NEXT: sd a1, 0(sp) +; RV64IA-TSO-NEXT: mv a1, sp +; RV64IA-TSO-NEXT: li a4, 4 +; RV64IA-TSO-NEXT: mv a2, a3 +; RV64IA-TSO-NEXT: mv a3, a5 +; RV64IA-TSO-NEXT: li a5, 0 +; RV64IA-TSO-NEXT: call __atomic_compare_exchange_16@plt +; RV64IA-TSO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IA-TSO-NEXT: addi sp, sp, 32 +; RV64IA-TSO-NEXT: ret + %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val acq_rel monotonic + ret void +} + +define void @cmpxchg_i128_acq_rel_acquire(ptr %ptr, i128 %cmp, i128 %val) nounwind { +; RV32I-LABEL: cmpxchg_i128_acq_rel_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv a6, a0 +; RV32I-NEXT: lw a0, 0(a2) +; RV32I-NEXT: lw a3, 4(a2) +; RV32I-NEXT: lw a4, 8(a2) +; RV32I-NEXT: lw a2, 12(a2) +; RV32I-NEXT: lw a5, 12(a1) +; RV32I-NEXT: lw a7, 8(a1) +; RV32I-NEXT: lw t0, 4(a1) +; RV32I-NEXT: lw a1, 0(a1) +; RV32I-NEXT: sw a5, 36(sp) +; RV32I-NEXT: sw a7, 32(sp) +; RV32I-NEXT: sw t0, 28(sp) +; RV32I-NEXT: sw a1, 24(sp) +; RV32I-NEXT: sw a2, 20(sp) +; RV32I-NEXT: sw a4, 16(sp) +; RV32I-NEXT: sw a3, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: li a0, 16 +; RV32I-NEXT: addi a2, sp, 24 +; RV32I-NEXT: addi a3, sp, 8 +; RV32I-NEXT: li a4, 4 +; RV32I-NEXT: li a5, 2 +; RV32I-NEXT: mv a1, a6 +; RV32I-NEXT: call __atomic_compare_exchange@plt +; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i128_acq_rel_acquire: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -48 +; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32IA-NEXT: mv a6, a0 +; RV32IA-NEXT: lw a0, 0(a2) +; RV32IA-NEXT: lw a3, 4(a2) +; RV32IA-NEXT: lw a4, 8(a2) +; RV32IA-NEXT: lw a2, 12(a2) +; RV32IA-NEXT: lw a5, 12(a1) +; RV32IA-NEXT: lw a7, 8(a1) +; RV32IA-NEXT: lw t0, 4(a1) +; RV32IA-NEXT: lw a1, 0(a1) +; RV32IA-NEXT: sw a5, 36(sp) +; RV32IA-NEXT: sw a7, 32(sp) +; RV32IA-NEXT: sw t0, 28(sp) +; RV32IA-NEXT: sw a1, 24(sp) +; RV32IA-NEXT: sw a2, 20(sp) +; RV32IA-NEXT: sw a4, 16(sp) +; RV32IA-NEXT: sw a3, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: li a0, 16 +; RV32IA-NEXT: addi a2, sp, 24 +; RV32IA-NEXT: addi a3, sp, 8 +; RV32IA-NEXT: li a4, 4 +; RV32IA-NEXT: li a5, 2 +; RV32IA-NEXT: mv a1, a6 +; RV32IA-NEXT: call __atomic_compare_exchange@plt +; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 48 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i128_acq_rel_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv a6, a4 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: sd a1, 0(sp) +; RV64I-NEXT: mv a1, sp +; RV64I-NEXT: li a4, 4 +; RV64I-NEXT: li a5, 2 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: mv a3, a6 +; RV64I-NEXT: call __atomic_compare_exchange_16@plt +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i128_acq_rel_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: addi sp, sp, -32 +; RV64IA-WMO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IA-WMO-NEXT: mv a6, a4 +; RV64IA-WMO-NEXT: sd a2, 8(sp) +; RV64IA-WMO-NEXT: sd a1, 0(sp) +; RV64IA-WMO-NEXT: mv a1, sp +; RV64IA-WMO-NEXT: li a4, 4 +; RV64IA-WMO-NEXT: li a5, 2 +; RV64IA-WMO-NEXT: mv a2, a3 +; RV64IA-WMO-NEXT: mv a3, a6 +; RV64IA-WMO-NEXT: call __atomic_compare_exchange_16@plt +; RV64IA-WMO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IA-WMO-NEXT: addi sp, sp, 32 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i128_acq_rel_acquire: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: mv a5, a4 +; RV64IA-ZACAS-NEXT: mv a7, a2 +; RV64IA-ZACAS-NEXT: mv a4, a3 +; RV64IA-ZACAS-NEXT: mv a6, a1 +; RV64IA-ZACAS-NEXT: amocas.q.aqrl a6, a4, (a0) +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i128_acq_rel_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: addi sp, sp, -32 +; RV64IA-TSO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IA-TSO-NEXT: mv a6, a4 +; RV64IA-TSO-NEXT: sd a2, 8(sp) +; RV64IA-TSO-NEXT: sd a1, 0(sp) +; RV64IA-TSO-NEXT: mv a1, sp +; RV64IA-TSO-NEXT: li a4, 4 +; RV64IA-TSO-NEXT: li a5, 2 +; RV64IA-TSO-NEXT: mv a2, a3 +; RV64IA-TSO-NEXT: mv a3, a6 +; RV64IA-TSO-NEXT: call __atomic_compare_exchange_16@plt +; RV64IA-TSO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IA-TSO-NEXT: addi sp, sp, 32 +; RV64IA-TSO-NEXT: ret + %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val acq_rel acquire + ret void +} + +define void @cmpxchg_i128_seq_cst_monotonic(ptr %ptr, i128 %cmp, i128 %val) nounwind { +; RV32I-LABEL: cmpxchg_i128_seq_cst_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv a5, a0 +; RV32I-NEXT: lw a0, 0(a2) +; RV32I-NEXT: lw a3, 4(a2) +; RV32I-NEXT: lw a4, 8(a2) +; RV32I-NEXT: lw a2, 12(a2) +; RV32I-NEXT: lw a6, 12(a1) +; RV32I-NEXT: lw a7, 8(a1) +; RV32I-NEXT: lw t0, 4(a1) +; RV32I-NEXT: lw a1, 0(a1) +; RV32I-NEXT: sw a6, 36(sp) +; RV32I-NEXT: sw a7, 32(sp) +; RV32I-NEXT: sw t0, 28(sp) +; RV32I-NEXT: sw a1, 24(sp) +; RV32I-NEXT: sw a2, 20(sp) +; RV32I-NEXT: sw a4, 16(sp) +; RV32I-NEXT: sw a3, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: li a0, 16 +; RV32I-NEXT: addi a2, sp, 24 +; RV32I-NEXT: addi a3, sp, 8 +; RV32I-NEXT: li a4, 5 +; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: li a5, 0 +; RV32I-NEXT: call __atomic_compare_exchange@plt +; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i128_seq_cst_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -48 +; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32IA-NEXT: mv a5, a0 +; RV32IA-NEXT: lw a0, 0(a2) +; RV32IA-NEXT: lw a3, 4(a2) +; RV32IA-NEXT: lw a4, 8(a2) +; RV32IA-NEXT: lw a2, 12(a2) +; RV32IA-NEXT: lw a6, 12(a1) +; RV32IA-NEXT: lw a7, 8(a1) +; RV32IA-NEXT: lw t0, 4(a1) +; RV32IA-NEXT: lw a1, 0(a1) +; RV32IA-NEXT: sw a6, 36(sp) +; RV32IA-NEXT: sw a7, 32(sp) +; RV32IA-NEXT: sw t0, 28(sp) +; RV32IA-NEXT: sw a1, 24(sp) +; RV32IA-NEXT: sw a2, 20(sp) +; RV32IA-NEXT: sw a4, 16(sp) +; RV32IA-NEXT: sw a3, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: li a0, 16 +; RV32IA-NEXT: addi a2, sp, 24 +; RV32IA-NEXT: addi a3, sp, 8 +; RV32IA-NEXT: li a4, 5 +; RV32IA-NEXT: mv a1, a5 +; RV32IA-NEXT: li a5, 0 +; RV32IA-NEXT: call __atomic_compare_exchange@plt +; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 48 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i128_seq_cst_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv a5, a4 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: sd a1, 0(sp) +; RV64I-NEXT: mv a1, sp +; RV64I-NEXT: li a4, 5 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: mv a3, a5 +; RV64I-NEXT: li a5, 0 +; RV64I-NEXT: call __atomic_compare_exchange_16@plt +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i128_seq_cst_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: addi sp, sp, -32 +; RV64IA-WMO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IA-WMO-NEXT: mv a5, a4 +; RV64IA-WMO-NEXT: sd a2, 8(sp) +; RV64IA-WMO-NEXT: sd a1, 0(sp) +; RV64IA-WMO-NEXT: mv a1, sp +; RV64IA-WMO-NEXT: li a4, 5 +; RV64IA-WMO-NEXT: mv a2, a3 +; RV64IA-WMO-NEXT: mv a3, a5 +; RV64IA-WMO-NEXT: li a5, 0 +; RV64IA-WMO-NEXT: call __atomic_compare_exchange_16@plt +; RV64IA-WMO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IA-WMO-NEXT: addi sp, sp, 32 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i128_seq_cst_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: mv a5, a4 +; RV64IA-ZACAS-NEXT: mv a7, a2 +; RV64IA-ZACAS-NEXT: mv a4, a3 +; RV64IA-ZACAS-NEXT: mv a6, a1 +; RV64IA-ZACAS-NEXT: amocas.q.aqrl a6, a4, (a0) +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i128_seq_cst_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: addi sp, sp, -32 +; RV64IA-TSO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IA-TSO-NEXT: mv a5, a4 +; RV64IA-TSO-NEXT: sd a2, 8(sp) +; RV64IA-TSO-NEXT: sd a1, 0(sp) +; RV64IA-TSO-NEXT: mv a1, sp +; RV64IA-TSO-NEXT: li a4, 5 +; RV64IA-TSO-NEXT: mv a2, a3 +; RV64IA-TSO-NEXT: mv a3, a5 +; RV64IA-TSO-NEXT: li a5, 0 +; RV64IA-TSO-NEXT: call __atomic_compare_exchange_16@plt +; RV64IA-TSO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IA-TSO-NEXT: addi sp, sp, 32 +; RV64IA-TSO-NEXT: ret + %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val seq_cst monotonic + ret void +} + +define void @cmpxchg_i128_seq_cst_acquire(ptr %ptr, i128 %cmp, i128 %val) nounwind { +; RV32I-LABEL: cmpxchg_i128_seq_cst_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv a6, a0 +; RV32I-NEXT: lw a0, 0(a2) +; RV32I-NEXT: lw a3, 4(a2) +; RV32I-NEXT: lw a4, 8(a2) +; RV32I-NEXT: lw a2, 12(a2) +; RV32I-NEXT: lw a5, 12(a1) +; RV32I-NEXT: lw a7, 8(a1) +; RV32I-NEXT: lw t0, 4(a1) +; RV32I-NEXT: lw a1, 0(a1) +; RV32I-NEXT: sw a5, 36(sp) +; RV32I-NEXT: sw a7, 32(sp) +; RV32I-NEXT: sw t0, 28(sp) +; RV32I-NEXT: sw a1, 24(sp) +; RV32I-NEXT: sw a2, 20(sp) +; RV32I-NEXT: sw a4, 16(sp) +; RV32I-NEXT: sw a3, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: li a0, 16 +; RV32I-NEXT: addi a2, sp, 24 +; RV32I-NEXT: addi a3, sp, 8 +; RV32I-NEXT: li a4, 5 +; RV32I-NEXT: li a5, 2 +; RV32I-NEXT: mv a1, a6 +; RV32I-NEXT: call __atomic_compare_exchange@plt +; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i128_seq_cst_acquire: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -48 +; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32IA-NEXT: mv a6, a0 +; RV32IA-NEXT: lw a0, 0(a2) +; RV32IA-NEXT: lw a3, 4(a2) +; RV32IA-NEXT: lw a4, 8(a2) +; RV32IA-NEXT: lw a2, 12(a2) +; RV32IA-NEXT: lw a5, 12(a1) +; RV32IA-NEXT: lw a7, 8(a1) +; RV32IA-NEXT: lw t0, 4(a1) +; RV32IA-NEXT: lw a1, 0(a1) +; RV32IA-NEXT: sw a5, 36(sp) +; RV32IA-NEXT: sw a7, 32(sp) +; RV32IA-NEXT: sw t0, 28(sp) +; RV32IA-NEXT: sw a1, 24(sp) +; RV32IA-NEXT: sw a2, 20(sp) +; RV32IA-NEXT: sw a4, 16(sp) +; RV32IA-NEXT: sw a3, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: li a0, 16 +; RV32IA-NEXT: addi a2, sp, 24 +; RV32IA-NEXT: addi a3, sp, 8 +; RV32IA-NEXT: li a4, 5 +; RV32IA-NEXT: li a5, 2 +; RV32IA-NEXT: mv a1, a6 +; RV32IA-NEXT: call __atomic_compare_exchange@plt +; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 48 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i128_seq_cst_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv a6, a4 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: sd a1, 0(sp) +; RV64I-NEXT: mv a1, sp +; RV64I-NEXT: li a4, 5 +; RV64I-NEXT: li a5, 2 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: mv a3, a6 +; RV64I-NEXT: call __atomic_compare_exchange_16@plt +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i128_seq_cst_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: addi sp, sp, -32 +; RV64IA-WMO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IA-WMO-NEXT: mv a6, a4 +; RV64IA-WMO-NEXT: sd a2, 8(sp) +; RV64IA-WMO-NEXT: sd a1, 0(sp) +; RV64IA-WMO-NEXT: mv a1, sp +; RV64IA-WMO-NEXT: li a4, 5 +; RV64IA-WMO-NEXT: li a5, 2 +; RV64IA-WMO-NEXT: mv a2, a3 +; RV64IA-WMO-NEXT: mv a3, a6 +; RV64IA-WMO-NEXT: call __atomic_compare_exchange_16@plt +; RV64IA-WMO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IA-WMO-NEXT: addi sp, sp, 32 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i128_seq_cst_acquire: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: mv a5, a4 +; RV64IA-ZACAS-NEXT: mv a7, a2 +; RV64IA-ZACAS-NEXT: mv a4, a3 +; RV64IA-ZACAS-NEXT: mv a6, a1 +; RV64IA-ZACAS-NEXT: amocas.q.aqrl a6, a4, (a0) +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i128_seq_cst_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: addi sp, sp, -32 +; RV64IA-TSO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IA-TSO-NEXT: mv a6, a4 +; RV64IA-TSO-NEXT: sd a2, 8(sp) +; RV64IA-TSO-NEXT: sd a1, 0(sp) +; RV64IA-TSO-NEXT: mv a1, sp +; RV64IA-TSO-NEXT: li a4, 5 +; RV64IA-TSO-NEXT: li a5, 2 +; RV64IA-TSO-NEXT: mv a2, a3 +; RV64IA-TSO-NEXT: mv a3, a6 +; RV64IA-TSO-NEXT: call __atomic_compare_exchange_16@plt +; RV64IA-TSO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IA-TSO-NEXT: addi sp, sp, 32 +; RV64IA-TSO-NEXT: ret + %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val seq_cst acquire + ret void +} + +define void @cmpxchg_i128_seq_cst_seq_cst(ptr %ptr, i128 %cmp, i128 %val) nounwind { +; RV32I-LABEL: cmpxchg_i128_seq_cst_seq_cst: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv a6, a0 +; RV32I-NEXT: lw a0, 0(a2) +; RV32I-NEXT: lw a3, 4(a2) +; RV32I-NEXT: lw a4, 8(a2) +; RV32I-NEXT: lw a2, 12(a2) +; RV32I-NEXT: lw a5, 12(a1) +; RV32I-NEXT: lw a7, 8(a1) +; RV32I-NEXT: lw t0, 4(a1) +; RV32I-NEXT: lw a1, 0(a1) +; RV32I-NEXT: sw a5, 36(sp) +; RV32I-NEXT: sw a7, 32(sp) +; RV32I-NEXT: sw t0, 28(sp) +; RV32I-NEXT: sw a1, 24(sp) +; RV32I-NEXT: sw a2, 20(sp) +; RV32I-NEXT: sw a4, 16(sp) +; RV32I-NEXT: sw a3, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: li a0, 16 +; RV32I-NEXT: addi a2, sp, 24 +; RV32I-NEXT: addi a3, sp, 8 +; RV32I-NEXT: li a4, 5 +; RV32I-NEXT: li a5, 5 +; RV32I-NEXT: mv a1, a6 +; RV32I-NEXT: call __atomic_compare_exchange@plt +; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i128_seq_cst_seq_cst: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -48 +; RV32IA-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32IA-NEXT: mv a6, a0 +; RV32IA-NEXT: lw a0, 0(a2) +; RV32IA-NEXT: lw a3, 4(a2) +; RV32IA-NEXT: lw a4, 8(a2) +; RV32IA-NEXT: lw a2, 12(a2) +; RV32IA-NEXT: lw a5, 12(a1) +; RV32IA-NEXT: lw a7, 8(a1) +; RV32IA-NEXT: lw t0, 4(a1) +; RV32IA-NEXT: lw a1, 0(a1) +; RV32IA-NEXT: sw a5, 36(sp) +; RV32IA-NEXT: sw a7, 32(sp) +; RV32IA-NEXT: sw t0, 28(sp) +; RV32IA-NEXT: sw a1, 24(sp) +; RV32IA-NEXT: sw a2, 20(sp) +; RV32IA-NEXT: sw a4, 16(sp) +; RV32IA-NEXT: sw a3, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: li a0, 16 +; RV32IA-NEXT: addi a2, sp, 24 +; RV32IA-NEXT: addi a3, sp, 8 +; RV32IA-NEXT: li a4, 5 +; RV32IA-NEXT: li a5, 5 +; RV32IA-NEXT: mv a1, a6 +; RV32IA-NEXT: call __atomic_compare_exchange@plt +; RV32IA-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 48 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i128_seq_cst_seq_cst: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv a6, a4 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: sd a1, 0(sp) +; RV64I-NEXT: mv a1, sp +; RV64I-NEXT: li a4, 5 +; RV64I-NEXT: li a5, 5 +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: mv a3, a6 +; RV64I-NEXT: call __atomic_compare_exchange_16@plt +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i128_seq_cst_seq_cst: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: addi sp, sp, -32 +; RV64IA-WMO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IA-WMO-NEXT: mv a6, a4 +; RV64IA-WMO-NEXT: sd a2, 8(sp) +; RV64IA-WMO-NEXT: sd a1, 0(sp) +; RV64IA-WMO-NEXT: mv a1, sp +; RV64IA-WMO-NEXT: li a4, 5 +; RV64IA-WMO-NEXT: li a5, 5 +; RV64IA-WMO-NEXT: mv a2, a3 +; RV64IA-WMO-NEXT: mv a3, a6 +; RV64IA-WMO-NEXT: call __atomic_compare_exchange_16@plt +; RV64IA-WMO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IA-WMO-NEXT: addi sp, sp, 32 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i128_seq_cst_seq_cst: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: mv a5, a4 +; RV64IA-ZACAS-NEXT: mv a7, a2 +; RV64IA-ZACAS-NEXT: mv a4, a3 +; RV64IA-ZACAS-NEXT: mv a6, a1 +; RV64IA-ZACAS-NEXT: amocas.q.aqrl a6, a4, (a0) +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i128_seq_cst_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: addi sp, sp, -32 +; RV64IA-TSO-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IA-TSO-NEXT: mv a6, a4 +; RV64IA-TSO-NEXT: sd a2, 8(sp) +; RV64IA-TSO-NEXT: sd a1, 0(sp) +; RV64IA-TSO-NEXT: mv a1, sp +; RV64IA-TSO-NEXT: li a4, 5 +; RV64IA-TSO-NEXT: li a5, 5 +; RV64IA-TSO-NEXT: mv a2, a3 +; RV64IA-TSO-NEXT: mv a3, a6 +; RV64IA-TSO-NEXT: call __atomic_compare_exchange_16@plt +; RV64IA-TSO-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IA-TSO-NEXT: addi sp, sp, 32 +; RV64IA-TSO-NEXT: ret + %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val seq_cst seq_cst + ret void +}