Index: llvm/trunk/include/llvm/IR/IntrinsicsRISCV.td =================================================================== --- llvm/trunk/include/llvm/IR/IntrinsicsRISCV.td +++ llvm/trunk/include/llvm/IR/IntrinsicsRISCV.td @@ -36,4 +36,9 @@ def int_riscv_masked_atomicrmw_umax_i32 : MaskedAtomicRMW32Intrinsic; def int_riscv_masked_atomicrmw_umin_i32 : MaskedAtomicRMW32Intrinsic; +def int_riscv_masked_cmpxchg_i32 + : Intrinsic<[llvm_i32_ty], [llvm_anyptr_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], + [IntrArgMemOnly, NoCapture<0>]>; + } // TargetPrefix = "riscv" Index: llvm/trunk/lib/CodeGen/AtomicExpandPass.cpp =================================================================== --- llvm/trunk/lib/CodeGen/AtomicExpandPass.cpp +++ llvm/trunk/lib/CodeGen/AtomicExpandPass.cpp @@ -91,6 +91,7 @@ AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI); void expandPartwordCmpXchg(AtomicCmpXchgInst *I); void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI); + void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI); AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI); static Value *insertRMWCmpXchgLoop( @@ -944,6 +945,35 @@ AI->eraseFromParent(); } +void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) { + IRBuilder<> Builder(CI); + + PartwordMaskValues PMV = createMaskInstrs( + Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(), + TLI->getMinCmpXchgSizeInBits() / 8); + + Value *CmpVal_Shifted = Builder.CreateShl( + Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt, + "CmpVal_Shifted"); + Value *NewVal_Shifted = Builder.CreateShl( + Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt, + "NewVal_Shifted"); + Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic( + Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask, + CI->getSuccessOrdering()); + Value *FinalOldVal = Builder.CreateTrunc( + Builder.CreateLShr(OldVal, PMV.ShiftAmt), PMV.ValueType); + + Value *Res = UndefValue::get(CI->getType()); + Res = Builder.CreateInsertValue(Res, FinalOldVal, 0); + Value *Success = Builder.CreateICmpEQ( + CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success"); + Res = Builder.CreateInsertValue(Res, Success, 1); + + CI->replaceAllUsesWith(Res); + CI->eraseFromParent(); +} + Value *AtomicExpand::insertRMWLLSCLoop( IRBuilder<> &Builder, Type *ResultTy, Value *Addr, AtomicOrdering MemOpOrder, @@ -1366,8 +1396,8 @@ return expandAtomicCmpXchg(CI); } case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: - llvm_unreachable( - "MaskedIntrinsic expansion of cmpxhg not yet implemented"); + expandAtomicCmpXchgToMaskedIntrinsic(CI); + return true; } } Index: llvm/trunk/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp =================================================================== --- llvm/trunk/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp +++ llvm/trunk/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp @@ -52,6 +52,9 @@ MachineBasicBlock::iterator MBBI, AtomicRMWInst::BinOp, bool IsMasked, int Width, MachineBasicBlock::iterator &NextMBBI); + bool expandAtomicCmpXchg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, bool IsMasked, + int Width, MachineBasicBlock::iterator &NextMBBI); }; char RISCVExpandPseudo::ID = 0; @@ -106,6 +109,10 @@ case RISCV::PseudoMaskedAtomicLoadUMin32: return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, true, 32, NextMBBI); + case RISCV::PseudoCmpXchg32: + return expandAtomicCmpXchg(MBB, MBBI, false, 32, NextMBBI); + case RISCV::PseudoMaskedCmpXchg32: + return expandAtomicCmpXchg(MBB, MBBI, true, 32, NextMBBI); } return false; @@ -441,6 +448,103 @@ return true; } +bool RISCVExpandPseudo::expandAtomicCmpXchg( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsMasked, + int Width, MachineBasicBlock::iterator &NextMBBI) { + assert(Width == 32 && "RV64 atomic expansion currently unsupported"); + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + MachineFunction *MF = MBB.getParent(); + auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + + // Insert new MBBs. + MF->insert(++MBB.getIterator(), LoopHeadMBB); + MF->insert(++LoopHeadMBB->getIterator(), LoopTailMBB); + MF->insert(++LoopTailMBB->getIterator(), DoneMBB); + + // Set up successors and transfer remaining instructions to DoneMBB. + LoopHeadMBB->addSuccessor(LoopTailMBB); + LoopHeadMBB->addSuccessor(DoneMBB); + LoopTailMBB->addSuccessor(DoneMBB); + LoopTailMBB->addSuccessor(LoopHeadMBB); + DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end()); + DoneMBB->transferSuccessors(&MBB); + MBB.addSuccessor(LoopHeadMBB); + + unsigned DestReg = MI.getOperand(0).getReg(); + unsigned ScratchReg = MI.getOperand(1).getReg(); + unsigned AddrReg = MI.getOperand(2).getReg(); + unsigned CmpValReg = MI.getOperand(3).getReg(); + unsigned NewValReg = MI.getOperand(4).getReg(); + AtomicOrdering Ordering = + static_cast(MI.getOperand(IsMasked ? 6 : 5).getImm()); + + if (!IsMasked) { + // .loophead: + // lr.w dest, (addr) + // bne dest, cmpval, done + BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg) + .addReg(AddrReg); + BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE)) + .addReg(DestReg) + .addReg(CmpValReg) + .addMBB(DoneMBB); + // .looptail: + // sc.w scratch, newval, (addr) + // bnez scratch, loophead + BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering)), ScratchReg) + .addReg(AddrReg) + .addReg(NewValReg); + BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE)) + .addReg(ScratchReg) + .addReg(RISCV::X0) + .addMBB(LoopHeadMBB); + } else { + // .loophead: + // lr.w dest, (addr) + // and scratch, dest, mask + // bne scratch, cmpval, done + unsigned MaskReg = MI.getOperand(5).getReg(); + BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg) + .addReg(AddrReg); + BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), ScratchReg) + .addReg(DestReg) + .addReg(MaskReg); + BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE)) + .addReg(ScratchReg) + .addReg(CmpValReg) + .addMBB(DoneMBB); + + // .looptail: + // xor scratch, dest, newval + // and scratch, scratch, mask + // xor scratch, dest, scratch + // sc.w scratch, scratch, (adrr) + // bnez scratch, loophead + insertMaskedMerge(TII, DL, LoopTailMBB, ScratchReg, DestReg, NewValReg, + MaskReg, ScratchReg); + BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering)), ScratchReg) + .addReg(AddrReg) + .addReg(ScratchReg); + BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE)) + .addReg(ScratchReg) + .addReg(RISCV::X0) + .addMBB(LoopHeadMBB); + } + + NextMBBI = MBB.end(); + MI.eraseFromParent(); + + LivePhysRegs LiveRegs; + computeAndAddLiveIns(LiveRegs, *LoopHeadMBB); + computeAndAddLiveIns(LiveRegs, *LoopTailMBB); + computeAndAddLiveIns(LiveRegs, *DoneMBB); + + return true; +} + } // end of anonymous namespace INITIALIZE_PASS(RISCVExpandPseudo, "riscv-expand-pseudo", Index: llvm/trunk/lib/Target/RISCV/RISCVISelLowering.h =================================================================== --- llvm/trunk/lib/Target/RISCV/RISCVISelLowering.h +++ llvm/trunk/lib/Target/RISCV/RISCVISelLowering.h @@ -126,6 +126,13 @@ virtual Value *emitMaskedAtomicRMWIntrinsic( IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override; + TargetLowering::AtomicExpansionKind + shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override; + virtual Value * + emitMaskedAtomicCmpXchgIntrinsic(IRBuilder<> &Builder, AtomicCmpXchgInst *CI, + Value *AlignedAddr, Value *CmpVal, + Value *NewVal, Value *Mask, + AtomicOrdering Ord) const override; }; } Index: llvm/trunk/lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/RISCV/RISCVISelLowering.cpp +++ llvm/trunk/lib/Target/RISCV/RISCVISelLowering.cpp @@ -186,6 +186,7 @@ case Intrinsic::riscv_masked_atomicrmw_min_i32: case Intrinsic::riscv_masked_atomicrmw_umax_i32: case Intrinsic::riscv_masked_atomicrmw_umin_i32: + case Intrinsic::riscv_masked_cmpxchg_i32: PointerType *PtrTy = cast(I.getArgOperand(0)->getType()); Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::getVT(PtrTy->getElementType()); @@ -1708,3 +1709,23 @@ return Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering}); } + +TargetLowering::AtomicExpansionKind +RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR( + AtomicCmpXchgInst *CI) const { + unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); + if (Size == 8 || Size == 16) + return AtomicExpansionKind::MaskedIntrinsic; + return AtomicExpansionKind::None; +} + +Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( + IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, + Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { + Value *Ordering = Builder.getInt32(static_cast(Ord)); + Type *Tys[] = {AlignedAddr->getType()}; + Function *MaskedCmpXchg = Intrinsic::getDeclaration( + CI->getModule(), Intrinsic::riscv_masked_cmpxchg_i32, Tys); + return Builder.CreateCall(MaskedCmpXchg, + {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); +} Index: llvm/trunk/lib/Target/RISCV/RISCVInstrInfoA.td =================================================================== --- llvm/trunk/lib/Target/RISCV/RISCVInstrInfoA.td +++ llvm/trunk/lib/Target/RISCV/RISCVInstrInfoA.td @@ -153,7 +153,7 @@ } def PseudoAtomicLoadNand32 : PseudoAMO; -// Ordering constants must be kept in sync with the AtomicOrdering enum in +// Ordering constants must be kept in sync with the AtomicOrdering enum in // AtomicOrdering.h. def : Pat<(atomic_load_nand_32_monotonic GPR:$addr, GPR:$incr), (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 2)>; @@ -230,4 +230,49 @@ def PseudoMaskedAtomicLoadUMin32 : PseudoMaskedAMOUMinUMax; def : PseudoMaskedAMOPat; + +/// Compare and exchange + +class PseudoCmpXchg + : Pseudo<(outs GPR:$res, GPR:$scratch), + (ins GPR:$addr, GPR:$cmpval, GPR:$newval, i32imm:$ordering), []> { + let Constraints = "@earlyclobber $res,@earlyclobber $scratch"; + let mayLoad = 1; + let mayStore = 1; + let hasSideEffects = 0; +} + +// Ordering constants must be kept in sync with the AtomicOrdering enum in +// AtomicOrdering.h. +multiclass PseudoCmpXchgPat { + def : Pat<(!cast(Op#"_monotonic") GPR:$addr, GPR:$cmp, GPR:$new), + (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 2)>; + def : Pat<(!cast(Op#"_acquire") GPR:$addr, GPR:$cmp, GPR:$new), + (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 4)>; + def : Pat<(!cast(Op#"_release") GPR:$addr, GPR:$cmp, GPR:$new), + (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 5)>; + def : Pat<(!cast(Op#"_acq_rel") GPR:$addr, GPR:$cmp, GPR:$new), + (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 6)>; + def : Pat<(!cast(Op#"_seq_cst") GPR:$addr, GPR:$cmp, GPR:$new), + (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>; +} + +def PseudoCmpXchg32 : PseudoCmpXchg; +defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoCmpXchg32>; + +def PseudoMaskedCmpXchg32 + : Pseudo<(outs GPR:$res, GPR:$scratch), + (ins GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, + i32imm:$ordering), []> { + let Constraints = "@earlyclobber $res,@earlyclobber $scratch"; + let mayLoad = 1; + let mayStore = 1; + let hasSideEffects = 0; +} + +def : Pat<(int_riscv_masked_cmpxchg_i32 + GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering), + (PseudoMaskedCmpXchg32 + GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering)>; + } // Predicates = [HasStdExtA] Index: llvm/trunk/test/CodeGen/RISCV/atomic-cmpxchg.ll =================================================================== --- llvm/trunk/test/CodeGen/RISCV/atomic-cmpxchg.ll +++ llvm/trunk/test/CodeGen/RISCV/atomic-cmpxchg.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32IA %s define void @cmpxchg_i8_monotonic_monotonic(i8* %ptr, i8 %cmp, i8 %val) { ; RV32I-LABEL: cmpxchg_i8_monotonic_monotonic: @@ -15,6 +17,30 @@ ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i8_monotonic_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: slli a3, a0, 3 +; RV32IA-NEXT: andi a3, a3, 24 +; RV32IA-NEXT: addi a4, zero, 255 +; RV32IA-NEXT: sll a4, a4, a3 +; RV32IA-NEXT: andi a2, a2, 255 +; RV32IA-NEXT: sll a2, a2, a3 +; RV32IA-NEXT: andi a1, a1, 255 +; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-NEXT: lr.w a3, (a0) +; RV32IA-NEXT: and a5, a3, a4 +; RV32IA-NEXT: bne a5, a1, .LBB0_3 +; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB0_1 Depth=1 +; RV32IA-NEXT: xor a5, a3, a2 +; RV32IA-NEXT: and a5, a5, a4 +; RV32IA-NEXT: xor a5, a3, a5 +; RV32IA-NEXT: sc.w a5, a5, (a0) +; RV32IA-NEXT: bnez a5, .LBB0_1 +; RV32IA-NEXT: .LBB0_3: +; RV32IA-NEXT: ret %res = cmpxchg i8* %ptr, i8 %cmp, i8 %val monotonic monotonic ret void } @@ -32,6 +58,30 @@ ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i8_acquire_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: slli a3, a0, 3 +; RV32IA-NEXT: andi a3, a3, 24 +; RV32IA-NEXT: addi a4, zero, 255 +; RV32IA-NEXT: sll a4, a4, a3 +; RV32IA-NEXT: andi a2, a2, 255 +; RV32IA-NEXT: sll a2, a2, a3 +; RV32IA-NEXT: andi a1, a1, 255 +; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-NEXT: lr.w.aq a3, (a0) +; RV32IA-NEXT: and a5, a3, a4 +; RV32IA-NEXT: bne a5, a1, .LBB1_3 +; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 +; RV32IA-NEXT: xor a5, a3, a2 +; RV32IA-NEXT: and a5, a5, a4 +; RV32IA-NEXT: xor a5, a3, a5 +; RV32IA-NEXT: sc.w a5, a5, (a0) +; RV32IA-NEXT: bnez a5, .LBB1_1 +; RV32IA-NEXT: .LBB1_3: +; RV32IA-NEXT: ret %res = cmpxchg i8* %ptr, i8 %cmp, i8 %val acquire monotonic ret void } @@ -49,6 +99,30 @@ ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i8_acquire_acquire: +; RV32IA: # %bb.0: +; RV32IA-NEXT: slli a3, a0, 3 +; RV32IA-NEXT: andi a3, a3, 24 +; RV32IA-NEXT: addi a4, zero, 255 +; RV32IA-NEXT: sll a4, a4, a3 +; RV32IA-NEXT: andi a2, a2, 255 +; RV32IA-NEXT: sll a2, a2, a3 +; RV32IA-NEXT: andi a1, a1, 255 +; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-NEXT: lr.w.aq a3, (a0) +; RV32IA-NEXT: and a5, a3, a4 +; RV32IA-NEXT: bne a5, a1, .LBB2_3 +; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 +; RV32IA-NEXT: xor a5, a3, a2 +; RV32IA-NEXT: and a5, a5, a4 +; RV32IA-NEXT: xor a5, a3, a5 +; RV32IA-NEXT: sc.w a5, a5, (a0) +; RV32IA-NEXT: bnez a5, .LBB2_1 +; RV32IA-NEXT: .LBB2_3: +; RV32IA-NEXT: ret %res = cmpxchg i8* %ptr, i8 %cmp, i8 %val acquire acquire ret void } @@ -66,6 +140,30 @@ ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i8_release_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: slli a3, a0, 3 +; RV32IA-NEXT: andi a3, a3, 24 +; RV32IA-NEXT: addi a4, zero, 255 +; RV32IA-NEXT: sll a4, a4, a3 +; RV32IA-NEXT: andi a2, a2, 255 +; RV32IA-NEXT: sll a2, a2, a3 +; RV32IA-NEXT: andi a1, a1, 255 +; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-NEXT: lr.w a3, (a0) +; RV32IA-NEXT: and a5, a3, a4 +; RV32IA-NEXT: bne a5, a1, .LBB3_3 +; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 +; RV32IA-NEXT: xor a5, a3, a2 +; RV32IA-NEXT: and a5, a5, a4 +; RV32IA-NEXT: xor a5, a3, a5 +; RV32IA-NEXT: sc.w.rl a5, a5, (a0) +; RV32IA-NEXT: bnez a5, .LBB3_1 +; RV32IA-NEXT: .LBB3_3: +; RV32IA-NEXT: ret %res = cmpxchg i8* %ptr, i8 %cmp, i8 %val release monotonic ret void } @@ -83,6 +181,30 @@ ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i8_release_acquire: +; RV32IA: # %bb.0: +; RV32IA-NEXT: slli a3, a0, 3 +; RV32IA-NEXT: andi a3, a3, 24 +; RV32IA-NEXT: addi a4, zero, 255 +; RV32IA-NEXT: sll a4, a4, a3 +; RV32IA-NEXT: andi a2, a2, 255 +; RV32IA-NEXT: sll a2, a2, a3 +; RV32IA-NEXT: andi a1, a1, 255 +; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-NEXT: lr.w a3, (a0) +; RV32IA-NEXT: and a5, a3, a4 +; RV32IA-NEXT: bne a5, a1, .LBB4_3 +; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 +; RV32IA-NEXT: xor a5, a3, a2 +; RV32IA-NEXT: and a5, a5, a4 +; RV32IA-NEXT: xor a5, a3, a5 +; RV32IA-NEXT: sc.w.rl a5, a5, (a0) +; RV32IA-NEXT: bnez a5, .LBB4_1 +; RV32IA-NEXT: .LBB4_3: +; RV32IA-NEXT: ret %res = cmpxchg i8* %ptr, i8 %cmp, i8 %val release acquire ret void } @@ -100,6 +222,30 @@ ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i8_acq_rel_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: slli a3, a0, 3 +; RV32IA-NEXT: andi a3, a3, 24 +; RV32IA-NEXT: addi a4, zero, 255 +; RV32IA-NEXT: sll a4, a4, a3 +; RV32IA-NEXT: andi a2, a2, 255 +; RV32IA-NEXT: sll a2, a2, a3 +; RV32IA-NEXT: andi a1, a1, 255 +; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-NEXT: lr.w.aq a3, (a0) +; RV32IA-NEXT: and a5, a3, a4 +; RV32IA-NEXT: bne a5, a1, .LBB5_3 +; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 +; RV32IA-NEXT: xor a5, a3, a2 +; RV32IA-NEXT: and a5, a5, a4 +; RV32IA-NEXT: xor a5, a3, a5 +; RV32IA-NEXT: sc.w.rl a5, a5, (a0) +; RV32IA-NEXT: bnez a5, .LBB5_1 +; RV32IA-NEXT: .LBB5_3: +; RV32IA-NEXT: ret %res = cmpxchg i8* %ptr, i8 %cmp, i8 %val acq_rel monotonic ret void } @@ -117,6 +263,30 @@ ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i8_acq_rel_acquire: +; RV32IA: # %bb.0: +; RV32IA-NEXT: slli a3, a0, 3 +; RV32IA-NEXT: andi a3, a3, 24 +; RV32IA-NEXT: addi a4, zero, 255 +; RV32IA-NEXT: sll a4, a4, a3 +; RV32IA-NEXT: andi a2, a2, 255 +; RV32IA-NEXT: sll a2, a2, a3 +; RV32IA-NEXT: andi a1, a1, 255 +; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-NEXT: lr.w.aq a3, (a0) +; RV32IA-NEXT: and a5, a3, a4 +; RV32IA-NEXT: bne a5, a1, .LBB6_3 +; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 +; RV32IA-NEXT: xor a5, a3, a2 +; RV32IA-NEXT: and a5, a5, a4 +; RV32IA-NEXT: xor a5, a3, a5 +; RV32IA-NEXT: sc.w.rl a5, a5, (a0) +; RV32IA-NEXT: bnez a5, .LBB6_1 +; RV32IA-NEXT: .LBB6_3: +; RV32IA-NEXT: ret %res = cmpxchg i8* %ptr, i8 %cmp, i8 %val acq_rel acquire ret void } @@ -134,6 +304,30 @@ ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i8_seq_cst_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: slli a3, a0, 3 +; RV32IA-NEXT: andi a3, a3, 24 +; RV32IA-NEXT: addi a4, zero, 255 +; RV32IA-NEXT: sll a4, a4, a3 +; RV32IA-NEXT: andi a2, a2, 255 +; RV32IA-NEXT: sll a2, a2, a3 +; RV32IA-NEXT: andi a1, a1, 255 +; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-NEXT: lr.w.aqrl a3, (a0) +; RV32IA-NEXT: and a5, a3, a4 +; RV32IA-NEXT: bne a5, a1, .LBB7_3 +; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB7_1 Depth=1 +; RV32IA-NEXT: xor a5, a3, a2 +; RV32IA-NEXT: and a5, a5, a4 +; RV32IA-NEXT: xor a5, a3, a5 +; RV32IA-NEXT: sc.w.aqrl a5, a5, (a0) +; RV32IA-NEXT: bnez a5, .LBB7_1 +; RV32IA-NEXT: .LBB7_3: +; RV32IA-NEXT: ret %res = cmpxchg i8* %ptr, i8 %cmp, i8 %val seq_cst monotonic ret void } @@ -151,6 +345,30 @@ ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i8_seq_cst_acquire: +; RV32IA: # %bb.0: +; RV32IA-NEXT: slli a3, a0, 3 +; RV32IA-NEXT: andi a3, a3, 24 +; RV32IA-NEXT: addi a4, zero, 255 +; RV32IA-NEXT: sll a4, a4, a3 +; RV32IA-NEXT: andi a2, a2, 255 +; RV32IA-NEXT: sll a2, a2, a3 +; RV32IA-NEXT: andi a1, a1, 255 +; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-NEXT: lr.w.aqrl a3, (a0) +; RV32IA-NEXT: and a5, a3, a4 +; RV32IA-NEXT: bne a5, a1, .LBB8_3 +; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1 +; RV32IA-NEXT: xor a5, a3, a2 +; RV32IA-NEXT: and a5, a5, a4 +; RV32IA-NEXT: xor a5, a3, a5 +; RV32IA-NEXT: sc.w.aqrl a5, a5, (a0) +; RV32IA-NEXT: bnez a5, .LBB8_1 +; RV32IA-NEXT: .LBB8_3: +; RV32IA-NEXT: ret %res = cmpxchg i8* %ptr, i8 %cmp, i8 %val seq_cst acquire ret void } @@ -168,6 +386,30 @@ ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i8_seq_cst_seq_cst: +; RV32IA: # %bb.0: +; RV32IA-NEXT: slli a3, a0, 3 +; RV32IA-NEXT: andi a3, a3, 24 +; RV32IA-NEXT: addi a4, zero, 255 +; RV32IA-NEXT: sll a4, a4, a3 +; RV32IA-NEXT: andi a2, a2, 255 +; RV32IA-NEXT: sll a2, a2, a3 +; RV32IA-NEXT: andi a1, a1, 255 +; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-NEXT: lr.w.aqrl a3, (a0) +; RV32IA-NEXT: and a5, a3, a4 +; RV32IA-NEXT: bne a5, a1, .LBB9_3 +; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1 +; RV32IA-NEXT: xor a5, a3, a2 +; RV32IA-NEXT: and a5, a5, a4 +; RV32IA-NEXT: xor a5, a3, a5 +; RV32IA-NEXT: sc.w.aqrl a5, a5, (a0) +; RV32IA-NEXT: bnez a5, .LBB9_1 +; RV32IA-NEXT: .LBB9_3: +; RV32IA-NEXT: ret %res = cmpxchg i8* %ptr, i8 %cmp, i8 %val seq_cst seq_cst ret void } @@ -185,6 +427,31 @@ ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i16_monotonic_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: and a2, a2, a3 +; RV32IA-NEXT: slli a4, a0, 3 +; RV32IA-NEXT: andi a4, a4, 24 +; RV32IA-NEXT: sll a3, a3, a4 +; RV32IA-NEXT: sll a2, a2, a4 +; RV32IA-NEXT: sll a1, a1, a4 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-NEXT: lr.w a4, (a0) +; RV32IA-NEXT: and a5, a4, a3 +; RV32IA-NEXT: bne a5, a1, .LBB10_3 +; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 +; RV32IA-NEXT: xor a5, a4, a2 +; RV32IA-NEXT: and a5, a5, a3 +; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: sc.w a5, a5, (a0) +; RV32IA-NEXT: bnez a5, .LBB10_1 +; RV32IA-NEXT: .LBB10_3: +; RV32IA-NEXT: ret %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val monotonic monotonic ret void } @@ -202,6 +469,31 @@ ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i16_acquire_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: and a2, a2, a3 +; RV32IA-NEXT: slli a4, a0, 3 +; RV32IA-NEXT: andi a4, a4, 24 +; RV32IA-NEXT: sll a3, a3, a4 +; RV32IA-NEXT: sll a2, a2, a4 +; RV32IA-NEXT: sll a1, a1, a4 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-NEXT: lr.w.aq a4, (a0) +; RV32IA-NEXT: and a5, a4, a3 +; RV32IA-NEXT: bne a5, a1, .LBB11_3 +; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 +; RV32IA-NEXT: xor a5, a4, a2 +; RV32IA-NEXT: and a5, a5, a3 +; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: sc.w a5, a5, (a0) +; RV32IA-NEXT: bnez a5, .LBB11_1 +; RV32IA-NEXT: .LBB11_3: +; RV32IA-NEXT: ret %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val acquire monotonic ret void } @@ -219,6 +511,31 @@ ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i16_acquire_acquire: +; RV32IA: # %bb.0: +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: and a2, a2, a3 +; RV32IA-NEXT: slli a4, a0, 3 +; RV32IA-NEXT: andi a4, a4, 24 +; RV32IA-NEXT: sll a3, a3, a4 +; RV32IA-NEXT: sll a2, a2, a4 +; RV32IA-NEXT: sll a1, a1, a4 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-NEXT: lr.w.aq a4, (a0) +; RV32IA-NEXT: and a5, a4, a3 +; RV32IA-NEXT: bne a5, a1, .LBB12_3 +; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 +; RV32IA-NEXT: xor a5, a4, a2 +; RV32IA-NEXT: and a5, a5, a3 +; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: sc.w a5, a5, (a0) +; RV32IA-NEXT: bnez a5, .LBB12_1 +; RV32IA-NEXT: .LBB12_3: +; RV32IA-NEXT: ret %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val acquire acquire ret void } @@ -236,6 +553,31 @@ ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i16_release_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: and a2, a2, a3 +; RV32IA-NEXT: slli a4, a0, 3 +; RV32IA-NEXT: andi a4, a4, 24 +; RV32IA-NEXT: sll a3, a3, a4 +; RV32IA-NEXT: sll a2, a2, a4 +; RV32IA-NEXT: sll a1, a1, a4 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-NEXT: lr.w a4, (a0) +; RV32IA-NEXT: and a5, a4, a3 +; RV32IA-NEXT: bne a5, a1, .LBB13_3 +; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 +; RV32IA-NEXT: xor a5, a4, a2 +; RV32IA-NEXT: and a5, a5, a3 +; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: sc.w.rl a5, a5, (a0) +; RV32IA-NEXT: bnez a5, .LBB13_1 +; RV32IA-NEXT: .LBB13_3: +; RV32IA-NEXT: ret %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val release monotonic ret void } @@ -253,6 +595,31 @@ ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i16_release_acquire: +; RV32IA: # %bb.0: +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: and a2, a2, a3 +; RV32IA-NEXT: slli a4, a0, 3 +; RV32IA-NEXT: andi a4, a4, 24 +; RV32IA-NEXT: sll a3, a3, a4 +; RV32IA-NEXT: sll a2, a2, a4 +; RV32IA-NEXT: sll a1, a1, a4 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-NEXT: lr.w a4, (a0) +; RV32IA-NEXT: and a5, a4, a3 +; RV32IA-NEXT: bne a5, a1, .LBB14_3 +; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 +; RV32IA-NEXT: xor a5, a4, a2 +; RV32IA-NEXT: and a5, a5, a3 +; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: sc.w.rl a5, a5, (a0) +; RV32IA-NEXT: bnez a5, .LBB14_1 +; RV32IA-NEXT: .LBB14_3: +; RV32IA-NEXT: ret %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val release acquire ret void } @@ -270,6 +637,31 @@ ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i16_acq_rel_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: and a2, a2, a3 +; RV32IA-NEXT: slli a4, a0, 3 +; RV32IA-NEXT: andi a4, a4, 24 +; RV32IA-NEXT: sll a3, a3, a4 +; RV32IA-NEXT: sll a2, a2, a4 +; RV32IA-NEXT: sll a1, a1, a4 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-NEXT: lr.w.aq a4, (a0) +; RV32IA-NEXT: and a5, a4, a3 +; RV32IA-NEXT: bne a5, a1, .LBB15_3 +; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 +; RV32IA-NEXT: xor a5, a4, a2 +; RV32IA-NEXT: and a5, a5, a3 +; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: sc.w.rl a5, a5, (a0) +; RV32IA-NEXT: bnez a5, .LBB15_1 +; RV32IA-NEXT: .LBB15_3: +; RV32IA-NEXT: ret %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val acq_rel monotonic ret void } @@ -287,6 +679,31 @@ ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i16_acq_rel_acquire: +; RV32IA: # %bb.0: +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: and a2, a2, a3 +; RV32IA-NEXT: slli a4, a0, 3 +; RV32IA-NEXT: andi a4, a4, 24 +; RV32IA-NEXT: sll a3, a3, a4 +; RV32IA-NEXT: sll a2, a2, a4 +; RV32IA-NEXT: sll a1, a1, a4 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-NEXT: lr.w.aq a4, (a0) +; RV32IA-NEXT: and a5, a4, a3 +; RV32IA-NEXT: bne a5, a1, .LBB16_3 +; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 +; RV32IA-NEXT: xor a5, a4, a2 +; RV32IA-NEXT: and a5, a5, a3 +; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: sc.w.rl a5, a5, (a0) +; RV32IA-NEXT: bnez a5, .LBB16_1 +; RV32IA-NEXT: .LBB16_3: +; RV32IA-NEXT: ret %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val acq_rel acquire ret void } @@ -304,6 +721,31 @@ ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i16_seq_cst_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: and a2, a2, a3 +; RV32IA-NEXT: slli a4, a0, 3 +; RV32IA-NEXT: andi a4, a4, 24 +; RV32IA-NEXT: sll a3, a3, a4 +; RV32IA-NEXT: sll a2, a2, a4 +; RV32IA-NEXT: sll a1, a1, a4 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-NEXT: lr.w.aqrl a4, (a0) +; RV32IA-NEXT: and a5, a4, a3 +; RV32IA-NEXT: bne a5, a1, .LBB17_3 +; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1 +; RV32IA-NEXT: xor a5, a4, a2 +; RV32IA-NEXT: and a5, a5, a3 +; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: sc.w.aqrl a5, a5, (a0) +; RV32IA-NEXT: bnez a5, .LBB17_1 +; RV32IA-NEXT: .LBB17_3: +; RV32IA-NEXT: ret %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val seq_cst monotonic ret void } @@ -321,6 +763,31 @@ ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i16_seq_cst_acquire: +; RV32IA: # %bb.0: +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: and a2, a2, a3 +; RV32IA-NEXT: slli a4, a0, 3 +; RV32IA-NEXT: andi a4, a4, 24 +; RV32IA-NEXT: sll a3, a3, a4 +; RV32IA-NEXT: sll a2, a2, a4 +; RV32IA-NEXT: sll a1, a1, a4 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-NEXT: lr.w.aqrl a4, (a0) +; RV32IA-NEXT: and a5, a4, a3 +; RV32IA-NEXT: bne a5, a1, .LBB18_3 +; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB18_1 Depth=1 +; RV32IA-NEXT: xor a5, a4, a2 +; RV32IA-NEXT: and a5, a5, a3 +; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: sc.w.aqrl a5, a5, (a0) +; RV32IA-NEXT: bnez a5, .LBB18_1 +; RV32IA-NEXT: .LBB18_3: +; RV32IA-NEXT: ret %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val seq_cst acquire ret void } @@ -338,6 +805,31 @@ ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i16_seq_cst_seq_cst: +; RV32IA: # %bb.0: +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: and a2, a2, a3 +; RV32IA-NEXT: slli a4, a0, 3 +; RV32IA-NEXT: andi a4, a4, 24 +; RV32IA-NEXT: sll a3, a3, a4 +; RV32IA-NEXT: sll a2, a2, a4 +; RV32IA-NEXT: sll a1, a1, a4 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-NEXT: lr.w.aqrl a4, (a0) +; RV32IA-NEXT: and a5, a4, a3 +; RV32IA-NEXT: bne a5, a1, .LBB19_3 +; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB19_1 Depth=1 +; RV32IA-NEXT: xor a5, a4, a2 +; RV32IA-NEXT: and a5, a5, a3 +; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: sc.w.aqrl a5, a5, (a0) +; RV32IA-NEXT: bnez a5, .LBB19_1 +; RV32IA-NEXT: .LBB19_3: +; RV32IA-NEXT: ret %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val seq_cst seq_cst ret void } @@ -355,6 +847,17 @@ ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i32_monotonic_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-NEXT: lr.w a3, (a0) +; RV32IA-NEXT: bne a3, a1, .LBB20_3 +; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1 +; RV32IA-NEXT: sc.w a4, a2, (a0) +; RV32IA-NEXT: bnez a4, .LBB20_1 +; RV32IA-NEXT: .LBB20_3: +; RV32IA-NEXT: ret %res = cmpxchg i32* %ptr, i32 %cmp, i32 %val monotonic monotonic ret void } @@ -372,6 +875,17 @@ ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i32_acquire_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-NEXT: lr.w.aq a3, (a0) +; RV32IA-NEXT: bne a3, a1, .LBB21_3 +; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 +; RV32IA-NEXT: sc.w a4, a2, (a0) +; RV32IA-NEXT: bnez a4, .LBB21_1 +; RV32IA-NEXT: .LBB21_3: +; RV32IA-NEXT: ret %res = cmpxchg i32* %ptr, i32 %cmp, i32 %val acquire monotonic ret void } @@ -389,6 +903,17 @@ ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i32_acquire_acquire: +; RV32IA: # %bb.0: +; RV32IA-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-NEXT: lr.w.aq a3, (a0) +; RV32IA-NEXT: bne a3, a1, .LBB22_3 +; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 +; RV32IA-NEXT: sc.w a4, a2, (a0) +; RV32IA-NEXT: bnez a4, .LBB22_1 +; RV32IA-NEXT: .LBB22_3: +; RV32IA-NEXT: ret %res = cmpxchg i32* %ptr, i32 %cmp, i32 %val acquire acquire ret void } @@ -406,6 +931,17 @@ ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i32_release_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-NEXT: lr.w a3, (a0) +; RV32IA-NEXT: bne a3, a1, .LBB23_3 +; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 +; RV32IA-NEXT: sc.w.rl a4, a2, (a0) +; RV32IA-NEXT: bnez a4, .LBB23_1 +; RV32IA-NEXT: .LBB23_3: +; RV32IA-NEXT: ret %res = cmpxchg i32* %ptr, i32 %cmp, i32 %val release monotonic ret void } @@ -423,6 +959,17 @@ ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i32_release_acquire: +; RV32IA: # %bb.0: +; RV32IA-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-NEXT: lr.w a3, (a0) +; RV32IA-NEXT: bne a3, a1, .LBB24_3 +; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB24_1 Depth=1 +; RV32IA-NEXT: sc.w.rl a4, a2, (a0) +; RV32IA-NEXT: bnez a4, .LBB24_1 +; RV32IA-NEXT: .LBB24_3: +; RV32IA-NEXT: ret %res = cmpxchg i32* %ptr, i32 %cmp, i32 %val release acquire ret void } @@ -440,6 +987,17 @@ ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i32_acq_rel_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-NEXT: lr.w.aq a3, (a0) +; RV32IA-NEXT: bne a3, a1, .LBB25_3 +; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB25_1 Depth=1 +; RV32IA-NEXT: sc.w.rl a4, a2, (a0) +; RV32IA-NEXT: bnez a4, .LBB25_1 +; RV32IA-NEXT: .LBB25_3: +; RV32IA-NEXT: ret %res = cmpxchg i32* %ptr, i32 %cmp, i32 %val acq_rel monotonic ret void } @@ -457,6 +1015,17 @@ ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i32_acq_rel_acquire: +; RV32IA: # %bb.0: +; RV32IA-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-NEXT: lr.w.aq a3, (a0) +; RV32IA-NEXT: bne a3, a1, .LBB26_3 +; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB26_1 Depth=1 +; RV32IA-NEXT: sc.w.rl a4, a2, (a0) +; RV32IA-NEXT: bnez a4, .LBB26_1 +; RV32IA-NEXT: .LBB26_3: +; RV32IA-NEXT: ret %res = cmpxchg i32* %ptr, i32 %cmp, i32 %val acq_rel acquire ret void } @@ -474,6 +1043,17 @@ ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i32_seq_cst_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-NEXT: lr.w.aqrl a3, (a0) +; RV32IA-NEXT: bne a3, a1, .LBB27_3 +; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB27_1 Depth=1 +; RV32IA-NEXT: sc.w.aqrl a4, a2, (a0) +; RV32IA-NEXT: bnez a4, .LBB27_1 +; RV32IA-NEXT: .LBB27_3: +; RV32IA-NEXT: ret %res = cmpxchg i32* %ptr, i32 %cmp, i32 %val seq_cst monotonic ret void } @@ -491,6 +1071,17 @@ ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i32_seq_cst_acquire: +; RV32IA: # %bb.0: +; RV32IA-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-NEXT: lr.w.aqrl a3, (a0) +; RV32IA-NEXT: bne a3, a1, .LBB28_3 +; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB28_1 Depth=1 +; RV32IA-NEXT: sc.w.aqrl a4, a2, (a0) +; RV32IA-NEXT: bnez a4, .LBB28_1 +; RV32IA-NEXT: .LBB28_3: +; RV32IA-NEXT: ret %res = cmpxchg i32* %ptr, i32 %cmp, i32 %val seq_cst acquire ret void } @@ -508,6 +1099,17 @@ ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i32_seq_cst_seq_cst: +; RV32IA: # %bb.0: +; RV32IA-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-NEXT: lr.w.aqrl a3, (a0) +; RV32IA-NEXT: bne a3, a1, .LBB29_3 +; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB29_1 Depth=1 +; RV32IA-NEXT: sc.w.aqrl a4, a2, (a0) +; RV32IA-NEXT: bnez a4, .LBB29_1 +; RV32IA-NEXT: .LBB29_3: +; RV32IA-NEXT: ret %res = cmpxchg i32* %ptr, i32 %cmp, i32 %val seq_cst seq_cst ret void } @@ -528,6 +1130,22 @@ ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i64_monotonic_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) +; RV32IA-NEXT: sw a2, 4(sp) +; RV32IA-NEXT: sw a1, 0(sp) +; RV32IA-NEXT: mv a1, sp +; RV32IA-NEXT: mv a2, a3 +; RV32IA-NEXT: mv a3, a4 +; RV32IA-NEXT: mv a4, zero +; RV32IA-NEXT: mv a5, zero +; RV32IA-NEXT: call __atomic_compare_exchange_8 +; RV32IA-NEXT: lw ra, 12(sp) +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret %res = cmpxchg i64* %ptr, i64 %cmp, i64 %val monotonic monotonic ret void } @@ -549,6 +1167,23 @@ ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i64_acquire_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) +; RV32IA-NEXT: mv a5, a4 +; RV32IA-NEXT: sw a2, 4(sp) +; RV32IA-NEXT: sw a1, 0(sp) +; RV32IA-NEXT: mv a1, sp +; RV32IA-NEXT: addi a4, zero, 2 +; RV32IA-NEXT: mv a2, a3 +; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a5, zero +; RV32IA-NEXT: call __atomic_compare_exchange_8 +; RV32IA-NEXT: lw ra, 12(sp) +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret %res = cmpxchg i64* %ptr, i64 %cmp, i64 %val acquire monotonic ret void } @@ -569,6 +1204,22 @@ ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i64_acquire_acquire: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) +; RV32IA-NEXT: sw a2, 4(sp) +; RV32IA-NEXT: sw a1, 0(sp) +; RV32IA-NEXT: mv a1, sp +; RV32IA-NEXT: addi a5, zero, 2 +; RV32IA-NEXT: mv a2, a3 +; RV32IA-NEXT: mv a3, a4 +; RV32IA-NEXT: mv a4, a5 +; RV32IA-NEXT: call __atomic_compare_exchange_8 +; RV32IA-NEXT: lw ra, 12(sp) +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret %res = cmpxchg i64* %ptr, i64 %cmp, i64 %val acquire acquire ret void } @@ -590,6 +1241,23 @@ ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i64_release_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) +; RV32IA-NEXT: mv a5, a4 +; RV32IA-NEXT: sw a2, 4(sp) +; RV32IA-NEXT: sw a1, 0(sp) +; RV32IA-NEXT: mv a1, sp +; RV32IA-NEXT: addi a4, zero, 3 +; RV32IA-NEXT: mv a2, a3 +; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a5, zero +; RV32IA-NEXT: call __atomic_compare_exchange_8 +; RV32IA-NEXT: lw ra, 12(sp) +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret %res = cmpxchg i64* %ptr, i64 %cmp, i64 %val release monotonic ret void } @@ -611,6 +1279,23 @@ ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i64_release_acquire: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) +; RV32IA-NEXT: mv a6, a4 +; RV32IA-NEXT: sw a2, 4(sp) +; RV32IA-NEXT: sw a1, 0(sp) +; RV32IA-NEXT: mv a1, sp +; RV32IA-NEXT: addi a4, zero, 3 +; RV32IA-NEXT: addi a5, zero, 2 +; RV32IA-NEXT: mv a2, a3 +; RV32IA-NEXT: mv a3, a6 +; RV32IA-NEXT: call __atomic_compare_exchange_8 +; RV32IA-NEXT: lw ra, 12(sp) +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret %res = cmpxchg i64* %ptr, i64 %cmp, i64 %val release acquire ret void } @@ -632,6 +1317,23 @@ ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i64_acq_rel_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) +; RV32IA-NEXT: mv a5, a4 +; RV32IA-NEXT: sw a2, 4(sp) +; RV32IA-NEXT: sw a1, 0(sp) +; RV32IA-NEXT: mv a1, sp +; RV32IA-NEXT: addi a4, zero, 4 +; RV32IA-NEXT: mv a2, a3 +; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a5, zero +; RV32IA-NEXT: call __atomic_compare_exchange_8 +; RV32IA-NEXT: lw ra, 12(sp) +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret %res = cmpxchg i64* %ptr, i64 %cmp, i64 %val acq_rel monotonic ret void } @@ -653,6 +1355,23 @@ ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i64_acq_rel_acquire: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) +; RV32IA-NEXT: mv a6, a4 +; RV32IA-NEXT: sw a2, 4(sp) +; RV32IA-NEXT: sw a1, 0(sp) +; RV32IA-NEXT: mv a1, sp +; RV32IA-NEXT: addi a4, zero, 4 +; RV32IA-NEXT: addi a5, zero, 2 +; RV32IA-NEXT: mv a2, a3 +; RV32IA-NEXT: mv a3, a6 +; RV32IA-NEXT: call __atomic_compare_exchange_8 +; RV32IA-NEXT: lw ra, 12(sp) +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret %res = cmpxchg i64* %ptr, i64 %cmp, i64 %val acq_rel acquire ret void } @@ -674,6 +1393,23 @@ ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i64_seq_cst_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) +; RV32IA-NEXT: mv a5, a4 +; RV32IA-NEXT: sw a2, 4(sp) +; RV32IA-NEXT: sw a1, 0(sp) +; RV32IA-NEXT: mv a1, sp +; RV32IA-NEXT: addi a4, zero, 5 +; RV32IA-NEXT: mv a2, a3 +; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a5, zero +; RV32IA-NEXT: call __atomic_compare_exchange_8 +; RV32IA-NEXT: lw ra, 12(sp) +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret %res = cmpxchg i64* %ptr, i64 %cmp, i64 %val seq_cst monotonic ret void } @@ -695,6 +1431,23 @@ ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i64_seq_cst_acquire: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) +; RV32IA-NEXT: mv a6, a4 +; RV32IA-NEXT: sw a2, 4(sp) +; RV32IA-NEXT: sw a1, 0(sp) +; RV32IA-NEXT: mv a1, sp +; RV32IA-NEXT: addi a4, zero, 5 +; RV32IA-NEXT: addi a5, zero, 2 +; RV32IA-NEXT: mv a2, a3 +; RV32IA-NEXT: mv a3, a6 +; RV32IA-NEXT: call __atomic_compare_exchange_8 +; RV32IA-NEXT: lw ra, 12(sp) +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret %res = cmpxchg i64* %ptr, i64 %cmp, i64 %val seq_cst acquire ret void } @@ -715,6 +1468,22 @@ ; RV32I-NEXT: lw ra, 12(sp) ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i64_seq_cst_seq_cst: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) +; RV32IA-NEXT: sw a2, 4(sp) +; RV32IA-NEXT: sw a1, 0(sp) +; RV32IA-NEXT: mv a1, sp +; RV32IA-NEXT: addi a5, zero, 5 +; RV32IA-NEXT: mv a2, a3 +; RV32IA-NEXT: mv a3, a4 +; RV32IA-NEXT: mv a4, a5 +; RV32IA-NEXT: call __atomic_compare_exchange_8 +; RV32IA-NEXT: lw ra, 12(sp) +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret %res = cmpxchg i64* %ptr, i64 %cmp, i64 %val seq_cst seq_cst ret void }