diff --git a/llvm/include/llvm/IR/IntrinsicsLoongArch.td b/llvm/include/llvm/IR/IntrinsicsLoongArch.td --- a/llvm/include/llvm/IR/IntrinsicsLoongArch.td +++ b/llvm/include/llvm/IR/IntrinsicsLoongArch.td @@ -29,10 +29,20 @@ def _i64 : MaskedAtomicRMW; } +multiclass MaskedAtomicRMWFiveOpIntrinsics { + // TODO: Support cmpxchg on LA32. + // i64 @llvm..i64.

(any*, i64, i64, i64, i64 imm); + def _i64 : MaskedAtomicRMWFiveArg; +} + defm int_loongarch_masked_atomicrmw_xchg : MaskedAtomicRMWIntrinsics; defm int_loongarch_masked_atomicrmw_add : MaskedAtomicRMWIntrinsics; defm int_loongarch_masked_atomicrmw_sub : MaskedAtomicRMWIntrinsics; defm int_loongarch_masked_atomicrmw_nand : MaskedAtomicRMWIntrinsics; defm int_loongarch_masked_atomicrmw_umax : MaskedAtomicRMWIntrinsics; defm int_loongarch_masked_atomicrmw_umin : MaskedAtomicRMWIntrinsics; + +// @llvm.loongarch.masked.cmpxchg.i64.

( +// ptr addr, grlen cmpval, grlen newval, grlen mask, grlenimm ordering) +defm int_loongarch_masked_cmpxchg : MaskedAtomicRMWFiveOpIntrinsics; } // TargetPrefix = "loongarch" diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp --- a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp @@ -55,6 +55,9 @@ MachineBasicBlock::iterator MBBI, AtomicRMWInst::BinOp, bool IsMasked, int Width, MachineBasicBlock::iterator &NextMBBI); + bool expandAtomicCmpXchg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, bool IsMasked, + int Width, MachineBasicBlock::iterator &NextMBBI); }; char LoongArchExpandAtomicPseudo::ID = 0; @@ -124,6 +127,12 @@ case LoongArch::PseudoMaskedAtomicLoadUMin32: return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, true, 32, NextMBBI); + case LoongArch::PseudoCmpXchg32: + return expandAtomicCmpXchg(MBB, MBBI, false, 32, NextMBBI); + case LoongArch::PseudoCmpXchg64: + return expandAtomicCmpXchg(MBB, MBBI, false, 64, NextMBBI); + case LoongArch::PseudoMaskedCmpXchg32: + return expandAtomicCmpXchg(MBB, MBBI, true, 32, NextMBBI); } return false; } @@ -432,6 +441,131 @@ return true; } +bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsMasked, + int Width, MachineBasicBlock::iterator &NextMBBI) { + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + MachineFunction *MF = MBB.getParent(); + auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto TailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + + // Insert new MBBs. + MF->insert(++MBB.getIterator(), LoopHeadMBB); + MF->insert(++LoopHeadMBB->getIterator(), LoopTailMBB); + MF->insert(++LoopTailMBB->getIterator(), TailMBB); + MF->insert(++TailMBB->getIterator(), DoneMBB); + + // Set up successors and transfer remaining instructions to DoneMBB. + LoopHeadMBB->addSuccessor(LoopTailMBB); + LoopHeadMBB->addSuccessor(TailMBB); + LoopTailMBB->addSuccessor(DoneMBB); + LoopTailMBB->addSuccessor(LoopHeadMBB); + TailMBB->addSuccessor(DoneMBB); + DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end()); + DoneMBB->transferSuccessors(&MBB); + MBB.addSuccessor(LoopHeadMBB); + + Register DestReg = MI.getOperand(0).getReg(); + Register ScratchReg = MI.getOperand(1).getReg(); + Register AddrReg = MI.getOperand(2).getReg(); + Register CmpValReg = MI.getOperand(3).getReg(); + Register NewValReg = MI.getOperand(4).getReg(); + + if (!IsMasked) { + // .loophead: + // ll.[w|d] dest, (addr) + // bne dest, cmpval, tail + BuildMI(LoopHeadMBB, DL, + TII->get(Width == 32 ? LoongArch::LL_W : LoongArch::LL_D), DestReg) + .addReg(AddrReg) + .addImm(0); + BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BNE)) + .addReg(DestReg) + .addReg(CmpValReg) + .addMBB(TailMBB); + // .looptail: + // dbar 0 + // move scratch, newval + // sc.[w|d] scratch, scratch, (addr) + // beqz scratch, loophead + // b done + BuildMI(LoopTailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); + BuildMI(LoopTailMBB, DL, TII->get(LoongArch::OR), ScratchReg) + .addReg(NewValReg) + .addReg(LoongArch::R0); + BuildMI(LoopTailMBB, DL, + TII->get(Width == 32 ? LoongArch::SC_W : LoongArch::SC_D), + ScratchReg) + .addReg(ScratchReg) + .addReg(AddrReg) + .addImm(0); + BuildMI(LoopTailMBB, DL, TII->get(LoongArch::BEQZ)) + .addReg(ScratchReg) + .addMBB(LoopHeadMBB); + BuildMI(LoopTailMBB, DL, TII->get(LoongArch::B)).addMBB(DoneMBB); + } else { + // .loophead: + // ll.[w|d] dest, (addr) + // and scratch, dest, mask + // bne scratch, cmpval, tail + Register MaskReg = MI.getOperand(5).getReg(); + BuildMI(LoopHeadMBB, DL, + TII->get(Width == 32 ? LoongArch::LL_W : LoongArch::LL_D), DestReg) + .addReg(AddrReg) + .addImm(0); + BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::AND), ScratchReg) + .addReg(DestReg) + .addReg(MaskReg); + BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BNE)) + .addReg(ScratchReg) + .addReg(CmpValReg) + .addMBB(TailMBB); + + // .looptail: + // dbar 0 + // andn scratch, dest, mask + // or scratch, scratch, newval + // sc.[w|d] scratch, scratch, (addr) + // beqz scratch, loophead + // b done + BuildMI(LoopTailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); + BuildMI(LoopTailMBB, DL, TII->get(LoongArch::ANDN), ScratchReg) + .addReg(DestReg) + .addReg(MaskReg); + BuildMI(LoopTailMBB, DL, TII->get(LoongArch::OR), ScratchReg) + .addReg(ScratchReg) + .addReg(NewValReg); + BuildMI(LoopTailMBB, DL, + TII->get(Width == 32 ? LoongArch::SC_W : LoongArch::SC_D), + ScratchReg) + .addReg(ScratchReg) + .addReg(AddrReg) + .addImm(0); + BuildMI(LoopTailMBB, DL, TII->get(LoongArch::BEQZ)) + .addReg(ScratchReg) + .addMBB(LoopHeadMBB); + BuildMI(LoopTailMBB, DL, TII->get(LoongArch::B)).addMBB(DoneMBB); + } + + // .tail: + // dbar 0x700 + BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0x700); + + NextMBBI = MBB.end(); + MI.eraseFromParent(); + + LivePhysRegs LiveRegs; + computeAndAddLiveIns(LiveRegs, *LoopHeadMBB); + computeAndAddLiveIns(LiveRegs, *LoopTailMBB); + computeAndAddLiveIns(LiveRegs, *TailMBB); + computeAndAddLiveIns(LiveRegs, *DoneMBB); + + return true; +} + } // end namespace INITIALIZE_PASS(LoongArchExpandAtomicPseudo, "loongarch-expand-atomic-pseudo", diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -109,6 +109,13 @@ EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override; + TargetLowering::AtomicExpansionKind + shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override; + Value *emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, + AtomicCmpXchgInst *CI, + Value *AlignedAddr, Value *CmpVal, + Value *NewVal, Value *Mask, + AtomicOrdering Ord) const override; bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, @@ -123,6 +130,10 @@ Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override; + ISD::NodeType getExtendForAtomicOps() const override { + return ISD::SIGN_EXTEND; + } + private: /// Target-specific function used to lower LoongArch calling conventions. typedef bool LoongArchCCAssignFn(const DataLayout &DL, LoongArchABI::ABI ABI, diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -2084,6 +2084,35 @@ llvm_unreachable("Unexpected GRLen\n"); } +TargetLowering::AtomicExpansionKind +LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR( + AtomicCmpXchgInst *CI) const { + unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); + if (Size == 8 || Size == 16) + return AtomicExpansionKind::MaskedIntrinsic; + return AtomicExpansionKind::None; +} + +Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( + IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, + Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { + Value *Ordering = + Builder.getIntN(Subtarget.getGRLen(), static_cast(Ord)); + + // TODO: Support cmpxchg on LA32. + Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64; + CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty()); + NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty()); + Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); + Type *Tys[] = {AlignedAddr->getType()}; + Function *MaskedCmpXchg = + Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys); + Value *Result = Builder.CreateCall( + MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); + Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); + return Result; +} + Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic( IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -1180,6 +1180,30 @@ def PseudoMaskedAtomicLoadUMax32 : PseudoMaskedAMUMinUMax; def PseudoMaskedAtomicLoadUMin32 : PseudoMaskedAMUMinUMax; +/// Compare and exchange + +class PseudoCmpXchg + : Pseudo<(outs GPR:$res, GPR:$scratch), + (ins GPR:$addr, GPR:$cmpval, GPR:$newval), []> { + let Constraints = "@earlyclobber $res,@earlyclobber $scratch"; + let mayLoad = 1; + let mayStore = 1; + let hasSideEffects = 0; +} + +def PseudoCmpXchg32 : PseudoCmpXchg; +def PseudoCmpXchg64 : PseudoCmpXchg; + +def PseudoMaskedCmpXchg32 + : Pseudo<(outs GPR:$res, GPR:$scratch), + (ins GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, + grlenimm:$ordering), []> { + let Constraints = "@earlyclobber $res,@earlyclobber $scratch"; + let mayLoad = 1; + let mayStore = 1; + let hasSideEffects = 0; +} + class AtomicPat : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering), (AMInst GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering)>; @@ -1233,6 +1257,15 @@ PseudoMaskedAtomicLoadUMax32>; def : AtomicPat; + +def : Pat<(atomic_cmp_swap_64 GPR:$addr, GPR:$cmp, GPR:$new), + (PseudoCmpXchg64 GPR:$addr, GPR:$cmp, GPR:$new)>; +def : Pat<(int_loongarch_masked_cmpxchg_i64 + GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering), + (PseudoMaskedCmpXchg32 + GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>; +def : Pat<(atomic_cmp_swap_32 GPR:$addr, GPR:$cmp, GPR:$new), + (PseudoCmpXchg32 GPR:$addr, GPR:$cmp, GPR:$new)>; } // Predicates = [IsLA64] def : Pat<(atomic_load_nand_32 GPR:$rj, GPR:$rk), diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll @@ -0,0 +1,356 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 + +define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { +; LA64-LABEL: cmpxchg_i8_acquire_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a3, $zero, -4 +; LA64-NEXT: and $a3, $a0, $a3 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: andi $a2, $a2, 255 +; LA64-NEXT: sll.w $a2, $a2, $a0 +; LA64-NEXT: ori $a4, $zero, 255 +; LA64-NEXT: sll.w $a0, $a4, $a0 +; LA64-NEXT: addi.w $a0, $a0, 0 +; LA64-NEXT: addi.w $a2, $a2, 0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a3, 0 +; LA64-NEXT: and $a5, $a4, $a0 +; LA64-NEXT: bne $a5, $a1, .LBB0_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB0_1 Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: andn $a5, $a4, $a0 +; LA64-NEXT: or $a5, $a5, $a2 +; LA64-NEXT: sc.w $a5, $a3, 0 +; LA64-NEXT: beqz $a5, .LBB0_1 +; LA64-NEXT: b .LBB0_4 +; LA64-NEXT: .LBB0_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB0_4: +; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire acquire + ret void +} + +define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind { +; LA64-LABEL: cmpxchg_i16_acquire_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a3, $zero, -4 +; LA64-NEXT: and $a3, $a0, $a3 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0 +; LA64-NEXT: sll.w $a2, $a2, $a0 +; LA64-NEXT: lu12i.w $a4, 15 +; LA64-NEXT: ori $a4, $a4, 4095 +; LA64-NEXT: sll.w $a0, $a4, $a0 +; LA64-NEXT: addi.w $a0, $a0, 0 +; LA64-NEXT: addi.w $a2, $a2, 0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a3, 0 +; LA64-NEXT: and $a5, $a4, $a0 +; LA64-NEXT: bne $a5, $a1, .LBB1_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: andn $a5, $a4, $a0 +; LA64-NEXT: or $a5, $a5, $a2 +; LA64-NEXT: sc.w $a5, $a3, 0 +; LA64-NEXT: beqz $a5, .LBB1_1 +; LA64-NEXT: b .LBB1_4 +; LA64-NEXT: .LBB1_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB1_4: +; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire acquire + ret void +} + +define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind { +; LA64-LABEL: cmpxchg_i32_acquire_acquire: +; LA64: # %bb.0: +; LA64-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a3, $a0, 0 +; LA64-NEXT: bne $a3, $a1, .LBB2_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: move $a4, $a2 +; LA64-NEXT: sc.w $a4, $a0, 0 +; LA64-NEXT: beqz $a4, .LBB2_1 +; LA64-NEXT: b .LBB2_4 +; LA64-NEXT: .LBB2_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB2_4: +; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire + ret void +} + +define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind { +; LA64-LABEL: cmpxchg_i64_acquire_acquire: +; LA64: # %bb.0: +; LA64-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.d $a3, $a0, 0 +; LA64-NEXT: bne $a3, $a1, .LBB3_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: move $a4, $a2 +; LA64-NEXT: sc.d $a4, $a0, 0 +; LA64-NEXT: beqz $a4, .LBB3_1 +; LA64-NEXT: b .LBB3_4 +; LA64-NEXT: .LBB3_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB3_4: +; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire acquire + ret void +} + +define i8 @cmpxchg_i8_acquire_acquire_reti8(ptr %ptr, i8 %cmp, i8 %val) nounwind { +; LA64-LABEL: cmpxchg_i8_acquire_acquire_reti8: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a3, $zero, -4 +; LA64-NEXT: and $a3, $a0, $a3 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a4, $zero, 255 +; LA64-NEXT: sll.w $a4, $a4, $a0 +; LA64-NEXT: addi.w $a4, $a4, 0 +; LA64-NEXT: andi $a2, $a2, 255 +; LA64-NEXT: sll.w $a2, $a2, $a0 +; LA64-NEXT: addi.w $a2, $a2, 0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a5, $a3, 0 +; LA64-NEXT: and $a6, $a5, $a4 +; LA64-NEXT: bne $a6, $a1, .LBB4_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: andn $a6, $a5, $a4 +; LA64-NEXT: or $a6, $a6, $a2 +; LA64-NEXT: sc.w $a6, $a3, 0 +; LA64-NEXT: beqz $a6, .LBB4_1 +; LA64-NEXT: b .LBB4_4 +; LA64-NEXT: .LBB4_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB4_4: +; LA64-NEXT: srl.w $a0, $a5, $a0 +; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire acquire + %res = extractvalue { i8, i1 } %tmp, 0 + ret i8 %res +} + +define i16 @cmpxchg_i16_acquire_acquire_reti16(ptr %ptr, i16 %cmp, i16 %val) nounwind { +; LA64-LABEL: cmpxchg_i16_acquire_acquire_reti16: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a3, $zero, -4 +; LA64-NEXT: and $a3, $a0, $a3 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a4, 15 +; LA64-NEXT: ori $a4, $a4, 4095 +; LA64-NEXT: sll.w $a4, $a4, $a0 +; LA64-NEXT: addi.w $a4, $a4, 0 +; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0 +; LA64-NEXT: sll.w $a2, $a2, $a0 +; LA64-NEXT: addi.w $a2, $a2, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a5, $a3, 0 +; LA64-NEXT: and $a6, $a5, $a4 +; LA64-NEXT: bne $a6, $a1, .LBB5_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: andn $a6, $a5, $a4 +; LA64-NEXT: or $a6, $a6, $a2 +; LA64-NEXT: sc.w $a6, $a3, 0 +; LA64-NEXT: beqz $a6, .LBB5_1 +; LA64-NEXT: b .LBB5_4 +; LA64-NEXT: .LBB5_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB5_4: +; LA64-NEXT: srl.w $a0, $a5, $a0 +; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire acquire + %res = extractvalue { i16, i1 } %tmp, 0 + ret i16 %res +} + +define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nounwind { +; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti32: +; LA64: # %bb.0: +; LA64-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a3, $a0, 0 +; LA64-NEXT: bne $a3, $a1, .LBB6_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: move $a4, $a2 +; LA64-NEXT: sc.w $a4, $a0, 0 +; LA64-NEXT: beqz $a4, .LBB6_1 +; LA64-NEXT: b .LBB6_4 +; LA64-NEXT: .LBB6_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB6_4: +; LA64-NEXT: move $a0, $a3 +; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire + %res = extractvalue { i32, i1 } %tmp, 0 + ret i32 %res +} + +define i64 @cmpxchg_i64_acquire_acquire_reti64(ptr %ptr, i64 %cmp, i64 %val) nounwind { +; LA64-LABEL: cmpxchg_i64_acquire_acquire_reti64: +; LA64: # %bb.0: +; LA64-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.d $a3, $a0, 0 +; LA64-NEXT: bne $a3, $a1, .LBB7_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB7_1 Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: move $a4, $a2 +; LA64-NEXT: sc.d $a4, $a0, 0 +; LA64-NEXT: beqz $a4, .LBB7_1 +; LA64-NEXT: b .LBB7_4 +; LA64-NEXT: .LBB7_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB7_4: +; LA64-NEXT: move $a0, $a3 +; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire acquire + %res = extractvalue { i64, i1 } %tmp, 0 + ret i64 %res +} + +define i1 @cmpxchg_i8_acquire_acquire_reti1(ptr %ptr, i8 %cmp, i8 %val) nounwind { +; LA64-LABEL: cmpxchg_i8_acquire_acquire_reti1: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a3, $zero, -4 +; LA64-NEXT: and $a3, $a0, $a3 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: ori $a4, $zero, 255 +; LA64-NEXT: sll.w $a4, $a4, $a0 +; LA64-NEXT: andi $a2, $a2, 255 +; LA64-NEXT: sll.w $a0, $a2, $a0 +; LA64-NEXT: addi.w $a0, $a0, 0 +; LA64-NEXT: addi.w $a2, $a4, 0 +; LA64-NEXT: addi.w $a5, $a1, 0 +; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a6, $a3, 0 +; LA64-NEXT: and $a7, $a6, $a2 +; LA64-NEXT: bne $a7, $a5, .LBB8_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: andn $a7, $a6, $a2 +; LA64-NEXT: or $a7, $a7, $a0 +; LA64-NEXT: sc.w $a7, $a3, 0 +; LA64-NEXT: beqz $a7, .LBB8_1 +; LA64-NEXT: b .LBB8_4 +; LA64-NEXT: .LBB8_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB8_4: +; LA64-NEXT: and $a0, $a6, $a4 +; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 31, 0 +; LA64-NEXT: xor $a0, $a1, $a0 +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire acquire + %res = extractvalue { i8, i1 } %tmp, 1 + ret i1 %res +} + +define i1 @cmpxchg_i16_acquire_acquire_reti1(ptr %ptr, i16 %cmp, i16 %val) nounwind { +; LA64-LABEL: cmpxchg_i16_acquire_acquire_reti1: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a3, $zero, -4 +; LA64-NEXT: and $a3, $a0, $a3 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: lu12i.w $a4, 15 +; LA64-NEXT: ori $a4, $a4, 4095 +; LA64-NEXT: sll.w $a4, $a4, $a0 +; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0 +; LA64-NEXT: sll.w $a0, $a2, $a0 +; LA64-NEXT: addi.w $a0, $a0, 0 +; LA64-NEXT: addi.w $a2, $a4, 0 +; LA64-NEXT: addi.w $a5, $a1, 0 +; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a6, $a3, 0 +; LA64-NEXT: and $a7, $a6, $a2 +; LA64-NEXT: bne $a7, $a5, .LBB9_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: andn $a7, $a6, $a2 +; LA64-NEXT: or $a7, $a7, $a0 +; LA64-NEXT: sc.w $a7, $a3, 0 +; LA64-NEXT: beqz $a7, .LBB9_1 +; LA64-NEXT: b .LBB9_4 +; LA64-NEXT: .LBB9_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB9_4: +; LA64-NEXT: and $a0, $a6, $a4 +; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 31, 0 +; LA64-NEXT: xor $a0, $a1, $a0 +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire acquire + %res = extractvalue { i16, i1 } %tmp, 1 + ret i1 %res +} + +define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounwind { +; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti1: +; LA64: # %bb.0: +; LA64-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a3, $a0, 0 +; LA64-NEXT: bne $a3, $a1, .LBB10_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: move $a4, $a2 +; LA64-NEXT: sc.w $a4, $a0, 0 +; LA64-NEXT: beqz $a4, .LBB10_1 +; LA64-NEXT: b .LBB10_4 +; LA64-NEXT: .LBB10_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB10_4: +; LA64-NEXT: addi.w $a0, $a1, 0 +; LA64-NEXT: xor $a0, $a3, $a0 +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire + %res = extractvalue { i32, i1 } %tmp, 1 + ret i1 %res +} + +define i1 @cmpxchg_i64_acquire_acquire_reti1(ptr %ptr, i64 %cmp, i64 %val) nounwind { +; LA64-LABEL: cmpxchg_i64_acquire_acquire_reti1: +; LA64: # %bb.0: +; LA64-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.d $a3, $a0, 0 +; LA64-NEXT: bne $a3, $a1, .LBB11_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: move $a4, $a2 +; LA64-NEXT: sc.d $a4, $a0, 0 +; LA64-NEXT: beqz $a4, .LBB11_1 +; LA64-NEXT: b .LBB11_4 +; LA64-NEXT: .LBB11_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB11_4: +; LA64-NEXT: xor $a0, $a3, $a1 +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire acquire + %res = extractvalue { i64, i1 } %tmp, 1 + ret i1 %res +}