diff --git a/llvm/include/llvm/IR/CMakeLists.txt b/llvm/include/llvm/IR/CMakeLists.txt --- a/llvm/include/llvm/IR/CMakeLists.txt +++ b/llvm/include/llvm/IR/CMakeLists.txt @@ -10,6 +10,7 @@ tablegen(LLVM IntrinsicsBPF.h -gen-intrinsic-enums -intrinsic-prefix=bpf) tablegen(LLVM IntrinsicsDirectX.h -gen-intrinsic-enums -intrinsic-prefix=dx) tablegen(LLVM IntrinsicsHexagon.h -gen-intrinsic-enums -intrinsic-prefix=hexagon) +tablegen(LLVM IntrinsicsLoongArch.h -gen-intrinsic-enums -intrinsic-prefix=loongarch) tablegen(LLVM IntrinsicsMips.h -gen-intrinsic-enums -intrinsic-prefix=mips) tablegen(LLVM IntrinsicsNVPTX.h -gen-intrinsic-enums -intrinsic-prefix=nvvm) tablegen(LLVM IntrinsicsPowerPC.h -gen-intrinsic-enums -intrinsic-prefix=ppc) diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -2068,3 +2068,4 @@ include "llvm/IR/IntrinsicsSPIRV.td" include "llvm/IR/IntrinsicsVE.td" include "llvm/IR/IntrinsicsDirectX.td" +include "llvm/IR/IntrinsicsLoongArch.td" diff --git a/llvm/include/llvm/IR/IntrinsicsLoongArch.td b/llvm/include/llvm/IR/IntrinsicsLoongArch.td new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/IR/IntrinsicsLoongArch.td @@ -0,0 +1,31 @@ +//===- IntrinsicsLoongArch.td - Defines LoongArch intrinsics *- tablegen -*===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines all of the LoongArch-specific intrinsics. +// +//===----------------------------------------------------------------------===// + +let TargetPrefix = "loongarch" in { + +//===----------------------------------------------------------------------===// +// Atomics + +// T @llvm..T.

(any*, T, T, T imm); +class MaskedAtomicRMW + : Intrinsic<[itype], [llvm_anyptr_ty, itype, itype, itype], + [IntrArgMemOnly, NoCapture>, ImmArg>]>; + +// We define 32-bit and 64-bit variants of the above, where T stands for i32 +// or i64 respectively: +multiclass MaskedAtomicRMWIntrinsics { + // i64 @llvm..i32.

(any*, i64, i64, i64 imm); + def _i64 : MaskedAtomicRMW; +} + +defm int_loongarch_masked_atomicrmw_xchg : MaskedAtomicRMWIntrinsics; +} // TargetPrefix = "loongarch" diff --git a/llvm/lib/Target/LoongArch/CMakeLists.txt b/llvm/lib/Target/LoongArch/CMakeLists.txt --- a/llvm/lib/Target/LoongArch/CMakeLists.txt +++ b/llvm/lib/Target/LoongArch/CMakeLists.txt @@ -16,6 +16,7 @@ add_llvm_target(LoongArchCodeGen LoongArchAsmPrinter.cpp + LoongArchExpandAtomicPseudoInsts.cpp LoongArchFrameLowering.cpp LoongArchInstrInfo.cpp LoongArchISelDAGToDAG.cpp diff --git a/llvm/lib/Target/LoongArch/LoongArch.h b/llvm/lib/Target/LoongArch/LoongArch.h --- a/llvm/lib/Target/LoongArch/LoongArch.h +++ b/llvm/lib/Target/LoongArch/LoongArch.h @@ -25,6 +25,7 @@ class MCOperand; class MachineInstr; class MachineOperand; +class PassRegistry; bool lowerLoongArchMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, AsmPrinter &AP); @@ -33,6 +34,8 @@ const AsmPrinter &AP); FunctionPass *createLoongArchISelDag(LoongArchTargetMachine &TM); +FunctionPass *createLoongArchExpandAtomicPseudoPass(); +void initializeLoongArchExpandAtomicPseudoPass(PassRegistry &); } // end namespace llvm #endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCH_H diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp @@ -0,0 +1,207 @@ +//==- LoongArchExpandAtomicPseudoInsts.cpp - Expand atomic pseudo instrs. -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that expands atomic pseudo instructions into +// target instructions. This pass should be run at the last possible moment, +// avoiding the possibility for other passes to break the requirements for +// forward progress in the LL/SC block. +// +//===----------------------------------------------------------------------===// + +#include "LoongArch.h" +#include "LoongArchInstrInfo.h" +#include "LoongArchTargetMachine.h" + +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" + +using namespace llvm; + +#define LoongArch_EXPAND_ATOMIC_PSEUDO_NAME \ + "LoongArch atomic pseudo instruction expansion pass" + +namespace { + +class LoongArchExpandAtomicPseudo : public MachineFunctionPass { +public: + const LoongArchInstrInfo *TII; + static char ID; + + LoongArchExpandAtomicPseudo() : MachineFunctionPass(ID) { + initializeLoongArchExpandAtomicPseudoPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + StringRef getPassName() const override { + return LoongArch_EXPAND_ATOMIC_PSEUDO_NAME; + } + +private: + bool expandMBB(MachineBasicBlock &MBB); + bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool expandAtomicBinOp(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, AtomicRMWInst::BinOp, + bool IsMasked, int Width, + MachineBasicBlock::iterator &NextMBBI); +}; + +char LoongArchExpandAtomicPseudo::ID = 0; + +bool LoongArchExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) { + TII = + static_cast(MF.getSubtarget().getInstrInfo()); + bool Modified = false; + for (auto &MBB : MF) + Modified |= expandMBB(MBB); + return Modified; +} + +bool LoongArchExpandAtomicPseudo::expandMBB(MachineBasicBlock &MBB) { + bool Modified = false; + + MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + while (MBBI != E) { + MachineBasicBlock::iterator NMBBI = std::next(MBBI); + Modified |= expandMI(MBB, MBBI, NMBBI); + MBBI = NMBBI; + } + + return Modified; +} + +bool LoongArchExpandAtomicPseudo::expandMI( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + switch (MBBI->getOpcode()) { + case LoongArch::PseudoMaskedAtomicSwap32: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, true, 32, + NextMBBI); + } + return false; +} + +static void insertMaskedMerge(const LoongArchInstrInfo *TII, DebugLoc DL, + MachineBasicBlock *MBB, Register DestReg, + Register OldValReg, Register NewValReg, + Register MaskReg, Register ScratchReg) { + assert(OldValReg != ScratchReg && "OldValReg and ScratchReg must be unique"); + assert(OldValReg != MaskReg && "OldValReg and MaskReg must be unique"); + assert(ScratchReg != MaskReg && "ScratchReg and MaskReg must be unique"); + + // res = oldval ^ ((oldval ^ newval) & masktargetdata); + BuildMI(MBB, DL, TII->get(LoongArch::XOR), ScratchReg) + .addReg(OldValReg) + .addReg(NewValReg); + BuildMI(MBB, DL, TII->get(LoongArch::AND), ScratchReg) + .addReg(ScratchReg) + .addReg(MaskReg); + BuildMI(MBB, DL, TII->get(LoongArch::XOR), DestReg) + .addReg(OldValReg) + .addReg(ScratchReg); +} + +static void doMaskedAtomicBinOpExpansion( + const LoongArchInstrInfo *TII, MachineInstr &MI, DebugLoc DL, + MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopMBB, + MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width) { + assert(Width == 32 && "Should never need to expand masked 64-bit operations"); + Register DestReg = MI.getOperand(0).getReg(); + Register ScratchReg = MI.getOperand(1).getReg(); + Register AddrReg = MI.getOperand(2).getReg(); + Register IncrReg = MI.getOperand(3).getReg(); + Register MaskReg = MI.getOperand(4).getReg(); + + // .loop: + // dbar 0 + // ll.w destreg, (alignedaddr) + // binop scratch, destreg, incr + // xor scratch, destreg, scratch + // and scratch, scratch, masktargetdata + // xor scratch, destreg, scratch + // sc.w scratch, scratch, (alignedaddr) + // beq scratch, zero, loop + BuildMI(LoopMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); + BuildMI(LoopMBB, DL, TII->get(LoongArch::LL_W), DestReg) + .addReg(AddrReg) + .addImm(0); + switch (BinOp) { + default: + llvm_unreachable("Unexpected AtomicRMW BinOp"); + case AtomicRMWInst::Xchg: + BuildMI(LoopMBB, DL, TII->get(LoongArch::ADDI_W), ScratchReg) + .addReg(IncrReg) + .addImm(0); + break; + // TODO: support other AtomicRMWInst. + } + + insertMaskedMerge(TII, DL, LoopMBB, ScratchReg, DestReg, ScratchReg, MaskReg, + ScratchReg); + + BuildMI(LoopMBB, DL, TII->get(LoongArch::SC_W), ScratchReg) + .addReg(ScratchReg) + .addReg(AddrReg) + .addImm(0); + BuildMI(LoopMBB, DL, TII->get(LoongArch::BEQ)) + .addReg(ScratchReg) + .addReg(LoongArch::R0) + .addMBB(LoopMBB); +} + +bool LoongArchExpandAtomicPseudo::expandAtomicBinOp( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width, + MachineBasicBlock::iterator &NextMBBI) { + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + + MachineFunction *MF = MBB.getParent(); + auto LoopMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + + // Insert new MBBs. + MF->insert(++MBB.getIterator(), LoopMBB); + MF->insert(++LoopMBB->getIterator(), DoneMBB); + + // Set up successors and transfer remaining instructions to DoneMBB. + LoopMBB->addSuccessor(LoopMBB); + LoopMBB->addSuccessor(DoneMBB); + DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end()); + DoneMBB->transferSuccessors(&MBB); + MBB.addSuccessor(LoopMBB); + + if (IsMasked) + doMaskedAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp, + Width); + // TODO: support IsMasked = false. + + NextMBBI = MBB.end(); + MI.eraseFromParent(); + + LivePhysRegs LiveRegs; + computeAndAddLiveIns(LiveRegs, *LoopMBB); + computeAndAddLiveIns(LiveRegs, *DoneMBB); + + return true; +} + +} // end namespace + +INITIALIZE_PASS(LoongArchExpandAtomicPseudo, "loongarch-expand-atomic-pseudo", + LoongArch_EXPAND_ATOMIC_PSEUDO_NAME, false, false) + +namespace llvm { + +FunctionPass *createLoongArchExpandAtomicPseudoPass() { + return new LoongArchExpandAtomicPseudo(); +} + +} // end namespace llvm diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -99,6 +99,13 @@ bool isCheapToSpeculateCttz(Type *Ty) const override; bool isCheapToSpeculateCtlz(Type *Ty) const override; bool hasAndNot(SDValue Y) const override; + TargetLowering::AtomicExpansionKind + shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + + Value *emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, + Value *AlignedAddr, Value *Incr, + Value *Mask, Value *ShiftAmt, + AtomicOrdering Ord) const override; private: /// Target-specific function used to lower LoongArch calling conventions. diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -21,6 +21,8 @@ #include "MCTargetDesc/LoongArchMCTargetDesc.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicsLoongArch.h" #include "llvm/Support/Debug.h" #include "llvm/Support/KnownBits.h" @@ -137,6 +139,8 @@ setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen()); + setMinCmpXchgSizeInBits(32); + // Function alignments. const Align FunctionAlignment(4); setMinFunctionAlignment(FunctionAlignment); @@ -1779,3 +1783,55 @@ // TODO: Support vectors. return Y.getValueType().isScalarInteger() && !isa(Y); } + +TargetLowering::AtomicExpansionKind +LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { + // TODO: Add more AtomicRMWInst that needs to be extended. + unsigned Size = AI->getType()->getPrimitiveSizeInBits(); + if (Size == 8 || Size == 16) + return AtomicExpansionKind::MaskedIntrinsic; + return AtomicExpansionKind::None; +} + +static Intrinsic::ID +getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, + AtomicRMWInst::BinOp BinOp) { + if (GRLen == 64) { + switch (BinOp) { + default: + llvm_unreachable("Unexpected AtomicRMW BinOp"); + case AtomicRMWInst::Xchg: + return Intrinsic::loongarch_masked_atomicrmw_xchg_i64; + // TODO: support other AtomicRMWInst. + } + } + + llvm_unreachable("Unexpected GRLen\n"); +} + +Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic( + IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, + Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { + unsigned GRLen = Subtarget.getGRLen(); + Value *Ordering = + Builder.getIntN(GRLen, static_cast(AI->getOrdering())); + Type *Tys[] = {AlignedAddr->getType()}; + Function *LlwOpScwLoop = Intrinsic::getDeclaration( + AI->getModule(), + getIntrinsicForMaskedAtomicRMWBinOp(GRLen, AI->getOperation()), Tys); + + if (GRLen == 64) { + Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty()); + Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); + ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty()); + } + + Value *Result; + + Result = + Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering}); + + if (GRLen == 64) + Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); + return Result; +} diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -83,6 +83,9 @@ : ImmAsmOperand<"U", width, suffix> { } +// A parameterized register class alternative to i32imm/i64imm from Target.td. +def grlenimm : Operand; + def uimm2 : Operand { let ParserMatchClass = UImmAsmOperand<2>; } @@ -1083,6 +1086,32 @@ defm : StPat; } // Predicates = [IsLA64] +/// Atomic Ops + +class PseudoMaskedAM + : Pseudo<(outs GPR:$res, GPR:$scratch), + (ins GPR:$addr, GPR:$incr, GPR:$mask, grlenimm:$ordering), []> { + let Constraints = "@earlyclobber $res,@earlyclobber $scratch"; + let mayLoad = 1; + let mayStore = 1; + let hasSideEffects = 0; +} + +def PseudoMaskedAtomicSwap32 : PseudoMaskedAM; + +class AtomicPat + : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering), + (AMInst GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering)>; + +let Predicates = [IsLA64] in { +def : AtomicPat; +def : Pat<(atomic_swap_32 GPR:$addr, GPR:$incr), + (AMSWAP_DB_W GPR:$incr, GPR:$addr)>; +def : Pat<(atomic_swap_64 GPR:$addr, GPR:$incr), + (AMSWAP_DB_D GPR:$incr, GPR:$addr)>; +} // Predicates = [IsLA64] + /// Other pseudo-instructions // Pessimistically assume the stack pointer will be clobbered diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp --- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp @@ -102,6 +102,7 @@ void addIRPasses() override; bool addInstSelector() override; + void addPreEmitPass2() override; }; } // end namespace @@ -121,3 +122,10 @@ return false; } + +void LoongArchPassConfig::addPreEmitPass2() { + // Schedule the expansion of AtomicPseudos at the last possible moment, + // avoiding the possibility for other passes to break the requirements for + // forward progress in the LL/SC block. + addPass(createLoongArchExpandAtomicPseudoPass()); +} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll @@ -0,0 +1,79 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 + +;;TODO:The atomicrmw xchg operation on LA32 will be added later + +define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind { +; LA64-LABEL: atomicrmw_xchg_i8_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: addi.w $a5, $a1, 0 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beq $a5, $zero, .LBB0_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw xchg ptr %a, i8 %b acquire + ret i8 %1 +} + +define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind { +; LA64-LABEL: atomicrmw_xchg_i16_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: addi.w $a5, $a1, 0 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beq $a5, $zero, .LBB1_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw xchg ptr %a, i16 %b acquire + ret i16 %1 +} + +define i32 @atomicrmw_xchg_i32_acquire(ptr %a, i32 %b) nounwind { +; LA64-LABEL: atomicrmw_xchg_i32_acquire: +; LA64: # %bb.0: +; LA64-NEXT: amswap_db.w $a0, $a1, $a0 +; LA64-NEXT: ret + %1 = atomicrmw xchg ptr %a, i32 %b acquire + ret i32 %1 +} + +define i64 @atomicrmw_xchg_i64_acquire(ptr %a, i64 %b) nounwind { +; LA64-LABEL: atomicrmw_xchg_i64_acquire: +; LA64: # %bb.0: +; LA64-NEXT: amswap_db.d $a0, $a1, $a0 +; LA64-NEXT: ret + %1 = atomicrmw xchg ptr %a, i64 %b acquire + ret i64 %1 +}