diff --git a/llvm/include/llvm/IR/IntrinsicsLoongArch.td b/llvm/include/llvm/IR/IntrinsicsLoongArch.td --- a/llvm/include/llvm/IR/IntrinsicsLoongArch.td +++ b/llvm/include/llvm/IR/IntrinsicsLoongArch.td @@ -23,6 +23,8 @@ // We define 32-bit and 64-bit variants of the above, where T stands for i32 // or i64 respectively: multiclass MaskedAtomicRMWIntrinsics { + // i32 @llvm..i32.

(any*, i32, i32, i32 imm); + def _i32 : MaskedAtomicRMW; // i64 @llvm..i32.

(any*, i64, i64, i64 imm); def _i64 : MaskedAtomicRMW; } diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp --- a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp @@ -84,10 +84,53 @@ case LoongArch::PseudoMaskedAtomicSwap32: return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, true, 32, NextMBBI); + case LoongArch::PseudoAtomicSwap32: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, false, 32, + NextMBBI); } return false; } +static void doAtomicBinOpExpansion(const LoongArchInstrInfo *TII, + MachineInstr &MI, DebugLoc DL, + MachineBasicBlock *ThisMBB, + MachineBasicBlock *LoopMBB, + MachineBasicBlock *DoneMBB, + AtomicRMWInst::BinOp BinOp, int Width) { + Register DestReg = MI.getOperand(0).getReg(); + Register ScratchReg = MI.getOperand(1).getReg(); + Register AddrReg = MI.getOperand(2).getReg(); + Register IncrReg = MI.getOperand(3).getReg(); + + // .loop: + // dbar 0 + // ll.w dest, (addr) + // binop scratch, dest, val + // sc.w scratch, scratch, (addr) + // beq scratch, zero, loop + BuildMI(LoopMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); + BuildMI(LoopMBB, DL, TII->get(LoongArch::LL_W), DestReg) + .addReg(AddrReg) + .addImm(0); + switch (BinOp) { + default: + llvm_unreachable("Unexpected AtomicRMW BinOp"); + case AtomicRMWInst::Xchg: + BuildMI(LoopMBB, DL, TII->get(LoongArch::OR), ScratchReg) + .addReg(IncrReg) + .addReg(LoongArch::R0); + break; + } + BuildMI(LoopMBB, DL, TII->get(LoongArch::SC_W), ScratchReg) + .addReg(ScratchReg) + .addReg(AddrReg) + .addImm(0); + BuildMI(LoopMBB, DL, TII->get(LoongArch::BEQ)) + .addReg(ScratchReg) + .addReg(LoongArch::R0) + .addMBB(LoopMBB); +} + static void insertMaskedMerge(const LoongArchInstrInfo *TII, DebugLoc DL, MachineBasicBlock *MBB, Register DestReg, Register OldValReg, Register NewValReg, @@ -181,7 +224,8 @@ if (IsMasked) doMaskedAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp, Width); - // TODO: support IsMasked = false. + else + doAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp, Width); NextMBBI = MBB.end(); MI.eraseFromParent(); diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -107,6 +107,10 @@ Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override; + bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, + MachineFunction &MF, + unsigned Intrinsic) const override; + private: /// Target-specific function used to lower LoongArch calling conventions. typedef bool LoongArchCCAssignFn(const DataLayout &DL, LoongArchABI::ABI ABI, diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -1778,6 +1778,26 @@ return Y.getValueType().isScalarInteger() && !isa(Y); } +bool LoongArchTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, + const CallInst &I, + MachineFunction &MF, + unsigned Intrinsic) const { + switch (Intrinsic) { + default: + return false; + case Intrinsic::loongarch_masked_atomicrmw_xchg_i32: + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::i32; + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Info.align = Align(4); + Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | + MachineMemOperand::MOVolatile; + return true; + // TODO: Add more Intrinsics later. + } +} + TargetLowering::AtomicExpansionKind LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { // TODO: Add more AtomicRMWInst that needs to be extended. @@ -1800,6 +1820,16 @@ } } + if (GRLen == 32) { + switch (BinOp) { + default: + llvm_unreachable("Unexpected AtomicRMW BinOp"); + case AtomicRMWInst::Xchg: + return Intrinsic::loongarch_masked_atomicrmw_xchg_i32; + // TODO: support other AtomicRMWInst. + } + } + llvm_unreachable("Unexpected GRLen\n"); } diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -1070,6 +1070,16 @@ def PseudoMaskedAtomicSwap32 : PseudoMaskedAM; +class PseudoAM : Pseudo<(outs GPR:$res, GPR:$scratch), + (ins GPR:$addr, GPR:$incr), []> { + let Constraints = "@earlyclobber $res,@earlyclobber $scratch"; + let mayLoad = 1; + let mayStore = 1; + let hasSideEffects = 0; +} + +def PseudoAtomicSwap32 : PseudoAM; + class AtomicPat : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering), (AMInst GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering)>; @@ -1083,6 +1093,13 @@ (AMSWAP_DB_D GPR:$incr, GPR:$addr)>; } // Predicates = [IsLA64] +let Predicates = [IsLA32] in { +def : AtomicPat; +def : Pat<(atomic_swap_32 GPR:$addr, GPR:$incr), + (PseudoAtomicSwap32 GPR:$incr, GPR:$addr)>; +} // Predicates = [IsLA32] + /// Other pseudo-instructions // Pessimistically assume the stack pointer will be clobbered diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll --- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll @@ -1,9 +1,30 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 ; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 -;;TODO:The atomicrmw xchg operation on LA32 will be added later - define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i8_acquire: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: addi.w $a5, $a1, 0 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beq $a5, $zero, .LBB0_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; ; LA64-LABEL: atomicrmw_xchg_i8_acquire: ; LA64: # %bb.0: ; LA64-NEXT: addi.w $a2, $zero, -4 @@ -32,6 +53,29 @@ } define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i16_acquire: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: addi.w $a5, $a1, 0 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beq $a5, $zero, .LBB1_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; ; LA64-LABEL: atomicrmw_xchg_i16_acquire: ; LA64: # %bb.0: ; LA64-NEXT: addi.w $a2, $zero, -4 @@ -61,6 +105,18 @@ } define i32 @atomicrmw_xchg_i32_acquire(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i32_acquire: +; LA32: # %bb.0: +; LA32-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ll.w $a2, $a1, 0 +; LA32-NEXT: move $a3, $a0 +; LA32-NEXT: sc.w $a3, $a1, 0 +; LA32-NEXT: beq $a3, $zero, .LBB2_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; ; LA64-LABEL: atomicrmw_xchg_i32_acquire: ; LA64: # %bb.0: ; LA64-NEXT: amswap_db.w $a0, $a1, $a0 @@ -70,6 +126,16 @@ } define i64 @atomicrmw_xchg_i64_acquire(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i64_acquire: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a3, $zero, 2 +; LA32-NEXT: bl %plt(__atomic_exchange_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; ; LA64-LABEL: atomicrmw_xchg_i64_acquire: ; LA64: # %bb.0: ; LA64-NEXT: amswap_db.d $a0, $a1, $a0