diff --git a/llvm/include/llvm/IR/IntrinsicsLoongArch.td b/llvm/include/llvm/IR/IntrinsicsLoongArch.td --- a/llvm/include/llvm/IR/IntrinsicsLoongArch.td +++ b/llvm/include/llvm/IR/IntrinsicsLoongArch.td @@ -33,4 +33,6 @@ defm int_loongarch_masked_atomicrmw_add : MaskedAtomicRMWIntrinsics; defm int_loongarch_masked_atomicrmw_sub : MaskedAtomicRMWIntrinsics; defm int_loongarch_masked_atomicrmw_nand : MaskedAtomicRMWIntrinsics; +defm int_loongarch_masked_atomicrmw_umax : MaskedAtomicRMWIntrinsics; +defm int_loongarch_masked_atomicrmw_umin : MaskedAtomicRMWIntrinsics; } // TargetPrefix = "loongarch" diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp --- a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp @@ -51,6 +51,10 @@ MachineBasicBlock::iterator MBBI, AtomicRMWInst::BinOp, bool IsMasked, int Width, MachineBasicBlock::iterator &NextMBBI); + bool expandAtomicMinMaxOp(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + AtomicRMWInst::BinOp, bool IsMasked, int Width, + MachineBasicBlock::iterator &NextMBBI); }; char LoongArchExpandAtomicPseudo::ID = 0; @@ -114,6 +118,12 @@ case LoongArch::PseudoAtomicLoadXor32: return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xor, false, 32, NextMBBI); + case LoongArch::PseudoMaskedAtomicLoadUMax32: + return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, true, 32, + NextMBBI); + case LoongArch::PseudoMaskedAtomicLoadUMin32: + return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, true, 32, + NextMBBI); } return false; } @@ -316,6 +326,111 @@ return true; } +bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width, + MachineBasicBlock::iterator &NextMBBI) { + assert(IsMasked == true && + "Should only need to expand masked atomic max/min"); + assert(Width == 32 && "Should never need to expand masked 64-bit operations"); + + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + MachineFunction *MF = MBB.getParent(); + auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + + // Insert new MBBs. + MF->insert(++MBB.getIterator(), LoopHeadMBB); + MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB); + MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB); + MF->insert(++LoopTailMBB->getIterator(), DoneMBB); + + // Set up successors and transfer remaining instructions to DoneMBB. + LoopHeadMBB->addSuccessor(LoopIfBodyMBB); + LoopHeadMBB->addSuccessor(LoopTailMBB); + LoopIfBodyMBB->addSuccessor(LoopTailMBB); + LoopTailMBB->addSuccessor(LoopHeadMBB); + LoopTailMBB->addSuccessor(DoneMBB); + DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end()); + DoneMBB->transferSuccessors(&MBB); + MBB.addSuccessor(LoopHeadMBB); + + Register DestReg = MI.getOperand(0).getReg(); + Register Scratch1Reg = MI.getOperand(1).getReg(); + Register Scratch2Reg = MI.getOperand(2).getReg(); + Register AddrReg = MI.getOperand(3).getReg(); + Register IncrReg = MI.getOperand(4).getReg(); + Register MaskReg = MI.getOperand(5).getReg(); + + // + // .loophead: + // ll.w destreg, (alignedaddr) + // and scratch2, destreg, mask + // move scratch1, destreg + BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::LL_W), DestReg) + .addReg(AddrReg) + .addImm(0); + BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::AND), Scratch2Reg) + .addReg(DestReg) + .addReg(MaskReg); + BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::OR), Scratch1Reg) + .addReg(DestReg) + .addReg(LoongArch::R0); + + switch (BinOp) { + default: + llvm_unreachable("Unexpected AtomicRMW BinOp"); + // bgeu scratch2, incr, .looptail + case AtomicRMWInst::UMax: + BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGEU)) + .addReg(Scratch2Reg) + .addReg(IncrReg) + .addMBB(LoopTailMBB); + break; + // bgeu incr, scratch2, .looptail + case AtomicRMWInst::UMin: + BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGEU)) + .addReg(IncrReg) + .addReg(Scratch2Reg) + .addMBB(LoopTailMBB); + break; + // TODO: support other AtomicRMWInst. + } + + // .loopifbody: + // xor scratch1, destreg, incr + // and scratch1, scratch1, mask + // xor scratch1, destreg, scratch1 + insertMaskedMerge(TII, DL, LoopIfBodyMBB, Scratch1Reg, DestReg, IncrReg, + MaskReg, Scratch1Reg); + + // .looptail: + // sc.w scratch1, scratch1, (addr) + // beq scratch1, zero, loop + BuildMI(LoopTailMBB, DL, TII->get(LoongArch::SC_W), Scratch1Reg) + .addReg(Scratch1Reg) + .addReg(AddrReg) + .addImm(0); + BuildMI(LoopTailMBB, DL, TII->get(LoongArch::BEQ)) + .addReg(Scratch1Reg) + .addReg(LoongArch::R0) + .addMBB(LoopHeadMBB); + + NextMBBI = MBB.end(); + MI.eraseFromParent(); + + LivePhysRegs LiveRegs; + computeAndAddLiveIns(LiveRegs, *LoopHeadMBB); + computeAndAddLiveIns(LiveRegs, *LoopIfBodyMBB); + computeAndAddLiveIns(LiveRegs, *LoopTailMBB); + computeAndAddLiveIns(LiveRegs, *DoneMBB); + + return true; +} + } // end namespace INITIALIZE_PASS(LoongArchExpandAtomicPseudo, "loongarch-expand-atomic-pseudo", diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -1912,6 +1912,10 @@ return Intrinsic::loongarch_masked_atomicrmw_sub_i64; case AtomicRMWInst::Nand: return Intrinsic::loongarch_masked_atomicrmw_nand_i64; + case AtomicRMWInst::UMax: + return Intrinsic::loongarch_masked_atomicrmw_umax_i64; + case AtomicRMWInst::UMin: + return Intrinsic::loongarch_masked_atomicrmw_umin_i64; // TODO: support other AtomicRMWInst. } } diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -1130,6 +1130,20 @@ def PseudoAtomicLoadOr32 : PseudoAM; def PseudoAtomicLoadXor32 : PseudoAM; + +class PseudoMaskedAMUMinUMax + : Pseudo<(outs GPR:$res, GPR:$scratch1, GPR:$scratch2), + (ins GPR:$addr, GPR:$incr, GPR:$mask, grlenimm:$ordering), []> { + let Constraints = "@earlyclobber $res,@earlyclobber $scratch1," + "@earlyclobber $scratch2"; + let mayLoad = 1; + let mayStore = 1; + let hasSideEffects = 0; +} + +def PseudoMaskedAtomicLoadUMax32 : PseudoMaskedAMUMinUMax; +def PseudoMaskedAtomicLoadUMin32 : PseudoMaskedAMUMinUMax; + class AtomicPat : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering), (AMInst GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering)>; @@ -1169,6 +1183,20 @@ (AMXOR_DB_W GPR:$rk, GPR:$rj)>; def : Pat<(atomic_load_xor_64 GPR:$rj, GPR:$rk), (AMXOR_DB_D GPR:$rk, GPR:$rj)>; + +def : Pat<(atomic_load_umin_32 GPR:$rj, GPR:$rk), + (AMMIN_DB_WU GPR:$rk, GPR:$rj)>; +def : Pat<(atomic_load_umin_64 GPR:$rj, GPR:$rk), + (AMMIN_DB_DU GPR:$rk, GPR:$rj)>; +def : Pat<(atomic_load_umax_32 GPR:$rj, GPR:$rk), + (AMMAX_DB_WU GPR:$rk, GPR:$rj)>; +def : Pat<(atomic_load_umax_64 GPR:$rj, GPR:$rk), + (AMMAX_DB_DU GPR:$rk, GPR:$rj)>; + +def : AtomicPat; +def : AtomicPat; } // Predicates = [IsLA64] def : Pat<(atomic_load_nand_32 GPR:$rj, GPR:$rk), diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll @@ -0,0 +1,166 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 + +;; TODO: Testing for LA32 architecture will be added later + +define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind { +; LA64-LABEL: atomicrmw_umax_i8_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: and $a6, $a4, $a3 +; LA64-NEXT: move $a5, $a4 +; LA64-NEXT: bgeu $a6, $a1, .LBB0_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB0_1 Depth=1 +; LA64-NEXT: xor $a5, $a4, $a1 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: .LBB0_3: # in Loop: Header=BB0_1 Depth=1 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beq $a5, $zero, .LBB0_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw umax ptr %a, i8 %b acquire + ret i8 %1 +} + +define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind { +; LA64-LABEL: atomicrmw_umax_i16_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: and $a6, $a4, $a3 +; LA64-NEXT: move $a5, $a4 +; LA64-NEXT: bgeu $a6, $a1, .LBB1_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 +; LA64-NEXT: xor $a5, $a4, $a1 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: .LBB1_3: # in Loop: Header=BB1_1 Depth=1 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beq $a5, $zero, .LBB1_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw umax ptr %a, i16 %b acquire + ret i16 %1 +} + +define i32 @atomicrmw_umax_i32_acquire(ptr %a, i32 %b) nounwind { +; LA64-LABEL: atomicrmw_umax_i32_acquire: +; LA64: # %bb.0: +; LA64-NEXT: ammax_db.wu $a0, $a1, $a0 +; LA64-NEXT: ret + %1 = atomicrmw umax ptr %a, i32 %b acquire + ret i32 %1 +} + +define i64 @atomicrmw_umax_i64_acquire(ptr %a, i64 %b) nounwind { +; LA64-LABEL: atomicrmw_umax_i64_acquire: +; LA64: # %bb.0: +; LA64-NEXT: ammax_db.du $a0, $a1, $a0 +; LA64-NEXT: ret + %1 = atomicrmw umax ptr %a, i64 %b acquire + ret i64 %1 +} + +define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind { +; LA64-LABEL: atomicrmw_umin_i8_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: and $a6, $a4, $a3 +; LA64-NEXT: move $a5, $a4 +; LA64-NEXT: bgeu $a1, $a6, .LBB4_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 +; LA64-NEXT: xor $a5, $a4, $a1 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: .LBB4_3: # in Loop: Header=BB4_1 Depth=1 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beq $a5, $zero, .LBB4_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw umin ptr %a, i8 %b acquire + ret i8 %1 +} + +define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind { +; LA64-LABEL: atomicrmw_umin_i16_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: and $a6, $a4, $a3 +; LA64-NEXT: move $a5, $a4 +; LA64-NEXT: bgeu $a1, $a6, .LBB5_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 +; LA64-NEXT: xor $a5, $a4, $a1 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: .LBB5_3: # in Loop: Header=BB5_1 Depth=1 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beq $a5, $zero, .LBB5_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw umin ptr %a, i16 %b acquire + ret i16 %1 +} + +define i32 @atomicrmw_umin_i32_acquire(ptr %a, i32 %b) nounwind { +; LA64-LABEL: atomicrmw_umin_i32_acquire: +; LA64: # %bb.0: +; LA64-NEXT: ammin_db.wu $a0, $a1, $a0 +; LA64-NEXT: ret + %1 = atomicrmw umin ptr %a, i32 %b acquire + ret i32 %1 +} + +define i64 @atomicrmw_umin_i64_acquire(ptr %a, i64 %b) nounwind { +; LA64-LABEL: atomicrmw_umin_i64_acquire: +; LA64: # %bb.0: +; LA64-NEXT: ammin_db.du $a0, $a1, $a0 +; LA64-NEXT: ret + %1 = atomicrmw umin ptr %a, i64 %b acquire + ret i64 %1 +}