diff --git a/llvm/include/llvm/IR/IntrinsicsLoongArch.td b/llvm/include/llvm/IR/IntrinsicsLoongArch.td --- a/llvm/include/llvm/IR/IntrinsicsLoongArch.td +++ b/llvm/include/llvm/IR/IntrinsicsLoongArch.td @@ -41,6 +41,8 @@ defm int_loongarch_masked_atomicrmw_nand : MaskedAtomicRMWIntrinsics; defm int_loongarch_masked_atomicrmw_umax : MaskedAtomicRMWIntrinsics; defm int_loongarch_masked_atomicrmw_umin : MaskedAtomicRMWIntrinsics; +defm int_loongarch_masked_atomicrmw_max : MaskedAtomicRMWFiveOpIntrinsics; +defm int_loongarch_masked_atomicrmw_min : MaskedAtomicRMWFiveOpIntrinsics; // @llvm.loongarch.masked.cmpxchg.i64.

( // ptr addr, grlen cmpval, grlen newval, grlen mask, grlenimm ordering) diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp --- a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp @@ -133,6 +133,12 @@ return expandAtomicCmpXchg(MBB, MBBI, false, 64, NextMBBI); case LoongArch::PseudoMaskedCmpXchg32: return expandAtomicCmpXchg(MBB, MBBI, true, 32, NextMBBI); + case LoongArch::PseudoMaskedAtomicLoadMax32: + return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, true, 32, + NextMBBI); + case LoongArch::PseudoMaskedAtomicLoadMin32: + return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, true, 32, + NextMBBI); } return false; } @@ -341,6 +347,17 @@ return true; } +static void insertSext(const LoongArchInstrInfo *TII, DebugLoc DL, + MachineBasicBlock *MBB, Register ValReg, + Register ShamtReg) { + BuildMI(MBB, DL, TII->get(LoongArch::SLL_W), ValReg) + .addReg(ValReg) + .addReg(ShamtReg); + BuildMI(MBB, DL, TII->get(LoongArch::SRA_W), ValReg) + .addReg(ValReg) + .addReg(ShamtReg); +} + bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width, @@ -417,6 +434,22 @@ .addReg(Scratch2Reg) .addMBB(LoopTailMBB); break; + case AtomicRMWInst::Max: + insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg()); + // bge scratch2, incr, .looptail + BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGE)) + .addReg(Scratch2Reg) + .addReg(IncrReg) + .addMBB(LoopTailMBB); + break; + case AtomicRMWInst::Min: + insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg()); + // bge incr, scratch2, .looptail + BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGE)) + .addReg(IncrReg) + .addReg(Scratch2Reg) + .addMBB(LoopTailMBB); + break; // TODO: support other AtomicRMWInst. } diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -2325,6 +2325,10 @@ return Intrinsic::loongarch_masked_atomicrmw_umax_i64; case AtomicRMWInst::UMin: return Intrinsic::loongarch_masked_atomicrmw_umin_i64; + case AtomicRMWInst::Max: + return Intrinsic::loongarch_masked_atomicrmw_max_i64; + case AtomicRMWInst::Min: + return Intrinsic::loongarch_masked_atomicrmw_min_i64; // TODO: support other AtomicRMWInst. } } @@ -2396,8 +2400,24 @@ Value *Result; - Result = - Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering}); + // Must pass the shift amount needed to sign extend the loaded value prior + // to performing a signed comparison for min/max. ShiftAmt is the number of + // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which + // is the number of bits to left+right shift the value in order to + // sign-extend. + if (AI->getOperation() == AtomicRMWInst::Min || + AI->getOperation() == AtomicRMWInst::Max) { + const DataLayout &DL = AI->getModule()->getDataLayout(); + unsigned ValWidth = + DL.getTypeStoreSizeInBits(AI->getValOperand()->getType()); + Value *SextShamt = + Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt); + Result = Builder.CreateCall(LlwOpScwLoop, + {AlignedAddr, Incr, Mask, SextShamt, Ordering}); + } else { + Result = + Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering}); + } if (GRLen == 64) Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -1338,6 +1338,20 @@ def PseudoMaskedAtomicLoadUMax32 : PseudoMaskedAMUMinUMax; def PseudoMaskedAtomicLoadUMin32 : PseudoMaskedAMUMinUMax; +class PseudoMaskedAMMinMax + : Pseudo<(outs GPR:$res, GPR:$scratch1, GPR:$scratch2), + (ins GPR:$addr, GPR:$incr, GPR:$mask, grlenimm:$sextshamt, + grlenimm:$ordering)> { + let Constraints = "@earlyclobber $res,@earlyclobber $scratch1," + "@earlyclobber $scratch2"; + let mayLoad = 1; + let mayStore = 1; + let hasSideEffects = 0; +} + +def PseudoMaskedAtomicLoadMax32 : PseudoMaskedAMMinMax; +def PseudoMaskedAtomicLoadMin32 : PseudoMaskedAMMinMax; + /// Compare and exchange class PseudoCmpXchg @@ -1362,6 +1376,12 @@ let hasSideEffects = 0; } +class PseudoMaskedAMMinMaxPat + : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt, + timm:$ordering), + (AMInst GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt, + timm:$ordering)>; + class AtomicPat : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering), (AMInst GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering)>; @@ -1410,6 +1430,15 @@ def : Pat<(atomic_load_umax_64 GPR:$rj, GPR:$rk), (AMMAX_DB_DU GPR:$rk, GPR:$rj)>; +def : Pat<(atomic_load_min_32 GPR:$rj, GPR:$rk), + (AMMIN_DB_W GPR:$rk, GPR:$rj)>; +def : Pat<(atomic_load_min_64 GPR:$rj, GPR:$rk), + (AMMIN_DB_D GPR:$rk, GPR:$rj)>; +def : Pat<(atomic_load_max_32 GPR:$rj, GPR:$rk), + (AMMAX_DB_W GPR:$rk, GPR:$rj)>; +def : Pat<(atomic_load_max_64 GPR:$rj, GPR:$rk), + (AMMAX_DB_D GPR:$rk, GPR:$rj)>; + def : AtomicPat; def : AtomicPat; def : Pat<(atomic_cmp_swap_32 GPR:$addr, GPR:$cmp, GPR:$new), (PseudoCmpXchg32 GPR:$addr, GPR:$cmp, GPR:$new)>; + +def : PseudoMaskedAMMinMaxPat; +def : PseudoMaskedAMMinMaxPat; } // Predicates = [IsLA64] defm : PseudoBinPat<"atomic_load_nand_32", PseudoAtomicLoadNand32>; diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll --- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll @@ -181,3 +181,199 @@ %1 = atomicrmw umin ptr %a, i64 %b acquire ret i64 %1 } + +define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind { +; LA64-LABEL: atomicrmw_max_i8_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: ext.w.b $a1, $a1 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: andi $a4, $a0, 24 +; LA64-NEXT: xori $a4, $a4, 56 +; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: ll.w $a5, $a2, 0 +; LA64-NEXT: and $a7, $a5, $a3 +; LA64-NEXT: move $a6, $a5 +; LA64-NEXT: sll.w $a7, $a7, $a4 +; LA64-NEXT: sra.w $a7, $a7, $a4 +; LA64-NEXT: bge $a7, $a1, .LBB8_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1 +; LA64-NEXT: xor $a6, $a5, $a1 +; LA64-NEXT: and $a6, $a6, $a3 +; LA64-NEXT: xor $a6, $a5, $a6 +; LA64-NEXT: .LBB8_3: # in Loop: Header=BB8_1 Depth=1 +; LA64-NEXT: sc.w $a6, $a2, 0 +; LA64-NEXT: beqz $a6, .LBB8_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: # %bb.5: +; LA64-NEXT: srl.w $a0, $a5, $a0 +; LA64-NEXT: ret + %1 = atomicrmw max ptr %a, i8 %b acquire + ret i8 %1 +} + +define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind { +; LA64-LABEL: atomicrmw_max_i16_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: andi $a3, $a0, 24 +; LA64-NEXT: ori $a4, $zero, 48 +; LA64-NEXT: sub.d $a3, $a4, $a3 +; LA64-NEXT: lu12i.w $a4, 15 +; LA64-NEXT: ori $a4, $a4, 4095 +; LA64-NEXT: sll.w $a4, $a4, $a0 +; LA64-NEXT: addi.w $a4, $a4, 0 +; LA64-NEXT: ext.w.h $a1, $a1 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: ll.w $a5, $a2, 0 +; LA64-NEXT: and $a7, $a5, $a4 +; LA64-NEXT: move $a6, $a5 +; LA64-NEXT: sll.w $a7, $a7, $a3 +; LA64-NEXT: sra.w $a7, $a7, $a3 +; LA64-NEXT: bge $a7, $a1, .LBB9_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1 +; LA64-NEXT: xor $a6, $a5, $a1 +; LA64-NEXT: and $a6, $a6, $a4 +; LA64-NEXT: xor $a6, $a5, $a6 +; LA64-NEXT: .LBB9_3: # in Loop: Header=BB9_1 Depth=1 +; LA64-NEXT: sc.w $a6, $a2, 0 +; LA64-NEXT: beqz $a6, .LBB9_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: # %bb.5: +; LA64-NEXT: srl.w $a0, $a5, $a0 +; LA64-NEXT: ret + %1 = atomicrmw max ptr %a, i16 %b acquire + ret i16 %1 +} + +define i32 @atomicrmw_max_i32_acquire(ptr %a, i32 %b) nounwind { +; LA64-LABEL: atomicrmw_max_i32_acquire: +; LA64: # %bb.0: +; LA64-NEXT: ammax_db.w $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw max ptr %a, i32 %b acquire + ret i32 %1 +} + +define i64 @atomicrmw_max_i64_acquire(ptr %a, i64 %b) nounwind { +; LA64-LABEL: atomicrmw_max_i64_acquire: +; LA64: # %bb.0: +; LA64-NEXT: ammax_db.d $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw max ptr %a, i64 %b acquire + ret i64 %1 +} + +define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind { +; LA64-LABEL: atomicrmw_min_i8_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: ext.w.b $a1, $a1 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: andi $a4, $a0, 24 +; LA64-NEXT: xori $a4, $a4, 56 +; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: ll.w $a5, $a2, 0 +; LA64-NEXT: and $a7, $a5, $a3 +; LA64-NEXT: move $a6, $a5 +; LA64-NEXT: sll.w $a7, $a7, $a4 +; LA64-NEXT: sra.w $a7, $a7, $a4 +; LA64-NEXT: bge $a1, $a7, .LBB12_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 +; LA64-NEXT: xor $a6, $a5, $a1 +; LA64-NEXT: and $a6, $a6, $a3 +; LA64-NEXT: xor $a6, $a5, $a6 +; LA64-NEXT: .LBB12_3: # in Loop: Header=BB12_1 Depth=1 +; LA64-NEXT: sc.w $a6, $a2, 0 +; LA64-NEXT: beqz $a6, .LBB12_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: # %bb.5: +; LA64-NEXT: srl.w $a0, $a5, $a0 +; LA64-NEXT: ret + %1 = atomicrmw min ptr %a, i8 %b acquire + ret i8 %1 +} + +define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind { +; LA64-LABEL: atomicrmw_min_i16_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: andi $a3, $a0, 24 +; LA64-NEXT: ori $a4, $zero, 48 +; LA64-NEXT: sub.d $a3, $a4, $a3 +; LA64-NEXT: lu12i.w $a4, 15 +; LA64-NEXT: ori $a4, $a4, 4095 +; LA64-NEXT: sll.w $a4, $a4, $a0 +; LA64-NEXT: addi.w $a4, $a4, 0 +; LA64-NEXT: ext.w.h $a1, $a1 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: ll.w $a5, $a2, 0 +; LA64-NEXT: and $a7, $a5, $a4 +; LA64-NEXT: move $a6, $a5 +; LA64-NEXT: sll.w $a7, $a7, $a3 +; LA64-NEXT: sra.w $a7, $a7, $a3 +; LA64-NEXT: bge $a1, $a7, .LBB13_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 +; LA64-NEXT: xor $a6, $a5, $a1 +; LA64-NEXT: and $a6, $a6, $a4 +; LA64-NEXT: xor $a6, $a5, $a6 +; LA64-NEXT: .LBB13_3: # in Loop: Header=BB13_1 Depth=1 +; LA64-NEXT: sc.w $a6, $a2, 0 +; LA64-NEXT: beqz $a6, .LBB13_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: # %bb.5: +; LA64-NEXT: srl.w $a0, $a5, $a0 +; LA64-NEXT: ret + %1 = atomicrmw min ptr %a, i16 %b acquire + ret i16 %1 +} + +define i32 @atomicrmw_min_i32_acquire(ptr %a, i32 %b) nounwind { +; LA64-LABEL: atomicrmw_min_i32_acquire: +; LA64: # %bb.0: +; LA64-NEXT: ammin_db.w $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw min ptr %a, i32 %b acquire + ret i32 %1 +} + +define i64 @atomicrmw_min_i64_acquire(ptr %a, i64 %b) nounwind { +; LA64-LABEL: atomicrmw_min_i64_acquire: +; LA64: # %bb.0: +; LA64-NEXT: ammin_db.d $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw min ptr %a, i64 %b acquire + ret i64 %1 +}