diff --git a/llvm/include/llvm/IR/IntrinsicsLoongArch.td b/llvm/include/llvm/IR/IntrinsicsLoongArch.td
--- a/llvm/include/llvm/IR/IntrinsicsLoongArch.td
+++ b/llvm/include/llvm/IR/IntrinsicsLoongArch.td
@@ -41,6 +41,8 @@
defm int_loongarch_masked_atomicrmw_nand : MaskedAtomicRMWIntrinsics;
defm int_loongarch_masked_atomicrmw_umax : MaskedAtomicRMWIntrinsics;
defm int_loongarch_masked_atomicrmw_umin : MaskedAtomicRMWIntrinsics;
+defm int_loongarch_masked_atomicrmw_max : MaskedAtomicRMWFiveOpIntrinsics;
+defm int_loongarch_masked_atomicrmw_min : MaskedAtomicRMWFiveOpIntrinsics;
// @llvm.loongarch.masked.cmpxchg.i64.
(
// ptr addr, grlen cmpval, grlen newval, grlen mask, grlenimm ordering)
diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp
--- a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp
@@ -133,6 +133,12 @@
return expandAtomicCmpXchg(MBB, MBBI, false, 64, NextMBBI);
case LoongArch::PseudoMaskedCmpXchg32:
return expandAtomicCmpXchg(MBB, MBBI, true, 32, NextMBBI);
+ case LoongArch::PseudoMaskedAtomicLoadMax32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, true, 32,
+ NextMBBI);
+ case LoongArch::PseudoMaskedAtomicLoadMin32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, true, 32,
+ NextMBBI);
}
return false;
}
@@ -341,6 +347,17 @@
return true;
}
+static void insertSext(const LoongArchInstrInfo *TII, DebugLoc DL,
+ MachineBasicBlock *MBB, Register ValReg,
+ Register ShamtReg) {
+ BuildMI(MBB, DL, TII->get(LoongArch::SLL_W), ValReg)
+ .addReg(ValReg)
+ .addReg(ShamtReg);
+ BuildMI(MBB, DL, TII->get(LoongArch::SRA_W), ValReg)
+ .addReg(ValReg)
+ .addReg(ShamtReg);
+}
+
bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
@@ -417,6 +434,22 @@
.addReg(Scratch2Reg)
.addMBB(LoopTailMBB);
break;
+ case AtomicRMWInst::Max:
+ insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
+ // bge scratch2, incr, .looptail
+ BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGE))
+ .addReg(Scratch2Reg)
+ .addReg(IncrReg)
+ .addMBB(LoopTailMBB);
+ break;
+ case AtomicRMWInst::Min:
+ insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
+ // bge incr, scratch2, .looptail
+ BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGE))
+ .addReg(IncrReg)
+ .addReg(Scratch2Reg)
+ .addMBB(LoopTailMBB);
+ break;
// TODO: support other AtomicRMWInst.
}
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -2325,6 +2325,10 @@
return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
case AtomicRMWInst::UMin:
return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
+ case AtomicRMWInst::Max:
+ return Intrinsic::loongarch_masked_atomicrmw_max_i64;
+ case AtomicRMWInst::Min:
+ return Intrinsic::loongarch_masked_atomicrmw_min_i64;
// TODO: support other AtomicRMWInst.
}
}
@@ -2396,8 +2400,24 @@
Value *Result;
- Result =
- Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
+ // Must pass the shift amount needed to sign extend the loaded value prior
+ // to performing a signed comparison for min/max. ShiftAmt is the number of
+ // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
+ // is the number of bits to left+right shift the value in order to
+ // sign-extend.
+ if (AI->getOperation() == AtomicRMWInst::Min ||
+ AI->getOperation() == AtomicRMWInst::Max) {
+ const DataLayout &DL = AI->getModule()->getDataLayout();
+ unsigned ValWidth =
+ DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
+ Value *SextShamt =
+ Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
+ Result = Builder.CreateCall(LlwOpScwLoop,
+ {AlignedAddr, Incr, Mask, SextShamt, Ordering});
+ } else {
+ Result =
+ Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
+ }
if (GRLen == 64)
Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -1338,6 +1338,20 @@
def PseudoMaskedAtomicLoadUMax32 : PseudoMaskedAMUMinUMax;
def PseudoMaskedAtomicLoadUMin32 : PseudoMaskedAMUMinUMax;
+class PseudoMaskedAMMinMax
+ : Pseudo<(outs GPR:$res, GPR:$scratch1, GPR:$scratch2),
+ (ins GPR:$addr, GPR:$incr, GPR:$mask, grlenimm:$sextshamt,
+ grlenimm:$ordering)> {
+ let Constraints = "@earlyclobber $res,@earlyclobber $scratch1,"
+ "@earlyclobber $scratch2";
+ let mayLoad = 1;
+ let mayStore = 1;
+ let hasSideEffects = 0;
+}
+
+def PseudoMaskedAtomicLoadMax32 : PseudoMaskedAMMinMax;
+def PseudoMaskedAtomicLoadMin32 : PseudoMaskedAMMinMax;
+
/// Compare and exchange
class PseudoCmpXchg
@@ -1362,6 +1376,12 @@
let hasSideEffects = 0;
}
+class PseudoMaskedAMMinMaxPat
+ : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt,
+ timm:$ordering),
+ (AMInst GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt,
+ timm:$ordering)>;
+
class AtomicPat
: Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering),
(AMInst GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering)>;
@@ -1410,6 +1430,15 @@
def : Pat<(atomic_load_umax_64 GPR:$rj, GPR:$rk),
(AMMAX_DB_DU GPR:$rk, GPR:$rj)>;
+def : Pat<(atomic_load_min_32 GPR:$rj, GPR:$rk),
+ (AMMIN_DB_W GPR:$rk, GPR:$rj)>;
+def : Pat<(atomic_load_min_64 GPR:$rj, GPR:$rk),
+ (AMMIN_DB_D GPR:$rk, GPR:$rj)>;
+def : Pat<(atomic_load_max_32 GPR:$rj, GPR:$rk),
+ (AMMAX_DB_W GPR:$rk, GPR:$rj)>;
+def : Pat<(atomic_load_max_64 GPR:$rj, GPR:$rk),
+ (AMMAX_DB_D GPR:$rk, GPR:$rj)>;
+
def : AtomicPat;
def : AtomicPat;
def : Pat<(atomic_cmp_swap_32 GPR:$addr, GPR:$cmp, GPR:$new),
(PseudoCmpXchg32 GPR:$addr, GPR:$cmp, GPR:$new)>;
+
+def : PseudoMaskedAMMinMaxPat;
+def : PseudoMaskedAMMinMaxPat;
} // Predicates = [IsLA64]
defm : PseudoBinPat<"atomic_load_nand_32", PseudoAtomicLoadNand32>;
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll
@@ -181,3 +181,199 @@
%1 = atomicrmw umin ptr %a, i64 %b acquire
ret i64 %1
}
+
+define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_max_i8_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.w $a2, $zero, -4
+; LA64-NEXT: and $a2, $a0, $a2
+; LA64-NEXT: slli.d $a0, $a0, 3
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a3, $a3, $a0
+; LA64-NEXT: addi.w $a3, $a3, 0
+; LA64-NEXT: ext.w.b $a1, $a1
+; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: addi.w $a1, $a1, 0
+; LA64-NEXT: andi $a4, $a0, 24
+; LA64-NEXT: xori $a4, $a4, 56
+; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: dbar 0
+; LA64-NEXT: ll.w $a5, $a2, 0
+; LA64-NEXT: and $a7, $a5, $a3
+; LA64-NEXT: move $a6, $a5
+; LA64-NEXT: sll.w $a7, $a7, $a4
+; LA64-NEXT: sra.w $a7, $a7, $a4
+; LA64-NEXT: bge $a7, $a1, .LBB8_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1
+; LA64-NEXT: xor $a6, $a5, $a1
+; LA64-NEXT: and $a6, $a6, $a3
+; LA64-NEXT: xor $a6, $a5, $a6
+; LA64-NEXT: .LBB8_3: # in Loop: Header=BB8_1 Depth=1
+; LA64-NEXT: sc.w $a6, $a2, 0
+; LA64-NEXT: beqz $a6, .LBB8_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: dbar 1792
+; LA64-NEXT: # %bb.5:
+; LA64-NEXT: srl.w $a0, $a5, $a0
+; LA64-NEXT: ret
+ %1 = atomicrmw max ptr %a, i8 %b acquire
+ ret i8 %1
+}
+
+define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_max_i16_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.w $a2, $zero, -4
+; LA64-NEXT: and $a2, $a0, $a2
+; LA64-NEXT: slli.d $a0, $a0, 3
+; LA64-NEXT: andi $a3, $a0, 24
+; LA64-NEXT: ori $a4, $zero, 48
+; LA64-NEXT: sub.d $a3, $a4, $a3
+; LA64-NEXT: lu12i.w $a4, 15
+; LA64-NEXT: ori $a4, $a4, 4095
+; LA64-NEXT: sll.w $a4, $a4, $a0
+; LA64-NEXT: addi.w $a4, $a4, 0
+; LA64-NEXT: ext.w.h $a1, $a1
+; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: addi.w $a1, $a1, 0
+; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: dbar 0
+; LA64-NEXT: ll.w $a5, $a2, 0
+; LA64-NEXT: and $a7, $a5, $a4
+; LA64-NEXT: move $a6, $a5
+; LA64-NEXT: sll.w $a7, $a7, $a3
+; LA64-NEXT: sra.w $a7, $a7, $a3
+; LA64-NEXT: bge $a7, $a1, .LBB9_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1
+; LA64-NEXT: xor $a6, $a5, $a1
+; LA64-NEXT: and $a6, $a6, $a4
+; LA64-NEXT: xor $a6, $a5, $a6
+; LA64-NEXT: .LBB9_3: # in Loop: Header=BB9_1 Depth=1
+; LA64-NEXT: sc.w $a6, $a2, 0
+; LA64-NEXT: beqz $a6, .LBB9_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: dbar 1792
+; LA64-NEXT: # %bb.5:
+; LA64-NEXT: srl.w $a0, $a5, $a0
+; LA64-NEXT: ret
+ %1 = atomicrmw max ptr %a, i16 %b acquire
+ ret i16 %1
+}
+
+define i32 @atomicrmw_max_i32_acquire(ptr %a, i32 %b) nounwind {
+; LA64-LABEL: atomicrmw_max_i32_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: ammax_db.w $a2, $a1, $a0
+; LA64-NEXT: move $a0, $a2
+; LA64-NEXT: ret
+ %1 = atomicrmw max ptr %a, i32 %b acquire
+ ret i32 %1
+}
+
+define i64 @atomicrmw_max_i64_acquire(ptr %a, i64 %b) nounwind {
+; LA64-LABEL: atomicrmw_max_i64_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: ammax_db.d $a2, $a1, $a0
+; LA64-NEXT: move $a0, $a2
+; LA64-NEXT: ret
+ %1 = atomicrmw max ptr %a, i64 %b acquire
+ ret i64 %1
+}
+
+define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_min_i8_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.w $a2, $zero, -4
+; LA64-NEXT: and $a2, $a0, $a2
+; LA64-NEXT: slli.d $a0, $a0, 3
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a3, $a3, $a0
+; LA64-NEXT: addi.w $a3, $a3, 0
+; LA64-NEXT: ext.w.b $a1, $a1
+; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: addi.w $a1, $a1, 0
+; LA64-NEXT: andi $a4, $a0, 24
+; LA64-NEXT: xori $a4, $a4, 56
+; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: dbar 0
+; LA64-NEXT: ll.w $a5, $a2, 0
+; LA64-NEXT: and $a7, $a5, $a3
+; LA64-NEXT: move $a6, $a5
+; LA64-NEXT: sll.w $a7, $a7, $a4
+; LA64-NEXT: sra.w $a7, $a7, $a4
+; LA64-NEXT: bge $a1, $a7, .LBB12_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1
+; LA64-NEXT: xor $a6, $a5, $a1
+; LA64-NEXT: and $a6, $a6, $a3
+; LA64-NEXT: xor $a6, $a5, $a6
+; LA64-NEXT: .LBB12_3: # in Loop: Header=BB12_1 Depth=1
+; LA64-NEXT: sc.w $a6, $a2, 0
+; LA64-NEXT: beqz $a6, .LBB12_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: dbar 1792
+; LA64-NEXT: # %bb.5:
+; LA64-NEXT: srl.w $a0, $a5, $a0
+; LA64-NEXT: ret
+ %1 = atomicrmw min ptr %a, i8 %b acquire
+ ret i8 %1
+}
+
+define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_min_i16_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.w $a2, $zero, -4
+; LA64-NEXT: and $a2, $a0, $a2
+; LA64-NEXT: slli.d $a0, $a0, 3
+; LA64-NEXT: andi $a3, $a0, 24
+; LA64-NEXT: ori $a4, $zero, 48
+; LA64-NEXT: sub.d $a3, $a4, $a3
+; LA64-NEXT: lu12i.w $a4, 15
+; LA64-NEXT: ori $a4, $a4, 4095
+; LA64-NEXT: sll.w $a4, $a4, $a0
+; LA64-NEXT: addi.w $a4, $a4, 0
+; LA64-NEXT: ext.w.h $a1, $a1
+; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: addi.w $a1, $a1, 0
+; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: dbar 0
+; LA64-NEXT: ll.w $a5, $a2, 0
+; LA64-NEXT: and $a7, $a5, $a4
+; LA64-NEXT: move $a6, $a5
+; LA64-NEXT: sll.w $a7, $a7, $a3
+; LA64-NEXT: sra.w $a7, $a7, $a3
+; LA64-NEXT: bge $a1, $a7, .LBB13_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1
+; LA64-NEXT: xor $a6, $a5, $a1
+; LA64-NEXT: and $a6, $a6, $a4
+; LA64-NEXT: xor $a6, $a5, $a6
+; LA64-NEXT: .LBB13_3: # in Loop: Header=BB13_1 Depth=1
+; LA64-NEXT: sc.w $a6, $a2, 0
+; LA64-NEXT: beqz $a6, .LBB13_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: dbar 1792
+; LA64-NEXT: # %bb.5:
+; LA64-NEXT: srl.w $a0, $a5, $a0
+; LA64-NEXT: ret
+ %1 = atomicrmw min ptr %a, i16 %b acquire
+ ret i16 %1
+}
+
+define i32 @atomicrmw_min_i32_acquire(ptr %a, i32 %b) nounwind {
+; LA64-LABEL: atomicrmw_min_i32_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: ammin_db.w $a2, $a1, $a0
+; LA64-NEXT: move $a0, $a2
+; LA64-NEXT: ret
+ %1 = atomicrmw min ptr %a, i32 %b acquire
+ ret i32 %1
+}
+
+define i64 @atomicrmw_min_i64_acquire(ptr %a, i64 %b) nounwind {
+; LA64-LABEL: atomicrmw_min_i64_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: ammin_db.d $a2, $a1, $a0
+; LA64-NEXT: move $a0, $a2
+; LA64-NEXT: ret
+ %1 = atomicrmw min ptr %a, i64 %b acquire
+ ret i64 %1
+}