diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp --- a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp @@ -147,14 +147,18 @@ Register ScratchReg = MI.getOperand(1).getReg(); Register AddrReg = MI.getOperand(2).getReg(); Register IncrReg = MI.getOperand(3).getReg(); + AtomicOrdering Ordering = + static_cast(MI.getOperand(4).getImm()); // .loop: - // dbar 0 + // if(Ordering != AtomicOrdering::Monotonic) + // dbar 0 // ll.[w|d] dest, (addr) // binop scratch, dest, val // sc.[w|d] scratch, scratch, (addr) // beqz scratch, loop - BuildMI(LoopMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); + if (Ordering != AtomicOrdering::Monotonic) + BuildMI(LoopMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); BuildMI(LoopMBB, DL, TII->get(Width == 32 ? LoongArch::LL_W : LoongArch::LL_D), DestReg) .addReg(AddrReg) @@ -241,9 +245,12 @@ Register AddrReg = MI.getOperand(2).getReg(); Register IncrReg = MI.getOperand(3).getReg(); Register MaskReg = MI.getOperand(4).getReg(); + AtomicOrdering Ordering = + static_cast(MI.getOperand(5).getImm()); // .loop: - // dbar 0 + // if(Ordering != AtomicOrdering::Monotonic) + // dbar 0 // ll.w destreg, (alignedaddr) // binop scratch, destreg, incr // xor scratch, destreg, scratch @@ -251,7 +258,8 @@ // xor scratch, destreg, scratch // sc.w scratch, scratch, (alignedaddr) // beqz scratch, loop - BuildMI(LoopMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); + if (Ordering != AtomicOrdering::Monotonic) + BuildMI(LoopMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); BuildMI(LoopMBB, DL, TII->get(LoongArch::LL_W), DestReg) .addReg(AddrReg) .addImm(0); diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -1296,7 +1296,7 @@ def PseudoMaskedAtomicLoadNand32 : PseudoMaskedAM; class PseudoAM : Pseudo<(outs GPR:$res, GPR:$scratch), - (ins GPR:$addr, GPR:$incr)> { + (ins GPR:$addr, GPR:$incr, grlenimm:$ordering)> { let Constraints = "@earlyclobber $res,@earlyclobber $scratch"; let mayLoad = 1; let mayStore = 1; @@ -1312,6 +1312,18 @@ def PseudoAtomicLoadOr32 : PseudoAM; def PseudoAtomicLoadXor32 : PseudoAM; +multiclass PseudoBinPat { + def : Pat<(!cast(Op#"_monotonic") GPR:$addr, GPR:$incr), + (BinInst GPR:$addr, GPR:$incr, 2)>; + def : Pat<(!cast(Op#"_acquire") GPR:$addr, GPR:$incr), + (BinInst GPR:$addr, GPR:$incr, 4)>; + def : Pat<(!cast(Op#"_release") GPR:$addr, GPR:$incr), + (BinInst GPR:$addr, GPR:$incr, 5)>; + def : Pat<(!cast(Op#"_acq_rel") GPR:$addr, GPR:$incr), + (BinInst GPR:$addr, GPR:$incr, 6)>; + def : Pat<(!cast(Op#"_seq_cst") GPR:$addr, GPR:$incr), + (BinInst GPR:$addr, GPR:$incr, 7)>; +} class PseudoMaskedAMUMinUMax : Pseudo<(outs GPR:$res, GPR:$scratch1, GPR:$scratch2), @@ -1371,8 +1383,7 @@ (AMADD_DB_D (SUB_D R0, GPR:$rk), GPR:$rj)>; def : AtomicPat; -def : Pat<(atomic_load_nand_64 GPR:$rj, GPR:$rk), - (PseudoAtomicLoadNand64 GPR:$rj, GPR:$rk)>; +defm : PseudoBinPat<"atomic_load_nand_64", PseudoAtomicLoadNand64>; def : AtomicPat; def : Pat<(atomic_load_add_32 GPR:$rj, GPR:$rk), @@ -1414,30 +1425,23 @@ (PseudoCmpXchg32 GPR:$addr, GPR:$cmp, GPR:$new)>; } // Predicates = [IsLA64] -def : Pat<(atomic_load_nand_32 GPR:$rj, GPR:$rk), - (PseudoAtomicLoadNand32 GPR:$rj, GPR:$rk)>; +defm : PseudoBinPat<"atomic_load_nand_32", PseudoAtomicLoadNand32>; let Predicates = [IsLA32] in { def : AtomicPat; -def : Pat<(atomic_swap_32 GPR:$addr, GPR:$incr), - (PseudoAtomicSwap32 GPR:$incr, GPR:$addr)>; +defm : PseudoBinPat<"atomic_swap_32", PseudoAtomicSwap32>; def : AtomicPat; def : AtomicPat; def : AtomicPat; -def : Pat<(atomic_load_add_32 GPR:$addr, GPR:$incr), - (PseudoAtomicLoadAdd32 GPR:$incr, GPR:$addr)>; -def : Pat<(atomic_load_sub_32 GPR:$addr, GPR:$incr), - (PseudoAtomicLoadSub32 GPR:$incr, GPR:$addr)>; -def : Pat<(atomic_load_and_32 GPR:$addr, GPR:$incr), - (PseudoAtomicLoadAnd32 GPR:$incr, GPR:$addr)>; -def : Pat<(atomic_load_or_32 GPR:$addr, GPR:$incr), - (PseudoAtomicLoadOr32 GPR:$incr, GPR:$addr)>; -def : Pat<(atomic_load_xor_32 GPR:$addr, GPR:$incr), - (PseudoAtomicLoadXor32 GPR:$incr, GPR:$addr)>; +defm : PseudoBinPat<"atomic_load_add_32", PseudoAtomicLoadAdd32>; +defm : PseudoBinPat<"atomic_load_sub_32", PseudoAtomicLoadSub32>; +defm : PseudoBinPat<"atomic_load_and_32", PseudoAtomicLoadAnd32>; +defm : PseudoBinPat<"atomic_load_or_32", PseudoAtomicLoadOr32>; +defm : PseudoBinPat<"atomic_load_xor_32", PseudoAtomicLoadXor32>; } // Predicates = [IsLA32] /// Intrinsics diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll --- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll @@ -109,9 +109,9 @@ ; LA32: # %bb.0: ; LA32-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: dbar 0 -; LA32-NEXT: ll.w $a2, $a1, 0 -; LA32-NEXT: move $a3, $a0 -; LA32-NEXT: sc.w $a3, $a1, 0 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: move $a3, $a1 +; LA32-NEXT: sc.w $a3, $a0, 0 ; LA32-NEXT: beqz $a3, .LBB2_1 ; LA32-NEXT: # %bb.2: ; LA32-NEXT: move $a0, $a2 @@ -253,9 +253,9 @@ ; LA32: # %bb.0: ; LA32-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: dbar 0 -; LA32-NEXT: ll.w $a2, $a1, 0 -; LA32-NEXT: add.w $a3, $a2, $a0 -; LA32-NEXT: sc.w $a3, $a1, 0 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: add.w $a3, $a2, $a1 +; LA32-NEXT: sc.w $a3, $a0, 0 ; LA32-NEXT: beqz $a3, .LBB6_1 ; LA32-NEXT: # %bb.2: ; LA32-NEXT: move $a0, $a2 @@ -397,9 +397,9 @@ ; LA32: # %bb.0: ; LA32-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: dbar 0 -; LA32-NEXT: ll.w $a2, $a1, 0 -; LA32-NEXT: sub.w $a3, $a2, $a0 -; LA32-NEXT: sc.w $a3, $a1, 0 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: sub.w $a3, $a2, $a1 +; LA32-NEXT: sc.w $a3, $a0, 0 ; LA32-NEXT: beqz $a3, .LBB10_1 ; LA32-NEXT: # %bb.2: ; LA32-NEXT: move $a0, $a2 @@ -612,9 +612,9 @@ ; LA32-NEXT: and $a0, $a0, $a3 ; LA32-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: dbar 0 -; LA32-NEXT: ll.w $a3, $a1, 0 -; LA32-NEXT: and $a4, $a3, $a0 -; LA32-NEXT: sc.w $a4, $a1, 0 +; LA32-NEXT: ll.w $a3, $a0, 0 +; LA32-NEXT: and $a4, $a3, $a1 +; LA32-NEXT: sc.w $a4, $a0, 0 ; LA32-NEXT: beqz $a4, .LBB16_1 ; LA32-NEXT: # %bb.2: ; LA32-NEXT: srl.w $a0, $a3, $a2 @@ -651,9 +651,9 @@ ; LA32-NEXT: and $a0, $a0, $a2 ; LA32-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: dbar 0 -; LA32-NEXT: ll.w $a2, $a1, 0 -; LA32-NEXT: and $a4, $a2, $a0 -; LA32-NEXT: sc.w $a4, $a1, 0 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: and $a4, $a2, $a1 +; LA32-NEXT: sc.w $a4, $a0, 0 ; LA32-NEXT: beqz $a4, .LBB17_1 ; LA32-NEXT: # %bb.2: ; LA32-NEXT: srl.w $a0, $a2, $a3 @@ -682,9 +682,9 @@ ; LA32: # %bb.0: ; LA32-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: dbar 0 -; LA32-NEXT: ll.w $a2, $a1, 0 -; LA32-NEXT: and $a3, $a2, $a0 -; LA32-NEXT: sc.w $a3, $a1, 0 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: and $a3, $a2, $a1 +; LA32-NEXT: sc.w $a3, $a0, 0 ; LA32-NEXT: beqz $a3, .LBB18_1 ; LA32-NEXT: # %bb.2: ; LA32-NEXT: move $a0, $a2 @@ -729,9 +729,9 @@ ; LA32-NEXT: sll.w $a1, $a1, $a0 ; LA32-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: dbar 0 -; LA32-NEXT: ll.w $a3, $a1, 0 -; LA32-NEXT: or $a4, $a3, $a2 -; LA32-NEXT: sc.w $a4, $a1, 0 +; LA32-NEXT: ll.w $a3, $a2, 0 +; LA32-NEXT: or $a4, $a3, $a1 +; LA32-NEXT: sc.w $a4, $a2, 0 ; LA32-NEXT: beqz $a4, .LBB20_1 ; LA32-NEXT: # %bb.2: ; LA32-NEXT: srl.w $a0, $a3, $a0 @@ -761,9 +761,9 @@ ; LA32-NEXT: sll.w $a1, $a1, $a0 ; LA32-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: dbar 0 -; LA32-NEXT: ll.w $a3, $a1, 0 -; LA32-NEXT: or $a4, $a3, $a2 -; LA32-NEXT: sc.w $a4, $a1, 0 +; LA32-NEXT: ll.w $a3, $a2, 0 +; LA32-NEXT: or $a4, $a3, $a1 +; LA32-NEXT: sc.w $a4, $a2, 0 ; LA32-NEXT: beqz $a4, .LBB21_1 ; LA32-NEXT: # %bb.2: ; LA32-NEXT: srl.w $a0, $a3, $a0 @@ -788,9 +788,9 @@ ; LA32: # %bb.0: ; LA32-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: dbar 0 -; LA32-NEXT: ll.w $a2, $a1, 0 -; LA32-NEXT: or $a3, $a2, $a0 -; LA32-NEXT: sc.w $a3, $a1, 0 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: or $a3, $a2, $a1 +; LA32-NEXT: sc.w $a3, $a0, 0 ; LA32-NEXT: beqz $a3, .LBB22_1 ; LA32-NEXT: # %bb.2: ; LA32-NEXT: move $a0, $a2 @@ -835,9 +835,9 @@ ; LA32-NEXT: sll.w $a1, $a1, $a0 ; LA32-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: dbar 0 -; LA32-NEXT: ll.w $a3, $a1, 0 -; LA32-NEXT: xor $a4, $a3, $a2 -; LA32-NEXT: sc.w $a4, $a1, 0 +; LA32-NEXT: ll.w $a3, $a2, 0 +; LA32-NEXT: xor $a4, $a3, $a1 +; LA32-NEXT: sc.w $a4, $a2, 0 ; LA32-NEXT: beqz $a4, .LBB24_1 ; LA32-NEXT: # %bb.2: ; LA32-NEXT: srl.w $a0, $a3, $a0 @@ -867,9 +867,9 @@ ; LA32-NEXT: sll.w $a1, $a1, $a0 ; LA32-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: dbar 0 -; LA32-NEXT: ll.w $a3, $a1, 0 -; LA32-NEXT: xor $a4, $a3, $a2 -; LA32-NEXT: sc.w $a4, $a1, 0 +; LA32-NEXT: ll.w $a3, $a2, 0 +; LA32-NEXT: xor $a4, $a3, $a1 +; LA32-NEXT: sc.w $a4, $a2, 0 ; LA32-NEXT: beqz $a4, .LBB25_1 ; LA32-NEXT: # %bb.2: ; LA32-NEXT: srl.w $a0, $a3, $a0 @@ -894,9 +894,9 @@ ; LA32: # %bb.0: ; LA32-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: dbar 0 -; LA32-NEXT: ll.w $a2, $a1, 0 -; LA32-NEXT: xor $a3, $a2, $a0 -; LA32-NEXT: sc.w $a3, $a1, 0 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: xor $a3, $a2, $a1 +; LA32-NEXT: sc.w $a3, $a0, 0 ; LA32-NEXT: beqz $a3, .LBB26_1 ; LA32-NEXT: # %bb.2: ; LA32-NEXT: move $a0, $a2 @@ -930,3 +930,901 @@ %1 = atomicrmw xor ptr %a, i64 %b acquire ret i64 %1 } + +define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i8_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: addi.w $a5, $a1, 0 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB28_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_i8_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: addi.w $a5, $a1, 0 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB28_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw xchg ptr %a, i8 %b monotonic + ret i8 %1 +} + +define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i16_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: addi.w $a5, $a1, 0 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB29_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_i16_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: addi.w $a5, $a1, 0 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB29_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw xchg ptr %a, i16 %b monotonic + ret i16 %1 +} + +define i32 @atomicrmw_xchg_i32_monotonic(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i32_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: move $a3, $a1 +; LA32-NEXT: sc.w $a3, $a0, 0 +; LA32-NEXT: beqz $a3, .LBB30_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_i32_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: amswap_db.w $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw xchg ptr %a, i32 %b monotonic + ret i32 %1 +} + +define i64 @atomicrmw_xchg_i64_monotonic(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i64_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: move $a3, $zero +; LA32-NEXT: bl %plt(__atomic_exchange_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_i64_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: amswap_db.d $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw xchg ptr %a, i64 %b monotonic + ret i64 %1 +} + +define i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i8_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: add.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB32_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i8_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: add.w $a5, $a4, $a1 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB32_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw add ptr %a, i8 %b monotonic + ret i8 %1 +} + +define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i16_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: add.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB33_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i16_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: add.w $a5, $a4, $a1 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB33_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw add ptr %a, i16 %b monotonic + ret i16 %1 +} + +define i32 @atomicrmw_add_i32_monotonic(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i32_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: add.w $a3, $a2, $a1 +; LA32-NEXT: sc.w $a3, $a0, 0 +; LA32-NEXT: beqz $a3, .LBB34_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i32_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: amadd_db.w $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw add ptr %a, i32 %b monotonic + ret i32 %1 +} + +define i64 @atomicrmw_add_i64_monotonic(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i64_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: move $a3, $zero +; LA32-NEXT: bl %plt(__atomic_fetch_add_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i64_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: amadd_db.d $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw add ptr %a, i64 %b monotonic + ret i64 %1 +} + +define i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_sub_i8_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: sub.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB36_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_sub_i8_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: sub.w $a5, $a4, $a1 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB36_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw sub ptr %a, i8 %b monotonic + ret i8 %1 +} + +define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_sub_i16_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: sub.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB37_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_sub_i16_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: sub.w $a5, $a4, $a1 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB37_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw sub ptr %a, i16 %b monotonic + ret i16 %1 +} + +define i32 @atomicrmw_sub_i32_monotonic(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_sub_i32_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: sub.w $a3, $a2, $a1 +; LA32-NEXT: sc.w $a3, $a0, 0 +; LA32-NEXT: beqz $a3, .LBB38_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_sub_i32_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: sub.w $a2, $zero, $a1 +; LA64-NEXT: amadd_db.w $a1, $a2, $a0 +; LA64-NEXT: move $a0, $a1 +; LA64-NEXT: ret + %1 = atomicrmw sub ptr %a, i32 %b monotonic + ret i32 %1 +} + +define i64 @atomicrmw_sub_i64_monotonic(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_sub_i64_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: move $a3, $zero +; LA32-NEXT: bl %plt(__atomic_fetch_sub_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_sub_i64_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: sub.d $a2, $zero, $a1 +; LA64-NEXT: amadd_db.d $a1, $a2, $a0 +; LA64-NEXT: move $a0, $a1 +; LA64-NEXT: ret + %1 = atomicrmw sub ptr %a, i64 %b monotonic + ret i64 %1 +} + +define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_nand_i8_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: and $a5, $a4, $a1 +; LA32-NEXT: nor $a5, $a5, $zero +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB40_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_nand_i8_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: and $a5, $a4, $a1 +; LA64-NEXT: nor $a5, $a5, $zero +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB40_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw nand ptr %a, i8 %b monotonic + ret i8 %1 +} + +define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_nand_i16_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: and $a5, $a4, $a1 +; LA32-NEXT: nor $a5, $a5, $zero +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB41_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_nand_i16_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: and $a5, $a4, $a1 +; LA64-NEXT: nor $a5, $a5, $zero +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB41_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw nand ptr %a, i16 %b monotonic + ret i16 %1 +} + +define i32 @atomicrmw_nand_i32_monotonic(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_nand_i32_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: .LBB42_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: and $a3, $a2, $a1 +; LA32-NEXT: nor $a3, $a3, $zero +; LA32-NEXT: sc.w $a3, $a0, 0 +; LA32-NEXT: beqz $a3, .LBB42_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_nand_i32_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: .LBB42_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a2, $a0, 0 +; LA64-NEXT: and $a3, $a2, $a1 +; LA64-NEXT: nor $a3, $a3, $zero +; LA64-NEXT: sc.w $a3, $a0, 0 +; LA64-NEXT: beqz $a3, .LBB42_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw nand ptr %a, i32 %b monotonic + ret i32 %1 +} + +define i64 @atomicrmw_nand_i64_monotonic(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_nand_i64_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: move $a3, $zero +; LA32-NEXT: bl %plt(__atomic_fetch_nand_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_nand_i64_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: .LBB43_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.d $a2, $a0, 0 +; LA64-NEXT: and $a3, $a2, $a1 +; LA64-NEXT: nor $a3, $a3, $zero +; LA64-NEXT: sc.d $a3, $a0, 0 +; LA64-NEXT: beqz $a3, .LBB43_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw nand ptr %a, i64 %b monotonic + ret i64 %1 +} + +define i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_and_i8_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: orn $a1, $a1, $a3 +; LA32-NEXT: addi.w $a3, $zero, -4 +; LA32-NEXT: and $a0, $a0, $a3 +; LA32-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a0, 0 +; LA32-NEXT: and $a4, $a3, $a1 +; LA32-NEXT: sc.w $a4, $a0, 0 +; LA32-NEXT: beqz $a4, .LBB44_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_and_i8_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a2, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a2 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a2 +; LA64-NEXT: orn $a1, $a1, $a3 +; LA64-NEXT: addi.w $a3, $zero, -4 +; LA64-NEXT: and $a0, $a0, $a3 +; LA64-NEXT: amand_db.w $a3, $a1, $a0 +; LA64-NEXT: srl.w $a0, $a3, $a2 +; LA64-NEXT: ret + %1 = atomicrmw and ptr %a, i8 %b monotonic + ret i8 %1 +} + +define i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_and_i16_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: lu12i.w $a2, 15 +; LA32-NEXT: ori $a2, $a2, 4095 +; LA32-NEXT: slli.w $a3, $a0, 3 +; LA32-NEXT: sll.w $a2, $a2, $a3 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a3 +; LA32-NEXT: orn $a1, $a1, $a2 +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a0, $a0, $a2 +; LA32-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: and $a4, $a2, $a1 +; LA32-NEXT: sc.w $a4, $a0, 0 +; LA32-NEXT: beqz $a4, .LBB45_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a2, $a3 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_and_i16_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: lu12i.w $a2, 15 +; LA64-NEXT: ori $a2, $a2, 4095 +; LA64-NEXT: slli.d $a3, $a0, 3 +; LA64-NEXT: sll.w $a2, $a2, $a3 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a3 +; LA64-NEXT: orn $a1, $a1, $a2 +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a0, $a0, $a2 +; LA64-NEXT: amand_db.w $a2, $a1, $a0 +; LA64-NEXT: srl.w $a0, $a2, $a3 +; LA64-NEXT: ret + %1 = atomicrmw and ptr %a, i16 %b monotonic + ret i16 %1 +} + +define i32 @atomicrmw_and_i32_monotonic(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_and_i32_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: and $a3, $a2, $a1 +; LA32-NEXT: sc.w $a3, $a0, 0 +; LA32-NEXT: beqz $a3, .LBB46_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_and_i32_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: amand_db.w $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw and ptr %a, i32 %b monotonic + ret i32 %1 +} + +define i64 @atomicrmw_and_i64_monotonic(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_and_i64_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: move $a3, $zero +; LA32-NEXT: bl %plt(__atomic_fetch_and_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_and_i64_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: amand_db.d $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw and ptr %a, i64 %b monotonic + ret i64 %1 +} + +define i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_or_i8_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a2, 0 +; LA32-NEXT: or $a4, $a3, $a1 +; LA32-NEXT: sc.w $a4, $a2, 0 +; LA32-NEXT: beqz $a4, .LBB48_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_or_i8_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: amor_db.w $a3, $a1, $a2 +; LA64-NEXT: srl.w $a0, $a3, $a0 +; LA64-NEXT: ret + %1 = atomicrmw or ptr %a, i8 %b monotonic + ret i8 %1 +} + +define i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_or_i16_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a2, 0 +; LA32-NEXT: or $a4, $a3, $a1 +; LA32-NEXT: sc.w $a4, $a2, 0 +; LA32-NEXT: beqz $a4, .LBB49_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_or_i16_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: amor_db.w $a3, $a1, $a2 +; LA64-NEXT: srl.w $a0, $a3, $a0 +; LA64-NEXT: ret + %1 = atomicrmw or ptr %a, i16 %b monotonic + ret i16 %1 +} + +define i32 @atomicrmw_or_i32_monotonic(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_or_i32_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: .LBB50_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: or $a3, $a2, $a1 +; LA32-NEXT: sc.w $a3, $a0, 0 +; LA32-NEXT: beqz $a3, .LBB50_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_or_i32_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: amor_db.w $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw or ptr %a, i32 %b monotonic + ret i32 %1 +} + +define i64 @atomicrmw_or_i64_monotonic(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_or_i64_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: move $a3, $zero +; LA32-NEXT: bl %plt(__atomic_fetch_or_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_or_i64_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: amor_db.d $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw or ptr %a, i64 %b monotonic + ret i64 %1 +} + +define i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_xor_i8_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a2, 0 +; LA32-NEXT: xor $a4, $a3, $a1 +; LA32-NEXT: sc.w $a4, $a2, 0 +; LA32-NEXT: beqz $a4, .LBB52_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xor_i8_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: amxor_db.w $a3, $a1, $a2 +; LA64-NEXT: srl.w $a0, $a3, $a0 +; LA64-NEXT: ret + %1 = atomicrmw xor ptr %a, i8 %b monotonic + ret i8 %1 +} + +define i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_xor_i16_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a2, 0 +; LA32-NEXT: xor $a4, $a3, $a1 +; LA32-NEXT: sc.w $a4, $a2, 0 +; LA32-NEXT: beqz $a4, .LBB53_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xor_i16_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: amxor_db.w $a3, $a1, $a2 +; LA64-NEXT: srl.w $a0, $a3, $a0 +; LA64-NEXT: ret + %1 = atomicrmw xor ptr %a, i16 %b monotonic + ret i16 %1 +} + +define i32 @atomicrmw_xor_i32_monotonic(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_xor_i32_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: .LBB54_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: xor $a3, $a2, $a1 +; LA32-NEXT: sc.w $a3, $a0, 0 +; LA32-NEXT: beqz $a3, .LBB54_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xor_i32_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: amxor_db.w $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw xor ptr %a, i32 %b monotonic + ret i32 %1 +} + +define i64 @atomicrmw_xor_i64_monotonic(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_xor_i64_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: move $a3, $zero +; LA32-NEXT: bl %plt(__atomic_fetch_xor_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xor_i64_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: amxor_db.d $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw xor ptr %a, i64 %b monotonic + ret i64 %1 +}