diff --git a/llvm/include/llvm/IR/IntrinsicsLoongArch.td b/llvm/include/llvm/IR/IntrinsicsLoongArch.td --- a/llvm/include/llvm/IR/IntrinsicsLoongArch.td +++ b/llvm/include/llvm/IR/IntrinsicsLoongArch.td @@ -30,4 +30,7 @@ } defm int_loongarch_masked_atomicrmw_xchg : MaskedAtomicRMWIntrinsics; +defm int_loongarch_masked_atomicrmw_add : MaskedAtomicRMWIntrinsics; +defm int_loongarch_masked_atomicrmw_sub : MaskedAtomicRMWIntrinsics; +defm int_loongarch_masked_atomicrmw_nand : MaskedAtomicRMWIntrinsics; } // TargetPrefix = "loongarch" diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp --- a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp @@ -87,6 +87,33 @@ case LoongArch::PseudoAtomicSwap32: return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, false, 32, NextMBBI); + case LoongArch::PseudoMaskedAtomicLoadAdd32: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, true, 32, NextMBBI); + case LoongArch::PseudoMaskedAtomicLoadSub32: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, true, 32, NextMBBI); + case LoongArch::PseudoAtomicLoadNand32: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 32, + NextMBBI); + case LoongArch::PseudoAtomicLoadNand64: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 64, + NextMBBI); + case LoongArch::PseudoMaskedAtomicLoadNand32: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, true, 32, + NextMBBI); + case LoongArch::PseudoAtomicLoadAdd32: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, false, 32, + NextMBBI); + case LoongArch::PseudoAtomicLoadSub32: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, false, 32, + NextMBBI); + case LoongArch::PseudoAtomicLoadAnd32: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::And, false, 32, + NextMBBI); + case LoongArch::PseudoAtomicLoadOr32: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Or, false, 32, NextMBBI); + case LoongArch::PseudoAtomicLoadXor32: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xor, false, 32, + NextMBBI); } return false; } @@ -104,12 +131,13 @@ // .loop: // dbar 0 - // ll.w dest, (addr) + // ll.[w|d] dest, (addr) // binop scratch, dest, val - // sc.w scratch, scratch, (addr) + // sc.[w|d] scratch, scratch, (addr) // beq scratch, zero, loop BuildMI(LoopMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); - BuildMI(LoopMBB, DL, TII->get(LoongArch::LL_W), DestReg) + BuildMI(LoopMBB, DL, + TII->get(Width == 32 ? LoongArch::LL_W : LoongArch::LL_D), DestReg) .addReg(AddrReg) .addImm(0); switch (BinOp) { @@ -120,8 +148,42 @@ .addReg(IncrReg) .addReg(LoongArch::R0); break; + case AtomicRMWInst::Nand: + BuildMI(LoopMBB, DL, TII->get(LoongArch::AND), ScratchReg) + .addReg(DestReg) + .addReg(IncrReg); + BuildMI(LoopMBB, DL, TII->get(LoongArch::XORI), ScratchReg) + .addReg(ScratchReg) + .addImm(-1); + break; + case AtomicRMWInst::Add: + BuildMI(LoopMBB, DL, TII->get(LoongArch::ADD_W), ScratchReg) + .addReg(DestReg) + .addReg(IncrReg); + break; + case AtomicRMWInst::Sub: + BuildMI(LoopMBB, DL, TII->get(LoongArch::SUB_W), ScratchReg) + .addReg(DestReg) + .addReg(IncrReg); + break; + case AtomicRMWInst::And: + BuildMI(LoopMBB, DL, TII->get(LoongArch::AND), ScratchReg) + .addReg(DestReg) + .addReg(IncrReg); + break; + case AtomicRMWInst::Or: + BuildMI(LoopMBB, DL, TII->get(LoongArch::OR), ScratchReg) + .addReg(DestReg) + .addReg(IncrReg); + break; + case AtomicRMWInst::Xor: + BuildMI(LoopMBB, DL, TII->get(LoongArch::XOR), ScratchReg) + .addReg(DestReg) + .addReg(IncrReg); + break; } - BuildMI(LoopMBB, DL, TII->get(LoongArch::SC_W), ScratchReg) + BuildMI(LoopMBB, DL, + TII->get(Width == 32 ? LoongArch::SC_W : LoongArch::SC_D), ScratchReg) .addReg(ScratchReg) .addReg(AddrReg) .addImm(0); @@ -183,6 +245,23 @@ .addReg(IncrReg) .addImm(0); break; + case AtomicRMWInst::Add: + BuildMI(LoopMBB, DL, TII->get(LoongArch::ADD_W), ScratchReg) + .addReg(DestReg) + .addReg(IncrReg); + break; + case AtomicRMWInst::Sub: + BuildMI(LoopMBB, DL, TII->get(LoongArch::SUB_W), ScratchReg) + .addReg(DestReg) + .addReg(IncrReg); + break; + case AtomicRMWInst::Nand: + BuildMI(LoopMBB, DL, TII->get(LoongArch::AND), ScratchReg) + .addReg(DestReg) + .addReg(IncrReg); + BuildMI(LoopMBB, DL, TII->get(LoongArch::XORI), ScratchReg) + .addReg(ScratchReg) + .addImm(-1); // TODO: support other AtomicRMWInst. } diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -1798,6 +1798,9 @@ default: return false; case Intrinsic::loongarch_masked_atomicrmw_xchg_i32: + case Intrinsic::loongarch_masked_atomicrmw_add_i32: + case Intrinsic::loongarch_masked_atomicrmw_sub_i32: + case Intrinsic::loongarch_masked_atomicrmw_nand_i32: Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::i32; Info.ptrVal = I.getArgOperand(0); @@ -1828,6 +1831,12 @@ llvm_unreachable("Unexpected AtomicRMW BinOp"); case AtomicRMWInst::Xchg: return Intrinsic::loongarch_masked_atomicrmw_xchg_i64; + case AtomicRMWInst::Add: + return Intrinsic::loongarch_masked_atomicrmw_add_i64; + case AtomicRMWInst::Sub: + return Intrinsic::loongarch_masked_atomicrmw_sub_i64; + case AtomicRMWInst::Nand: + return Intrinsic::loongarch_masked_atomicrmw_nand_i64; // TODO: support other AtomicRMWInst. } } @@ -1838,6 +1847,12 @@ llvm_unreachable("Unexpected AtomicRMW BinOp"); case AtomicRMWInst::Xchg: return Intrinsic::loongarch_masked_atomicrmw_xchg_i32; + case AtomicRMWInst::Add: + return Intrinsic::loongarch_masked_atomicrmw_add_i32; + case AtomicRMWInst::Sub: + return Intrinsic::loongarch_masked_atomicrmw_sub_i32; + case AtomicRMWInst::Nand: + return Intrinsic::loongarch_masked_atomicrmw_nand_i32; // TODO: support other AtomicRMWInst. } } diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -1103,6 +1103,9 @@ } def PseudoMaskedAtomicSwap32 : PseudoMaskedAM; +def PseudoMaskedAtomicLoadAdd32 : PseudoMaskedAM; +def PseudoMaskedAtomicLoadSub32 : PseudoMaskedAM; +def PseudoMaskedAtomicLoadNand32 : PseudoMaskedAM; class PseudoAM : Pseudo<(outs GPR:$res, GPR:$scratch), (ins GPR:$addr, GPR:$incr), []> { @@ -1113,6 +1116,13 @@ } def PseudoAtomicSwap32 : PseudoAM; +def PseudoAtomicLoadNand32 : PseudoAM; +def PseudoAtomicLoadNand64 : PseudoAM; +def PseudoAtomicLoadAdd32 : PseudoAM; +def PseudoAtomicLoadSub32 : PseudoAM; +def PseudoAtomicLoadAnd32 : PseudoAM; +def PseudoAtomicLoadOr32 : PseudoAM; +def PseudoAtomicLoadXor32 : PseudoAM; class AtomicPat : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering), @@ -1125,13 +1135,60 @@ (AMSWAP_DB_W GPR:$incr, GPR:$addr)>; def : Pat<(atomic_swap_64 GPR:$addr, GPR:$incr), (AMSWAP_DB_D GPR:$incr, GPR:$addr)>; +def : Pat<(atomic_load_add_64 GPR:$rj, GPR:$rk), + (AMADD_DB_D GPR:$rk, GPR:$rj)>; +def : AtomicPat; +def : Pat<(atomic_load_sub_32 GPR:$rj, GPR:$rk), + (AMADD_DB_W (SUB_W R0, GPR:$rk), GPR:$rj)>; +def : Pat<(atomic_load_sub_64 GPR:$rj, GPR:$rk), + (AMADD_DB_D (SUB_D R0, GPR:$rk), GPR:$rj)>; +def : AtomicPat; +def : Pat<(atomic_load_nand_64 GPR:$rj, GPR:$rk), + (PseudoAtomicLoadNand64 GPR:$rk, GPR:$rj)>; +def : AtomicPat; +def : Pat<(atomic_load_add_32 GPR:$rj, GPR:$rk), + (AMADD_DB_W GPR:$rk, GPR:$rj)>; +def : Pat<(atomic_load_and_32 GPR:$rj, GPR:$rk), + (AMAND_DB_W GPR:$rk, GPR:$rj)>; +def : Pat<(atomic_load_and_64 GPR:$rj, GPR:$rk), + (AMAND_DB_D GPR:$rk, GPR:$rj)>; +def : Pat<(atomic_load_or_32 GPR:$rj, GPR:$rk), + (AMOR_DB_W GPR:$rk, GPR:$rj)>; +def : Pat<(atomic_load_or_64 GPR:$rj, GPR:$rk), + (AMOR_DB_D GPR:$rk, GPR:$rj)>; +def : Pat<(atomic_load_xor_32 GPR:$rj, GPR:$rk), + (AMXOR_DB_W GPR:$rk, GPR:$rj)>; +def : Pat<(atomic_load_xor_64 GPR:$rj, GPR:$rk), + (AMXOR_DB_D GPR:$rk, GPR:$rj)>; } // Predicates = [IsLA64] +def : Pat<(atomic_load_nand_32 GPR:$rj, GPR:$rk), + (PseudoAtomicLoadNand32 GPR:$rk, GPR:$rj)>; + let Predicates = [IsLA32] in { def : AtomicPat; def : Pat<(atomic_swap_32 GPR:$addr, GPR:$incr), (PseudoAtomicSwap32 GPR:$incr, GPR:$addr)>; +def : AtomicPat; +def : AtomicPat; +def : AtomicPat; +def : Pat<(atomic_load_add_32 GPR:$addr, GPR:$incr), + (PseudoAtomicLoadAdd32 GPR:$incr, GPR:$addr)>; +def : Pat<(atomic_load_sub_32 GPR:$addr, GPR:$incr), + (PseudoAtomicLoadSub32 GPR:$incr, GPR:$addr)>; +def : Pat<(atomic_load_and_32 GPR:$addr, GPR:$incr), + (PseudoAtomicLoadAnd32 GPR:$incr, GPR:$addr)>; +def : Pat<(atomic_load_or_32 GPR:$addr, GPR:$incr), + (PseudoAtomicLoadOr32 GPR:$incr, GPR:$addr)>; +def : Pat<(atomic_load_xor_32 GPR:$addr, GPR:$incr), + (PseudoAtomicLoadXor32 GPR:$incr, GPR:$addr)>; } // Predicates = [IsLA32] /// Other pseudo-instructions diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll --- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll @@ -143,3 +143,778 @@ %1 = atomicrmw xchg ptr %a, i64 %b acquire ret i64 %1 } + +define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i8_acquire: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: add.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beq $a5, $zero, .LBB4_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i8_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: add.w $a5, $a4, $a1 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beq $a5, $zero, .LBB4_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw add ptr %a, i8 %b acquire + ret i8 %1 +} + +define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i16_acquire: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: add.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beq $a5, $zero, .LBB5_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i16_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: add.w $a5, $a4, $a1 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beq $a5, $zero, .LBB5_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw add ptr %a, i16 %b acquire + ret i16 %1 +} + +define i32 @atomicrmw_add_i32_acquire(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i32_acquire: +; LA32: # %bb.0: +; LA32-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ll.w $a2, $a1, 0 +; LA32-NEXT: add.w $a3, $a2, $a0 +; LA32-NEXT: sc.w $a3, $a1, 0 +; LA32-NEXT: beq $a3, $zero, .LBB6_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i32_acquire: +; LA64: # %bb.0: +; LA64-NEXT: amadd_db.w $a0, $a1, $a0 +; LA64-NEXT: ret + %1 = atomicrmw add ptr %a, i32 %b acquire + ret i32 %1 +} + +define i64 @atomicrmw_add_i64_acquire(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i64_acquire: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a3, $zero, 2 +; LA32-NEXT: bl %plt(__atomic_fetch_add_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i64_acquire: +; LA64: # %bb.0: +; LA64-NEXT: amadd_db.d $a0, $a1, $a0 +; LA64-NEXT: ret + %1 = atomicrmw add ptr %a, i64 %b acquire + ret i64 %1 +} + +define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_sub_i8_acquire: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: sub.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beq $a5, $zero, .LBB8_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_sub_i8_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: sub.w $a5, $a4, $a1 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beq $a5, $zero, .LBB8_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw sub ptr %a, i8 %b acquire + ret i8 %1 +} + +define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_sub_i16_acquire: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: sub.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beq $a5, $zero, .LBB9_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_sub_i16_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: sub.w $a5, $a4, $a1 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beq $a5, $zero, .LBB9_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw sub ptr %a, i16 %b acquire + ret i16 %1 +} + +define i32 @atomicrmw_sub_i32_acquire(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_sub_i32_acquire: +; LA32: # %bb.0: +; LA32-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ll.w $a2, $a1, 0 +; LA32-NEXT: sub.w $a3, $a2, $a0 +; LA32-NEXT: sc.w $a3, $a1, 0 +; LA32-NEXT: beq $a3, $zero, .LBB10_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_sub_i32_acquire: +; LA64: # %bb.0: +; LA64-NEXT: sub.w $a1, $zero, $a1 +; LA64-NEXT: amadd_db.w $a0, $a1, $a0 +; LA64-NEXT: ret + %1 = atomicrmw sub ptr %a, i32 %b acquire + ret i32 %1 +} + +define i64 @atomicrmw_sub_i64_acquire(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_sub_i64_acquire: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a3, $zero, 2 +; LA32-NEXT: bl %plt(__atomic_fetch_sub_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_sub_i64_acquire: +; LA64: # %bb.0: +; LA64-NEXT: sub.d $a1, $zero, $a1 +; LA64-NEXT: amadd_db.d $a0, $a1, $a0 +; LA64-NEXT: ret + %1 = atomicrmw sub ptr %a, i64 %b acquire + ret i64 %1 +} + +define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_nand_i8_acquire: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: and $a5, $a4, $a1 +; LA32-NEXT: xori $a5, $a5, -1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beq $a5, $zero, .LBB12_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_nand_i8_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: and $a5, $a4, $a1 +; LA64-NEXT: xori $a5, $a5, -1 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beq $a5, $zero, .LBB12_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw nand ptr %a, i8 %b acquire + ret i8 %1 +} + +define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_nand_i16_acquire: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: and $a5, $a4, $a1 +; LA32-NEXT: xori $a5, $a5, -1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beq $a5, $zero, .LBB13_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_nand_i16_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: and $a5, $a4, $a1 +; LA64-NEXT: xori $a5, $a5, -1 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beq $a5, $zero, .LBB13_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw nand ptr %a, i16 %b acquire + ret i16 %1 +} + +define i32 @atomicrmw_nand_i32_acquire(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_nand_i32_acquire: +; LA32: # %bb.0: +; LA32-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ll.w $a2, $a1, 0 +; LA32-NEXT: and $a3, $a2, $a0 +; LA32-NEXT: xori $a3, $a3, -1 +; LA32-NEXT: sc.w $a3, $a1, 0 +; LA32-NEXT: beq $a3, $zero, .LBB14_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_nand_i32_acquire: +; LA64: # %bb.0: +; LA64-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: ll.w $a2, $a1, 0 +; LA64-NEXT: and $a3, $a2, $a0 +; LA64-NEXT: xori $a3, $a3, -1 +; LA64-NEXT: sc.w $a3, $a1, 0 +; LA64-NEXT: beq $a3, $zero, .LBB14_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw nand ptr %a, i32 %b acquire + ret i32 %1 +} + +define i64 @atomicrmw_nand_i64_acquire(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_nand_i64_acquire: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a3, $zero, 2 +; LA32-NEXT: bl %plt(__atomic_fetch_nand_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_nand_i64_acquire: +; LA64: # %bb.0: +; LA64-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: ll.d $a2, $a1, 0 +; LA64-NEXT: and $a3, $a2, $a0 +; LA64-NEXT: xori $a3, $a3, -1 +; LA64-NEXT: sc.d $a3, $a1, 0 +; LA64-NEXT: beq $a3, $zero, .LBB15_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw nand ptr %a, i64 %b acquire + ret i64 %1 +} + +define i8 @atomicrmw_and_i8_acquire(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_and_i8_acquire: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: orn $a1, $a1, $a3 +; LA32-NEXT: addi.w $a3, $zero, -4 +; LA32-NEXT: and $a0, $a0, $a3 +; LA32-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ll.w $a3, $a1, 0 +; LA32-NEXT: and $a4, $a3, $a0 +; LA32-NEXT: sc.w $a4, $a1, 0 +; LA32-NEXT: beq $a4, $zero, .LBB16_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_and_i8_acquire: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a2, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a2 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a2 +; LA64-NEXT: orn $a1, $a1, $a3 +; LA64-NEXT: addi.w $a3, $zero, -4 +; LA64-NEXT: and $a0, $a0, $a3 +; LA64-NEXT: amand_db.w $a0, $a1, $a0 +; LA64-NEXT: srl.w $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw and ptr %a, i8 %b acquire + ret i8 %1 +} + +define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_and_i16_acquire: +; LA32: # %bb.0: +; LA32-NEXT: lu12i.w $a2, 15 +; LA32-NEXT: ori $a2, $a2, 4095 +; LA32-NEXT: slli.w $a3, $a0, 3 +; LA32-NEXT: sll.w $a2, $a2, $a3 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a3 +; LA32-NEXT: orn $a1, $a1, $a2 +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a0, $a0, $a2 +; LA32-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ll.w $a2, $a1, 0 +; LA32-NEXT: and $a4, $a2, $a0 +; LA32-NEXT: sc.w $a4, $a1, 0 +; LA32-NEXT: beq $a4, $zero, .LBB17_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a2, $a3 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_and_i16_acquire: +; LA64: # %bb.0: +; LA64-NEXT: lu12i.w $a2, 15 +; LA64-NEXT: ori $a2, $a2, 4095 +; LA64-NEXT: slli.d $a3, $a0, 3 +; LA64-NEXT: sll.w $a2, $a2, $a3 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a3 +; LA64-NEXT: orn $a1, $a1, $a2 +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a0, $a0, $a2 +; LA64-NEXT: amand_db.w $a0, $a1, $a0 +; LA64-NEXT: srl.w $a0, $a0, $a3 +; LA64-NEXT: ret + %1 = atomicrmw and ptr %a, i16 %b acquire + ret i16 %1 +} + +define i32 @atomicrmw_and_i32_acquire(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_and_i32_acquire: +; LA32: # %bb.0: +; LA32-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ll.w $a2, $a1, 0 +; LA32-NEXT: and $a3, $a2, $a0 +; LA32-NEXT: sc.w $a3, $a1, 0 +; LA32-NEXT: beq $a3, $zero, .LBB18_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_and_i32_acquire: +; LA64: # %bb.0: +; LA64-NEXT: amand_db.w $a0, $a1, $a0 +; LA64-NEXT: ret + %1 = atomicrmw and ptr %a, i32 %b acquire + ret i32 %1 +} + +define i64 @atomicrmw_and_i64_acquire(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_and_i64_acquire: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a3, $zero, 2 +; LA32-NEXT: bl %plt(__atomic_fetch_and_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_and_i64_acquire: +; LA64: # %bb.0: +; LA64-NEXT: amand_db.d $a0, $a1, $a0 +; LA64-NEXT: ret + %1 = atomicrmw and ptr %a, i64 %b acquire + ret i64 %1 +} + +define i8 @atomicrmw_or_i8_acquire(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_or_i8_acquire: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ll.w $a3, $a1, 0 +; LA32-NEXT: or $a4, $a3, $a2 +; LA32-NEXT: sc.w $a4, $a1, 0 +; LA32-NEXT: beq $a4, $zero, .LBB20_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_or_i8_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: amor_db.w $a1, $a1, $a2 +; LA64-NEXT: srl.w $a0, $a1, $a0 +; LA64-NEXT: ret + %1 = atomicrmw or ptr %a, i8 %b acquire + ret i8 %1 +} + +define i16 @atomicrmw_or_i16_acquire(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_or_i16_acquire: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ll.w $a3, $a1, 0 +; LA32-NEXT: or $a4, $a3, $a2 +; LA32-NEXT: sc.w $a4, $a1, 0 +; LA32-NEXT: beq $a4, $zero, .LBB21_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_or_i16_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: amor_db.w $a1, $a1, $a2 +; LA64-NEXT: srl.w $a0, $a1, $a0 +; LA64-NEXT: ret + %1 = atomicrmw or ptr %a, i16 %b acquire + ret i16 %1 +} + +define i32 @atomicrmw_or_i32_acquire(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_or_i32_acquire: +; LA32: # %bb.0: +; LA32-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ll.w $a2, $a1, 0 +; LA32-NEXT: or $a3, $a2, $a0 +; LA32-NEXT: sc.w $a3, $a1, 0 +; LA32-NEXT: beq $a3, $zero, .LBB22_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_or_i32_acquire: +; LA64: # %bb.0: +; LA64-NEXT: amor_db.w $a0, $a1, $a0 +; LA64-NEXT: ret + %1 = atomicrmw or ptr %a, i32 %b acquire + ret i32 %1 +} + +define i64 @atomicrmw_or_i64_acquire(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_or_i64_acquire: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a3, $zero, 2 +; LA32-NEXT: bl %plt(__atomic_fetch_or_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_or_i64_acquire: +; LA64: # %bb.0: +; LA64-NEXT: amor_db.d $a0, $a1, $a0 +; LA64-NEXT: ret + %1 = atomicrmw or ptr %a, i64 %b acquire + ret i64 %1 +} + +define i8 @atomicrmw_xor_i8_acquire(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_xor_i8_acquire: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ll.w $a3, $a1, 0 +; LA32-NEXT: xor $a4, $a3, $a2 +; LA32-NEXT: sc.w $a4, $a1, 0 +; LA32-NEXT: beq $a4, $zero, .LBB24_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xor_i8_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: amxor_db.w $a1, $a1, $a2 +; LA64-NEXT: srl.w $a0, $a1, $a0 +; LA64-NEXT: ret + %1 = atomicrmw xor ptr %a, i8 %b acquire + ret i8 %1 +} + +define i16 @atomicrmw_xor_i16_acquire(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_xor_i16_acquire: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ll.w $a3, $a1, 0 +; LA32-NEXT: xor $a4, $a3, $a2 +; LA32-NEXT: sc.w $a4, $a1, 0 +; LA32-NEXT: beq $a4, $zero, .LBB25_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xor_i16_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: amxor_db.w $a1, $a1, $a2 +; LA64-NEXT: srl.w $a0, $a1, $a0 +; LA64-NEXT: ret + %1 = atomicrmw xor ptr %a, i16 %b acquire + ret i16 %1 +} + +define i32 @atomicrmw_xor_i32_acquire(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_xor_i32_acquire: +; LA32: # %bb.0: +; LA32-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ll.w $a2, $a1, 0 +; LA32-NEXT: xor $a3, $a2, $a0 +; LA32-NEXT: sc.w $a3, $a1, 0 +; LA32-NEXT: beq $a3, $zero, .LBB26_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xor_i32_acquire: +; LA64: # %bb.0: +; LA64-NEXT: amxor_db.w $a0, $a1, $a0 +; LA64-NEXT: ret + %1 = atomicrmw xor ptr %a, i32 %b acquire + ret i32 %1 +} + +define i64 @atomicrmw_xor_i64_acquire(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_xor_i64_acquire: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a3, $zero, 2 +; LA32-NEXT: bl %plt(__atomic_fetch_xor_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xor_i64_acquire: +; LA64: # %bb.0: +; LA64-NEXT: amxor_db.d $a0, $a1, $a0 +; LA64-NEXT: ret + %1 = atomicrmw xor ptr %a, i64 %b acquire + ret i64 %1 +}