diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -4182,6 +4182,22 @@ Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic( IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { + // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace + // the atomic instruction with an AtomicRMWInst::And/Or with appropriate + // mask, as this produces better code than the LL/SC loop emitted by + // int_loongarch_masked_atomicrmw_xchg. + if (AI->getOperation() == AtomicRMWInst::Xchg && + isa(AI->getValOperand())) { + ConstantInt *CVal = cast(AI->getValOperand()); + if (CVal->isZero()) + return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr, + Builder.CreateNot(Mask, "Inv_Mask"), + AI->getAlign(), Ord); + if (CVal->isMinusOne()) + return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask, + AI->getAlign(), Ord); + } + unsigned GRLen = Subtarget.getGRLen(); Value *Ordering = Builder.getIntN(GRLen, static_cast(AI->getOrdering())); diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll --- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll @@ -56,14 +56,12 @@ ; LA32-NEXT: slli.w $a1, $a0, 3 ; LA32-NEXT: ori $a2, $zero, 255 ; LA32-NEXT: sll.w $a2, $a2, $a1 +; LA32-NEXT: nor $a2, $a2, $zero ; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a3, $a0, 0 -; LA32-NEXT: addi.w $a4, $zero, 0 -; LA32-NEXT: xor $a4, $a3, $a4 -; LA32-NEXT: and $a4, $a4, $a2 -; LA32-NEXT: xor $a4, $a3, $a4 +; LA32-NEXT: and $a4, $a3, $a2 ; LA32-NEXT: sc.w $a4, $a0, 0 ; LA32-NEXT: beqz $a4, .LBB1_1 ; LA32-NEXT: # %bb.2: @@ -75,18 +73,9 @@ ; LA64-NEXT: slli.d $a1, $a0, 3 ; LA64-NEXT: ori $a2, $zero, 255 ; LA64-NEXT: sll.w $a2, $a2, $a1 -; LA64-NEXT: addi.w $a2, $a2, 0 +; LA64-NEXT: nor $a2, $a2, $zero ; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 -; LA64-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: dbar 0 -; LA64-NEXT: ll.w $a3, $a0, 0 -; LA64-NEXT: addi.w $a4, $zero, 0 -; LA64-NEXT: xor $a4, $a3, $a4 -; LA64-NEXT: and $a4, $a4, $a2 -; LA64-NEXT: xor $a4, $a3, $a4 -; LA64-NEXT: sc.w $a4, $a0, 0 -; LA64-NEXT: beqz $a4, .LBB1_1 -; LA64-NEXT: # %bb.2: +; LA64-NEXT: amand_db.w $a3, $a2, $a0 ; LA64-NEXT: srl.w $a0, $a3, $a1 ; LA64-NEXT: ret %1 = atomicrmw xchg ptr %a, i8 0 acquire @@ -103,10 +92,7 @@ ; LA32-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a3, $a0, 0 -; LA32-NEXT: addi.w $a4, $a2, 0 -; LA32-NEXT: xor $a4, $a3, $a4 -; LA32-NEXT: and $a4, $a4, $a2 -; LA32-NEXT: xor $a4, $a3, $a4 +; LA32-NEXT: or $a4, $a3, $a2 ; LA32-NEXT: sc.w $a4, $a0, 0 ; LA32-NEXT: beqz $a4, .LBB2_1 ; LA32-NEXT: # %bb.2: @@ -118,18 +104,8 @@ ; LA64-NEXT: slli.d $a1, $a0, 3 ; LA64-NEXT: ori $a2, $zero, 255 ; LA64-NEXT: sll.w $a2, $a2, $a1 -; LA64-NEXT: addi.w $a2, $a2, 0 ; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 -; LA64-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: dbar 0 -; LA64-NEXT: ll.w $a3, $a0, 0 -; LA64-NEXT: addi.w $a4, $a2, 0 -; LA64-NEXT: xor $a4, $a3, $a4 -; LA64-NEXT: and $a4, $a4, $a2 -; LA64-NEXT: xor $a4, $a3, $a4 -; LA64-NEXT: sc.w $a4, $a0, 0 -; LA64-NEXT: beqz $a4, .LBB2_1 -; LA64-NEXT: # %bb.2: +; LA64-NEXT: amor_db.w $a3, $a2, $a0 ; LA64-NEXT: srl.w $a0, $a3, $a1 ; LA64-NEXT: ret %1 = atomicrmw xchg ptr %a, i8 -1 acquire @@ -193,14 +169,12 @@ ; LA32-NEXT: ori $a1, $a1, 4095 ; LA32-NEXT: slli.w $a2, $a0, 3 ; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: nor $a1, $a1, $zero ; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a3, $a0, 0 -; LA32-NEXT: addi.w $a4, $zero, 0 -; LA32-NEXT: xor $a4, $a3, $a4 -; LA32-NEXT: and $a4, $a4, $a1 -; LA32-NEXT: xor $a4, $a3, $a4 +; LA32-NEXT: and $a4, $a3, $a1 ; LA32-NEXT: sc.w $a4, $a0, 0 ; LA32-NEXT: beqz $a4, .LBB4_1 ; LA32-NEXT: # %bb.2: @@ -213,18 +187,9 @@ ; LA64-NEXT: ori $a1, $a1, 4095 ; LA64-NEXT: slli.d $a2, $a0, 3 ; LA64-NEXT: sll.w $a1, $a1, $a2 -; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: nor $a1, $a1, $zero ; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 -; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: dbar 0 -; LA64-NEXT: ll.w $a3, $a0, 0 -; LA64-NEXT: addi.w $a4, $zero, 0 -; LA64-NEXT: xor $a4, $a3, $a4 -; LA64-NEXT: and $a4, $a4, $a1 -; LA64-NEXT: xor $a4, $a3, $a4 -; LA64-NEXT: sc.w $a4, $a0, 0 -; LA64-NEXT: beqz $a4, .LBB4_1 -; LA64-NEXT: # %bb.2: +; LA64-NEXT: amand_db.w $a3, $a1, $a0 ; LA64-NEXT: srl.w $a0, $a3, $a2 ; LA64-NEXT: ret %1 = atomicrmw xchg ptr %a, i16 0 acquire @@ -242,10 +207,7 @@ ; LA32-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a3, $a0, 0 -; LA32-NEXT: addi.w $a4, $a1, 0 -; LA32-NEXT: xor $a4, $a3, $a4 -; LA32-NEXT: and $a4, $a4, $a1 -; LA32-NEXT: xor $a4, $a3, $a4 +; LA32-NEXT: or $a4, $a3, $a1 ; LA32-NEXT: sc.w $a4, $a0, 0 ; LA32-NEXT: beqz $a4, .LBB5_1 ; LA32-NEXT: # %bb.2: @@ -258,18 +220,8 @@ ; LA64-NEXT: ori $a1, $a1, 4095 ; LA64-NEXT: slli.d $a2, $a0, 3 ; LA64-NEXT: sll.w $a1, $a1, $a2 -; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 -; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: dbar 0 -; LA64-NEXT: ll.w $a3, $a0, 0 -; LA64-NEXT: addi.w $a4, $a1, 0 -; LA64-NEXT: xor $a4, $a3, $a4 -; LA64-NEXT: and $a4, $a4, $a1 -; LA64-NEXT: xor $a4, $a3, $a4 -; LA64-NEXT: sc.w $a4, $a0, 0 -; LA64-NEXT: beqz $a4, .LBB5_1 -; LA64-NEXT: # %bb.2: +; LA64-NEXT: amor_db.w $a3, $a1, $a0 ; LA64-NEXT: srl.w $a0, $a3, $a2 ; LA64-NEXT: ret %1 = atomicrmw xchg ptr %a, i16 -1 acquire @@ -1131,13 +1083,11 @@ ; LA32-NEXT: slli.w $a1, $a0, 3 ; LA32-NEXT: ori $a2, $zero, 255 ; LA32-NEXT: sll.w $a2, $a2, $a1 +; LA32-NEXT: nor $a2, $a2, $zero ; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: ll.w $a3, $a0, 0 -; LA32-NEXT: addi.w $a4, $zero, 0 -; LA32-NEXT: xor $a4, $a3, $a4 -; LA32-NEXT: and $a4, $a4, $a2 -; LA32-NEXT: xor $a4, $a3, $a4 +; LA32-NEXT: and $a4, $a3, $a2 ; LA32-NEXT: sc.w $a4, $a0, 0 ; LA32-NEXT: beqz $a4, .LBB33_1 ; LA32-NEXT: # %bb.2: @@ -1149,17 +1099,9 @@ ; LA64-NEXT: slli.d $a1, $a0, 3 ; LA64-NEXT: ori $a2, $zero, 255 ; LA64-NEXT: sll.w $a2, $a2, $a1 -; LA64-NEXT: addi.w $a2, $a2, 0 +; LA64-NEXT: nor $a2, $a2, $zero ; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 -; LA64-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: ll.w $a3, $a0, 0 -; LA64-NEXT: addi.w $a4, $zero, 0 -; LA64-NEXT: xor $a4, $a3, $a4 -; LA64-NEXT: and $a4, $a4, $a2 -; LA64-NEXT: xor $a4, $a3, $a4 -; LA64-NEXT: sc.w $a4, $a0, 0 -; LA64-NEXT: beqz $a4, .LBB33_1 -; LA64-NEXT: # %bb.2: +; LA64-NEXT: amand_db.w $a3, $a2, $a0 ; LA64-NEXT: srl.w $a0, $a3, $a1 ; LA64-NEXT: ret %1 = atomicrmw xchg ptr %a, i8 0 monotonic @@ -1175,10 +1117,7 @@ ; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: ll.w $a3, $a0, 0 -; LA32-NEXT: addi.w $a4, $a2, 0 -; LA32-NEXT: xor $a4, $a3, $a4 -; LA32-NEXT: and $a4, $a4, $a2 -; LA32-NEXT: xor $a4, $a3, $a4 +; LA32-NEXT: or $a4, $a3, $a2 ; LA32-NEXT: sc.w $a4, $a0, 0 ; LA32-NEXT: beqz $a4, .LBB34_1 ; LA32-NEXT: # %bb.2: @@ -1190,17 +1129,8 @@ ; LA64-NEXT: slli.d $a1, $a0, 3 ; LA64-NEXT: ori $a2, $zero, 255 ; LA64-NEXT: sll.w $a2, $a2, $a1 -; LA64-NEXT: addi.w $a2, $a2, 0 ; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 -; LA64-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: ll.w $a3, $a0, 0 -; LA64-NEXT: addi.w $a4, $a2, 0 -; LA64-NEXT: xor $a4, $a3, $a4 -; LA64-NEXT: and $a4, $a4, $a2 -; LA64-NEXT: xor $a4, $a3, $a4 -; LA64-NEXT: sc.w $a4, $a0, 0 -; LA64-NEXT: beqz $a4, .LBB34_1 -; LA64-NEXT: # %bb.2: +; LA64-NEXT: amor_db.w $a3, $a2, $a0 ; LA64-NEXT: srl.w $a0, $a3, $a1 ; LA64-NEXT: ret %1 = atomicrmw xchg ptr %a, i8 -1 monotonic @@ -1262,13 +1192,11 @@ ; LA32-NEXT: ori $a1, $a1, 4095 ; LA32-NEXT: slli.w $a2, $a0, 3 ; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: nor $a1, $a1, $zero ; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: ll.w $a3, $a0, 0 -; LA32-NEXT: addi.w $a4, $zero, 0 -; LA32-NEXT: xor $a4, $a3, $a4 -; LA32-NEXT: and $a4, $a4, $a1 -; LA32-NEXT: xor $a4, $a3, $a4 +; LA32-NEXT: and $a4, $a3, $a1 ; LA32-NEXT: sc.w $a4, $a0, 0 ; LA32-NEXT: beqz $a4, .LBB36_1 ; LA32-NEXT: # %bb.2: @@ -1281,17 +1209,9 @@ ; LA64-NEXT: ori $a1, $a1, 4095 ; LA64-NEXT: slli.d $a2, $a0, 3 ; LA64-NEXT: sll.w $a1, $a1, $a2 -; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: nor $a1, $a1, $zero ; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 -; LA64-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: ll.w $a3, $a0, 0 -; LA64-NEXT: addi.w $a4, $zero, 0 -; LA64-NEXT: xor $a4, $a3, $a4 -; LA64-NEXT: and $a4, $a4, $a1 -; LA64-NEXT: xor $a4, $a3, $a4 -; LA64-NEXT: sc.w $a4, $a0, 0 -; LA64-NEXT: beqz $a4, .LBB36_1 -; LA64-NEXT: # %bb.2: +; LA64-NEXT: amand_db.w $a3, $a1, $a0 ; LA64-NEXT: srl.w $a0, $a3, $a2 ; LA64-NEXT: ret %1 = atomicrmw xchg ptr %a, i16 0 monotonic @@ -1308,10 +1228,7 @@ ; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: ll.w $a3, $a0, 0 -; LA32-NEXT: addi.w $a4, $a1, 0 -; LA32-NEXT: xor $a4, $a3, $a4 -; LA32-NEXT: and $a4, $a4, $a1 -; LA32-NEXT: xor $a4, $a3, $a4 +; LA32-NEXT: or $a4, $a3, $a1 ; LA32-NEXT: sc.w $a4, $a0, 0 ; LA32-NEXT: beqz $a4, .LBB37_1 ; LA32-NEXT: # %bb.2: @@ -1324,17 +1241,8 @@ ; LA64-NEXT: ori $a1, $a1, 4095 ; LA64-NEXT: slli.d $a2, $a0, 3 ; LA64-NEXT: sll.w $a1, $a1, $a2 -; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 -; LA64-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: ll.w $a3, $a0, 0 -; LA64-NEXT: addi.w $a4, $a1, 0 -; LA64-NEXT: xor $a4, $a3, $a4 -; LA64-NEXT: and $a4, $a4, $a1 -; LA64-NEXT: xor $a4, $a3, $a4 -; LA64-NEXT: sc.w $a4, $a0, 0 -; LA64-NEXT: beqz $a4, .LBB37_1 -; LA64-NEXT: # %bb.2: +; LA64-NEXT: amor_db.w $a3, $a1, $a0 ; LA64-NEXT: srl.w $a0, $a3, $a2 ; LA64-NEXT: ret %1 = atomicrmw xchg ptr %a, i16 -1 monotonic