diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3133,6 +3133,23 @@ Results.push_back(Res.getValue(1)); break; } + case ISD::ATOMIC_LOAD_SUB: { + SDLoc DL(Node); + EVT VT = Node->getValueType(0); + SDValue RHS = Node->getOperand(2); + AtomicSDNode *AN = cast(Node); + if (RHS->getOpcode() == ISD::SIGN_EXTEND_INREG && + cast(RHS->getOperand(1))->getVT() == AN->getMemoryVT()) + RHS = RHS->getOperand(0); + SDValue NewRHS = + DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), RHS); + SDValue Res = DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, AN->getMemoryVT(), + Node->getOperand(0), Node->getOperand(1), + NewRHS, AN->getMemOperand()); + Results.push_back(Res); + Results.push_back(Res.getValue(1)); + break; + } case ISD::DYNAMIC_STACKALLOC: ExpandDYNAMIC_STACKALLOC(Node, Results); break; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -1097,7 +1097,6 @@ SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const; SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain, diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -794,8 +794,13 @@ setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom); setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom); - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom); - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom); + if (!Subtarget->hasLSE() && !Subtarget->outlineAtomics()) { + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, LibCall); + } else { + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Expand); + } setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom); @@ -6109,8 +6114,6 @@ case ISD::VECREDUCE_FMAXIMUM: case ISD::VECREDUCE_FMINIMUM: return LowerVECREDUCE(Op, DAG); - case ISD::ATOMIC_LOAD_SUB: - return LowerATOMIC_LOAD_SUB(Op, DAG); case ISD::ATOMIC_LOAD_AND: return LowerATOMIC_LOAD_AND(Op, DAG); case ISD::DYNAMIC_STACKALLOC: @@ -13732,23 +13735,6 @@ } } -SDValue AArch64TargetLowering::LowerATOMIC_LOAD_SUB(SDValue Op, - SelectionDAG &DAG) const { - auto &Subtarget = DAG.getSubtarget(); - if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics()) - return SDValue(); - - // LSE has an atomic load-add instruction, but not a load-sub. - SDLoc dl(Op); - MVT VT = Op.getSimpleValueType(); - SDValue RHS = Op.getOperand(2); - AtomicSDNode *AN = cast(Op.getNode()); - RHS = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), RHS); - return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, dl, AN->getMemoryVT(), - Op.getOperand(0), Op.getOperand(1), RHS, - AN->getMemOperand()); -} - SDValue AArch64TargetLowering::LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const { auto &Subtarget = DAG.getSubtarget(); diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1349,19 +1349,19 @@ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Subtarget->hasAnyDataBarrier() ? Custom : Expand); - // Set them all for expansion, which will force libcalls. - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); + // Set them all for libcall, which will force libcalls. + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, LibCall); // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the // Unordered/Monotonic case. if (!InsertFencesForAtomic) { diff --git a/llvm/lib/Target/Mips/Mips16ISelLowering.cpp b/llvm/lib/Target/Mips/Mips16ISelLowering.cpp --- a/llvm/lib/Target/Mips/Mips16ISelLowering.cpp +++ b/llvm/lib/Target/Mips/Mips16ISelLowering.cpp @@ -127,19 +127,19 @@ if (!Subtarget.useSoftFloat()) setMips16HardFloatLibCalls(); - setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, LibCall); + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, LibCall); + setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, LibCall); setOperationAction(ISD::ROTR, MVT::i32, Expand); setOperationAction(ISD::ROTR, MVT::i64, Expand); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1229,14 +1229,17 @@ } } + if (Subtarget.hasStdExtA()) + setOperationAction(ISD::ATOMIC_LOAD_SUB, XLenVT, Expand); + if (Subtarget.hasForcedAtomics()) { - // Set atomic rmw/cas operations to expand to force __sync libcalls. + // Force __sync libcalls to be emitted for atomic rmw/cas operations. setOperationAction( {ISD::ATOMIC_CMP_SWAP, ISD::ATOMIC_SWAP, ISD::ATOMIC_LOAD_ADD, ISD::ATOMIC_LOAD_SUB, ISD::ATOMIC_LOAD_AND, ISD::ATOMIC_LOAD_OR, ISD::ATOMIC_LOAD_XOR, ISD::ATOMIC_LOAD_NAND, ISD::ATOMIC_LOAD_MIN, ISD::ATOMIC_LOAD_MAX, ISD::ATOMIC_LOAD_UMIN, ISD::ATOMIC_LOAD_UMAX}, - XLenVT, Expand); + XLenVT, LibCall); } if (Subtarget.hasVendorXTHeadMemIdx()) { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td @@ -168,17 +168,6 @@ defm : AMOPat<"atomic_load_umax_32", "AMOMAXU_W">; defm : AMOPat<"atomic_load_umin_32", "AMOMINU_W">; -def : Pat<(XLenVT (atomic_load_sub_32_monotonic GPR:$addr, GPR:$incr)), - (AMOADD_W GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>; -def : Pat<(XLenVT (atomic_load_sub_32_acquire GPR:$addr, GPR:$incr)), - (AMOADD_W_AQ GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>; -def : Pat<(XLenVT (atomic_load_sub_32_release GPR:$addr, GPR:$incr)), - (AMOADD_W_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>; -def : Pat<(XLenVT (atomic_load_sub_32_acq_rel GPR:$addr, GPR:$incr)), - (AMOADD_W_AQ_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>; -def : Pat<(XLenVT (atomic_load_sub_32_seq_cst GPR:$addr, GPR:$incr)), - (AMOADD_W_AQ_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>; - /// Pseudo AMOs class PseudoAMO : Pseudo<(outs GPR:$res, GPR:$scratch), @@ -338,19 +327,6 @@ defm : AMOPat<"atomic_load_umax_64", "AMOMAXU_D", i64>; defm : AMOPat<"atomic_load_umin_64", "AMOMINU_D", i64>; -/// 64-bit AMOs - -def : Pat<(i64 (atomic_load_sub_64_monotonic GPR:$addr, GPR:$incr)), - (AMOADD_D GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>; -def : Pat<(i64 (atomic_load_sub_64_acquire GPR:$addr, GPR:$incr)), - (AMOADD_D_AQ GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>; -def : Pat<(i64 (atomic_load_sub_64_release GPR:$addr, GPR:$incr)), - (AMOADD_D_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>; -def : Pat<(i64 (atomic_load_sub_64_acq_rel GPR:$addr, GPR:$incr)), - (AMOADD_D_AQ_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>; -def : Pat<(i64 (atomic_load_sub_64_seq_cst GPR:$addr, GPR:$incr)), - (AMOADD_D_AQ_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>; - /// 64-bit pseudo AMOs let Size = 20 in diff --git a/llvm/test/CodeGen/Mips/atomicops.ll b/llvm/test/CodeGen/Mips/atomicops.ll --- a/llvm/test/CodeGen/Mips/atomicops.ll +++ b/llvm/test/CodeGen/Mips/atomicops.ll @@ -12,6 +12,15 @@ ; 16: lw ${{[0-9]+}}, %call16(__sync_fetch_and_add_4)(${{[0-9]+}}) } +define i32 @atomic_load_sub(ptr %mem, i32 %val, i32 %c) nounwind { +; 16-LABEL: atomic_load_sub: +; 16: lw ${{[0-9]+}}, %call16(__sync_synchronize)(${{[0-9]+}}) +; 16: lw ${{[0-9]+}}, %call16(__sync_fetch_and_sub_4)(${{[0-9]+}}) +entry: + %0 = atomicrmw sub ptr %mem, i32 %val seq_cst + ret i32 %0 +} + define i32 @main() nounwind { entry: %x = alloca i32, align 4 @@ -37,5 +46,3 @@ } declare i32 @printf(ptr nocapture, ...) nounwind - - diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw-sub.ll b/llvm/test/CodeGen/RISCV/atomic-rmw-sub.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/atomic-rmw-sub.ll @@ -0,0 +1,181 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV32IA %s +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I %s +; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV64IA %s + +define i32 @atomicrmw_sub_i32_constant(ptr %a) nounwind { +; RV32I-LABEL: atomicrmw_sub_i32_constant: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 1 +; RV32I-NEXT: li a2, 5 +; RV32I-NEXT: call __atomic_fetch_sub_4@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomicrmw_sub_i32_constant: +; RV32IA: # %bb.0: +; RV32IA-NEXT: li a1, -1 +; RV32IA-NEXT: amoadd.w.aqrl a0, a1, (a0) +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomicrmw_sub_i32_constant: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 1 +; RV64I-NEXT: li a2, 5 +; RV64I-NEXT: call __atomic_fetch_sub_4@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomicrmw_sub_i32_constant: +; RV64IA: # %bb.0: +; RV64IA-NEXT: li a1, -1 +; RV64IA-NEXT: amoadd.w.aqrl a0, a1, (a0) +; RV64IA-NEXT: ret + %1 = atomicrmw sub ptr %a, i32 1 seq_cst + ret i32 %1 +} + +define i64 @atomicrmw_sub_i64_constant(ptr %a) nounwind { +; RV32I-LABEL: atomicrmw_sub_i64_constant: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 1 +; RV32I-NEXT: li a3, 5 +; RV32I-NEXT: li a2, 0 +; RV32I-NEXT: call __atomic_fetch_sub_8@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomicrmw_sub_i64_constant: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: li a1, 1 +; RV32IA-NEXT: li a3, 5 +; RV32IA-NEXT: li a2, 0 +; RV32IA-NEXT: call __atomic_fetch_sub_8@plt +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomicrmw_sub_i64_constant: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 1 +; RV64I-NEXT: li a2, 5 +; RV64I-NEXT: call __atomic_fetch_sub_8@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomicrmw_sub_i64_constant: +; RV64IA: # %bb.0: +; RV64IA-NEXT: li a1, -1 +; RV64IA-NEXT: amoadd.d.aqrl a0, a1, (a0) +; RV64IA-NEXT: ret + %1 = atomicrmw sub ptr %a, i64 1 seq_cst + ret i64 %1 +} + +define i32 @atomicrmw_sub_i32_neg(ptr %a, i32 %x, i32 %y) nounwind { +; RV32I-LABEL: atomicrmw_sub_i32_neg: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sub a1, a1, a2 +; RV32I-NEXT: li a2, 5 +; RV32I-NEXT: call __atomic_fetch_sub_4@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomicrmw_sub_i32_neg: +; RV32IA: # %bb.0: +; RV32IA-NEXT: sub a2, a2, a1 +; RV32IA-NEXT: amoadd.w.aqrl a0, a2, (a0) +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomicrmw_sub_i32_neg: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: subw a1, a1, a2 +; RV64I-NEXT: li a2, 5 +; RV64I-NEXT: call __atomic_fetch_sub_4@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomicrmw_sub_i32_neg: +; RV64IA: # %bb.0: +; RV64IA-NEXT: sub a2, a2, a1 +; RV64IA-NEXT: amoadd.w.aqrl a0, a2, (a0) +; RV64IA-NEXT: ret + %b = sub i32 %x, %y + %1 = atomicrmw sub ptr %a, i32 %b seq_cst + ret i32 %1 +} + +define i64 @atomicrmw_sub_i64_neg(ptr %a, i64 %x, i64 %y) nounwind { +; RV32I-LABEL: atomicrmw_sub_i64_neg: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sltu a5, a1, a3 +; RV32I-NEXT: sub a2, a2, a4 +; RV32I-NEXT: sub a2, a2, a5 +; RV32I-NEXT: sub a1, a1, a3 +; RV32I-NEXT: li a3, 5 +; RV32I-NEXT: call __atomic_fetch_sub_8@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomicrmw_sub_i64_neg: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: sltu a5, a1, a3 +; RV32IA-NEXT: sub a2, a2, a4 +; RV32IA-NEXT: sub a2, a2, a5 +; RV32IA-NEXT: sub a1, a1, a3 +; RV32IA-NEXT: li a3, 5 +; RV32IA-NEXT: call __atomic_fetch_sub_8@plt +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomicrmw_sub_i64_neg: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sub a1, a1, a2 +; RV64I-NEXT: li a2, 5 +; RV64I-NEXT: call __atomic_fetch_sub_8@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomicrmw_sub_i64_neg: +; RV64IA: # %bb.0: +; RV64IA-NEXT: sub a2, a2, a1 +; RV64IA-NEXT: amoadd.d.aqrl a0, a2, (a0) +; RV64IA-NEXT: ret + %b = sub i64 %x, %y + %1 = atomicrmw sub ptr %a, i64 %b seq_cst + ret i64 %1 +}