diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1236,6 +1236,9 @@ XLenVT, Expand); } + if (Subtarget.hasStdExtA()) + setOperationAction(ISD::ATOMIC_LOAD_SUB, XLenVT, Custom); + if (Subtarget.hasVendorXTHeadMemIdx()) { for (unsigned im = (unsigned)ISD::PRE_INC; im != (unsigned)ISD::POST_DEC; ++im) { @@ -2902,6 +2905,21 @@ return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops); } +static SDValue lowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) { + SDLoc DL(Op); + MVT VT = Op.getSimpleValueType(); + AtomicSDNode *AN = cast(Op.getNode()); + SDValue RHS = Op.getOperand(2); + if (RHS->getOpcode() == ISD::SIGN_EXTEND_INREG && + RHS->getOperand(0).getValueType() == VT) + RHS = RHS->getOperand(0); + SDValue NewRHS = + DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), RHS); + return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, AN->getMemoryVT(), + Op.getOperand(0), Op.getOperand(1), NewRHS, + AN->getMemOperand()); +} + struct VIDSequence { int64_t StepNumerator; unsigned StepDenominator; @@ -6097,6 +6115,8 @@ !Subtarget.hasVInstructionsF16())) return SplitVPOp(Op, DAG); return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); + case ISD::ATOMIC_LOAD_SUB: + return lowerATOMIC_LOAD_SUB(Op, DAG); } } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td @@ -174,17 +174,6 @@ defm : AMOPat<"atomic_load_umax_32", "AMOMAXU_W">; defm : AMOPat<"atomic_load_umin_32", "AMOMINU_W">; -def : Pat<(XLenVT (atomic_load_sub_32_monotonic GPR:$addr, GPR:$incr)), - (AMOADD_W GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>; -def : Pat<(XLenVT (atomic_load_sub_32_acquire GPR:$addr, GPR:$incr)), - (AMOADD_W_AQ GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>; -def : Pat<(XLenVT (atomic_load_sub_32_release GPR:$addr, GPR:$incr)), - (AMOADD_W_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>; -def : Pat<(XLenVT (atomic_load_sub_32_acq_rel GPR:$addr, GPR:$incr)), - (AMOADD_W_AQ_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>; -def : Pat<(XLenVT (atomic_load_sub_32_seq_cst GPR:$addr, GPR:$incr)), - (AMOADD_W_AQ_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>; - /// Pseudo AMOs class PseudoAMO : Pseudo<(outs GPR:$res, GPR:$scratch), @@ -346,17 +335,6 @@ /// 64-bit AMOs -def : Pat<(i64 (atomic_load_sub_64_monotonic GPR:$addr, GPR:$incr)), - (AMOADD_D GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>; -def : Pat<(i64 (atomic_load_sub_64_acquire GPR:$addr, GPR:$incr)), - (AMOADD_D_AQ GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>; -def : Pat<(i64 (atomic_load_sub_64_release GPR:$addr, GPR:$incr)), - (AMOADD_D_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>; -def : Pat<(i64 (atomic_load_sub_64_acq_rel GPR:$addr, GPR:$incr)), - (AMOADD_D_AQ_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>; -def : Pat<(i64 (atomic_load_sub_64_seq_cst GPR:$addr, GPR:$incr)), - (AMOADD_D_AQ_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>; - /// 64-bit pseudo AMOs let Size = 20 in diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw-sub.ll b/llvm/test/CodeGen/RISCV/atomic-rmw-sub.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/atomic-rmw-sub.ll @@ -0,0 +1,181 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV32IA %s +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I %s +; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV64IA %s + +define i32 @atomicrmw_sub_i32_constant(ptr %a) nounwind { +; RV32I-LABEL: atomicrmw_sub_i32_constant: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 1 +; RV32I-NEXT: li a2, 5 +; RV32I-NEXT: call __atomic_fetch_sub_4@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomicrmw_sub_i32_constant: +; RV32IA: # %bb.0: +; RV32IA-NEXT: li a1, -1 +; RV32IA-NEXT: amoadd.w.aqrl a0, a1, (a0) +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomicrmw_sub_i32_constant: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 1 +; RV64I-NEXT: li a2, 5 +; RV64I-NEXT: call __atomic_fetch_sub_4@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomicrmw_sub_i32_constant: +; RV64IA: # %bb.0: +; RV64IA-NEXT: li a1, -1 +; RV64IA-NEXT: amoadd.w.aqrl a0, a1, (a0) +; RV64IA-NEXT: ret + %1 = atomicrmw sub ptr %a, i32 1 seq_cst + ret i32 %1 +} + +define i64 @atomicrmw_sub_i64_constant(ptr %a) nounwind { +; RV32I-LABEL: atomicrmw_sub_i64_constant: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 1 +; RV32I-NEXT: li a3, 5 +; RV32I-NEXT: li a2, 0 +; RV32I-NEXT: call __atomic_fetch_sub_8@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomicrmw_sub_i64_constant: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: li a1, 1 +; RV32IA-NEXT: li a3, 5 +; RV32IA-NEXT: li a2, 0 +; RV32IA-NEXT: call __atomic_fetch_sub_8@plt +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomicrmw_sub_i64_constant: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 1 +; RV64I-NEXT: li a2, 5 +; RV64I-NEXT: call __atomic_fetch_sub_8@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomicrmw_sub_i64_constant: +; RV64IA: # %bb.0: +; RV64IA-NEXT: li a1, -1 +; RV64IA-NEXT: amoadd.d.aqrl a0, a1, (a0) +; RV64IA-NEXT: ret + %1 = atomicrmw sub ptr %a, i64 1 seq_cst + ret i64 %1 +} + +define i32 @atomicrmw_sub_i32_neg(ptr %a, i32 %x, i32 %y) nounwind { +; RV32I-LABEL: atomicrmw_sub_i32_neg: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sub a1, a1, a2 +; RV32I-NEXT: li a2, 5 +; RV32I-NEXT: call __atomic_fetch_sub_4@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomicrmw_sub_i32_neg: +; RV32IA: # %bb.0: +; RV32IA-NEXT: sub a2, a2, a1 +; RV32IA-NEXT: amoadd.w.aqrl a0, a2, (a0) +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomicrmw_sub_i32_neg: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: subw a1, a1, a2 +; RV64I-NEXT: li a2, 5 +; RV64I-NEXT: call __atomic_fetch_sub_4@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomicrmw_sub_i32_neg: +; RV64IA: # %bb.0: +; RV64IA-NEXT: sub a2, a2, a1 +; RV64IA-NEXT: amoadd.w.aqrl a0, a2, (a0) +; RV64IA-NEXT: ret + %b = sub i32 %x, %y + %1 = atomicrmw sub ptr %a, i32 %b seq_cst + ret i32 %1 +} + +define i64 @atomicrmw_sub_i64_neg(ptr %a, i64 %x, i64 %y) nounwind { +; RV32I-LABEL: atomicrmw_sub_i64_neg: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sltu a5, a1, a3 +; RV32I-NEXT: sub a2, a2, a4 +; RV32I-NEXT: sub a2, a2, a5 +; RV32I-NEXT: sub a1, a1, a3 +; RV32I-NEXT: li a3, 5 +; RV32I-NEXT: call __atomic_fetch_sub_8@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomicrmw_sub_i64_neg: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: sltu a5, a1, a3 +; RV32IA-NEXT: sub a2, a2, a4 +; RV32IA-NEXT: sub a2, a2, a5 +; RV32IA-NEXT: sub a1, a1, a3 +; RV32IA-NEXT: li a3, 5 +; RV32IA-NEXT: call __atomic_fetch_sub_8@plt +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomicrmw_sub_i64_neg: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sub a1, a1, a2 +; RV64I-NEXT: li a2, 5 +; RV64I-NEXT: call __atomic_fetch_sub_8@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomicrmw_sub_i64_neg: +; RV64IA: # %bb.0: +; RV64IA-NEXT: sub a2, a2, a1 +; RV64IA-NEXT: amoadd.d.aqrl a0, a2, (a0) +; RV64IA-NEXT: ret + %b = sub i64 %x, %y + %1 = atomicrmw sub ptr %a, i64 %b seq_cst + ret i64 %1 +}