Index: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h +++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h @@ -596,6 +596,7 @@ SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const; SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, std::vector *Created) const override; Index: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -464,6 +464,8 @@ setOperationAction(ISD::PREFETCH, MVT::Other, Custom); setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom); + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom); // Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0. // This requires the Performance Monitors extension. @@ -2679,6 +2681,8 @@ case ISD::VECREDUCE_FMAX: case ISD::VECREDUCE_FMIN: return LowerVECREDUCE(Op, DAG); + case ISD::ATOMIC_LOAD_SUB: + return LowerATOMIC_LOAD_SUB(Op, DAG); } } @@ -7373,6 +7377,23 @@ } } +SDValue AArch64TargetLowering::LowerATOMIC_LOAD_SUB(SDValue Op, + SelectionDAG &DAG) const { + auto &Subtarget = static_cast(DAG.getSubtarget()); + if (!Subtarget.hasLSE()) + return SDValue(); + + // LSE has an atomic load-add instruction, but not a load-sub. + SDLoc dl(Op); + MVT VT = Op.getSimpleValueType(); + SDValue RHS = Op.getOperand(2); + AtomicSDNode *AN = cast(Op.getNode()); + RHS = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), RHS); + return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, dl, AN->getMemoryVT(), + Op.getOperand(0), Op.getOperand(1), RHS, + AN->getMemOperand()); +} + /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as /// MemIntrinsicNodes. The associated MachineMemOperands record the alignment /// specified in the intrinsic calls. Index: llvm/trunk/test/CodeGen/AArch64/atomic-ops-lse.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/atomic-ops-lse.ll +++ llvm/trunk/test/CodeGen/AArch64/atomic-ops-lse.ll @@ -814,6 +814,118 @@ ret void } +define i8 @test_atomic_load_sub_i8_neg_imm() nounwind { +; CHECK-LABEL: test_atomic_load_sub_i8_neg_imm: + %old = atomicrmw sub i8* @var8, i8 -1 seq_cst + +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 +; CHECK: orr w[[IMM:[0-9]+]], wzr, #0x1 +; CHECK: ldaddalb w[[IMM]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i8 %old +} + +define i16 @test_atomic_load_sub_i16_neg_imm() nounwind { +; CHECK-LABEL: test_atomic_load_sub_i16_neg_imm: + %old = atomicrmw sub i16* @var16, i16 -1 seq_cst + +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 +; CHECK: orr w[[IMM:[0-9]+]], wzr, #0x1 +; CHECK: ldaddalh w[[IMM]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i16 %old +} + +define i32 @test_atomic_load_sub_i32_neg_imm() nounwind { +; CHECK-LABEL: test_atomic_load_sub_i32_neg_imm: + %old = atomicrmw sub i32* @var32, i32 -1 seq_cst + +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 +; CHECK: orr w[[IMM:[0-9]+]], wzr, #0x1 +; CHECK: ldaddal w[[IMM]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i32 %old +} + +define i64 @test_atomic_load_sub_i64_neg_imm() nounwind { +; CHECK-LABEL: test_atomic_load_sub_i64_neg_imm: + %old = atomicrmw sub i64* @var64, i64 -1 seq_cst + +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 +; CHECK: orr w[[IMM:[0-9]+]], wzr, #0x1 +; CHECK: ldaddal x[[IMM]], x[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i64 %old +} + +define i8 @test_atomic_load_sub_i8_neg_arg(i8 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_sub_i8_neg_arg: + %neg = sub i8 0, %offset + %old = atomicrmw sub i8* @var8, i8 %neg seq_cst + +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 +; CHECK: ldaddalb w0, w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i8 %old +} + +define i16 @test_atomic_load_sub_i16_neg_arg(i16 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_sub_i16_neg_arg: + %neg = sub i16 0, %offset + %old = atomicrmw sub i16* @var16, i16 %neg seq_cst + +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 +; CHECK: ldaddalh w0, w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i16 %old +} + +define i32 @test_atomic_load_sub_i32_neg_arg(i32 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_sub_i32_neg_arg: + %neg = sub i32 0, %offset + %old = atomicrmw sub i32* @var32, i32 %neg seq_cst + +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 +; CHECK: ldaddal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i32 %old +} + +define i64 @test_atomic_load_sub_i64_neg_arg(i64 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_sub_i64_neg_arg: + %neg = sub i64 0, %offset + %old = atomicrmw sub i64* @var64, i64 %neg seq_cst + +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 +; CHECK: ldaddal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i64 %old +} + define i8 @test_atomic_load_and_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i8: %old = atomicrmw and i8* @var8, i8 %offset seq_cst