Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -543,6 +543,7 @@ if (Subtarget->supportsAddressTopByteIgnored()) setTargetDAGCombine(ISD::LOAD); + setTargetDAGCombine(ISD::ATOMIC_LOAD_SUB); setTargetDAGCombine(ISD::MUL); setTargetDAGCombine(ISD::SELECT); @@ -10205,6 +10206,20 @@ return SDValue(); } +/// Try to convert subtractions of constants to additions. +static SDValue performAtomicLoadSubCombine(SDNode *N, SelectionDAG &DAG) { + if (!isa(N->getOperand(2))) + return SDValue(); + + SDLoc dl(N); + MVT VT = N->getSimpleValueType(0); + SDValue RHS = N->getOperand(2); + AtomicSDNode *AN = cast(N); + RHS = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), RHS); + return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, dl, VT, N->getOperand(0), + N->getOperand(1), RHS, AN->getMemOperand()); +} + SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -10248,6 +10263,8 @@ if (performTBISimplification(N->getOperand(1), DCI, DAG)) return SDValue(N, 0); break; + case ISD::ATOMIC_LOAD_SUB: + return performAtomicLoadSubCombine(N, DAG); case ISD::STORE: return performSTORECombine(N, DCI, DAG, Subtarget); case AArch64ISD::BRCOND: Index: test/CodeGen/AArch64/atomic-ops-lse.ll =================================================================== --- test/CodeGen/AArch64/atomic-ops-lse.ll +++ test/CodeGen/AArch64/atomic-ops-lse.ll @@ -765,6 +765,62 @@ ret void } +define i8 @test_atomic_load_sub_i8_neg_imm() nounwind { +; CHECK-LABEL: test_atomic_load_sub_i8_neg_imm: + %old = atomicrmw sub i8* @var8, i8 -1 seq_cst + +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 +; CHECK: orr w[[IMM:[0-9]+]], wzr, #0x1 +; CHECK: ldaddalb w[[IMM]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i8 %old +} + +define i16 @test_atomic_load_sub_i16_neg_imm() nounwind { +; CHECK-LABEL: test_atomic_load_sub_i16_neg_imm: + %old = atomicrmw sub i16* @var16, i16 -1 seq_cst + +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 +; CHECK: orr w[[IMM:[0-9]+]], wzr, #0x1 +; CHECK: ldaddalh w[[IMM]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i16 %old +} + +define i32 @test_atomic_load_sub_i32_neg_imm() nounwind { +; CHECK-LABEL: test_atomic_load_sub_i32_neg_imm: + %old = atomicrmw sub i32* @var32, i32 -1 seq_cst + +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 +; CHECK: orr w[[IMM:[0-9]+]], wzr, #0x1 +; CHECK: ldaddal w[[IMM]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i32 %old +} + +define i64 @test_atomic_load_sub_i64_neg_imm() nounwind { +; CHECK-LABEL: test_atomic_load_sub_i64_neg_imm: + %old = atomicrmw sub i64* @var64, i64 -1 seq_cst + +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 +; CHECK: orr w[[IMM:[0-9]+]], wzr, #0x1 +; CHECK: ldaddal x[[IMM]], x[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i64 %old +} + define i8 @test_atomic_load_and_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i8: %old = atomicrmw and i8* @var8, i8 %offset seq_cst Index: test/CodeGen/AArch64/atomic-ops.ll =================================================================== --- test/CodeGen/AArch64/atomic-ops.ll +++ test/CodeGen/AArch64/atomic-ops.ll @@ -172,6 +172,74 @@ ret i64 %old } +define i8 @test_atomic_load_sub_i8_neg_imm() nounwind { +; CHECK-LABEL: test_atomic_load_sub_i8_neg_imm: + %old = atomicrmw sub i8* @var8, i8 -1 monotonic +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]] +; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], #1 +; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb + + ret i8 %old +} + +define i16 @test_atomic_load_sub_i16_neg_imm() nounwind { +; CHECK-LABEL: test_atomic_load_sub_i16_neg_imm: + %old = atomicrmw sub i16* @var16, i16 -1 release +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]] +; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], #1 +; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb + + ret i16 %old +} + +define i32 @test_atomic_load_sub_i32_neg_imm() nounwind { +; CHECK-LABEL: test_atomic_load_sub_i32_neg_imm: + %old = atomicrmw sub i32* @var32, i32 -1 acquire +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]] +; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], #1 +; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb + + ret i32 %old +} + +define i64 @test_atomic_load_sub_i64_neg_imm() nounwind { +; CHECK-LABEL: test_atomic_load_sub_i64_neg_imm: + %old = atomicrmw sub i64* @var64, i64 -1 seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]] +; CHECK-NEXT: add [[NEW:x[0-9]+]], x[[OLD]], #1 +; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb + + ret i64 %old +} + define i8 @test_atomic_load_and_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i8: %old = atomicrmw and i8* @var8, i8 %offset release