diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp --- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -985,6 +985,7 @@ TII->get(Opcode), MI.getDebugLoc(), /*NoImplicit=*/true); MBB.insert(MBBI, NewMI); MachineInstrBuilder MIB1(MF, NewMI); + MIB1->setPCSections(MF, MI.getPCSections()); MIB1.addReg(MI.getOperand(0).getReg(), RegState::Define) .add(MI.getOperand(1)) .add(MI.getOperand(2)) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -2341,10 +2341,10 @@ } case TargetOpcode::G_FENCE: { if (I.getOperand(1).getImm() == 0) - BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CompilerBarrier)) + BuildMI(MBB, I, MIMetadata(I), TII.get(AArch64::CompilerBarrier)) .addImm(I.getOperand(0).getImm()); else - BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::DMB)) + BuildMI(MBB, I, MIMetadata(I), TII.get(AArch64::DMB)) .addImm(I.getOperand(0).getImm() == 4 ? 0x9 : 0xb); I.eraseFromParent(); return true; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll @@ -0,0 +1,5529 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -global-isel -global-isel-abort=1 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-NOLSE-O1 +; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -global-isel -global-isel-abort=1 -O0 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-NOLSE-O0 +; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -global-isel -global-isel-abort=1 -mattr=+rcpc,+ldapr -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-LDAPR-O1 +; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -global-isel -global-isel-abort=1 -mattr=+rcpc,+ldapr -O0 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-LDAPR-O0 + +define i32 @val_compare_and_swap(i32* %p, i32 %cmp, i32 %new) { +; CHECK-NOLSE-O1-LABEL: val_compare_and_swap: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: .LBB0_1: // %cmpxchg.start +; CHECK-NOLSE-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection0: +; CHECK-NOLSE-O1-NEXT: ldaxr w8, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection1: +; CHECK-NOLSE-O1-NEXT: cmp w8, w1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection2: +; CHECK-NOLSE-O1-NEXT: b.ne .LBB0_4 +; CHECK-NOLSE-O1-NEXT: // %bb.2: // %cmpxchg.trystore +; CHECK-NOLSE-O1-NEXT: // in Loop: Header=BB0_1 Depth=1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection3: +; CHECK-NOLSE-O1-NEXT: stxr w9, w2, [x0] +; CHECK-NOLSE-O1-NEXT: cbnz w9, .LBB0_1 +; CHECK-NOLSE-O1-NEXT: // %bb.3: // %cmpxchg.end +; CHECK-NOLSE-O1-NEXT: mov w0, w8 +; CHECK-NOLSE-O1-NEXT: ret +; CHECK-NOLSE-O1-NEXT: .LBB0_4: // %cmpxchg.nostore +; CHECK-NOLSE-O1-NEXT: .Lpcsection4: +; CHECK-NOLSE-O1-NEXT: clrex +; CHECK-NOLSE-O1-NEXT: mov w0, w8 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: val_compare_and_swap: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: mov x9, x0 +; CHECK-NOLSE-O0-NEXT: .LBB0_1: // =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection0: +; CHECK-NOLSE-O0-NEXT: ldaxr w0, [x9] +; CHECK-NOLSE-O0-NEXT: .Lpcsection1: +; CHECK-NOLSE-O0-NEXT: cmp w0, w1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection2: +; CHECK-NOLSE-O0-NEXT: b.ne .LBB0_3 +; CHECK-NOLSE-O0-NEXT: // %bb.2: // in Loop: Header=BB0_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection3: +; CHECK-NOLSE-O0-NEXT: stlxr w8, w2, [x9] +; CHECK-NOLSE-O0-NEXT: .Lpcsection4: +; CHECK-NOLSE-O0-NEXT: cbnz w8, .LBB0_1 +; CHECK-NOLSE-O0-NEXT: .LBB0_3: +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: val_compare_and_swap: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: .LBB0_1: // %cmpxchg.start +; CHECK-LDAPR-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection0: +; CHECK-LDAPR-O1-NEXT: ldaxr w8, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection1: +; CHECK-LDAPR-O1-NEXT: cmp w8, w1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection2: +; CHECK-LDAPR-O1-NEXT: b.ne .LBB0_4 +; CHECK-LDAPR-O1-NEXT: // %bb.2: // %cmpxchg.trystore +; CHECK-LDAPR-O1-NEXT: // in Loop: Header=BB0_1 Depth=1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection3: +; CHECK-LDAPR-O1-NEXT: stxr w9, w2, [x0] +; CHECK-LDAPR-O1-NEXT: cbnz w9, .LBB0_1 +; CHECK-LDAPR-O1-NEXT: // %bb.3: // %cmpxchg.end +; CHECK-LDAPR-O1-NEXT: mov w0, w8 +; CHECK-LDAPR-O1-NEXT: ret +; CHECK-LDAPR-O1-NEXT: .LBB0_4: // %cmpxchg.nostore +; CHECK-LDAPR-O1-NEXT: .Lpcsection4: +; CHECK-LDAPR-O1-NEXT: clrex +; CHECK-LDAPR-O1-NEXT: mov w0, w8 +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: val_compare_and_swap: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: mov x9, x0 +; CHECK-LDAPR-O0-NEXT: .LBB0_1: // =>This Inner Loop Header: Depth=1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection0: +; CHECK-LDAPR-O0-NEXT: ldaxr w0, [x9] +; CHECK-LDAPR-O0-NEXT: .Lpcsection1: +; CHECK-LDAPR-O0-NEXT: cmp w0, w1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection2: +; CHECK-LDAPR-O0-NEXT: b.ne .LBB0_3 +; CHECK-LDAPR-O0-NEXT: // %bb.2: // in Loop: Header=BB0_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection3: +; CHECK-LDAPR-O0-NEXT: stlxr w8, w2, [x9] +; CHECK-LDAPR-O0-NEXT: .Lpcsection4: +; CHECK-LDAPR-O0-NEXT: cbnz w8, .LBB0_1 +; CHECK-LDAPR-O0-NEXT: .LBB0_3: +; CHECK-LDAPR-O0-NEXT: ret + %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire, !pcsections !0 + %val = extractvalue { i32, i1 } %pair, 0 + ret i32 %val +} + +define i32 @val_compare_and_swap_from_load(i32* %p, i32 %cmp, i32* %pnew) { +; CHECK-NOLSE-O1-LABEL: val_compare_and_swap_from_load: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: .Lpcsection5: +; CHECK-NOLSE-O1-NEXT: ldr w9, [x2] +; CHECK-NOLSE-O1-NEXT: .LBB1_1: // %cmpxchg.start +; CHECK-NOLSE-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection6: +; CHECK-NOLSE-O1-NEXT: ldaxr w8, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection7: +; CHECK-NOLSE-O1-NEXT: cmp w8, w1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection8: +; CHECK-NOLSE-O1-NEXT: b.ne .LBB1_4 +; CHECK-NOLSE-O1-NEXT: // %bb.2: // %cmpxchg.trystore +; CHECK-NOLSE-O1-NEXT: // in Loop: Header=BB1_1 Depth=1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection9: +; CHECK-NOLSE-O1-NEXT: stxr w10, w9, [x0] +; CHECK-NOLSE-O1-NEXT: cbnz w10, .LBB1_1 +; CHECK-NOLSE-O1-NEXT: // %bb.3: // %cmpxchg.end +; CHECK-NOLSE-O1-NEXT: mov w0, w8 +; CHECK-NOLSE-O1-NEXT: ret +; CHECK-NOLSE-O1-NEXT: .LBB1_4: // %cmpxchg.nostore +; CHECK-NOLSE-O1-NEXT: .Lpcsection10: +; CHECK-NOLSE-O1-NEXT: clrex +; CHECK-NOLSE-O1-NEXT: mov w0, w8 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: val_compare_and_swap_from_load: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: mov x9, x0 +; CHECK-NOLSE-O0-NEXT: .Lpcsection5: +; CHECK-NOLSE-O0-NEXT: ldr w10, [x2] +; CHECK-NOLSE-O0-NEXT: .LBB1_1: // =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection6: +; CHECK-NOLSE-O0-NEXT: ldaxr w0, [x9] +; CHECK-NOLSE-O0-NEXT: .Lpcsection7: +; CHECK-NOLSE-O0-NEXT: cmp w0, w1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection8: +; CHECK-NOLSE-O0-NEXT: b.ne .LBB1_3 +; CHECK-NOLSE-O0-NEXT: // %bb.2: // in Loop: Header=BB1_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection9: +; CHECK-NOLSE-O0-NEXT: stlxr w8, w10, [x9] +; CHECK-NOLSE-O0-NEXT: .Lpcsection10: +; CHECK-NOLSE-O0-NEXT: cbnz w8, .LBB1_1 +; CHECK-NOLSE-O0-NEXT: .LBB1_3: +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: val_compare_and_swap_from_load: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: .Lpcsection5: +; CHECK-LDAPR-O1-NEXT: ldr w9, [x2] +; CHECK-LDAPR-O1-NEXT: .LBB1_1: // %cmpxchg.start +; CHECK-LDAPR-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection6: +; CHECK-LDAPR-O1-NEXT: ldaxr w8, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection7: +; CHECK-LDAPR-O1-NEXT: cmp w8, w1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection8: +; CHECK-LDAPR-O1-NEXT: b.ne .LBB1_4 +; CHECK-LDAPR-O1-NEXT: // %bb.2: // %cmpxchg.trystore +; CHECK-LDAPR-O1-NEXT: // in Loop: Header=BB1_1 Depth=1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection9: +; CHECK-LDAPR-O1-NEXT: stxr w10, w9, [x0] +; CHECK-LDAPR-O1-NEXT: cbnz w10, .LBB1_1 +; CHECK-LDAPR-O1-NEXT: // %bb.3: // %cmpxchg.end +; CHECK-LDAPR-O1-NEXT: mov w0, w8 +; CHECK-LDAPR-O1-NEXT: ret +; CHECK-LDAPR-O1-NEXT: .LBB1_4: // %cmpxchg.nostore +; CHECK-LDAPR-O1-NEXT: .Lpcsection10: +; CHECK-LDAPR-O1-NEXT: clrex +; CHECK-LDAPR-O1-NEXT: mov w0, w8 +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: val_compare_and_swap_from_load: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: mov x9, x0 +; CHECK-LDAPR-O0-NEXT: .Lpcsection5: +; CHECK-LDAPR-O0-NEXT: ldr w10, [x2] +; CHECK-LDAPR-O0-NEXT: .LBB1_1: // =>This Inner Loop Header: Depth=1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection6: +; CHECK-LDAPR-O0-NEXT: ldaxr w0, [x9] +; CHECK-LDAPR-O0-NEXT: .Lpcsection7: +; CHECK-LDAPR-O0-NEXT: cmp w0, w1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection8: +; CHECK-LDAPR-O0-NEXT: b.ne .LBB1_3 +; CHECK-LDAPR-O0-NEXT: // %bb.2: // in Loop: Header=BB1_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection9: +; CHECK-LDAPR-O0-NEXT: stlxr w8, w10, [x9] +; CHECK-LDAPR-O0-NEXT: .Lpcsection10: +; CHECK-LDAPR-O0-NEXT: cbnz w8, .LBB1_1 +; CHECK-LDAPR-O0-NEXT: .LBB1_3: +; CHECK-LDAPR-O0-NEXT: ret + %new = load i32, i32* %pnew, !pcsections !0 + %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire, !pcsections !0 + %val = extractvalue { i32, i1 } %pair, 0 + ret i32 %val +} + +define i32 @val_compare_and_swap_rel(i32* %p, i32 %cmp, i32 %new) { +; CHECK-NOLSE-O1-LABEL: val_compare_and_swap_rel: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: .LBB2_1: // %cmpxchg.start +; CHECK-NOLSE-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection11: +; CHECK-NOLSE-O1-NEXT: ldaxr w8, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection12: +; CHECK-NOLSE-O1-NEXT: cmp w8, w1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection13: +; CHECK-NOLSE-O1-NEXT: b.ne .LBB2_4 +; CHECK-NOLSE-O1-NEXT: // %bb.2: // %cmpxchg.trystore +; CHECK-NOLSE-O1-NEXT: // in Loop: Header=BB2_1 Depth=1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection14: +; CHECK-NOLSE-O1-NEXT: stlxr w9, w2, [x0] +; CHECK-NOLSE-O1-NEXT: cbnz w9, .LBB2_1 +; CHECK-NOLSE-O1-NEXT: // %bb.3: // %cmpxchg.end +; CHECK-NOLSE-O1-NEXT: mov w0, w8 +; CHECK-NOLSE-O1-NEXT: ret +; CHECK-NOLSE-O1-NEXT: .LBB2_4: // %cmpxchg.nostore +; CHECK-NOLSE-O1-NEXT: .Lpcsection15: +; CHECK-NOLSE-O1-NEXT: clrex +; CHECK-NOLSE-O1-NEXT: mov w0, w8 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: val_compare_and_swap_rel: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: mov x9, x0 +; CHECK-NOLSE-O0-NEXT: .LBB2_1: // =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection11: +; CHECK-NOLSE-O0-NEXT: ldaxr w0, [x9] +; CHECK-NOLSE-O0-NEXT: .Lpcsection12: +; CHECK-NOLSE-O0-NEXT: cmp w0, w1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection13: +; CHECK-NOLSE-O0-NEXT: b.ne .LBB2_3 +; CHECK-NOLSE-O0-NEXT: // %bb.2: // in Loop: Header=BB2_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection14: +; CHECK-NOLSE-O0-NEXT: stlxr w8, w2, [x9] +; CHECK-NOLSE-O0-NEXT: .Lpcsection15: +; CHECK-NOLSE-O0-NEXT: cbnz w8, .LBB2_1 +; CHECK-NOLSE-O0-NEXT: .LBB2_3: +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: val_compare_and_swap_rel: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: .LBB2_1: // %cmpxchg.start +; CHECK-LDAPR-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection11: +; CHECK-LDAPR-O1-NEXT: ldaxr w8, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection12: +; CHECK-LDAPR-O1-NEXT: cmp w8, w1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection13: +; CHECK-LDAPR-O1-NEXT: b.ne .LBB2_4 +; CHECK-LDAPR-O1-NEXT: // %bb.2: // %cmpxchg.trystore +; CHECK-LDAPR-O1-NEXT: // in Loop: Header=BB2_1 Depth=1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection14: +; CHECK-LDAPR-O1-NEXT: stlxr w9, w2, [x0] +; CHECK-LDAPR-O1-NEXT: cbnz w9, .LBB2_1 +; CHECK-LDAPR-O1-NEXT: // %bb.3: // %cmpxchg.end +; CHECK-LDAPR-O1-NEXT: mov w0, w8 +; CHECK-LDAPR-O1-NEXT: ret +; CHECK-LDAPR-O1-NEXT: .LBB2_4: // %cmpxchg.nostore +; CHECK-LDAPR-O1-NEXT: .Lpcsection15: +; CHECK-LDAPR-O1-NEXT: clrex +; CHECK-LDAPR-O1-NEXT: mov w0, w8 +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: val_compare_and_swap_rel: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: mov x9, x0 +; CHECK-LDAPR-O0-NEXT: .LBB2_1: // =>This Inner Loop Header: Depth=1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection11: +; CHECK-LDAPR-O0-NEXT: ldaxr w0, [x9] +; CHECK-LDAPR-O0-NEXT: .Lpcsection12: +; CHECK-LDAPR-O0-NEXT: cmp w0, w1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection13: +; CHECK-LDAPR-O0-NEXT: b.ne .LBB2_3 +; CHECK-LDAPR-O0-NEXT: // %bb.2: // in Loop: Header=BB2_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection14: +; CHECK-LDAPR-O0-NEXT: stlxr w8, w2, [x9] +; CHECK-LDAPR-O0-NEXT: .Lpcsection15: +; CHECK-LDAPR-O0-NEXT: cbnz w8, .LBB2_1 +; CHECK-LDAPR-O0-NEXT: .LBB2_3: +; CHECK-LDAPR-O0-NEXT: ret + %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acq_rel monotonic, !pcsections !0 + %val = extractvalue { i32, i1 } %pair, 0 + ret i32 %val +} + +define i64 @val_compare_and_swap_64(i64* %p, i64 %cmp, i64 %new) { +; CHECK-NOLSE-O1-LABEL: val_compare_and_swap_64: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: .LBB3_1: // %cmpxchg.start +; CHECK-NOLSE-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection16: +; CHECK-NOLSE-O1-NEXT: ldxr x8, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection17: +; CHECK-NOLSE-O1-NEXT: cmp x8, x1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection18: +; CHECK-NOLSE-O1-NEXT: b.ne .LBB3_4 +; CHECK-NOLSE-O1-NEXT: // %bb.2: // %cmpxchg.trystore +; CHECK-NOLSE-O1-NEXT: // in Loop: Header=BB3_1 Depth=1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection19: +; CHECK-NOLSE-O1-NEXT: stxr w9, x2, [x0] +; CHECK-NOLSE-O1-NEXT: cbnz w9, .LBB3_1 +; CHECK-NOLSE-O1-NEXT: // %bb.3: // %cmpxchg.end +; CHECK-NOLSE-O1-NEXT: mov x0, x8 +; CHECK-NOLSE-O1-NEXT: ret +; CHECK-NOLSE-O1-NEXT: .LBB3_4: // %cmpxchg.nostore +; CHECK-NOLSE-O1-NEXT: .Lpcsection20: +; CHECK-NOLSE-O1-NEXT: clrex +; CHECK-NOLSE-O1-NEXT: mov x0, x8 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: val_compare_and_swap_64: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: mov x9, x0 +; CHECK-NOLSE-O0-NEXT: .LBB3_1: // =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection16: +; CHECK-NOLSE-O0-NEXT: ldaxr x0, [x9] +; CHECK-NOLSE-O0-NEXT: .Lpcsection17: +; CHECK-NOLSE-O0-NEXT: cmp x0, x1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection18: +; CHECK-NOLSE-O0-NEXT: b.ne .LBB3_3 +; CHECK-NOLSE-O0-NEXT: // %bb.2: // in Loop: Header=BB3_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection19: +; CHECK-NOLSE-O0-NEXT: stlxr w8, x2, [x9] +; CHECK-NOLSE-O0-NEXT: .Lpcsection20: +; CHECK-NOLSE-O0-NEXT: cbnz w8, .LBB3_1 +; CHECK-NOLSE-O0-NEXT: .LBB3_3: +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: val_compare_and_swap_64: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: .LBB3_1: // %cmpxchg.start +; CHECK-LDAPR-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection16: +; CHECK-LDAPR-O1-NEXT: ldxr x8, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection17: +; CHECK-LDAPR-O1-NEXT: cmp x8, x1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection18: +; CHECK-LDAPR-O1-NEXT: b.ne .LBB3_4 +; CHECK-LDAPR-O1-NEXT: // %bb.2: // %cmpxchg.trystore +; CHECK-LDAPR-O1-NEXT: // in Loop: Header=BB3_1 Depth=1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection19: +; CHECK-LDAPR-O1-NEXT: stxr w9, x2, [x0] +; CHECK-LDAPR-O1-NEXT: cbnz w9, .LBB3_1 +; CHECK-LDAPR-O1-NEXT: // %bb.3: // %cmpxchg.end +; CHECK-LDAPR-O1-NEXT: mov x0, x8 +; CHECK-LDAPR-O1-NEXT: ret +; CHECK-LDAPR-O1-NEXT: .LBB3_4: // %cmpxchg.nostore +; CHECK-LDAPR-O1-NEXT: .Lpcsection20: +; CHECK-LDAPR-O1-NEXT: clrex +; CHECK-LDAPR-O1-NEXT: mov x0, x8 +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: val_compare_and_swap_64: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: mov x9, x0 +; CHECK-LDAPR-O0-NEXT: .LBB3_1: // =>This Inner Loop Header: Depth=1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection16: +; CHECK-LDAPR-O0-NEXT: ldaxr x0, [x9] +; CHECK-LDAPR-O0-NEXT: .Lpcsection17: +; CHECK-LDAPR-O0-NEXT: cmp x0, x1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection18: +; CHECK-LDAPR-O0-NEXT: b.ne .LBB3_3 +; CHECK-LDAPR-O0-NEXT: // %bb.2: // in Loop: Header=BB3_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection19: +; CHECK-LDAPR-O0-NEXT: stlxr w8, x2, [x9] +; CHECK-LDAPR-O0-NEXT: .Lpcsection20: +; CHECK-LDAPR-O0-NEXT: cbnz w8, .LBB3_1 +; CHECK-LDAPR-O0-NEXT: .LBB3_3: +; CHECK-LDAPR-O0-NEXT: ret + %pair = cmpxchg i64* %p, i64 %cmp, i64 %new monotonic monotonic, !pcsections !0 + %val = extractvalue { i64, i1 } %pair, 0 + ret i64 %val +} + +define i64 @val_compare_and_swap_64_monotonic_seqcst(i64* %p, i64 %cmp, i64 %new) { +; CHECK-NOLSE-O1-LABEL: val_compare_and_swap_64_monotonic_seqcst: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: .LBB4_1: // %cmpxchg.start +; CHECK-NOLSE-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection21: +; CHECK-NOLSE-O1-NEXT: ldaxr x8, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection22: +; CHECK-NOLSE-O1-NEXT: cmp x8, x1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection23: +; CHECK-NOLSE-O1-NEXT: b.ne .LBB4_4 +; CHECK-NOLSE-O1-NEXT: // %bb.2: // %cmpxchg.trystore +; CHECK-NOLSE-O1-NEXT: // in Loop: Header=BB4_1 Depth=1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection24: +; CHECK-NOLSE-O1-NEXT: stlxr w9, x2, [x0] +; CHECK-NOLSE-O1-NEXT: cbnz w9, .LBB4_1 +; CHECK-NOLSE-O1-NEXT: // %bb.3: // %cmpxchg.end +; CHECK-NOLSE-O1-NEXT: mov x0, x8 +; CHECK-NOLSE-O1-NEXT: ret +; CHECK-NOLSE-O1-NEXT: .LBB4_4: // %cmpxchg.nostore +; CHECK-NOLSE-O1-NEXT: .Lpcsection25: +; CHECK-NOLSE-O1-NEXT: clrex +; CHECK-NOLSE-O1-NEXT: mov x0, x8 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: val_compare_and_swap_64_monotonic_seqcst: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: mov x9, x0 +; CHECK-NOLSE-O0-NEXT: .LBB4_1: // =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection21: +; CHECK-NOLSE-O0-NEXT: ldaxr x0, [x9] +; CHECK-NOLSE-O0-NEXT: .Lpcsection22: +; CHECK-NOLSE-O0-NEXT: cmp x0, x1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection23: +; CHECK-NOLSE-O0-NEXT: b.ne .LBB4_3 +; CHECK-NOLSE-O0-NEXT: // %bb.2: // in Loop: Header=BB4_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection24: +; CHECK-NOLSE-O0-NEXT: stlxr w8, x2, [x9] +; CHECK-NOLSE-O0-NEXT: .Lpcsection25: +; CHECK-NOLSE-O0-NEXT: cbnz w8, .LBB4_1 +; CHECK-NOLSE-O0-NEXT: .LBB4_3: +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: val_compare_and_swap_64_monotonic_seqcst: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: .LBB4_1: // %cmpxchg.start +; CHECK-LDAPR-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection21: +; CHECK-LDAPR-O1-NEXT: ldaxr x8, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection22: +; CHECK-LDAPR-O1-NEXT: cmp x8, x1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection23: +; CHECK-LDAPR-O1-NEXT: b.ne .LBB4_4 +; CHECK-LDAPR-O1-NEXT: // %bb.2: // %cmpxchg.trystore +; CHECK-LDAPR-O1-NEXT: // in Loop: Header=BB4_1 Depth=1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection24: +; CHECK-LDAPR-O1-NEXT: stlxr w9, x2, [x0] +; CHECK-LDAPR-O1-NEXT: cbnz w9, .LBB4_1 +; CHECK-LDAPR-O1-NEXT: // %bb.3: // %cmpxchg.end +; CHECK-LDAPR-O1-NEXT: mov x0, x8 +; CHECK-LDAPR-O1-NEXT: ret +; CHECK-LDAPR-O1-NEXT: .LBB4_4: // %cmpxchg.nostore +; CHECK-LDAPR-O1-NEXT: .Lpcsection25: +; CHECK-LDAPR-O1-NEXT: clrex +; CHECK-LDAPR-O1-NEXT: mov x0, x8 +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: val_compare_and_swap_64_monotonic_seqcst: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: mov x9, x0 +; CHECK-LDAPR-O0-NEXT: .LBB4_1: // =>This Inner Loop Header: Depth=1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection21: +; CHECK-LDAPR-O0-NEXT: ldaxr x0, [x9] +; CHECK-LDAPR-O0-NEXT: .Lpcsection22: +; CHECK-LDAPR-O0-NEXT: cmp x0, x1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection23: +; CHECK-LDAPR-O0-NEXT: b.ne .LBB4_3 +; CHECK-LDAPR-O0-NEXT: // %bb.2: // in Loop: Header=BB4_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection24: +; CHECK-LDAPR-O0-NEXT: stlxr w8, x2, [x9] +; CHECK-LDAPR-O0-NEXT: .Lpcsection25: +; CHECK-LDAPR-O0-NEXT: cbnz w8, .LBB4_1 +; CHECK-LDAPR-O0-NEXT: .LBB4_3: +; CHECK-LDAPR-O0-NEXT: ret + %pair = cmpxchg i64* %p, i64 %cmp, i64 %new monotonic seq_cst, !pcsections !0 + %val = extractvalue { i64, i1 } %pair, 0 + ret i64 %val +} + +define i64 @val_compare_and_swap_64_release_acquire(i64* %p, i64 %cmp, i64 %new) { +; CHECK-NOLSE-O1-LABEL: val_compare_and_swap_64_release_acquire: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: .LBB5_1: // %cmpxchg.start +; CHECK-NOLSE-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection26: +; CHECK-NOLSE-O1-NEXT: ldaxr x8, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection27: +; CHECK-NOLSE-O1-NEXT: cmp x8, x1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection28: +; CHECK-NOLSE-O1-NEXT: b.ne .LBB5_4 +; CHECK-NOLSE-O1-NEXT: // %bb.2: // %cmpxchg.trystore +; CHECK-NOLSE-O1-NEXT: // in Loop: Header=BB5_1 Depth=1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection29: +; CHECK-NOLSE-O1-NEXT: stlxr w9, x2, [x0] +; CHECK-NOLSE-O1-NEXT: cbnz w9, .LBB5_1 +; CHECK-NOLSE-O1-NEXT: // %bb.3: // %cmpxchg.end +; CHECK-NOLSE-O1-NEXT: mov x0, x8 +; CHECK-NOLSE-O1-NEXT: ret +; CHECK-NOLSE-O1-NEXT: .LBB5_4: // %cmpxchg.nostore +; CHECK-NOLSE-O1-NEXT: .Lpcsection30: +; CHECK-NOLSE-O1-NEXT: clrex +; CHECK-NOLSE-O1-NEXT: mov x0, x8 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: val_compare_and_swap_64_release_acquire: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: mov x9, x0 +; CHECK-NOLSE-O0-NEXT: .LBB5_1: // =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection26: +; CHECK-NOLSE-O0-NEXT: ldaxr x0, [x9] +; CHECK-NOLSE-O0-NEXT: .Lpcsection27: +; CHECK-NOLSE-O0-NEXT: cmp x0, x1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection28: +; CHECK-NOLSE-O0-NEXT: b.ne .LBB5_3 +; CHECK-NOLSE-O0-NEXT: // %bb.2: // in Loop: Header=BB5_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection29: +; CHECK-NOLSE-O0-NEXT: stlxr w8, x2, [x9] +; CHECK-NOLSE-O0-NEXT: .Lpcsection30: +; CHECK-NOLSE-O0-NEXT: cbnz w8, .LBB5_1 +; CHECK-NOLSE-O0-NEXT: .LBB5_3: +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: val_compare_and_swap_64_release_acquire: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: .LBB5_1: // %cmpxchg.start +; CHECK-LDAPR-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection26: +; CHECK-LDAPR-O1-NEXT: ldaxr x8, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection27: +; CHECK-LDAPR-O1-NEXT: cmp x8, x1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection28: +; CHECK-LDAPR-O1-NEXT: b.ne .LBB5_4 +; CHECK-LDAPR-O1-NEXT: // %bb.2: // %cmpxchg.trystore +; CHECK-LDAPR-O1-NEXT: // in Loop: Header=BB5_1 Depth=1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection29: +; CHECK-LDAPR-O1-NEXT: stlxr w9, x2, [x0] +; CHECK-LDAPR-O1-NEXT: cbnz w9, .LBB5_1 +; CHECK-LDAPR-O1-NEXT: // %bb.3: // %cmpxchg.end +; CHECK-LDAPR-O1-NEXT: mov x0, x8 +; CHECK-LDAPR-O1-NEXT: ret +; CHECK-LDAPR-O1-NEXT: .LBB5_4: // %cmpxchg.nostore +; CHECK-LDAPR-O1-NEXT: .Lpcsection30: +; CHECK-LDAPR-O1-NEXT: clrex +; CHECK-LDAPR-O1-NEXT: mov x0, x8 +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: val_compare_and_swap_64_release_acquire: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: mov x9, x0 +; CHECK-LDAPR-O0-NEXT: .LBB5_1: // =>This Inner Loop Header: Depth=1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection26: +; CHECK-LDAPR-O0-NEXT: ldaxr x0, [x9] +; CHECK-LDAPR-O0-NEXT: .Lpcsection27: +; CHECK-LDAPR-O0-NEXT: cmp x0, x1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection28: +; CHECK-LDAPR-O0-NEXT: b.ne .LBB5_3 +; CHECK-LDAPR-O0-NEXT: // %bb.2: // in Loop: Header=BB5_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection29: +; CHECK-LDAPR-O0-NEXT: stlxr w8, x2, [x9] +; CHECK-LDAPR-O0-NEXT: .Lpcsection30: +; CHECK-LDAPR-O0-NEXT: cbnz w8, .LBB5_1 +; CHECK-LDAPR-O0-NEXT: .LBB5_3: +; CHECK-LDAPR-O0-NEXT: ret + %pair = cmpxchg i64* %p, i64 %cmp, i64 %new release acquire, !pcsections !0 + %val = extractvalue { i64, i1 } %pair, 0 + ret i64 %val +} + +define i32 @fetch_and_nand(i32* %p) { +; CHECK-NOLSE-O1-LABEL: fetch_and_nand: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: .LBB6_1: // %atomicrmw.start +; CHECK-NOLSE-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection31: +; CHECK-NOLSE-O1-NEXT: ldxr w8, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection32: +; CHECK-NOLSE-O1-NEXT: and w9, w8, #0x7 +; CHECK-NOLSE-O1-NEXT: .Lpcsection33: +; CHECK-NOLSE-O1-NEXT: mvn w9, w9 +; CHECK-NOLSE-O1-NEXT: .Lpcsection34: +; CHECK-NOLSE-O1-NEXT: stlxr w10, w9, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection35: +; CHECK-NOLSE-O1-NEXT: cbnz w10, .LBB6_1 +; CHECK-NOLSE-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NOLSE-O1-NEXT: mov w0, w8 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: fetch_and_nand: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection31: +; CHECK-NOLSE-O0-NEXT: ldr w8, [x0] +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection32: +; CHECK-NOLSE-O0-NEXT: b .LBB6_1 +; CHECK-NOLSE-O0-NEXT: .LBB6_1: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-NOLSE-O0-NEXT: // Child Loop BB6_2 Depth 2 +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: .Lpcsection33: +; CHECK-NOLSE-O0-NEXT: and w9, w8, #0x7 +; CHECK-NOLSE-O0-NEXT: .Lpcsection34: +; CHECK-NOLSE-O0-NEXT: mvn w12, w9 +; CHECK-NOLSE-O0-NEXT: .LBB6_2: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // Parent Loop BB6_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection35: +; CHECK-NOLSE-O0-NEXT: ldaxr w9, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection36: +; CHECK-NOLSE-O0-NEXT: cmp w9, w8 +; CHECK-NOLSE-O0-NEXT: .Lpcsection37: +; CHECK-NOLSE-O0-NEXT: b.ne .LBB6_4 +; CHECK-NOLSE-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB6_2 Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection38: +; CHECK-NOLSE-O0-NEXT: stlxr w10, w12, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection39: +; CHECK-NOLSE-O0-NEXT: cbnz w10, .LBB6_2 +; CHECK-NOLSE-O0-NEXT: .LBB6_4: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB6_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection40: +; CHECK-NOLSE-O0-NEXT: subs w8, w9, w8 +; CHECK-NOLSE-O0-NEXT: .Lpcsection41: +; CHECK-NOLSE-O0-NEXT: cset w8, eq +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection42: +; CHECK-NOLSE-O0-NEXT: tbz w8, #0, .LBB6_1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection43: +; CHECK-NOLSE-O0-NEXT: b .LBB6_5 +; CHECK-NOLSE-O0-NEXT: .LBB6_5: // %atomicrmw.end +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: fetch_and_nand: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: .LBB6_1: // %atomicrmw.start +; CHECK-LDAPR-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection31: +; CHECK-LDAPR-O1-NEXT: ldxr w8, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection32: +; CHECK-LDAPR-O1-NEXT: and w9, w8, #0x7 +; CHECK-LDAPR-O1-NEXT: .Lpcsection33: +; CHECK-LDAPR-O1-NEXT: mvn w9, w9 +; CHECK-LDAPR-O1-NEXT: .Lpcsection34: +; CHECK-LDAPR-O1-NEXT: stlxr w10, w9, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection35: +; CHECK-LDAPR-O1-NEXT: cbnz w10, .LBB6_1 +; CHECK-LDAPR-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-LDAPR-O1-NEXT: mov w0, w8 +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: fetch_and_nand: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: sub sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-LDAPR-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection31: +; CHECK-LDAPR-O0-NEXT: ldr w8, [x0] +; CHECK-LDAPR-O0-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection32: +; CHECK-LDAPR-O0-NEXT: b .LBB6_1 +; CHECK-LDAPR-O0-NEXT: .LBB6_1: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-LDAPR-O0-NEXT: // Child Loop BB6_2 Depth 2 +; CHECK-LDAPR-O0-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: .Lpcsection33: +; CHECK-LDAPR-O0-NEXT: and w9, w8, #0x7 +; CHECK-LDAPR-O0-NEXT: .Lpcsection34: +; CHECK-LDAPR-O0-NEXT: mvn w12, w9 +; CHECK-LDAPR-O0-NEXT: .LBB6_2: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // Parent Loop BB6_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection35: +; CHECK-LDAPR-O0-NEXT: ldaxr w9, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection36: +; CHECK-LDAPR-O0-NEXT: cmp w9, w8 +; CHECK-LDAPR-O0-NEXT: .Lpcsection37: +; CHECK-LDAPR-O0-NEXT: b.ne .LBB6_4 +; CHECK-LDAPR-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB6_2 Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection38: +; CHECK-LDAPR-O0-NEXT: stlxr w10, w12, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection39: +; CHECK-LDAPR-O0-NEXT: cbnz w10, .LBB6_2 +; CHECK-LDAPR-O0-NEXT: .LBB6_4: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB6_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: str w9, [sp, #12] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection40: +; CHECK-LDAPR-O0-NEXT: subs w8, w9, w8 +; CHECK-LDAPR-O0-NEXT: .Lpcsection41: +; CHECK-LDAPR-O0-NEXT: cset w8, eq +; CHECK-LDAPR-O0-NEXT: str w9, [sp, #28] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection42: +; CHECK-LDAPR-O0-NEXT: tbz w8, #0, .LBB6_1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection43: +; CHECK-LDAPR-O0-NEXT: b .LBB6_5 +; CHECK-LDAPR-O0-NEXT: .LBB6_5: // %atomicrmw.end +; CHECK-LDAPR-O0-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: add sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: ret + %val = atomicrmw nand i32* %p, i32 7 release, !pcsections !0 + ret i32 %val +} + +define i64 @fetch_and_nand_64(i64* %p) { +; CHECK-NOLSE-O1-LABEL: fetch_and_nand_64: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: .LBB7_1: // %atomicrmw.start +; CHECK-NOLSE-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection36: +; CHECK-NOLSE-O1-NEXT: ldaxr x8, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection37: +; CHECK-NOLSE-O1-NEXT: and x9, x8, #0x7 +; CHECK-NOLSE-O1-NEXT: .Lpcsection38: +; CHECK-NOLSE-O1-NEXT: mvn x9, x9 +; CHECK-NOLSE-O1-NEXT: .Lpcsection39: +; CHECK-NOLSE-O1-NEXT: stlxr w10, x9, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection40: +; CHECK-NOLSE-O1-NEXT: cbnz w10, .LBB7_1 +; CHECK-NOLSE-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NOLSE-O1-NEXT: mov x0, x8 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: fetch_and_nand_64: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection44: +; CHECK-NOLSE-O0-NEXT: ldr x8, [x0] +; CHECK-NOLSE-O0-NEXT: str x8, [sp, #24] // 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection45: +; CHECK-NOLSE-O0-NEXT: b .LBB7_1 +; CHECK-NOLSE-O0-NEXT: .LBB7_1: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-NOLSE-O0-NEXT: // Child Loop BB7_2 Depth 2 +; CHECK-NOLSE-O0-NEXT: ldr x8, [sp, #24] // 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: .Lpcsection46: +; CHECK-NOLSE-O0-NEXT: and x9, x8, #0x7 +; CHECK-NOLSE-O0-NEXT: .Lpcsection47: +; CHECK-NOLSE-O0-NEXT: mvn x12, x9 +; CHECK-NOLSE-O0-NEXT: .LBB7_2: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // Parent Loop BB7_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection48: +; CHECK-NOLSE-O0-NEXT: ldaxr x9, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection49: +; CHECK-NOLSE-O0-NEXT: cmp x9, x8 +; CHECK-NOLSE-O0-NEXT: .Lpcsection50: +; CHECK-NOLSE-O0-NEXT: b.ne .LBB7_4 +; CHECK-NOLSE-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB7_2 Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection51: +; CHECK-NOLSE-O0-NEXT: stlxr w10, x12, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection52: +; CHECK-NOLSE-O0-NEXT: cbnz w10, .LBB7_2 +; CHECK-NOLSE-O0-NEXT: .LBB7_4: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB7_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: str x9, [sp, #8] // 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection53: +; CHECK-NOLSE-O0-NEXT: subs x8, x9, x8 +; CHECK-NOLSE-O0-NEXT: .Lpcsection54: +; CHECK-NOLSE-O0-NEXT: cset w8, eq +; CHECK-NOLSE-O0-NEXT: str x9, [sp, #24] // 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection55: +; CHECK-NOLSE-O0-NEXT: tbz w8, #0, .LBB7_1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection56: +; CHECK-NOLSE-O0-NEXT: b .LBB7_5 +; CHECK-NOLSE-O0-NEXT: .LBB7_5: // %atomicrmw.end +; CHECK-NOLSE-O0-NEXT: ldr x0, [sp, #8] // 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: fetch_and_nand_64: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: .LBB7_1: // %atomicrmw.start +; CHECK-LDAPR-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection36: +; CHECK-LDAPR-O1-NEXT: ldaxr x8, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection37: +; CHECK-LDAPR-O1-NEXT: and x9, x8, #0x7 +; CHECK-LDAPR-O1-NEXT: .Lpcsection38: +; CHECK-LDAPR-O1-NEXT: mvn x9, x9 +; CHECK-LDAPR-O1-NEXT: .Lpcsection39: +; CHECK-LDAPR-O1-NEXT: stlxr w10, x9, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection40: +; CHECK-LDAPR-O1-NEXT: cbnz w10, .LBB7_1 +; CHECK-LDAPR-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-LDAPR-O1-NEXT: mov x0, x8 +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: fetch_and_nand_64: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: sub sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-LDAPR-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection44: +; CHECK-LDAPR-O0-NEXT: ldr x8, [x0] +; CHECK-LDAPR-O0-NEXT: str x8, [sp, #24] // 8-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection45: +; CHECK-LDAPR-O0-NEXT: b .LBB7_1 +; CHECK-LDAPR-O0-NEXT: .LBB7_1: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-LDAPR-O0-NEXT: // Child Loop BB7_2 Depth 2 +; CHECK-LDAPR-O0-NEXT: ldr x8, [sp, #24] // 8-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: .Lpcsection46: +; CHECK-LDAPR-O0-NEXT: and x9, x8, #0x7 +; CHECK-LDAPR-O0-NEXT: .Lpcsection47: +; CHECK-LDAPR-O0-NEXT: mvn x12, x9 +; CHECK-LDAPR-O0-NEXT: .LBB7_2: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // Parent Loop BB7_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection48: +; CHECK-LDAPR-O0-NEXT: ldaxr x9, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection49: +; CHECK-LDAPR-O0-NEXT: cmp x9, x8 +; CHECK-LDAPR-O0-NEXT: .Lpcsection50: +; CHECK-LDAPR-O0-NEXT: b.ne .LBB7_4 +; CHECK-LDAPR-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB7_2 Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection51: +; CHECK-LDAPR-O0-NEXT: stlxr w10, x12, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection52: +; CHECK-LDAPR-O0-NEXT: cbnz w10, .LBB7_2 +; CHECK-LDAPR-O0-NEXT: .LBB7_4: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB7_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: str x9, [sp, #8] // 8-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection53: +; CHECK-LDAPR-O0-NEXT: subs x8, x9, x8 +; CHECK-LDAPR-O0-NEXT: .Lpcsection54: +; CHECK-LDAPR-O0-NEXT: cset w8, eq +; CHECK-LDAPR-O0-NEXT: str x9, [sp, #24] // 8-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection55: +; CHECK-LDAPR-O0-NEXT: tbz w8, #0, .LBB7_1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection56: +; CHECK-LDAPR-O0-NEXT: b .LBB7_5 +; CHECK-LDAPR-O0-NEXT: .LBB7_5: // %atomicrmw.end +; CHECK-LDAPR-O0-NEXT: ldr x0, [sp, #8] // 8-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: add sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: ret + %val = atomicrmw nand i64* %p, i64 7 acq_rel, !pcsections !0 + ret i64 %val +} + +define i32 @fetch_and_or(i32* %p) { +; CHECK-NOLSE-O1-LABEL: fetch_and_or: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: mov w9, #5 +; CHECK-NOLSE-O1-NEXT: .LBB8_1: // %atomicrmw.start +; CHECK-NOLSE-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection41: +; CHECK-NOLSE-O1-NEXT: ldaxr w8, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection42: +; CHECK-NOLSE-O1-NEXT: orr w10, w8, w9 +; CHECK-NOLSE-O1-NEXT: .Lpcsection43: +; CHECK-NOLSE-O1-NEXT: stlxr w11, w10, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection44: +; CHECK-NOLSE-O1-NEXT: cbnz w11, .LBB8_1 +; CHECK-NOLSE-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NOLSE-O1-NEXT: mov w0, w8 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: fetch_and_or: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection57: +; CHECK-NOLSE-O0-NEXT: ldr w8, [x0] +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection58: +; CHECK-NOLSE-O0-NEXT: b .LBB8_1 +; CHECK-NOLSE-O0-NEXT: .LBB8_1: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-NOLSE-O0-NEXT: // Child Loop BB8_2 Depth 2 +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: mov w9, #5 +; CHECK-NOLSE-O0-NEXT: .Lpcsection59: +; CHECK-NOLSE-O0-NEXT: orr w12, w8, w9 +; CHECK-NOLSE-O0-NEXT: .LBB8_2: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // Parent Loop BB8_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection60: +; CHECK-NOLSE-O0-NEXT: ldaxr w9, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection61: +; CHECK-NOLSE-O0-NEXT: cmp w9, w8 +; CHECK-NOLSE-O0-NEXT: .Lpcsection62: +; CHECK-NOLSE-O0-NEXT: b.ne .LBB8_4 +; CHECK-NOLSE-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB8_2 Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection63: +; CHECK-NOLSE-O0-NEXT: stlxr w10, w12, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection64: +; CHECK-NOLSE-O0-NEXT: cbnz w10, .LBB8_2 +; CHECK-NOLSE-O0-NEXT: .LBB8_4: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB8_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection65: +; CHECK-NOLSE-O0-NEXT: subs w8, w9, w8 +; CHECK-NOLSE-O0-NEXT: .Lpcsection66: +; CHECK-NOLSE-O0-NEXT: cset w8, eq +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection67: +; CHECK-NOLSE-O0-NEXT: tbz w8, #0, .LBB8_1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection68: +; CHECK-NOLSE-O0-NEXT: b .LBB8_5 +; CHECK-NOLSE-O0-NEXT: .LBB8_5: // %atomicrmw.end +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: fetch_and_or: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: mov w9, #5 +; CHECK-LDAPR-O1-NEXT: .LBB8_1: // %atomicrmw.start +; CHECK-LDAPR-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection41: +; CHECK-LDAPR-O1-NEXT: ldaxr w8, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection42: +; CHECK-LDAPR-O1-NEXT: orr w10, w8, w9 +; CHECK-LDAPR-O1-NEXT: .Lpcsection43: +; CHECK-LDAPR-O1-NEXT: stlxr w11, w10, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection44: +; CHECK-LDAPR-O1-NEXT: cbnz w11, .LBB8_1 +; CHECK-LDAPR-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-LDAPR-O1-NEXT: mov w0, w8 +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: fetch_and_or: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: sub sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-LDAPR-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection57: +; CHECK-LDAPR-O0-NEXT: ldr w8, [x0] +; CHECK-LDAPR-O0-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection58: +; CHECK-LDAPR-O0-NEXT: b .LBB8_1 +; CHECK-LDAPR-O0-NEXT: .LBB8_1: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-LDAPR-O0-NEXT: // Child Loop BB8_2 Depth 2 +; CHECK-LDAPR-O0-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: mov w9, #5 +; CHECK-LDAPR-O0-NEXT: .Lpcsection59: +; CHECK-LDAPR-O0-NEXT: orr w12, w8, w9 +; CHECK-LDAPR-O0-NEXT: .LBB8_2: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // Parent Loop BB8_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection60: +; CHECK-LDAPR-O0-NEXT: ldaxr w9, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection61: +; CHECK-LDAPR-O0-NEXT: cmp w9, w8 +; CHECK-LDAPR-O0-NEXT: .Lpcsection62: +; CHECK-LDAPR-O0-NEXT: b.ne .LBB8_4 +; CHECK-LDAPR-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB8_2 Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection63: +; CHECK-LDAPR-O0-NEXT: stlxr w10, w12, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection64: +; CHECK-LDAPR-O0-NEXT: cbnz w10, .LBB8_2 +; CHECK-LDAPR-O0-NEXT: .LBB8_4: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB8_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: str w9, [sp, #12] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection65: +; CHECK-LDAPR-O0-NEXT: subs w8, w9, w8 +; CHECK-LDAPR-O0-NEXT: .Lpcsection66: +; CHECK-LDAPR-O0-NEXT: cset w8, eq +; CHECK-LDAPR-O0-NEXT: str w9, [sp, #28] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection67: +; CHECK-LDAPR-O0-NEXT: tbz w8, #0, .LBB8_1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection68: +; CHECK-LDAPR-O0-NEXT: b .LBB8_5 +; CHECK-LDAPR-O0-NEXT: .LBB8_5: // %atomicrmw.end +; CHECK-LDAPR-O0-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: add sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: ret + %val = atomicrmw or i32* %p, i32 5 seq_cst, !pcsections !0 + ret i32 %val +} + +define i64 @fetch_and_or_64(i64* %p) { +; CHECK-NOLSE-O1-LABEL: fetch_and_or_64: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: .LBB9_1: // %atomicrmw.start +; CHECK-NOLSE-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection45: +; CHECK-NOLSE-O1-NEXT: ldxr x8, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection46: +; CHECK-NOLSE-O1-NEXT: orr x9, x8, #0x7 +; CHECK-NOLSE-O1-NEXT: .Lpcsection47: +; CHECK-NOLSE-O1-NEXT: stxr w10, x9, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection48: +; CHECK-NOLSE-O1-NEXT: cbnz w10, .LBB9_1 +; CHECK-NOLSE-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NOLSE-O1-NEXT: mov x0, x8 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: fetch_and_or_64: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection69: +; CHECK-NOLSE-O0-NEXT: ldr x8, [x0] +; CHECK-NOLSE-O0-NEXT: str x8, [sp, #24] // 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection70: +; CHECK-NOLSE-O0-NEXT: b .LBB9_1 +; CHECK-NOLSE-O0-NEXT: .LBB9_1: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-NOLSE-O0-NEXT: // Child Loop BB9_2 Depth 2 +; CHECK-NOLSE-O0-NEXT: ldr x8, [sp, #24] // 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: .Lpcsection71: +; CHECK-NOLSE-O0-NEXT: orr x12, x8, #0x7 +; CHECK-NOLSE-O0-NEXT: .LBB9_2: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // Parent Loop BB9_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection72: +; CHECK-NOLSE-O0-NEXT: ldaxr x9, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection73: +; CHECK-NOLSE-O0-NEXT: cmp x9, x8 +; CHECK-NOLSE-O0-NEXT: .Lpcsection74: +; CHECK-NOLSE-O0-NEXT: b.ne .LBB9_4 +; CHECK-NOLSE-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB9_2 Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection75: +; CHECK-NOLSE-O0-NEXT: stlxr w10, x12, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection76: +; CHECK-NOLSE-O0-NEXT: cbnz w10, .LBB9_2 +; CHECK-NOLSE-O0-NEXT: .LBB9_4: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB9_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: str x9, [sp, #8] // 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection77: +; CHECK-NOLSE-O0-NEXT: subs x8, x9, x8 +; CHECK-NOLSE-O0-NEXT: .Lpcsection78: +; CHECK-NOLSE-O0-NEXT: cset w8, eq +; CHECK-NOLSE-O0-NEXT: str x9, [sp, #24] // 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection79: +; CHECK-NOLSE-O0-NEXT: tbz w8, #0, .LBB9_1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection80: +; CHECK-NOLSE-O0-NEXT: b .LBB9_5 +; CHECK-NOLSE-O0-NEXT: .LBB9_5: // %atomicrmw.end +; CHECK-NOLSE-O0-NEXT: ldr x0, [sp, #8] // 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: fetch_and_or_64: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: .LBB9_1: // %atomicrmw.start +; CHECK-LDAPR-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection45: +; CHECK-LDAPR-O1-NEXT: ldxr x8, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection46: +; CHECK-LDAPR-O1-NEXT: orr x9, x8, #0x7 +; CHECK-LDAPR-O1-NEXT: .Lpcsection47: +; CHECK-LDAPR-O1-NEXT: stxr w10, x9, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection48: +; CHECK-LDAPR-O1-NEXT: cbnz w10, .LBB9_1 +; CHECK-LDAPR-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-LDAPR-O1-NEXT: mov x0, x8 +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: fetch_and_or_64: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: sub sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-LDAPR-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection69: +; CHECK-LDAPR-O0-NEXT: ldr x8, [x0] +; CHECK-LDAPR-O0-NEXT: str x8, [sp, #24] // 8-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection70: +; CHECK-LDAPR-O0-NEXT: b .LBB9_1 +; CHECK-LDAPR-O0-NEXT: .LBB9_1: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-LDAPR-O0-NEXT: // Child Loop BB9_2 Depth 2 +; CHECK-LDAPR-O0-NEXT: ldr x8, [sp, #24] // 8-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: .Lpcsection71: +; CHECK-LDAPR-O0-NEXT: orr x12, x8, #0x7 +; CHECK-LDAPR-O0-NEXT: .LBB9_2: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // Parent Loop BB9_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection72: +; CHECK-LDAPR-O0-NEXT: ldaxr x9, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection73: +; CHECK-LDAPR-O0-NEXT: cmp x9, x8 +; CHECK-LDAPR-O0-NEXT: .Lpcsection74: +; CHECK-LDAPR-O0-NEXT: b.ne .LBB9_4 +; CHECK-LDAPR-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB9_2 Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection75: +; CHECK-LDAPR-O0-NEXT: stlxr w10, x12, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection76: +; CHECK-LDAPR-O0-NEXT: cbnz w10, .LBB9_2 +; CHECK-LDAPR-O0-NEXT: .LBB9_4: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB9_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: str x9, [sp, #8] // 8-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection77: +; CHECK-LDAPR-O0-NEXT: subs x8, x9, x8 +; CHECK-LDAPR-O0-NEXT: .Lpcsection78: +; CHECK-LDAPR-O0-NEXT: cset w8, eq +; CHECK-LDAPR-O0-NEXT: str x9, [sp, #24] // 8-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection79: +; CHECK-LDAPR-O0-NEXT: tbz w8, #0, .LBB9_1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection80: +; CHECK-LDAPR-O0-NEXT: b .LBB9_5 +; CHECK-LDAPR-O0-NEXT: .LBB9_5: // %atomicrmw.end +; CHECK-LDAPR-O0-NEXT: ldr x0, [sp, #8] // 8-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: add sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: ret + %val = atomicrmw or i64* %p, i64 7 monotonic, !pcsections !0 + ret i64 %val +} + +define void @acquire_fence() { +; CHECK-NOLSE-O1-LABEL: acquire_fence: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: .Lpcsection49: +; CHECK-NOLSE-O1-NEXT: dmb ishld +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: acquire_fence: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: .Lpcsection81: +; CHECK-NOLSE-O0-NEXT: dmb ishld +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: acquire_fence: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: .Lpcsection49: +; CHECK-LDAPR-O1-NEXT: dmb ishld +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: acquire_fence: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: .Lpcsection81: +; CHECK-LDAPR-O0-NEXT: dmb ishld +; CHECK-LDAPR-O0-NEXT: ret + fence acquire, !pcsections !0 + ret void +} + +define void @release_fence() { +; CHECK-NOLSE-O1-LABEL: release_fence: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: .Lpcsection50: +; CHECK-NOLSE-O1-NEXT: dmb ish +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: release_fence: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: .Lpcsection82: +; CHECK-NOLSE-O0-NEXT: dmb ish +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: release_fence: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: .Lpcsection50: +; CHECK-LDAPR-O1-NEXT: dmb ish +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: release_fence: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: .Lpcsection82: +; CHECK-LDAPR-O0-NEXT: dmb ish +; CHECK-LDAPR-O0-NEXT: ret + fence release, !pcsections !0 + ret void +} + +define void @seq_cst_fence() { +; CHECK-NOLSE-O1-LABEL: seq_cst_fence: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: .Lpcsection51: +; CHECK-NOLSE-O1-NEXT: dmb ish +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: seq_cst_fence: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: .Lpcsection83: +; CHECK-NOLSE-O0-NEXT: dmb ish +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: seq_cst_fence: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: .Lpcsection51: +; CHECK-LDAPR-O1-NEXT: dmb ish +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: seq_cst_fence: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: .Lpcsection83: +; CHECK-LDAPR-O0-NEXT: dmb ish +; CHECK-LDAPR-O0-NEXT: ret + fence seq_cst, !pcsections !0 + ret void +} + +define i32 @atomic_load(i32* %p) { +; CHECK-NOLSE-O1-LABEL: atomic_load: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: .Lpcsection52: +; CHECK-NOLSE-O1-NEXT: ldar w0, [x0] +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: atomic_load: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: .Lpcsection84: +; CHECK-NOLSE-O0-NEXT: ldar w0, [x0] +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: atomic_load: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: .Lpcsection52: +; CHECK-LDAPR-O1-NEXT: ldar w0, [x0] +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: atomic_load: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: .Lpcsection84: +; CHECK-LDAPR-O0-NEXT: ldar w0, [x0] +; CHECK-LDAPR-O0-NEXT: ret + %r = load atomic i32, i32* %p seq_cst, align 4, !pcsections !0 + ret i32 %r +} + +define i8 @atomic_load_relaxed_8(i8* %p, i32 %off32) { +; CHECK-NOLSE-O1-LABEL: atomic_load_relaxed_8: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: add x8, x0, #291, lsl #12 // =1191936 +; CHECK-NOLSE-O1-NEXT: .Lpcsection53: +; CHECK-NOLSE-O1-NEXT: ldrb w9, [x0, #4095] +; CHECK-NOLSE-O1-NEXT: .Lpcsection54: +; CHECK-NOLSE-O1-NEXT: ldrb w10, [x0, w1, sxtw] +; CHECK-NOLSE-O1-NEXT: .Lpcsection55: +; CHECK-NOLSE-O1-NEXT: ldurb w11, [x0, #-256] +; CHECK-NOLSE-O1-NEXT: .Lpcsection56: +; CHECK-NOLSE-O1-NEXT: ldrb w8, [x8] +; CHECK-NOLSE-O1-NEXT: .Lpcsection57: +; CHECK-NOLSE-O1-NEXT: add w9, w9, w10 +; CHECK-NOLSE-O1-NEXT: .Lpcsection58: +; CHECK-NOLSE-O1-NEXT: add w9, w9, w11 +; CHECK-NOLSE-O1-NEXT: .Lpcsection59: +; CHECK-NOLSE-O1-NEXT: add w0, w9, w8 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: atomic_load_relaxed_8: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: .Lpcsection85: +; CHECK-NOLSE-O0-NEXT: ldrb w9, [x0, #4095] +; CHECK-NOLSE-O0-NEXT: add x8, x0, w1, sxtw +; CHECK-NOLSE-O0-NEXT: .Lpcsection86: +; CHECK-NOLSE-O0-NEXT: ldrb w8, [x8] +; CHECK-NOLSE-O0-NEXT: .Lpcsection87: +; CHECK-NOLSE-O0-NEXT: add w8, w8, w9, uxtb +; CHECK-NOLSE-O0-NEXT: subs x9, x0, #256 +; CHECK-NOLSE-O0-NEXT: .Lpcsection88: +; CHECK-NOLSE-O0-NEXT: ldrb w9, [x9] +; CHECK-NOLSE-O0-NEXT: .Lpcsection89: +; CHECK-NOLSE-O0-NEXT: add w8, w8, w9, uxtb +; CHECK-NOLSE-O0-NEXT: add x9, x0, #291, lsl #12 // =1191936 +; CHECK-NOLSE-O0-NEXT: .Lpcsection90: +; CHECK-NOLSE-O0-NEXT: ldrb w9, [x9] +; CHECK-NOLSE-O0-NEXT: .Lpcsection91: +; CHECK-NOLSE-O0-NEXT: add w0, w8, w9, uxtb +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: atomic_load_relaxed_8: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: add x8, x0, #291, lsl #12 // =1191936 +; CHECK-LDAPR-O1-NEXT: .Lpcsection53: +; CHECK-LDAPR-O1-NEXT: ldrb w9, [x0, #4095] +; CHECK-LDAPR-O1-NEXT: .Lpcsection54: +; CHECK-LDAPR-O1-NEXT: ldrb w10, [x0, w1, sxtw] +; CHECK-LDAPR-O1-NEXT: .Lpcsection55: +; CHECK-LDAPR-O1-NEXT: ldurb w11, [x0, #-256] +; CHECK-LDAPR-O1-NEXT: .Lpcsection56: +; CHECK-LDAPR-O1-NEXT: ldrb w8, [x8] +; CHECK-LDAPR-O1-NEXT: .Lpcsection57: +; CHECK-LDAPR-O1-NEXT: add w9, w9, w10 +; CHECK-LDAPR-O1-NEXT: .Lpcsection58: +; CHECK-LDAPR-O1-NEXT: add w9, w9, w11 +; CHECK-LDAPR-O1-NEXT: .Lpcsection59: +; CHECK-LDAPR-O1-NEXT: add w0, w9, w8 +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: atomic_load_relaxed_8: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: .Lpcsection85: +; CHECK-LDAPR-O0-NEXT: ldrb w9, [x0, #4095] +; CHECK-LDAPR-O0-NEXT: add x8, x0, w1, sxtw +; CHECK-LDAPR-O0-NEXT: .Lpcsection86: +; CHECK-LDAPR-O0-NEXT: ldrb w8, [x8] +; CHECK-LDAPR-O0-NEXT: .Lpcsection87: +; CHECK-LDAPR-O0-NEXT: add w8, w8, w9, uxtb +; CHECK-LDAPR-O0-NEXT: subs x9, x0, #256 +; CHECK-LDAPR-O0-NEXT: .Lpcsection88: +; CHECK-LDAPR-O0-NEXT: ldrb w9, [x9] +; CHECK-LDAPR-O0-NEXT: .Lpcsection89: +; CHECK-LDAPR-O0-NEXT: add w8, w8, w9, uxtb +; CHECK-LDAPR-O0-NEXT: add x9, x0, #291, lsl #12 // =1191936 +; CHECK-LDAPR-O0-NEXT: .Lpcsection90: +; CHECK-LDAPR-O0-NEXT: ldrb w9, [x9] +; CHECK-LDAPR-O0-NEXT: .Lpcsection91: +; CHECK-LDAPR-O0-NEXT: add w0, w8, w9, uxtb +; CHECK-LDAPR-O0-NEXT: ret + %ptr_unsigned = getelementptr i8, i8* %p, i32 4095 + %val_unsigned = load atomic i8, i8* %ptr_unsigned monotonic, align 1, !pcsections !0 + + %ptr_regoff = getelementptr i8, i8* %p, i32 %off32 + %val_regoff = load atomic i8, i8* %ptr_regoff unordered, align 1, !pcsections !0 + %tot1 = add i8 %val_unsigned, %val_regoff, !pcsections !0 + + %ptr_unscaled = getelementptr i8, i8* %p, i32 -256 + %val_unscaled = load atomic i8, i8* %ptr_unscaled monotonic, align 1, !pcsections !0 + %tot2 = add i8 %tot1, %val_unscaled, !pcsections !0 + + %ptr_random = getelementptr i8, i8* %p, i32 1191936 ; 0x123000 (i.e. ADD imm) + %val_random = load atomic i8, i8* %ptr_random unordered, align 1, !pcsections !0 + %tot3 = add i8 %tot2, %val_random, !pcsections !0 + + ret i8 %tot3 +} + +define i16 @atomic_load_relaxed_16(i16* %p, i32 %off32) { +; CHECK-NOLSE-O1-LABEL: atomic_load_relaxed_16: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: add x8, x0, #291, lsl #12 // =1191936 +; CHECK-NOLSE-O1-NEXT: .Lpcsection60: +; CHECK-NOLSE-O1-NEXT: ldrh w9, [x0, #8190] +; CHECK-NOLSE-O1-NEXT: .Lpcsection61: +; CHECK-NOLSE-O1-NEXT: ldrh w10, [x0, w1, sxtw #1] +; CHECK-NOLSE-O1-NEXT: .Lpcsection62: +; CHECK-NOLSE-O1-NEXT: ldurh w11, [x0, #-256] +; CHECK-NOLSE-O1-NEXT: .Lpcsection63: +; CHECK-NOLSE-O1-NEXT: ldrh w8, [x8] +; CHECK-NOLSE-O1-NEXT: .Lpcsection64: +; CHECK-NOLSE-O1-NEXT: add w9, w9, w10 +; CHECK-NOLSE-O1-NEXT: .Lpcsection65: +; CHECK-NOLSE-O1-NEXT: add w9, w9, w11 +; CHECK-NOLSE-O1-NEXT: .Lpcsection66: +; CHECK-NOLSE-O1-NEXT: add w0, w9, w8 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: atomic_load_relaxed_16: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: .Lpcsection92: +; CHECK-NOLSE-O0-NEXT: ldrh w9, [x0, #8190] +; CHECK-NOLSE-O0-NEXT: add x8, x0, w1, sxtw #1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection93: +; CHECK-NOLSE-O0-NEXT: ldrh w8, [x8] +; CHECK-NOLSE-O0-NEXT: .Lpcsection94: +; CHECK-NOLSE-O0-NEXT: add w8, w8, w9, uxth +; CHECK-NOLSE-O0-NEXT: subs x9, x0, #256 +; CHECK-NOLSE-O0-NEXT: .Lpcsection95: +; CHECK-NOLSE-O0-NEXT: ldrh w9, [x9] +; CHECK-NOLSE-O0-NEXT: .Lpcsection96: +; CHECK-NOLSE-O0-NEXT: add w8, w8, w9, uxth +; CHECK-NOLSE-O0-NEXT: add x9, x0, #291, lsl #12 // =1191936 +; CHECK-NOLSE-O0-NEXT: .Lpcsection97: +; CHECK-NOLSE-O0-NEXT: ldrh w9, [x9] +; CHECK-NOLSE-O0-NEXT: .Lpcsection98: +; CHECK-NOLSE-O0-NEXT: add w0, w8, w9, uxth +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: atomic_load_relaxed_16: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: add x8, x0, #291, lsl #12 // =1191936 +; CHECK-LDAPR-O1-NEXT: .Lpcsection60: +; CHECK-LDAPR-O1-NEXT: ldrh w9, [x0, #8190] +; CHECK-LDAPR-O1-NEXT: .Lpcsection61: +; CHECK-LDAPR-O1-NEXT: ldrh w10, [x0, w1, sxtw #1] +; CHECK-LDAPR-O1-NEXT: .Lpcsection62: +; CHECK-LDAPR-O1-NEXT: ldurh w11, [x0, #-256] +; CHECK-LDAPR-O1-NEXT: .Lpcsection63: +; CHECK-LDAPR-O1-NEXT: ldrh w8, [x8] +; CHECK-LDAPR-O1-NEXT: .Lpcsection64: +; CHECK-LDAPR-O1-NEXT: add w9, w9, w10 +; CHECK-LDAPR-O1-NEXT: .Lpcsection65: +; CHECK-LDAPR-O1-NEXT: add w9, w9, w11 +; CHECK-LDAPR-O1-NEXT: .Lpcsection66: +; CHECK-LDAPR-O1-NEXT: add w0, w9, w8 +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: atomic_load_relaxed_16: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: .Lpcsection92: +; CHECK-LDAPR-O0-NEXT: ldrh w9, [x0, #8190] +; CHECK-LDAPR-O0-NEXT: add x8, x0, w1, sxtw #1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection93: +; CHECK-LDAPR-O0-NEXT: ldrh w8, [x8] +; CHECK-LDAPR-O0-NEXT: .Lpcsection94: +; CHECK-LDAPR-O0-NEXT: add w8, w8, w9, uxth +; CHECK-LDAPR-O0-NEXT: subs x9, x0, #256 +; CHECK-LDAPR-O0-NEXT: .Lpcsection95: +; CHECK-LDAPR-O0-NEXT: ldrh w9, [x9] +; CHECK-LDAPR-O0-NEXT: .Lpcsection96: +; CHECK-LDAPR-O0-NEXT: add w8, w8, w9, uxth +; CHECK-LDAPR-O0-NEXT: add x9, x0, #291, lsl #12 // =1191936 +; CHECK-LDAPR-O0-NEXT: .Lpcsection97: +; CHECK-LDAPR-O0-NEXT: ldrh w9, [x9] +; CHECK-LDAPR-O0-NEXT: .Lpcsection98: +; CHECK-LDAPR-O0-NEXT: add w0, w8, w9, uxth +; CHECK-LDAPR-O0-NEXT: ret + %ptr_unsigned = getelementptr i16, i16* %p, i32 4095 + %val_unsigned = load atomic i16, i16* %ptr_unsigned monotonic, align 2, !pcsections !0 + + %ptr_regoff = getelementptr i16, i16* %p, i32 %off32 + %val_regoff = load atomic i16, i16* %ptr_regoff unordered, align 2, !pcsections !0 + %tot1 = add i16 %val_unsigned, %val_regoff, !pcsections !0 + + %ptr_unscaled = getelementptr i16, i16* %p, i32 -128 + %val_unscaled = load atomic i16, i16* %ptr_unscaled monotonic, align 2, !pcsections !0 + %tot2 = add i16 %tot1, %val_unscaled, !pcsections !0 + + %ptr_random = getelementptr i16, i16* %p, i32 595968 ; 0x123000/2 (i.e. ADD imm) + %val_random = load atomic i16, i16* %ptr_random unordered, align 2, !pcsections !0 + %tot3 = add i16 %tot2, %val_random, !pcsections !0 + + ret i16 %tot3 +} + +define i32 @atomic_load_relaxed_32(i32* %p, i32 %off32) { +; CHECK-NOLSE-O1-LABEL: atomic_load_relaxed_32: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: add x8, x0, #291, lsl #12 // =1191936 +; CHECK-NOLSE-O1-NEXT: .Lpcsection67: +; CHECK-NOLSE-O1-NEXT: ldr w9, [x0, #16380] +; CHECK-NOLSE-O1-NEXT: .Lpcsection68: +; CHECK-NOLSE-O1-NEXT: ldr w10, [x0, w1, sxtw #2] +; CHECK-NOLSE-O1-NEXT: .Lpcsection69: +; CHECK-NOLSE-O1-NEXT: ldur w11, [x0, #-256] +; CHECK-NOLSE-O1-NEXT: .Lpcsection70: +; CHECK-NOLSE-O1-NEXT: ldr w8, [x8] +; CHECK-NOLSE-O1-NEXT: .Lpcsection71: +; CHECK-NOLSE-O1-NEXT: add w9, w9, w10 +; CHECK-NOLSE-O1-NEXT: .Lpcsection72: +; CHECK-NOLSE-O1-NEXT: add w9, w9, w11 +; CHECK-NOLSE-O1-NEXT: .Lpcsection73: +; CHECK-NOLSE-O1-NEXT: add w0, w9, w8 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: atomic_load_relaxed_32: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: .Lpcsection99: +; CHECK-NOLSE-O0-NEXT: ldr w8, [x0, #16380] +; CHECK-NOLSE-O0-NEXT: .Lpcsection100: +; CHECK-NOLSE-O0-NEXT: ldr w9, [x0, w1, sxtw #2] +; CHECK-NOLSE-O0-NEXT: .Lpcsection101: +; CHECK-NOLSE-O0-NEXT: add w8, w8, w9 +; CHECK-NOLSE-O0-NEXT: .Lpcsection102: +; CHECK-NOLSE-O0-NEXT: ldur w9, [x0, #-256] +; CHECK-NOLSE-O0-NEXT: .Lpcsection103: +; CHECK-NOLSE-O0-NEXT: add w8, w8, w9 +; CHECK-NOLSE-O0-NEXT: add x9, x0, #291, lsl #12 // =1191936 +; CHECK-NOLSE-O0-NEXT: .Lpcsection104: +; CHECK-NOLSE-O0-NEXT: ldr w9, [x9] +; CHECK-NOLSE-O0-NEXT: .Lpcsection105: +; CHECK-NOLSE-O0-NEXT: add w0, w8, w9 +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: atomic_load_relaxed_32: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: add x8, x0, #291, lsl #12 // =1191936 +; CHECK-LDAPR-O1-NEXT: .Lpcsection67: +; CHECK-LDAPR-O1-NEXT: ldr w9, [x0, #16380] +; CHECK-LDAPR-O1-NEXT: .Lpcsection68: +; CHECK-LDAPR-O1-NEXT: ldr w10, [x0, w1, sxtw #2] +; CHECK-LDAPR-O1-NEXT: .Lpcsection69: +; CHECK-LDAPR-O1-NEXT: ldur w11, [x0, #-256] +; CHECK-LDAPR-O1-NEXT: .Lpcsection70: +; CHECK-LDAPR-O1-NEXT: ldr w8, [x8] +; CHECK-LDAPR-O1-NEXT: .Lpcsection71: +; CHECK-LDAPR-O1-NEXT: add w9, w9, w10 +; CHECK-LDAPR-O1-NEXT: .Lpcsection72: +; CHECK-LDAPR-O1-NEXT: add w9, w9, w11 +; CHECK-LDAPR-O1-NEXT: .Lpcsection73: +; CHECK-LDAPR-O1-NEXT: add w0, w9, w8 +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: atomic_load_relaxed_32: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: .Lpcsection99: +; CHECK-LDAPR-O0-NEXT: ldr w8, [x0, #16380] +; CHECK-LDAPR-O0-NEXT: .Lpcsection100: +; CHECK-LDAPR-O0-NEXT: ldr w9, [x0, w1, sxtw #2] +; CHECK-LDAPR-O0-NEXT: .Lpcsection101: +; CHECK-LDAPR-O0-NEXT: add w8, w8, w9 +; CHECK-LDAPR-O0-NEXT: .Lpcsection102: +; CHECK-LDAPR-O0-NEXT: ldur w9, [x0, #-256] +; CHECK-LDAPR-O0-NEXT: .Lpcsection103: +; CHECK-LDAPR-O0-NEXT: add w8, w8, w9 +; CHECK-LDAPR-O0-NEXT: add x9, x0, #291, lsl #12 // =1191936 +; CHECK-LDAPR-O0-NEXT: .Lpcsection104: +; CHECK-LDAPR-O0-NEXT: ldr w9, [x9] +; CHECK-LDAPR-O0-NEXT: .Lpcsection105: +; CHECK-LDAPR-O0-NEXT: add w0, w8, w9 +; CHECK-LDAPR-O0-NEXT: ret + %ptr_unsigned = getelementptr i32, i32* %p, i32 4095 + %val_unsigned = load atomic i32, i32* %ptr_unsigned monotonic, align 4, !pcsections !0 + + %ptr_regoff = getelementptr i32, i32* %p, i32 %off32 + %val_regoff = load atomic i32, i32* %ptr_regoff unordered, align 4, !pcsections !0 + %tot1 = add i32 %val_unsigned, %val_regoff, !pcsections !0 + + %ptr_unscaled = getelementptr i32, i32* %p, i32 -64 + %val_unscaled = load atomic i32, i32* %ptr_unscaled monotonic, align 4, !pcsections !0 + %tot2 = add i32 %tot1, %val_unscaled, !pcsections !0 + + %ptr_random = getelementptr i32, i32* %p, i32 297984 ; 0x123000/4 (i.e. ADD imm) + %val_random = load atomic i32, i32* %ptr_random unordered, align 4, !pcsections !0 + %tot3 = add i32 %tot2, %val_random, !pcsections !0 + + ret i32 %tot3 +} + +define i64 @atomic_load_relaxed_64(i64* %p, i32 %off32) { +; CHECK-NOLSE-O1-LABEL: atomic_load_relaxed_64: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: add x8, x0, #291, lsl #12 // =1191936 +; CHECK-NOLSE-O1-NEXT: .Lpcsection74: +; CHECK-NOLSE-O1-NEXT: ldr x9, [x0, #32760] +; CHECK-NOLSE-O1-NEXT: .Lpcsection75: +; CHECK-NOLSE-O1-NEXT: ldr x10, [x0, w1, sxtw #3] +; CHECK-NOLSE-O1-NEXT: .Lpcsection76: +; CHECK-NOLSE-O1-NEXT: ldur x11, [x0, #-256] +; CHECK-NOLSE-O1-NEXT: .Lpcsection77: +; CHECK-NOLSE-O1-NEXT: ldr x8, [x8] +; CHECK-NOLSE-O1-NEXT: .Lpcsection78: +; CHECK-NOLSE-O1-NEXT: add x9, x9, x10 +; CHECK-NOLSE-O1-NEXT: .Lpcsection79: +; CHECK-NOLSE-O1-NEXT: add x9, x9, x11 +; CHECK-NOLSE-O1-NEXT: .Lpcsection80: +; CHECK-NOLSE-O1-NEXT: add x0, x9, x8 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: atomic_load_relaxed_64: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: .Lpcsection106: +; CHECK-NOLSE-O0-NEXT: ldr x8, [x0, #32760] +; CHECK-NOLSE-O0-NEXT: .Lpcsection107: +; CHECK-NOLSE-O0-NEXT: ldr x9, [x0, w1, sxtw #3] +; CHECK-NOLSE-O0-NEXT: .Lpcsection108: +; CHECK-NOLSE-O0-NEXT: add x8, x8, x9 +; CHECK-NOLSE-O0-NEXT: .Lpcsection109: +; CHECK-NOLSE-O0-NEXT: ldur x9, [x0, #-256] +; CHECK-NOLSE-O0-NEXT: .Lpcsection110: +; CHECK-NOLSE-O0-NEXT: add x8, x8, x9 +; CHECK-NOLSE-O0-NEXT: add x9, x0, #291, lsl #12 // =1191936 +; CHECK-NOLSE-O0-NEXT: .Lpcsection111: +; CHECK-NOLSE-O0-NEXT: ldr x9, [x9] +; CHECK-NOLSE-O0-NEXT: .Lpcsection112: +; CHECK-NOLSE-O0-NEXT: add x0, x8, x9 +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: atomic_load_relaxed_64: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: add x8, x0, #291, lsl #12 // =1191936 +; CHECK-LDAPR-O1-NEXT: .Lpcsection74: +; CHECK-LDAPR-O1-NEXT: ldr x9, [x0, #32760] +; CHECK-LDAPR-O1-NEXT: .Lpcsection75: +; CHECK-LDAPR-O1-NEXT: ldr x10, [x0, w1, sxtw #3] +; CHECK-LDAPR-O1-NEXT: .Lpcsection76: +; CHECK-LDAPR-O1-NEXT: ldur x11, [x0, #-256] +; CHECK-LDAPR-O1-NEXT: .Lpcsection77: +; CHECK-LDAPR-O1-NEXT: ldr x8, [x8] +; CHECK-LDAPR-O1-NEXT: .Lpcsection78: +; CHECK-LDAPR-O1-NEXT: add x9, x9, x10 +; CHECK-LDAPR-O1-NEXT: .Lpcsection79: +; CHECK-LDAPR-O1-NEXT: add x9, x9, x11 +; CHECK-LDAPR-O1-NEXT: .Lpcsection80: +; CHECK-LDAPR-O1-NEXT: add x0, x9, x8 +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: atomic_load_relaxed_64: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: .Lpcsection106: +; CHECK-LDAPR-O0-NEXT: ldr x8, [x0, #32760] +; CHECK-LDAPR-O0-NEXT: .Lpcsection107: +; CHECK-LDAPR-O0-NEXT: ldr x9, [x0, w1, sxtw #3] +; CHECK-LDAPR-O0-NEXT: .Lpcsection108: +; CHECK-LDAPR-O0-NEXT: add x8, x8, x9 +; CHECK-LDAPR-O0-NEXT: .Lpcsection109: +; CHECK-LDAPR-O0-NEXT: ldur x9, [x0, #-256] +; CHECK-LDAPR-O0-NEXT: .Lpcsection110: +; CHECK-LDAPR-O0-NEXT: add x8, x8, x9 +; CHECK-LDAPR-O0-NEXT: add x9, x0, #291, lsl #12 // =1191936 +; CHECK-LDAPR-O0-NEXT: .Lpcsection111: +; CHECK-LDAPR-O0-NEXT: ldr x9, [x9] +; CHECK-LDAPR-O0-NEXT: .Lpcsection112: +; CHECK-LDAPR-O0-NEXT: add x0, x8, x9 +; CHECK-LDAPR-O0-NEXT: ret + %ptr_unsigned = getelementptr i64, i64* %p, i32 4095 + %val_unsigned = load atomic i64, i64* %ptr_unsigned monotonic, align 8, !pcsections !0 + + %ptr_regoff = getelementptr i64, i64* %p, i32 %off32 + %val_regoff = load atomic i64, i64* %ptr_regoff unordered, align 8, !pcsections !0 + %tot1 = add i64 %val_unsigned, %val_regoff, !pcsections !0 + + %ptr_unscaled = getelementptr i64, i64* %p, i32 -32 + %val_unscaled = load atomic i64, i64* %ptr_unscaled monotonic, align 8, !pcsections !0 + %tot2 = add i64 %tot1, %val_unscaled, !pcsections !0 + + %ptr_random = getelementptr i64, i64* %p, i32 148992 ; 0x123000/8 (i.e. ADD imm) + %val_random = load atomic i64, i64* %ptr_random unordered, align 8, !pcsections !0 + %tot3 = add i64 %tot2, %val_random, !pcsections !0 + + ret i64 %tot3 +} + + +define void @atomc_store(i32* %p) { +; CHECK-NOLSE-O1-LABEL: atomc_store: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: mov w8, #4 +; CHECK-NOLSE-O1-NEXT: .Lpcsection81: +; CHECK-NOLSE-O1-NEXT: stlr w8, [x0] +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: atomc_store: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: mov w8, #4 +; CHECK-NOLSE-O0-NEXT: .Lpcsection113: +; CHECK-NOLSE-O0-NEXT: stlr w8, [x0] +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: atomc_store: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: mov w8, #4 +; CHECK-LDAPR-O1-NEXT: .Lpcsection81: +; CHECK-LDAPR-O1-NEXT: stlr w8, [x0] +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: atomc_store: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: mov w8, #4 +; CHECK-LDAPR-O0-NEXT: .Lpcsection113: +; CHECK-LDAPR-O0-NEXT: stlr w8, [x0] +; CHECK-LDAPR-O0-NEXT: ret + store atomic i32 4, i32* %p seq_cst, align 4, !pcsections !0 + ret void +} + +define void @atomic_store_relaxed_8(i8* %p, i32 %off32, i8 %val) { +; CHECK-NOLSE-O1-LABEL: atomic_store_relaxed_8: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: add x8, x0, #291, lsl #12 // =1191936 +; CHECK-NOLSE-O1-NEXT: .Lpcsection82: +; CHECK-NOLSE-O1-NEXT: strb w2, [x0, #4095] +; CHECK-NOLSE-O1-NEXT: .Lpcsection83: +; CHECK-NOLSE-O1-NEXT: strb w2, [x0, w1, sxtw] +; CHECK-NOLSE-O1-NEXT: .Lpcsection84: +; CHECK-NOLSE-O1-NEXT: sturb w2, [x0, #-256] +; CHECK-NOLSE-O1-NEXT: .Lpcsection85: +; CHECK-NOLSE-O1-NEXT: strb w2, [x8] +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: atomic_store_relaxed_8: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: .Lpcsection114: +; CHECK-NOLSE-O0-NEXT: strb w2, [x0, #4095] +; CHECK-NOLSE-O0-NEXT: .Lpcsection115: +; CHECK-NOLSE-O0-NEXT: strb w2, [x0, w1, sxtw] +; CHECK-NOLSE-O0-NEXT: .Lpcsection116: +; CHECK-NOLSE-O0-NEXT: sturb w2, [x0, #-256] +; CHECK-NOLSE-O0-NEXT: add x8, x0, #291, lsl #12 // =1191936 +; CHECK-NOLSE-O0-NEXT: .Lpcsection117: +; CHECK-NOLSE-O0-NEXT: strb w2, [x8] +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: atomic_store_relaxed_8: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: add x8, x0, #291, lsl #12 // =1191936 +; CHECK-LDAPR-O1-NEXT: .Lpcsection82: +; CHECK-LDAPR-O1-NEXT: strb w2, [x0, #4095] +; CHECK-LDAPR-O1-NEXT: .Lpcsection83: +; CHECK-LDAPR-O1-NEXT: strb w2, [x0, w1, sxtw] +; CHECK-LDAPR-O1-NEXT: .Lpcsection84: +; CHECK-LDAPR-O1-NEXT: sturb w2, [x0, #-256] +; CHECK-LDAPR-O1-NEXT: .Lpcsection85: +; CHECK-LDAPR-O1-NEXT: strb w2, [x8] +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: atomic_store_relaxed_8: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: .Lpcsection114: +; CHECK-LDAPR-O0-NEXT: strb w2, [x0, #4095] +; CHECK-LDAPR-O0-NEXT: .Lpcsection115: +; CHECK-LDAPR-O0-NEXT: strb w2, [x0, w1, sxtw] +; CHECK-LDAPR-O0-NEXT: .Lpcsection116: +; CHECK-LDAPR-O0-NEXT: sturb w2, [x0, #-256] +; CHECK-LDAPR-O0-NEXT: add x8, x0, #291, lsl #12 // =1191936 +; CHECK-LDAPR-O0-NEXT: .Lpcsection117: +; CHECK-LDAPR-O0-NEXT: strb w2, [x8] +; CHECK-LDAPR-O0-NEXT: ret + %ptr_unsigned = getelementptr i8, i8* %p, i32 4095 + store atomic i8 %val, i8* %ptr_unsigned monotonic, align 1, !pcsections !0 + + %ptr_regoff = getelementptr i8, i8* %p, i32 %off32 + store atomic i8 %val, i8* %ptr_regoff unordered, align 1, !pcsections !0 + + %ptr_unscaled = getelementptr i8, i8* %p, i32 -256 + store atomic i8 %val, i8* %ptr_unscaled monotonic, align 1, !pcsections !0 + + %ptr_random = getelementptr i8, i8* %p, i32 1191936 ; 0x123000 (i.e. ADD imm) + store atomic i8 %val, i8* %ptr_random unordered, align 1, !pcsections !0 + + ret void +} + +define void @atomic_store_relaxed_16(i16* %p, i32 %off32, i16 %val) { +; CHECK-NOLSE-O1-LABEL: atomic_store_relaxed_16: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: add x8, x0, #291, lsl #12 // =1191936 +; CHECK-NOLSE-O1-NEXT: .Lpcsection86: +; CHECK-NOLSE-O1-NEXT: strh w2, [x0, #8190] +; CHECK-NOLSE-O1-NEXT: .Lpcsection87: +; CHECK-NOLSE-O1-NEXT: strh w2, [x0, w1, sxtw #1] +; CHECK-NOLSE-O1-NEXT: .Lpcsection88: +; CHECK-NOLSE-O1-NEXT: sturh w2, [x0, #-256] +; CHECK-NOLSE-O1-NEXT: .Lpcsection89: +; CHECK-NOLSE-O1-NEXT: strh w2, [x8] +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: atomic_store_relaxed_16: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: .Lpcsection118: +; CHECK-NOLSE-O0-NEXT: strh w2, [x0, #8190] +; CHECK-NOLSE-O0-NEXT: .Lpcsection119: +; CHECK-NOLSE-O0-NEXT: strh w2, [x0, w1, sxtw #1] +; CHECK-NOLSE-O0-NEXT: .Lpcsection120: +; CHECK-NOLSE-O0-NEXT: sturh w2, [x0, #-256] +; CHECK-NOLSE-O0-NEXT: add x8, x0, #291, lsl #12 // =1191936 +; CHECK-NOLSE-O0-NEXT: .Lpcsection121: +; CHECK-NOLSE-O0-NEXT: strh w2, [x8] +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: atomic_store_relaxed_16: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: add x8, x0, #291, lsl #12 // =1191936 +; CHECK-LDAPR-O1-NEXT: .Lpcsection86: +; CHECK-LDAPR-O1-NEXT: strh w2, [x0, #8190] +; CHECK-LDAPR-O1-NEXT: .Lpcsection87: +; CHECK-LDAPR-O1-NEXT: strh w2, [x0, w1, sxtw #1] +; CHECK-LDAPR-O1-NEXT: .Lpcsection88: +; CHECK-LDAPR-O1-NEXT: sturh w2, [x0, #-256] +; CHECK-LDAPR-O1-NEXT: .Lpcsection89: +; CHECK-LDAPR-O1-NEXT: strh w2, [x8] +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: atomic_store_relaxed_16: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: .Lpcsection118: +; CHECK-LDAPR-O0-NEXT: strh w2, [x0, #8190] +; CHECK-LDAPR-O0-NEXT: .Lpcsection119: +; CHECK-LDAPR-O0-NEXT: strh w2, [x0, w1, sxtw #1] +; CHECK-LDAPR-O0-NEXT: .Lpcsection120: +; CHECK-LDAPR-O0-NEXT: sturh w2, [x0, #-256] +; CHECK-LDAPR-O0-NEXT: add x8, x0, #291, lsl #12 // =1191936 +; CHECK-LDAPR-O0-NEXT: .Lpcsection121: +; CHECK-LDAPR-O0-NEXT: strh w2, [x8] +; CHECK-LDAPR-O0-NEXT: ret + %ptr_unsigned = getelementptr i16, i16* %p, i32 4095 + store atomic i16 %val, i16* %ptr_unsigned monotonic, align 2, !pcsections !0 + + %ptr_regoff = getelementptr i16, i16* %p, i32 %off32 + store atomic i16 %val, i16* %ptr_regoff unordered, align 2, !pcsections !0 + + %ptr_unscaled = getelementptr i16, i16* %p, i32 -128 + store atomic i16 %val, i16* %ptr_unscaled monotonic, align 2, !pcsections !0 + + %ptr_random = getelementptr i16, i16* %p, i32 595968 ; 0x123000/2 (i.e. ADD imm) + store atomic i16 %val, i16* %ptr_random unordered, align 2, !pcsections !0 + + ret void +} + +define void @atomic_store_relaxed_32(i32* %p, i32 %off32, i32 %val) { +; CHECK-NOLSE-O1-LABEL: atomic_store_relaxed_32: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: add x8, x0, #291, lsl #12 // =1191936 +; CHECK-NOLSE-O1-NEXT: .Lpcsection90: +; CHECK-NOLSE-O1-NEXT: str w2, [x0, #16380] +; CHECK-NOLSE-O1-NEXT: .Lpcsection91: +; CHECK-NOLSE-O1-NEXT: str w2, [x0, w1, sxtw #2] +; CHECK-NOLSE-O1-NEXT: .Lpcsection92: +; CHECK-NOLSE-O1-NEXT: stur w2, [x0, #-256] +; CHECK-NOLSE-O1-NEXT: .Lpcsection93: +; CHECK-NOLSE-O1-NEXT: str w2, [x8] +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: atomic_store_relaxed_32: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: .Lpcsection122: +; CHECK-NOLSE-O0-NEXT: str w2, [x0, #16380] +; CHECK-NOLSE-O0-NEXT: .Lpcsection123: +; CHECK-NOLSE-O0-NEXT: str w2, [x0, w1, sxtw #2] +; CHECK-NOLSE-O0-NEXT: .Lpcsection124: +; CHECK-NOLSE-O0-NEXT: stur w2, [x0, #-256] +; CHECK-NOLSE-O0-NEXT: add x8, x0, #291, lsl #12 // =1191936 +; CHECK-NOLSE-O0-NEXT: .Lpcsection125: +; CHECK-NOLSE-O0-NEXT: str w2, [x8] +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: atomic_store_relaxed_32: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: add x8, x0, #291, lsl #12 // =1191936 +; CHECK-LDAPR-O1-NEXT: .Lpcsection90: +; CHECK-LDAPR-O1-NEXT: str w2, [x0, #16380] +; CHECK-LDAPR-O1-NEXT: .Lpcsection91: +; CHECK-LDAPR-O1-NEXT: str w2, [x0, w1, sxtw #2] +; CHECK-LDAPR-O1-NEXT: .Lpcsection92: +; CHECK-LDAPR-O1-NEXT: stur w2, [x0, #-256] +; CHECK-LDAPR-O1-NEXT: .Lpcsection93: +; CHECK-LDAPR-O1-NEXT: str w2, [x8] +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: atomic_store_relaxed_32: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: .Lpcsection122: +; CHECK-LDAPR-O0-NEXT: str w2, [x0, #16380] +; CHECK-LDAPR-O0-NEXT: .Lpcsection123: +; CHECK-LDAPR-O0-NEXT: str w2, [x0, w1, sxtw #2] +; CHECK-LDAPR-O0-NEXT: .Lpcsection124: +; CHECK-LDAPR-O0-NEXT: stur w2, [x0, #-256] +; CHECK-LDAPR-O0-NEXT: add x8, x0, #291, lsl #12 // =1191936 +; CHECK-LDAPR-O0-NEXT: .Lpcsection125: +; CHECK-LDAPR-O0-NEXT: str w2, [x8] +; CHECK-LDAPR-O0-NEXT: ret + %ptr_unsigned = getelementptr i32, i32* %p, i32 4095 + store atomic i32 %val, i32* %ptr_unsigned monotonic, align 4, !pcsections !0 + + %ptr_regoff = getelementptr i32, i32* %p, i32 %off32 + store atomic i32 %val, i32* %ptr_regoff unordered, align 4, !pcsections !0 + + %ptr_unscaled = getelementptr i32, i32* %p, i32 -64 + store atomic i32 %val, i32* %ptr_unscaled monotonic, align 4, !pcsections !0 + + %ptr_random = getelementptr i32, i32* %p, i32 297984 ; 0x123000/4 (i.e. ADD imm) + store atomic i32 %val, i32* %ptr_random unordered, align 4, !pcsections !0 + + ret void +} + +define void @atomic_store_relaxed_64(i64* %p, i32 %off32, i64 %val) { +; CHECK-NOLSE-O1-LABEL: atomic_store_relaxed_64: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: add x8, x0, #291, lsl #12 // =1191936 +; CHECK-NOLSE-O1-NEXT: .Lpcsection94: +; CHECK-NOLSE-O1-NEXT: str x2, [x0, #32760] +; CHECK-NOLSE-O1-NEXT: .Lpcsection95: +; CHECK-NOLSE-O1-NEXT: str x2, [x0, w1, sxtw #3] +; CHECK-NOLSE-O1-NEXT: .Lpcsection96: +; CHECK-NOLSE-O1-NEXT: stur x2, [x0, #-256] +; CHECK-NOLSE-O1-NEXT: .Lpcsection97: +; CHECK-NOLSE-O1-NEXT: str x2, [x8] +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: atomic_store_relaxed_64: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: .Lpcsection126: +; CHECK-NOLSE-O0-NEXT: str x2, [x0, #32760] +; CHECK-NOLSE-O0-NEXT: .Lpcsection127: +; CHECK-NOLSE-O0-NEXT: str x2, [x0, w1, sxtw #3] +; CHECK-NOLSE-O0-NEXT: .Lpcsection128: +; CHECK-NOLSE-O0-NEXT: stur x2, [x0, #-256] +; CHECK-NOLSE-O0-NEXT: add x8, x0, #291, lsl #12 // =1191936 +; CHECK-NOLSE-O0-NEXT: .Lpcsection129: +; CHECK-NOLSE-O0-NEXT: str x2, [x8] +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: atomic_store_relaxed_64: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: add x8, x0, #291, lsl #12 // =1191936 +; CHECK-LDAPR-O1-NEXT: .Lpcsection94: +; CHECK-LDAPR-O1-NEXT: str x2, [x0, #32760] +; CHECK-LDAPR-O1-NEXT: .Lpcsection95: +; CHECK-LDAPR-O1-NEXT: str x2, [x0, w1, sxtw #3] +; CHECK-LDAPR-O1-NEXT: .Lpcsection96: +; CHECK-LDAPR-O1-NEXT: stur x2, [x0, #-256] +; CHECK-LDAPR-O1-NEXT: .Lpcsection97: +; CHECK-LDAPR-O1-NEXT: str x2, [x8] +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: atomic_store_relaxed_64: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: .Lpcsection126: +; CHECK-LDAPR-O0-NEXT: str x2, [x0, #32760] +; CHECK-LDAPR-O0-NEXT: .Lpcsection127: +; CHECK-LDAPR-O0-NEXT: str x2, [x0, w1, sxtw #3] +; CHECK-LDAPR-O0-NEXT: .Lpcsection128: +; CHECK-LDAPR-O0-NEXT: stur x2, [x0, #-256] +; CHECK-LDAPR-O0-NEXT: add x8, x0, #291, lsl #12 // =1191936 +; CHECK-LDAPR-O0-NEXT: .Lpcsection129: +; CHECK-LDAPR-O0-NEXT: str x2, [x8] +; CHECK-LDAPR-O0-NEXT: ret + %ptr_unsigned = getelementptr i64, i64* %p, i32 4095 + store atomic i64 %val, i64* %ptr_unsigned monotonic, align 8, !pcsections !0 + + %ptr_regoff = getelementptr i64, i64* %p, i32 %off32 + store atomic i64 %val, i64* %ptr_regoff unordered, align 8, !pcsections !0 + + %ptr_unscaled = getelementptr i64, i64* %p, i32 -32 + store atomic i64 %val, i64* %ptr_unscaled monotonic, align 8, !pcsections !0 + + %ptr_random = getelementptr i64, i64* %p, i32 148992 ; 0x123000/8 (i.e. ADD imm) + store atomic i64 %val, i64* %ptr_random unordered, align 8, !pcsections !0 + + ret void +} + +define i32 @load_zext(i8* %p8, i16* %p16) { +; CHECK-NOLSE-O1-LABEL: load_zext: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: .Lpcsection98: +; CHECK-NOLSE-O1-NEXT: ldarb w8, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection99: +; CHECK-NOLSE-O1-NEXT: ldrh w9, [x1] +; CHECK-NOLSE-O1-NEXT: .Lpcsection100: +; CHECK-NOLSE-O1-NEXT: add w0, w9, w8, uxtb +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: load_zext: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: .Lpcsection130: +; CHECK-NOLSE-O0-NEXT: ldarb w9, [x0] +; CHECK-NOLSE-O0-NEXT: .Lpcsection131: +; CHECK-NOLSE-O0-NEXT: ldrh w8, [x1] +; CHECK-NOLSE-O0-NEXT: .Lpcsection132: +; CHECK-NOLSE-O0-NEXT: add w0, w8, w9, uxtb +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: load_zext: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: .Lpcsection98: +; CHECK-LDAPR-O1-NEXT: ldaprb w8, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection99: +; CHECK-LDAPR-O1-NEXT: ldrh w9, [x1] +; CHECK-LDAPR-O1-NEXT: .Lpcsection100: +; CHECK-LDAPR-O1-NEXT: add w0, w9, w8, uxtb +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: load_zext: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: .Lpcsection130: +; CHECK-LDAPR-O0-NEXT: ldaprb w9, [x0] +; CHECK-LDAPR-O0-NEXT: .Lpcsection131: +; CHECK-LDAPR-O0-NEXT: ldrh w8, [x1] +; CHECK-LDAPR-O0-NEXT: .Lpcsection132: +; CHECK-LDAPR-O0-NEXT: add w0, w8, w9, uxtb +; CHECK-LDAPR-O0-NEXT: ret + %val1.8 = load atomic i8, i8* %p8 acquire, align 1, !pcsections !0 + %val1 = zext i8 %val1.8 to i32 + + %val2.16 = load atomic i16, i16* %p16 unordered, align 2, !pcsections !0 + %val2 = zext i16 %val2.16 to i32 + + %res = add i32 %val1, %val2, !pcsections !0 + ret i32 %res +} + +define { i32, i64 } @load_acq(i32* %p32, i64* %p64) { +; CHECK-NOLSE-O1-LABEL: load_acq: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: .Lpcsection101: +; CHECK-NOLSE-O1-NEXT: ldar w0, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection102: +; CHECK-NOLSE-O1-NEXT: ldar x1, [x1] +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: load_acq: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: .Lpcsection133: +; CHECK-NOLSE-O0-NEXT: ldar w0, [x0] +; CHECK-NOLSE-O0-NEXT: .Lpcsection134: +; CHECK-NOLSE-O0-NEXT: ldar x1, [x1] +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: load_acq: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: .Lpcsection101: +; CHECK-LDAPR-O1-NEXT: ldar w0, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection102: +; CHECK-LDAPR-O1-NEXT: ldapr x1, [x1] +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: load_acq: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: .Lpcsection133: +; CHECK-LDAPR-O0-NEXT: ldar w0, [x0] +; CHECK-LDAPR-O0-NEXT: .Lpcsection134: +; CHECK-LDAPR-O0-NEXT: ldapr x1, [x1] +; CHECK-LDAPR-O0-NEXT: ret + %val32 = load atomic i32, i32* %p32 seq_cst, align 4, !pcsections !0 + %tmp = insertvalue { i32, i64 } undef, i32 %val32, 0 + + %val64 = load atomic i64, i64* %p64 acquire, align 8, !pcsections !0 + %res = insertvalue { i32, i64 } %tmp, i64 %val64, 1 + + ret { i32, i64 } %res +} + +define i32 @load_sext(i8* %p8, i16* %p16) { +; CHECK-NOLSE-O1-LABEL: load_sext: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: .Lpcsection103: +; CHECK-NOLSE-O1-NEXT: ldarb w8, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection104: +; CHECK-NOLSE-O1-NEXT: ldrh w9, [x1] +; CHECK-NOLSE-O1-NEXT: sxth w9, w9 +; CHECK-NOLSE-O1-NEXT: .Lpcsection105: +; CHECK-NOLSE-O1-NEXT: add w0, w9, w8, sxtb +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: load_sext: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: .Lpcsection135: +; CHECK-NOLSE-O0-NEXT: ldarb w9, [x0] +; CHECK-NOLSE-O0-NEXT: .Lpcsection136: +; CHECK-NOLSE-O0-NEXT: ldrh w8, [x1] +; CHECK-NOLSE-O0-NEXT: sxth w8, w8 +; CHECK-NOLSE-O0-NEXT: .Lpcsection137: +; CHECK-NOLSE-O0-NEXT: add w0, w8, w9, sxtb +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: load_sext: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: .Lpcsection103: +; CHECK-LDAPR-O1-NEXT: ldaprb w8, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection104: +; CHECK-LDAPR-O1-NEXT: ldrh w9, [x1] +; CHECK-LDAPR-O1-NEXT: sxth w9, w9 +; CHECK-LDAPR-O1-NEXT: .Lpcsection105: +; CHECK-LDAPR-O1-NEXT: add w0, w9, w8, sxtb +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: load_sext: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: .Lpcsection135: +; CHECK-LDAPR-O0-NEXT: ldaprb w9, [x0] +; CHECK-LDAPR-O0-NEXT: .Lpcsection136: +; CHECK-LDAPR-O0-NEXT: ldrh w8, [x1] +; CHECK-LDAPR-O0-NEXT: sxth w8, w8 +; CHECK-LDAPR-O0-NEXT: .Lpcsection137: +; CHECK-LDAPR-O0-NEXT: add w0, w8, w9, sxtb +; CHECK-LDAPR-O0-NEXT: ret + %val1.8 = load atomic i8, i8* %p8 acquire, align 1, !pcsections !0 + %val1 = sext i8 %val1.8 to i32 + + %val2.16 = load atomic i16, i16* %p16 unordered, align 2, !pcsections !0 + %val2 = sext i16 %val2.16 to i32 + + %res = add i32 %val1, %val2, !pcsections !0 + ret i32 %res +} + +define void @store_trunc(i32 %val, i8* %p8, i16* %p16) { +; CHECK-NOLSE-O1-LABEL: store_trunc: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: .Lpcsection106: +; CHECK-NOLSE-O1-NEXT: stlrb w0, [x1] +; CHECK-NOLSE-O1-NEXT: .Lpcsection107: +; CHECK-NOLSE-O1-NEXT: strh w0, [x2] +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: store_trunc: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: .Lpcsection138: +; CHECK-NOLSE-O0-NEXT: stlrb w0, [x1] +; CHECK-NOLSE-O0-NEXT: .Lpcsection139: +; CHECK-NOLSE-O0-NEXT: strh w0, [x2] +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: store_trunc: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: .Lpcsection106: +; CHECK-LDAPR-O1-NEXT: stlrb w0, [x1] +; CHECK-LDAPR-O1-NEXT: .Lpcsection107: +; CHECK-LDAPR-O1-NEXT: strh w0, [x2] +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: store_trunc: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: .Lpcsection138: +; CHECK-LDAPR-O0-NEXT: stlrb w0, [x1] +; CHECK-LDAPR-O0-NEXT: .Lpcsection139: +; CHECK-LDAPR-O0-NEXT: strh w0, [x2] +; CHECK-LDAPR-O0-NEXT: ret + %val8 = trunc i32 %val to i8 + store atomic i8 %val8, i8* %p8 seq_cst, align 1, !pcsections !0 + + %val16 = trunc i32 %val to i16 + store atomic i16 %val16, i16* %p16 monotonic, align 2, !pcsections !0 + + ret void +} + +define i8 @atomicrmw_add_i8(i8* %ptr, i8 %rhs) { +; CHECK-NOLSE-O1-LABEL: atomicrmw_add_i8: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: .LBB27_1: // %atomicrmw.start +; CHECK-NOLSE-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection108: +; CHECK-NOLSE-O1-NEXT: ldaxrb w8, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection109: +; CHECK-NOLSE-O1-NEXT: add w9, w8, w1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection110: +; CHECK-NOLSE-O1-NEXT: stlxrb w10, w9, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection111: +; CHECK-NOLSE-O1-NEXT: cbnz w10, .LBB27_1 +; CHECK-NOLSE-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NOLSE-O1-NEXT: mov w0, w8 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: atomicrmw_add_i8: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection140: +; CHECK-NOLSE-O0-NEXT: ldrb w8, [x0] +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection141: +; CHECK-NOLSE-O0-NEXT: b .LBB27_1 +; CHECK-NOLSE-O0-NEXT: .LBB27_1: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-NOLSE-O0-NEXT: // Child Loop BB27_2 Depth 2 +; CHECK-NOLSE-O0-NEXT: ldr w10, [sp, #28] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #24] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: .Lpcsection142: +; CHECK-NOLSE-O0-NEXT: add w12, w8, w10, uxth +; CHECK-NOLSE-O0-NEXT: .LBB27_2: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // Parent Loop BB27_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection143: +; CHECK-NOLSE-O0-NEXT: ldaxrb w9, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection144: +; CHECK-NOLSE-O0-NEXT: cmp w9, w10, uxtb +; CHECK-NOLSE-O0-NEXT: .Lpcsection145: +; CHECK-NOLSE-O0-NEXT: b.ne .LBB27_4 +; CHECK-NOLSE-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB27_2 Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection146: +; CHECK-NOLSE-O0-NEXT: stlxrb w8, w12, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection147: +; CHECK-NOLSE-O0-NEXT: cbnz w8, .LBB27_2 +; CHECK-NOLSE-O0-NEXT: .LBB27_4: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB27_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection148: +; CHECK-NOLSE-O0-NEXT: and w8, w9, #0xff +; CHECK-NOLSE-O0-NEXT: .Lpcsection149: +; CHECK-NOLSE-O0-NEXT: subs w8, w8, w10, uxtb +; CHECK-NOLSE-O0-NEXT: .Lpcsection150: +; CHECK-NOLSE-O0-NEXT: cset w8, eq +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection151: +; CHECK-NOLSE-O0-NEXT: tbz w8, #0, .LBB27_1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection152: +; CHECK-NOLSE-O0-NEXT: b .LBB27_5 +; CHECK-NOLSE-O0-NEXT: .LBB27_5: // %atomicrmw.end +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: atomicrmw_add_i8: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: .LBB27_1: // %atomicrmw.start +; CHECK-LDAPR-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection108: +; CHECK-LDAPR-O1-NEXT: ldaxrb w8, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection109: +; CHECK-LDAPR-O1-NEXT: add w9, w8, w1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection110: +; CHECK-LDAPR-O1-NEXT: stlxrb w10, w9, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection111: +; CHECK-LDAPR-O1-NEXT: cbnz w10, .LBB27_1 +; CHECK-LDAPR-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-LDAPR-O1-NEXT: mov w0, w8 +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: atomicrmw_add_i8: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: sub sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-LDAPR-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: str w1, [sp, #24] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection140: +; CHECK-LDAPR-O0-NEXT: ldrb w8, [x0] +; CHECK-LDAPR-O0-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection141: +; CHECK-LDAPR-O0-NEXT: b .LBB27_1 +; CHECK-LDAPR-O0-NEXT: .LBB27_1: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-LDAPR-O0-NEXT: // Child Loop BB27_2 Depth 2 +; CHECK-LDAPR-O0-NEXT: ldr w10, [sp, #28] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: ldr w8, [sp, #24] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: .Lpcsection142: +; CHECK-LDAPR-O0-NEXT: add w12, w8, w10, uxth +; CHECK-LDAPR-O0-NEXT: .LBB27_2: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // Parent Loop BB27_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection143: +; CHECK-LDAPR-O0-NEXT: ldaxrb w9, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection144: +; CHECK-LDAPR-O0-NEXT: cmp w9, w10, uxtb +; CHECK-LDAPR-O0-NEXT: .Lpcsection145: +; CHECK-LDAPR-O0-NEXT: b.ne .LBB27_4 +; CHECK-LDAPR-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB27_2 Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection146: +; CHECK-LDAPR-O0-NEXT: stlxrb w8, w12, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection147: +; CHECK-LDAPR-O0-NEXT: cbnz w8, .LBB27_2 +; CHECK-LDAPR-O0-NEXT: .LBB27_4: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB27_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: str w9, [sp, #12] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection148: +; CHECK-LDAPR-O0-NEXT: and w8, w9, #0xff +; CHECK-LDAPR-O0-NEXT: .Lpcsection149: +; CHECK-LDAPR-O0-NEXT: subs w8, w8, w10, uxtb +; CHECK-LDAPR-O0-NEXT: .Lpcsection150: +; CHECK-LDAPR-O0-NEXT: cset w8, eq +; CHECK-LDAPR-O0-NEXT: str w9, [sp, #28] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection151: +; CHECK-LDAPR-O0-NEXT: tbz w8, #0, .LBB27_1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection152: +; CHECK-LDAPR-O0-NEXT: b .LBB27_5 +; CHECK-LDAPR-O0-NEXT: .LBB27_5: // %atomicrmw.end +; CHECK-LDAPR-O0-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: add sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: ret + %res = atomicrmw add i8* %ptr, i8 %rhs seq_cst, !pcsections !0 + ret i8 %res +} + +define i8 @atomicrmw_xchg_i8(i8* %ptr, i8 %rhs) { +; CHECK-NOLSE-O1-LABEL: atomicrmw_xchg_i8: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NOLSE-O1-NEXT: .LBB28_1: // %atomicrmw.start +; CHECK-NOLSE-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection112: +; CHECK-NOLSE-O1-NEXT: ldxrb w8, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection113: +; CHECK-NOLSE-O1-NEXT: stxrb w9, w1, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection114: +; CHECK-NOLSE-O1-NEXT: cbnz w9, .LBB28_1 +; CHECK-NOLSE-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NOLSE-O1-NEXT: mov w0, w8 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: atomicrmw_xchg_i8: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection153: +; CHECK-NOLSE-O0-NEXT: ldrb w8, [x0] +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection154: +; CHECK-NOLSE-O0-NEXT: b .LBB28_1 +; CHECK-NOLSE-O0-NEXT: .LBB28_1: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-NOLSE-O0-NEXT: // Child Loop BB28_2 Depth 2 +; CHECK-NOLSE-O0-NEXT: ldr w10, [sp, #28] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w12, [sp, #24] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: .LBB28_2: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // Parent Loop BB28_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection155: +; CHECK-NOLSE-O0-NEXT: ldaxrb w9, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection156: +; CHECK-NOLSE-O0-NEXT: cmp w9, w10, uxtb +; CHECK-NOLSE-O0-NEXT: .Lpcsection157: +; CHECK-NOLSE-O0-NEXT: b.ne .LBB28_4 +; CHECK-NOLSE-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB28_2 Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection158: +; CHECK-NOLSE-O0-NEXT: stlxrb w8, w12, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection159: +; CHECK-NOLSE-O0-NEXT: cbnz w8, .LBB28_2 +; CHECK-NOLSE-O0-NEXT: .LBB28_4: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB28_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection160: +; CHECK-NOLSE-O0-NEXT: and w8, w9, #0xff +; CHECK-NOLSE-O0-NEXT: .Lpcsection161: +; CHECK-NOLSE-O0-NEXT: subs w8, w8, w10, uxtb +; CHECK-NOLSE-O0-NEXT: .Lpcsection162: +; CHECK-NOLSE-O0-NEXT: cset w8, eq +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection163: +; CHECK-NOLSE-O0-NEXT: tbz w8, #0, .LBB28_1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection164: +; CHECK-NOLSE-O0-NEXT: b .LBB28_5 +; CHECK-NOLSE-O0-NEXT: .LBB28_5: // %atomicrmw.end +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: atomicrmw_xchg_i8: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-LDAPR-O1-NEXT: .LBB28_1: // %atomicrmw.start +; CHECK-LDAPR-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection112: +; CHECK-LDAPR-O1-NEXT: ldxrb w8, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection113: +; CHECK-LDAPR-O1-NEXT: stxrb w9, w1, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection114: +; CHECK-LDAPR-O1-NEXT: cbnz w9, .LBB28_1 +; CHECK-LDAPR-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-LDAPR-O1-NEXT: mov w0, w8 +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: atomicrmw_xchg_i8: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: sub sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-LDAPR-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: str w1, [sp, #24] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection153: +; CHECK-LDAPR-O0-NEXT: ldrb w8, [x0] +; CHECK-LDAPR-O0-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection154: +; CHECK-LDAPR-O0-NEXT: b .LBB28_1 +; CHECK-LDAPR-O0-NEXT: .LBB28_1: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-LDAPR-O0-NEXT: // Child Loop BB28_2 Depth 2 +; CHECK-LDAPR-O0-NEXT: ldr w10, [sp, #28] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: ldr w12, [sp, #24] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: .LBB28_2: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // Parent Loop BB28_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection155: +; CHECK-LDAPR-O0-NEXT: ldaxrb w9, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection156: +; CHECK-LDAPR-O0-NEXT: cmp w9, w10, uxtb +; CHECK-LDAPR-O0-NEXT: .Lpcsection157: +; CHECK-LDAPR-O0-NEXT: b.ne .LBB28_4 +; CHECK-LDAPR-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB28_2 Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection158: +; CHECK-LDAPR-O0-NEXT: stlxrb w8, w12, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection159: +; CHECK-LDAPR-O0-NEXT: cbnz w8, .LBB28_2 +; CHECK-LDAPR-O0-NEXT: .LBB28_4: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB28_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: str w9, [sp, #12] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection160: +; CHECK-LDAPR-O0-NEXT: and w8, w9, #0xff +; CHECK-LDAPR-O0-NEXT: .Lpcsection161: +; CHECK-LDAPR-O0-NEXT: subs w8, w8, w10, uxtb +; CHECK-LDAPR-O0-NEXT: .Lpcsection162: +; CHECK-LDAPR-O0-NEXT: cset w8, eq +; CHECK-LDAPR-O0-NEXT: str w9, [sp, #28] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection163: +; CHECK-LDAPR-O0-NEXT: tbz w8, #0, .LBB28_1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection164: +; CHECK-LDAPR-O0-NEXT: b .LBB28_5 +; CHECK-LDAPR-O0-NEXT: .LBB28_5: // %atomicrmw.end +; CHECK-LDAPR-O0-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: add sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: ret + %res = atomicrmw xchg i8* %ptr, i8 %rhs monotonic, !pcsections !0 + ret i8 %res +} + +define i8 @atomicrmw_sub_i8(i8* %ptr, i8 %rhs) { +; CHECK-NOLSE-O1-LABEL: atomicrmw_sub_i8: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: .LBB29_1: // %atomicrmw.start +; CHECK-NOLSE-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection115: +; CHECK-NOLSE-O1-NEXT: ldaxrb w8, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection116: +; CHECK-NOLSE-O1-NEXT: sub w9, w8, w1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection117: +; CHECK-NOLSE-O1-NEXT: stxrb w10, w9, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection118: +; CHECK-NOLSE-O1-NEXT: cbnz w10, .LBB29_1 +; CHECK-NOLSE-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NOLSE-O1-NEXT: mov w0, w8 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: atomicrmw_sub_i8: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection165: +; CHECK-NOLSE-O0-NEXT: ldrb w8, [x0] +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection166: +; CHECK-NOLSE-O0-NEXT: b .LBB29_1 +; CHECK-NOLSE-O0-NEXT: .LBB29_1: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-NOLSE-O0-NEXT: // Child Loop BB29_2 Depth 2 +; CHECK-NOLSE-O0-NEXT: ldr w10, [sp, #28] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #24] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: .Lpcsection167: +; CHECK-NOLSE-O0-NEXT: subs w12, w10, w8 +; CHECK-NOLSE-O0-NEXT: .LBB29_2: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // Parent Loop BB29_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection168: +; CHECK-NOLSE-O0-NEXT: ldaxrb w9, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection169: +; CHECK-NOLSE-O0-NEXT: cmp w9, w10, uxtb +; CHECK-NOLSE-O0-NEXT: .Lpcsection170: +; CHECK-NOLSE-O0-NEXT: b.ne .LBB29_4 +; CHECK-NOLSE-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB29_2 Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection171: +; CHECK-NOLSE-O0-NEXT: stlxrb w8, w12, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection172: +; CHECK-NOLSE-O0-NEXT: cbnz w8, .LBB29_2 +; CHECK-NOLSE-O0-NEXT: .LBB29_4: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB29_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection173: +; CHECK-NOLSE-O0-NEXT: and w8, w9, #0xff +; CHECK-NOLSE-O0-NEXT: .Lpcsection174: +; CHECK-NOLSE-O0-NEXT: subs w8, w8, w10, uxtb +; CHECK-NOLSE-O0-NEXT: .Lpcsection175: +; CHECK-NOLSE-O0-NEXT: cset w8, eq +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection176: +; CHECK-NOLSE-O0-NEXT: tbz w8, #0, .LBB29_1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection177: +; CHECK-NOLSE-O0-NEXT: b .LBB29_5 +; CHECK-NOLSE-O0-NEXT: .LBB29_5: // %atomicrmw.end +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: atomicrmw_sub_i8: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: .LBB29_1: // %atomicrmw.start +; CHECK-LDAPR-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection115: +; CHECK-LDAPR-O1-NEXT: ldaxrb w8, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection116: +; CHECK-LDAPR-O1-NEXT: sub w9, w8, w1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection117: +; CHECK-LDAPR-O1-NEXT: stxrb w10, w9, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection118: +; CHECK-LDAPR-O1-NEXT: cbnz w10, .LBB29_1 +; CHECK-LDAPR-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-LDAPR-O1-NEXT: mov w0, w8 +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: atomicrmw_sub_i8: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: sub sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-LDAPR-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: str w1, [sp, #24] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection165: +; CHECK-LDAPR-O0-NEXT: ldrb w8, [x0] +; CHECK-LDAPR-O0-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection166: +; CHECK-LDAPR-O0-NEXT: b .LBB29_1 +; CHECK-LDAPR-O0-NEXT: .LBB29_1: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-LDAPR-O0-NEXT: // Child Loop BB29_2 Depth 2 +; CHECK-LDAPR-O0-NEXT: ldr w10, [sp, #28] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: ldr w8, [sp, #24] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: .Lpcsection167: +; CHECK-LDAPR-O0-NEXT: subs w12, w10, w8 +; CHECK-LDAPR-O0-NEXT: .LBB29_2: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // Parent Loop BB29_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection168: +; CHECK-LDAPR-O0-NEXT: ldaxrb w9, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection169: +; CHECK-LDAPR-O0-NEXT: cmp w9, w10, uxtb +; CHECK-LDAPR-O0-NEXT: .Lpcsection170: +; CHECK-LDAPR-O0-NEXT: b.ne .LBB29_4 +; CHECK-LDAPR-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB29_2 Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection171: +; CHECK-LDAPR-O0-NEXT: stlxrb w8, w12, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection172: +; CHECK-LDAPR-O0-NEXT: cbnz w8, .LBB29_2 +; CHECK-LDAPR-O0-NEXT: .LBB29_4: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB29_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: str w9, [sp, #12] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection173: +; CHECK-LDAPR-O0-NEXT: and w8, w9, #0xff +; CHECK-LDAPR-O0-NEXT: .Lpcsection174: +; CHECK-LDAPR-O0-NEXT: subs w8, w8, w10, uxtb +; CHECK-LDAPR-O0-NEXT: .Lpcsection175: +; CHECK-LDAPR-O0-NEXT: cset w8, eq +; CHECK-LDAPR-O0-NEXT: str w9, [sp, #28] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection176: +; CHECK-LDAPR-O0-NEXT: tbz w8, #0, .LBB29_1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection177: +; CHECK-LDAPR-O0-NEXT: b .LBB29_5 +; CHECK-LDAPR-O0-NEXT: .LBB29_5: // %atomicrmw.end +; CHECK-LDAPR-O0-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: add sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: ret + %res = atomicrmw sub i8* %ptr, i8 %rhs acquire, !pcsections !0 + ret i8 %res +} + +define i8 @atomicrmw_and_i8(i8* %ptr, i8 %rhs) { +; CHECK-NOLSE-O1-LABEL: atomicrmw_and_i8: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: .LBB30_1: // %atomicrmw.start +; CHECK-NOLSE-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection119: +; CHECK-NOLSE-O1-NEXT: ldxrb w8, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection120: +; CHECK-NOLSE-O1-NEXT: and w9, w8, w1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection121: +; CHECK-NOLSE-O1-NEXT: stlxrb w10, w9, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection122: +; CHECK-NOLSE-O1-NEXT: cbnz w10, .LBB30_1 +; CHECK-NOLSE-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NOLSE-O1-NEXT: mov w0, w8 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: atomicrmw_and_i8: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection178: +; CHECK-NOLSE-O0-NEXT: ldrb w8, [x0] +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection179: +; CHECK-NOLSE-O0-NEXT: b .LBB30_1 +; CHECK-NOLSE-O0-NEXT: .LBB30_1: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-NOLSE-O0-NEXT: // Child Loop BB30_2 Depth 2 +; CHECK-NOLSE-O0-NEXT: ldr w10, [sp, #28] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #24] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: .Lpcsection180: +; CHECK-NOLSE-O0-NEXT: and w12, w10, w8 +; CHECK-NOLSE-O0-NEXT: .LBB30_2: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // Parent Loop BB30_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection181: +; CHECK-NOLSE-O0-NEXT: ldaxrb w9, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection182: +; CHECK-NOLSE-O0-NEXT: cmp w9, w10, uxtb +; CHECK-NOLSE-O0-NEXT: .Lpcsection183: +; CHECK-NOLSE-O0-NEXT: b.ne .LBB30_4 +; CHECK-NOLSE-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB30_2 Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection184: +; CHECK-NOLSE-O0-NEXT: stlxrb w8, w12, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection185: +; CHECK-NOLSE-O0-NEXT: cbnz w8, .LBB30_2 +; CHECK-NOLSE-O0-NEXT: .LBB30_4: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB30_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection186: +; CHECK-NOLSE-O0-NEXT: and w8, w9, #0xff +; CHECK-NOLSE-O0-NEXT: .Lpcsection187: +; CHECK-NOLSE-O0-NEXT: subs w8, w8, w10, uxtb +; CHECK-NOLSE-O0-NEXT: .Lpcsection188: +; CHECK-NOLSE-O0-NEXT: cset w8, eq +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection189: +; CHECK-NOLSE-O0-NEXT: tbz w8, #0, .LBB30_1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection190: +; CHECK-NOLSE-O0-NEXT: b .LBB30_5 +; CHECK-NOLSE-O0-NEXT: .LBB30_5: // %atomicrmw.end +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: atomicrmw_and_i8: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: .LBB30_1: // %atomicrmw.start +; CHECK-LDAPR-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection119: +; CHECK-LDAPR-O1-NEXT: ldxrb w8, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection120: +; CHECK-LDAPR-O1-NEXT: and w9, w8, w1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection121: +; CHECK-LDAPR-O1-NEXT: stlxrb w10, w9, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection122: +; CHECK-LDAPR-O1-NEXT: cbnz w10, .LBB30_1 +; CHECK-LDAPR-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-LDAPR-O1-NEXT: mov w0, w8 +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: atomicrmw_and_i8: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: sub sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-LDAPR-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: str w1, [sp, #24] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection178: +; CHECK-LDAPR-O0-NEXT: ldrb w8, [x0] +; CHECK-LDAPR-O0-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection179: +; CHECK-LDAPR-O0-NEXT: b .LBB30_1 +; CHECK-LDAPR-O0-NEXT: .LBB30_1: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-LDAPR-O0-NEXT: // Child Loop BB30_2 Depth 2 +; CHECK-LDAPR-O0-NEXT: ldr w10, [sp, #28] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: ldr w8, [sp, #24] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: .Lpcsection180: +; CHECK-LDAPR-O0-NEXT: and w12, w10, w8 +; CHECK-LDAPR-O0-NEXT: .LBB30_2: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // Parent Loop BB30_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection181: +; CHECK-LDAPR-O0-NEXT: ldaxrb w9, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection182: +; CHECK-LDAPR-O0-NEXT: cmp w9, w10, uxtb +; CHECK-LDAPR-O0-NEXT: .Lpcsection183: +; CHECK-LDAPR-O0-NEXT: b.ne .LBB30_4 +; CHECK-LDAPR-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB30_2 Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection184: +; CHECK-LDAPR-O0-NEXT: stlxrb w8, w12, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection185: +; CHECK-LDAPR-O0-NEXT: cbnz w8, .LBB30_2 +; CHECK-LDAPR-O0-NEXT: .LBB30_4: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB30_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: str w9, [sp, #12] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection186: +; CHECK-LDAPR-O0-NEXT: and w8, w9, #0xff +; CHECK-LDAPR-O0-NEXT: .Lpcsection187: +; CHECK-LDAPR-O0-NEXT: subs w8, w8, w10, uxtb +; CHECK-LDAPR-O0-NEXT: .Lpcsection188: +; CHECK-LDAPR-O0-NEXT: cset w8, eq +; CHECK-LDAPR-O0-NEXT: str w9, [sp, #28] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection189: +; CHECK-LDAPR-O0-NEXT: tbz w8, #0, .LBB30_1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection190: +; CHECK-LDAPR-O0-NEXT: b .LBB30_5 +; CHECK-LDAPR-O0-NEXT: .LBB30_5: // %atomicrmw.end +; CHECK-LDAPR-O0-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: add sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: ret + %res = atomicrmw and i8* %ptr, i8 %rhs release, !pcsections !0 + ret i8 %res +} + +define i8 @atomicrmw_or_i8(i8* %ptr, i8 %rhs) { +; CHECK-NOLSE-O1-LABEL: atomicrmw_or_i8: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: .LBB31_1: // %atomicrmw.start +; CHECK-NOLSE-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection123: +; CHECK-NOLSE-O1-NEXT: ldaxrb w8, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection124: +; CHECK-NOLSE-O1-NEXT: orr w9, w8, w1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection125: +; CHECK-NOLSE-O1-NEXT: stlxrb w10, w9, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection126: +; CHECK-NOLSE-O1-NEXT: cbnz w10, .LBB31_1 +; CHECK-NOLSE-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NOLSE-O1-NEXT: mov w0, w8 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: atomicrmw_or_i8: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection191: +; CHECK-NOLSE-O0-NEXT: ldrb w8, [x0] +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection192: +; CHECK-NOLSE-O0-NEXT: b .LBB31_1 +; CHECK-NOLSE-O0-NEXT: .LBB31_1: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-NOLSE-O0-NEXT: // Child Loop BB31_2 Depth 2 +; CHECK-NOLSE-O0-NEXT: ldr w10, [sp, #28] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #24] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: .Lpcsection193: +; CHECK-NOLSE-O0-NEXT: orr w12, w10, w8 +; CHECK-NOLSE-O0-NEXT: .LBB31_2: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // Parent Loop BB31_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection194: +; CHECK-NOLSE-O0-NEXT: ldaxrb w9, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection195: +; CHECK-NOLSE-O0-NEXT: cmp w9, w10, uxtb +; CHECK-NOLSE-O0-NEXT: .Lpcsection196: +; CHECK-NOLSE-O0-NEXT: b.ne .LBB31_4 +; CHECK-NOLSE-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB31_2 Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection197: +; CHECK-NOLSE-O0-NEXT: stlxrb w8, w12, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection198: +; CHECK-NOLSE-O0-NEXT: cbnz w8, .LBB31_2 +; CHECK-NOLSE-O0-NEXT: .LBB31_4: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB31_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection199: +; CHECK-NOLSE-O0-NEXT: and w8, w9, #0xff +; CHECK-NOLSE-O0-NEXT: .Lpcsection200: +; CHECK-NOLSE-O0-NEXT: subs w8, w8, w10, uxtb +; CHECK-NOLSE-O0-NEXT: .Lpcsection201: +; CHECK-NOLSE-O0-NEXT: cset w8, eq +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection202: +; CHECK-NOLSE-O0-NEXT: tbz w8, #0, .LBB31_1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection203: +; CHECK-NOLSE-O0-NEXT: b .LBB31_5 +; CHECK-NOLSE-O0-NEXT: .LBB31_5: // %atomicrmw.end +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: atomicrmw_or_i8: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: .LBB31_1: // %atomicrmw.start +; CHECK-LDAPR-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection123: +; CHECK-LDAPR-O1-NEXT: ldaxrb w8, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection124: +; CHECK-LDAPR-O1-NEXT: orr w9, w8, w1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection125: +; CHECK-LDAPR-O1-NEXT: stlxrb w10, w9, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection126: +; CHECK-LDAPR-O1-NEXT: cbnz w10, .LBB31_1 +; CHECK-LDAPR-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-LDAPR-O1-NEXT: mov w0, w8 +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: atomicrmw_or_i8: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: sub sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-LDAPR-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: str w1, [sp, #24] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection191: +; CHECK-LDAPR-O0-NEXT: ldrb w8, [x0] +; CHECK-LDAPR-O0-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection192: +; CHECK-LDAPR-O0-NEXT: b .LBB31_1 +; CHECK-LDAPR-O0-NEXT: .LBB31_1: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-LDAPR-O0-NEXT: // Child Loop BB31_2 Depth 2 +; CHECK-LDAPR-O0-NEXT: ldr w10, [sp, #28] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: ldr w8, [sp, #24] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: .Lpcsection193: +; CHECK-LDAPR-O0-NEXT: orr w12, w10, w8 +; CHECK-LDAPR-O0-NEXT: .LBB31_2: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // Parent Loop BB31_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection194: +; CHECK-LDAPR-O0-NEXT: ldaxrb w9, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection195: +; CHECK-LDAPR-O0-NEXT: cmp w9, w10, uxtb +; CHECK-LDAPR-O0-NEXT: .Lpcsection196: +; CHECK-LDAPR-O0-NEXT: b.ne .LBB31_4 +; CHECK-LDAPR-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB31_2 Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection197: +; CHECK-LDAPR-O0-NEXT: stlxrb w8, w12, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection198: +; CHECK-LDAPR-O0-NEXT: cbnz w8, .LBB31_2 +; CHECK-LDAPR-O0-NEXT: .LBB31_4: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB31_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: str w9, [sp, #12] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection199: +; CHECK-LDAPR-O0-NEXT: and w8, w9, #0xff +; CHECK-LDAPR-O0-NEXT: .Lpcsection200: +; CHECK-LDAPR-O0-NEXT: subs w8, w8, w10, uxtb +; CHECK-LDAPR-O0-NEXT: .Lpcsection201: +; CHECK-LDAPR-O0-NEXT: cset w8, eq +; CHECK-LDAPR-O0-NEXT: str w9, [sp, #28] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection202: +; CHECK-LDAPR-O0-NEXT: tbz w8, #0, .LBB31_1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection203: +; CHECK-LDAPR-O0-NEXT: b .LBB31_5 +; CHECK-LDAPR-O0-NEXT: .LBB31_5: // %atomicrmw.end +; CHECK-LDAPR-O0-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: add sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: ret + %res = atomicrmw or i8* %ptr, i8 %rhs seq_cst, !pcsections !0 + ret i8 %res +} + +define i8 @atomicrmw_xor_i8(i8* %ptr, i8 %rhs) { +; CHECK-NOLSE-O1-LABEL: atomicrmw_xor_i8: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: .LBB32_1: // %atomicrmw.start +; CHECK-NOLSE-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection127: +; CHECK-NOLSE-O1-NEXT: ldxrb w8, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection128: +; CHECK-NOLSE-O1-NEXT: eor w9, w8, w1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection129: +; CHECK-NOLSE-O1-NEXT: stxrb w10, w9, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection130: +; CHECK-NOLSE-O1-NEXT: cbnz w10, .LBB32_1 +; CHECK-NOLSE-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NOLSE-O1-NEXT: mov w0, w8 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: atomicrmw_xor_i8: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection204: +; CHECK-NOLSE-O0-NEXT: ldrb w8, [x0] +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection205: +; CHECK-NOLSE-O0-NEXT: b .LBB32_1 +; CHECK-NOLSE-O0-NEXT: .LBB32_1: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-NOLSE-O0-NEXT: // Child Loop BB32_2 Depth 2 +; CHECK-NOLSE-O0-NEXT: ldr w10, [sp, #28] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #24] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: .Lpcsection206: +; CHECK-NOLSE-O0-NEXT: eor w12, w10, w8 +; CHECK-NOLSE-O0-NEXT: .LBB32_2: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // Parent Loop BB32_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection207: +; CHECK-NOLSE-O0-NEXT: ldaxrb w9, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection208: +; CHECK-NOLSE-O0-NEXT: cmp w9, w10, uxtb +; CHECK-NOLSE-O0-NEXT: .Lpcsection209: +; CHECK-NOLSE-O0-NEXT: b.ne .LBB32_4 +; CHECK-NOLSE-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB32_2 Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection210: +; CHECK-NOLSE-O0-NEXT: stlxrb w8, w12, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection211: +; CHECK-NOLSE-O0-NEXT: cbnz w8, .LBB32_2 +; CHECK-NOLSE-O0-NEXT: .LBB32_4: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB32_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection212: +; CHECK-NOLSE-O0-NEXT: and w8, w9, #0xff +; CHECK-NOLSE-O0-NEXT: .Lpcsection213: +; CHECK-NOLSE-O0-NEXT: subs w8, w8, w10, uxtb +; CHECK-NOLSE-O0-NEXT: .Lpcsection214: +; CHECK-NOLSE-O0-NEXT: cset w8, eq +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection215: +; CHECK-NOLSE-O0-NEXT: tbz w8, #0, .LBB32_1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection216: +; CHECK-NOLSE-O0-NEXT: b .LBB32_5 +; CHECK-NOLSE-O0-NEXT: .LBB32_5: // %atomicrmw.end +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: atomicrmw_xor_i8: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: .LBB32_1: // %atomicrmw.start +; CHECK-LDAPR-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection127: +; CHECK-LDAPR-O1-NEXT: ldxrb w8, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection128: +; CHECK-LDAPR-O1-NEXT: eor w9, w8, w1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection129: +; CHECK-LDAPR-O1-NEXT: stxrb w10, w9, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection130: +; CHECK-LDAPR-O1-NEXT: cbnz w10, .LBB32_1 +; CHECK-LDAPR-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-LDAPR-O1-NEXT: mov w0, w8 +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: atomicrmw_xor_i8: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: sub sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-LDAPR-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: str w1, [sp, #24] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection204: +; CHECK-LDAPR-O0-NEXT: ldrb w8, [x0] +; CHECK-LDAPR-O0-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection205: +; CHECK-LDAPR-O0-NEXT: b .LBB32_1 +; CHECK-LDAPR-O0-NEXT: .LBB32_1: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-LDAPR-O0-NEXT: // Child Loop BB32_2 Depth 2 +; CHECK-LDAPR-O0-NEXT: ldr w10, [sp, #28] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: ldr w8, [sp, #24] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: .Lpcsection206: +; CHECK-LDAPR-O0-NEXT: eor w12, w10, w8 +; CHECK-LDAPR-O0-NEXT: .LBB32_2: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // Parent Loop BB32_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection207: +; CHECK-LDAPR-O0-NEXT: ldaxrb w9, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection208: +; CHECK-LDAPR-O0-NEXT: cmp w9, w10, uxtb +; CHECK-LDAPR-O0-NEXT: .Lpcsection209: +; CHECK-LDAPR-O0-NEXT: b.ne .LBB32_4 +; CHECK-LDAPR-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB32_2 Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection210: +; CHECK-LDAPR-O0-NEXT: stlxrb w8, w12, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection211: +; CHECK-LDAPR-O0-NEXT: cbnz w8, .LBB32_2 +; CHECK-LDAPR-O0-NEXT: .LBB32_4: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB32_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: str w9, [sp, #12] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection212: +; CHECK-LDAPR-O0-NEXT: and w8, w9, #0xff +; CHECK-LDAPR-O0-NEXT: .Lpcsection213: +; CHECK-LDAPR-O0-NEXT: subs w8, w8, w10, uxtb +; CHECK-LDAPR-O0-NEXT: .Lpcsection214: +; CHECK-LDAPR-O0-NEXT: cset w8, eq +; CHECK-LDAPR-O0-NEXT: str w9, [sp, #28] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection215: +; CHECK-LDAPR-O0-NEXT: tbz w8, #0, .LBB32_1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection216: +; CHECK-LDAPR-O0-NEXT: b .LBB32_5 +; CHECK-LDAPR-O0-NEXT: .LBB32_5: // %atomicrmw.end +; CHECK-LDAPR-O0-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: add sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: ret + %res = atomicrmw xor i8* %ptr, i8 %rhs monotonic, !pcsections !0 + ret i8 %res +} + +define i8 @atomicrmw_min_i8(i8* %ptr, i8 %rhs) { +; CHECK-NOLSE-O1-LABEL: atomicrmw_min_i8: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: .LBB33_1: // %atomicrmw.start +; CHECK-NOLSE-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection131: +; CHECK-NOLSE-O1-NEXT: ldaxrb w8, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection132: +; CHECK-NOLSE-O1-NEXT: sxtb w9, w8 +; CHECK-NOLSE-O1-NEXT: .Lpcsection133: +; CHECK-NOLSE-O1-NEXT: cmp w9, w1, sxtb +; CHECK-NOLSE-O1-NEXT: .Lpcsection134: +; CHECK-NOLSE-O1-NEXT: csel w9, w8, w1, le +; CHECK-NOLSE-O1-NEXT: .Lpcsection135: +; CHECK-NOLSE-O1-NEXT: stxrb w10, w9, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection136: +; CHECK-NOLSE-O1-NEXT: cbnz w10, .LBB33_1 +; CHECK-NOLSE-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NOLSE-O1-NEXT: mov w0, w8 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: atomicrmw_min_i8: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection217: +; CHECK-NOLSE-O0-NEXT: ldrb w8, [x0] +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection218: +; CHECK-NOLSE-O0-NEXT: b .LBB33_1 +; CHECK-NOLSE-O0-NEXT: .LBB33_1: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-NOLSE-O0-NEXT: // Child Loop BB33_2 Depth 2 +; CHECK-NOLSE-O0-NEXT: ldr w10, [sp, #28] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #24] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: .Lpcsection219: +; CHECK-NOLSE-O0-NEXT: sxtb w9, w10 +; CHECK-NOLSE-O0-NEXT: .Lpcsection220: +; CHECK-NOLSE-O0-NEXT: subs w9, w9, w8, sxtb +; CHECK-NOLSE-O0-NEXT: .Lpcsection221: +; CHECK-NOLSE-O0-NEXT: cset w9, le +; CHECK-NOLSE-O0-NEXT: .Lpcsection222: +; CHECK-NOLSE-O0-NEXT: and w9, w9, #0x1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection223: +; CHECK-NOLSE-O0-NEXT: ands w9, w9, #0x1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection224: +; CHECK-NOLSE-O0-NEXT: csel w12, w10, w8, ne +; CHECK-NOLSE-O0-NEXT: .LBB33_2: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // Parent Loop BB33_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection225: +; CHECK-NOLSE-O0-NEXT: ldaxrb w9, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection226: +; CHECK-NOLSE-O0-NEXT: cmp w9, w10, uxtb +; CHECK-NOLSE-O0-NEXT: .Lpcsection227: +; CHECK-NOLSE-O0-NEXT: b.ne .LBB33_4 +; CHECK-NOLSE-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB33_2 Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection228: +; CHECK-NOLSE-O0-NEXT: stlxrb w8, w12, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection229: +; CHECK-NOLSE-O0-NEXT: cbnz w8, .LBB33_2 +; CHECK-NOLSE-O0-NEXT: .LBB33_4: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB33_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection230: +; CHECK-NOLSE-O0-NEXT: and w8, w9, #0xff +; CHECK-NOLSE-O0-NEXT: .Lpcsection231: +; CHECK-NOLSE-O0-NEXT: subs w8, w8, w10, uxtb +; CHECK-NOLSE-O0-NEXT: .Lpcsection232: +; CHECK-NOLSE-O0-NEXT: cset w8, eq +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection233: +; CHECK-NOLSE-O0-NEXT: tbz w8, #0, .LBB33_1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection234: +; CHECK-NOLSE-O0-NEXT: b .LBB33_5 +; CHECK-NOLSE-O0-NEXT: .LBB33_5: // %atomicrmw.end +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: atomicrmw_min_i8: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: .LBB33_1: // %atomicrmw.start +; CHECK-LDAPR-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection131: +; CHECK-LDAPR-O1-NEXT: ldaxrb w8, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection132: +; CHECK-LDAPR-O1-NEXT: sxtb w9, w8 +; CHECK-LDAPR-O1-NEXT: .Lpcsection133: +; CHECK-LDAPR-O1-NEXT: cmp w9, w1, sxtb +; CHECK-LDAPR-O1-NEXT: .Lpcsection134: +; CHECK-LDAPR-O1-NEXT: csel w9, w8, w1, le +; CHECK-LDAPR-O1-NEXT: .Lpcsection135: +; CHECK-LDAPR-O1-NEXT: stxrb w10, w9, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection136: +; CHECK-LDAPR-O1-NEXT: cbnz w10, .LBB33_1 +; CHECK-LDAPR-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-LDAPR-O1-NEXT: mov w0, w8 +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: atomicrmw_min_i8: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: sub sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-LDAPR-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: str w1, [sp, #24] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection217: +; CHECK-LDAPR-O0-NEXT: ldrb w8, [x0] +; CHECK-LDAPR-O0-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection218: +; CHECK-LDAPR-O0-NEXT: b .LBB33_1 +; CHECK-LDAPR-O0-NEXT: .LBB33_1: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-LDAPR-O0-NEXT: // Child Loop BB33_2 Depth 2 +; CHECK-LDAPR-O0-NEXT: ldr w10, [sp, #28] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: ldr w8, [sp, #24] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: .Lpcsection219: +; CHECK-LDAPR-O0-NEXT: sxtb w9, w10 +; CHECK-LDAPR-O0-NEXT: .Lpcsection220: +; CHECK-LDAPR-O0-NEXT: subs w9, w9, w8, sxtb +; CHECK-LDAPR-O0-NEXT: .Lpcsection221: +; CHECK-LDAPR-O0-NEXT: cset w9, le +; CHECK-LDAPR-O0-NEXT: .Lpcsection222: +; CHECK-LDAPR-O0-NEXT: and w9, w9, #0x1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection223: +; CHECK-LDAPR-O0-NEXT: ands w9, w9, #0x1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection224: +; CHECK-LDAPR-O0-NEXT: csel w12, w10, w8, ne +; CHECK-LDAPR-O0-NEXT: .LBB33_2: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // Parent Loop BB33_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection225: +; CHECK-LDAPR-O0-NEXT: ldaxrb w9, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection226: +; CHECK-LDAPR-O0-NEXT: cmp w9, w10, uxtb +; CHECK-LDAPR-O0-NEXT: .Lpcsection227: +; CHECK-LDAPR-O0-NEXT: b.ne .LBB33_4 +; CHECK-LDAPR-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB33_2 Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection228: +; CHECK-LDAPR-O0-NEXT: stlxrb w8, w12, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection229: +; CHECK-LDAPR-O0-NEXT: cbnz w8, .LBB33_2 +; CHECK-LDAPR-O0-NEXT: .LBB33_4: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB33_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: str w9, [sp, #12] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection230: +; CHECK-LDAPR-O0-NEXT: and w8, w9, #0xff +; CHECK-LDAPR-O0-NEXT: .Lpcsection231: +; CHECK-LDAPR-O0-NEXT: subs w8, w8, w10, uxtb +; CHECK-LDAPR-O0-NEXT: .Lpcsection232: +; CHECK-LDAPR-O0-NEXT: cset w8, eq +; CHECK-LDAPR-O0-NEXT: str w9, [sp, #28] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection233: +; CHECK-LDAPR-O0-NEXT: tbz w8, #0, .LBB33_1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection234: +; CHECK-LDAPR-O0-NEXT: b .LBB33_5 +; CHECK-LDAPR-O0-NEXT: .LBB33_5: // %atomicrmw.end +; CHECK-LDAPR-O0-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: add sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: ret + %res = atomicrmw min i8* %ptr, i8 %rhs acquire, !pcsections !0 + ret i8 %res +} + +define i8 @atomicrmw_max_i8(i8* %ptr, i8 %rhs) { +; CHECK-NOLSE-O1-LABEL: atomicrmw_max_i8: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: .LBB34_1: // %atomicrmw.start +; CHECK-NOLSE-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection137: +; CHECK-NOLSE-O1-NEXT: ldxrb w8, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection138: +; CHECK-NOLSE-O1-NEXT: sxtb w9, w8 +; CHECK-NOLSE-O1-NEXT: .Lpcsection139: +; CHECK-NOLSE-O1-NEXT: cmp w9, w1, sxtb +; CHECK-NOLSE-O1-NEXT: .Lpcsection140: +; CHECK-NOLSE-O1-NEXT: csel w9, w8, w1, gt +; CHECK-NOLSE-O1-NEXT: .Lpcsection141: +; CHECK-NOLSE-O1-NEXT: stlxrb w10, w9, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection142: +; CHECK-NOLSE-O1-NEXT: cbnz w10, .LBB34_1 +; CHECK-NOLSE-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NOLSE-O1-NEXT: mov w0, w8 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: atomicrmw_max_i8: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection235: +; CHECK-NOLSE-O0-NEXT: ldrb w8, [x0] +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection236: +; CHECK-NOLSE-O0-NEXT: b .LBB34_1 +; CHECK-NOLSE-O0-NEXT: .LBB34_1: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-NOLSE-O0-NEXT: // Child Loop BB34_2 Depth 2 +; CHECK-NOLSE-O0-NEXT: ldr w10, [sp, #28] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #24] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: .Lpcsection237: +; CHECK-NOLSE-O0-NEXT: sxtb w9, w10 +; CHECK-NOLSE-O0-NEXT: .Lpcsection238: +; CHECK-NOLSE-O0-NEXT: subs w9, w9, w8, sxtb +; CHECK-NOLSE-O0-NEXT: .Lpcsection239: +; CHECK-NOLSE-O0-NEXT: cset w9, gt +; CHECK-NOLSE-O0-NEXT: .Lpcsection240: +; CHECK-NOLSE-O0-NEXT: and w9, w9, #0x1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection241: +; CHECK-NOLSE-O0-NEXT: ands w9, w9, #0x1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection242: +; CHECK-NOLSE-O0-NEXT: csel w12, w10, w8, ne +; CHECK-NOLSE-O0-NEXT: .LBB34_2: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // Parent Loop BB34_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection243: +; CHECK-NOLSE-O0-NEXT: ldaxrb w9, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection244: +; CHECK-NOLSE-O0-NEXT: cmp w9, w10, uxtb +; CHECK-NOLSE-O0-NEXT: .Lpcsection245: +; CHECK-NOLSE-O0-NEXT: b.ne .LBB34_4 +; CHECK-NOLSE-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB34_2 Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection246: +; CHECK-NOLSE-O0-NEXT: stlxrb w8, w12, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection247: +; CHECK-NOLSE-O0-NEXT: cbnz w8, .LBB34_2 +; CHECK-NOLSE-O0-NEXT: .LBB34_4: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB34_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection248: +; CHECK-NOLSE-O0-NEXT: and w8, w9, #0xff +; CHECK-NOLSE-O0-NEXT: .Lpcsection249: +; CHECK-NOLSE-O0-NEXT: subs w8, w8, w10, uxtb +; CHECK-NOLSE-O0-NEXT: .Lpcsection250: +; CHECK-NOLSE-O0-NEXT: cset w8, eq +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection251: +; CHECK-NOLSE-O0-NEXT: tbz w8, #0, .LBB34_1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection252: +; CHECK-NOLSE-O0-NEXT: b .LBB34_5 +; CHECK-NOLSE-O0-NEXT: .LBB34_5: // %atomicrmw.end +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: atomicrmw_max_i8: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: .LBB34_1: // %atomicrmw.start +; CHECK-LDAPR-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection137: +; CHECK-LDAPR-O1-NEXT: ldxrb w8, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection138: +; CHECK-LDAPR-O1-NEXT: sxtb w9, w8 +; CHECK-LDAPR-O1-NEXT: .Lpcsection139: +; CHECK-LDAPR-O1-NEXT: cmp w9, w1, sxtb +; CHECK-LDAPR-O1-NEXT: .Lpcsection140: +; CHECK-LDAPR-O1-NEXT: csel w9, w8, w1, gt +; CHECK-LDAPR-O1-NEXT: .Lpcsection141: +; CHECK-LDAPR-O1-NEXT: stlxrb w10, w9, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection142: +; CHECK-LDAPR-O1-NEXT: cbnz w10, .LBB34_1 +; CHECK-LDAPR-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-LDAPR-O1-NEXT: mov w0, w8 +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: atomicrmw_max_i8: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: sub sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-LDAPR-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: str w1, [sp, #24] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection235: +; CHECK-LDAPR-O0-NEXT: ldrb w8, [x0] +; CHECK-LDAPR-O0-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection236: +; CHECK-LDAPR-O0-NEXT: b .LBB34_1 +; CHECK-LDAPR-O0-NEXT: .LBB34_1: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-LDAPR-O0-NEXT: // Child Loop BB34_2 Depth 2 +; CHECK-LDAPR-O0-NEXT: ldr w10, [sp, #28] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: ldr w8, [sp, #24] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: .Lpcsection237: +; CHECK-LDAPR-O0-NEXT: sxtb w9, w10 +; CHECK-LDAPR-O0-NEXT: .Lpcsection238: +; CHECK-LDAPR-O0-NEXT: subs w9, w9, w8, sxtb +; CHECK-LDAPR-O0-NEXT: .Lpcsection239: +; CHECK-LDAPR-O0-NEXT: cset w9, gt +; CHECK-LDAPR-O0-NEXT: .Lpcsection240: +; CHECK-LDAPR-O0-NEXT: and w9, w9, #0x1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection241: +; CHECK-LDAPR-O0-NEXT: ands w9, w9, #0x1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection242: +; CHECK-LDAPR-O0-NEXT: csel w12, w10, w8, ne +; CHECK-LDAPR-O0-NEXT: .LBB34_2: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // Parent Loop BB34_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection243: +; CHECK-LDAPR-O0-NEXT: ldaxrb w9, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection244: +; CHECK-LDAPR-O0-NEXT: cmp w9, w10, uxtb +; CHECK-LDAPR-O0-NEXT: .Lpcsection245: +; CHECK-LDAPR-O0-NEXT: b.ne .LBB34_4 +; CHECK-LDAPR-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB34_2 Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection246: +; CHECK-LDAPR-O0-NEXT: stlxrb w8, w12, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection247: +; CHECK-LDAPR-O0-NEXT: cbnz w8, .LBB34_2 +; CHECK-LDAPR-O0-NEXT: .LBB34_4: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB34_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: str w9, [sp, #12] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection248: +; CHECK-LDAPR-O0-NEXT: and w8, w9, #0xff +; CHECK-LDAPR-O0-NEXT: .Lpcsection249: +; CHECK-LDAPR-O0-NEXT: subs w8, w8, w10, uxtb +; CHECK-LDAPR-O0-NEXT: .Lpcsection250: +; CHECK-LDAPR-O0-NEXT: cset w8, eq +; CHECK-LDAPR-O0-NEXT: str w9, [sp, #28] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection251: +; CHECK-LDAPR-O0-NEXT: tbz w8, #0, .LBB34_1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection252: +; CHECK-LDAPR-O0-NEXT: b .LBB34_5 +; CHECK-LDAPR-O0-NEXT: .LBB34_5: // %atomicrmw.end +; CHECK-LDAPR-O0-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: add sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: ret + %res = atomicrmw max i8* %ptr, i8 %rhs release, !pcsections !0 + ret i8 %res +} + +define i8 @atomicrmw_umin_i8(i8* %ptr, i8 %rhs) { +; CHECK-NOLSE-O1-LABEL: atomicrmw_umin_i8: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: and w9, w1, #0xff +; CHECK-NOLSE-O1-NEXT: .LBB35_1: // %atomicrmw.start +; CHECK-NOLSE-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection143: +; CHECK-NOLSE-O1-NEXT: ldaxrb w8, [x0] +; CHECK-NOLSE-O1-NEXT: and w10, w8, #0xff +; CHECK-NOLSE-O1-NEXT: .Lpcsection144: +; CHECK-NOLSE-O1-NEXT: cmp w10, w9 +; CHECK-NOLSE-O1-NEXT: .Lpcsection145: +; CHECK-NOLSE-O1-NEXT: csel w10, w10, w9, ls +; CHECK-NOLSE-O1-NEXT: .Lpcsection146: +; CHECK-NOLSE-O1-NEXT: stlxrb w11, w10, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection147: +; CHECK-NOLSE-O1-NEXT: cbnz w11, .LBB35_1 +; CHECK-NOLSE-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NOLSE-O1-NEXT: mov w0, w8 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: atomicrmw_umin_i8: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection253: +; CHECK-NOLSE-O0-NEXT: ldrb w8, [x0] +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection254: +; CHECK-NOLSE-O0-NEXT: b .LBB35_1 +; CHECK-NOLSE-O0-NEXT: .LBB35_1: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-NOLSE-O0-NEXT: // Child Loop BB35_2 Depth 2 +; CHECK-NOLSE-O0-NEXT: ldr w10, [sp, #28] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #24] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: .Lpcsection255: +; CHECK-NOLSE-O0-NEXT: and w9, w10, #0xff +; CHECK-NOLSE-O0-NEXT: .Lpcsection256: +; CHECK-NOLSE-O0-NEXT: subs w9, w9, w8, uxtb +; CHECK-NOLSE-O0-NEXT: .Lpcsection257: +; CHECK-NOLSE-O0-NEXT: cset w9, ls +; CHECK-NOLSE-O0-NEXT: .Lpcsection258: +; CHECK-NOLSE-O0-NEXT: and w9, w9, #0x1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection259: +; CHECK-NOLSE-O0-NEXT: ands w9, w9, #0x1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection260: +; CHECK-NOLSE-O0-NEXT: csel w12, w10, w8, ne +; CHECK-NOLSE-O0-NEXT: .LBB35_2: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // Parent Loop BB35_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection261: +; CHECK-NOLSE-O0-NEXT: ldaxrb w9, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection262: +; CHECK-NOLSE-O0-NEXT: cmp w9, w10, uxtb +; CHECK-NOLSE-O0-NEXT: .Lpcsection263: +; CHECK-NOLSE-O0-NEXT: b.ne .LBB35_4 +; CHECK-NOLSE-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB35_2 Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection264: +; CHECK-NOLSE-O0-NEXT: stlxrb w8, w12, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection265: +; CHECK-NOLSE-O0-NEXT: cbnz w8, .LBB35_2 +; CHECK-NOLSE-O0-NEXT: .LBB35_4: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB35_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection266: +; CHECK-NOLSE-O0-NEXT: and w8, w9, #0xff +; CHECK-NOLSE-O0-NEXT: .Lpcsection267: +; CHECK-NOLSE-O0-NEXT: subs w8, w8, w10, uxtb +; CHECK-NOLSE-O0-NEXT: .Lpcsection268: +; CHECK-NOLSE-O0-NEXT: cset w8, eq +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection269: +; CHECK-NOLSE-O0-NEXT: tbz w8, #0, .LBB35_1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection270: +; CHECK-NOLSE-O0-NEXT: b .LBB35_5 +; CHECK-NOLSE-O0-NEXT: .LBB35_5: // %atomicrmw.end +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: atomicrmw_umin_i8: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: and w9, w1, #0xff +; CHECK-LDAPR-O1-NEXT: .LBB35_1: // %atomicrmw.start +; CHECK-LDAPR-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection143: +; CHECK-LDAPR-O1-NEXT: ldaxrb w8, [x0] +; CHECK-LDAPR-O1-NEXT: and w10, w8, #0xff +; CHECK-LDAPR-O1-NEXT: .Lpcsection144: +; CHECK-LDAPR-O1-NEXT: cmp w10, w9 +; CHECK-LDAPR-O1-NEXT: .Lpcsection145: +; CHECK-LDAPR-O1-NEXT: csel w10, w10, w9, ls +; CHECK-LDAPR-O1-NEXT: .Lpcsection146: +; CHECK-LDAPR-O1-NEXT: stlxrb w11, w10, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection147: +; CHECK-LDAPR-O1-NEXT: cbnz w11, .LBB35_1 +; CHECK-LDAPR-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-LDAPR-O1-NEXT: mov w0, w8 +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: atomicrmw_umin_i8: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: sub sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-LDAPR-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: str w1, [sp, #24] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection253: +; CHECK-LDAPR-O0-NEXT: ldrb w8, [x0] +; CHECK-LDAPR-O0-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection254: +; CHECK-LDAPR-O0-NEXT: b .LBB35_1 +; CHECK-LDAPR-O0-NEXT: .LBB35_1: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-LDAPR-O0-NEXT: // Child Loop BB35_2 Depth 2 +; CHECK-LDAPR-O0-NEXT: ldr w10, [sp, #28] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: ldr w8, [sp, #24] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: .Lpcsection255: +; CHECK-LDAPR-O0-NEXT: and w9, w10, #0xff +; CHECK-LDAPR-O0-NEXT: .Lpcsection256: +; CHECK-LDAPR-O0-NEXT: subs w9, w9, w8, uxtb +; CHECK-LDAPR-O0-NEXT: .Lpcsection257: +; CHECK-LDAPR-O0-NEXT: cset w9, ls +; CHECK-LDAPR-O0-NEXT: .Lpcsection258: +; CHECK-LDAPR-O0-NEXT: and w9, w9, #0x1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection259: +; CHECK-LDAPR-O0-NEXT: ands w9, w9, #0x1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection260: +; CHECK-LDAPR-O0-NEXT: csel w12, w10, w8, ne +; CHECK-LDAPR-O0-NEXT: .LBB35_2: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // Parent Loop BB35_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection261: +; CHECK-LDAPR-O0-NEXT: ldaxrb w9, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection262: +; CHECK-LDAPR-O0-NEXT: cmp w9, w10, uxtb +; CHECK-LDAPR-O0-NEXT: .Lpcsection263: +; CHECK-LDAPR-O0-NEXT: b.ne .LBB35_4 +; CHECK-LDAPR-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB35_2 Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection264: +; CHECK-LDAPR-O0-NEXT: stlxrb w8, w12, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection265: +; CHECK-LDAPR-O0-NEXT: cbnz w8, .LBB35_2 +; CHECK-LDAPR-O0-NEXT: .LBB35_4: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB35_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: str w9, [sp, #12] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection266: +; CHECK-LDAPR-O0-NEXT: and w8, w9, #0xff +; CHECK-LDAPR-O0-NEXT: .Lpcsection267: +; CHECK-LDAPR-O0-NEXT: subs w8, w8, w10, uxtb +; CHECK-LDAPR-O0-NEXT: .Lpcsection268: +; CHECK-LDAPR-O0-NEXT: cset w8, eq +; CHECK-LDAPR-O0-NEXT: str w9, [sp, #28] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection269: +; CHECK-LDAPR-O0-NEXT: tbz w8, #0, .LBB35_1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection270: +; CHECK-LDAPR-O0-NEXT: b .LBB35_5 +; CHECK-LDAPR-O0-NEXT: .LBB35_5: // %atomicrmw.end +; CHECK-LDAPR-O0-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: add sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: ret + %res = atomicrmw umin i8* %ptr, i8 %rhs seq_cst, !pcsections !0 + ret i8 %res +} + +define i8 @atomicrmw_umax_i8(i8* %ptr, i8 %rhs) { +; CHECK-NOLSE-O1-LABEL: atomicrmw_umax_i8: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: and w9, w1, #0xff +; CHECK-NOLSE-O1-NEXT: .LBB36_1: // %atomicrmw.start +; CHECK-NOLSE-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection148: +; CHECK-NOLSE-O1-NEXT: ldxrb w8, [x0] +; CHECK-NOLSE-O1-NEXT: and w10, w8, #0xff +; CHECK-NOLSE-O1-NEXT: .Lpcsection149: +; CHECK-NOLSE-O1-NEXT: cmp w10, w9 +; CHECK-NOLSE-O1-NEXT: .Lpcsection150: +; CHECK-NOLSE-O1-NEXT: csel w10, w10, w9, hi +; CHECK-NOLSE-O1-NEXT: .Lpcsection151: +; CHECK-NOLSE-O1-NEXT: stxrb w11, w10, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection152: +; CHECK-NOLSE-O1-NEXT: cbnz w11, .LBB36_1 +; CHECK-NOLSE-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NOLSE-O1-NEXT: mov w0, w8 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: atomicrmw_umax_i8: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection271: +; CHECK-NOLSE-O0-NEXT: ldrb w8, [x0] +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection272: +; CHECK-NOLSE-O0-NEXT: b .LBB36_1 +; CHECK-NOLSE-O0-NEXT: .LBB36_1: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-NOLSE-O0-NEXT: // Child Loop BB36_2 Depth 2 +; CHECK-NOLSE-O0-NEXT: ldr w10, [sp, #28] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #24] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: .Lpcsection273: +; CHECK-NOLSE-O0-NEXT: and w9, w10, #0xff +; CHECK-NOLSE-O0-NEXT: .Lpcsection274: +; CHECK-NOLSE-O0-NEXT: subs w9, w9, w8, uxtb +; CHECK-NOLSE-O0-NEXT: .Lpcsection275: +; CHECK-NOLSE-O0-NEXT: cset w9, hi +; CHECK-NOLSE-O0-NEXT: .Lpcsection276: +; CHECK-NOLSE-O0-NEXT: and w9, w9, #0x1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection277: +; CHECK-NOLSE-O0-NEXT: ands w9, w9, #0x1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection278: +; CHECK-NOLSE-O0-NEXT: csel w12, w10, w8, ne +; CHECK-NOLSE-O0-NEXT: .LBB36_2: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // Parent Loop BB36_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection279: +; CHECK-NOLSE-O0-NEXT: ldaxrb w9, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection280: +; CHECK-NOLSE-O0-NEXT: cmp w9, w10, uxtb +; CHECK-NOLSE-O0-NEXT: .Lpcsection281: +; CHECK-NOLSE-O0-NEXT: b.ne .LBB36_4 +; CHECK-NOLSE-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB36_2 Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection282: +; CHECK-NOLSE-O0-NEXT: stlxrb w8, w12, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection283: +; CHECK-NOLSE-O0-NEXT: cbnz w8, .LBB36_2 +; CHECK-NOLSE-O0-NEXT: .LBB36_4: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB36_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection284: +; CHECK-NOLSE-O0-NEXT: and w8, w9, #0xff +; CHECK-NOLSE-O0-NEXT: .Lpcsection285: +; CHECK-NOLSE-O0-NEXT: subs w8, w8, w10, uxtb +; CHECK-NOLSE-O0-NEXT: .Lpcsection286: +; CHECK-NOLSE-O0-NEXT: cset w8, eq +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection287: +; CHECK-NOLSE-O0-NEXT: tbz w8, #0, .LBB36_1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection288: +; CHECK-NOLSE-O0-NEXT: b .LBB36_5 +; CHECK-NOLSE-O0-NEXT: .LBB36_5: // %atomicrmw.end +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: atomicrmw_umax_i8: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: and w9, w1, #0xff +; CHECK-LDAPR-O1-NEXT: .LBB36_1: // %atomicrmw.start +; CHECK-LDAPR-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection148: +; CHECK-LDAPR-O1-NEXT: ldxrb w8, [x0] +; CHECK-LDAPR-O1-NEXT: and w10, w8, #0xff +; CHECK-LDAPR-O1-NEXT: .Lpcsection149: +; CHECK-LDAPR-O1-NEXT: cmp w10, w9 +; CHECK-LDAPR-O1-NEXT: .Lpcsection150: +; CHECK-LDAPR-O1-NEXT: csel w10, w10, w9, hi +; CHECK-LDAPR-O1-NEXT: .Lpcsection151: +; CHECK-LDAPR-O1-NEXT: stxrb w11, w10, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection152: +; CHECK-LDAPR-O1-NEXT: cbnz w11, .LBB36_1 +; CHECK-LDAPR-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-LDAPR-O1-NEXT: mov w0, w8 +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: atomicrmw_umax_i8: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: sub sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-LDAPR-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: str w1, [sp, #24] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection271: +; CHECK-LDAPR-O0-NEXT: ldrb w8, [x0] +; CHECK-LDAPR-O0-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection272: +; CHECK-LDAPR-O0-NEXT: b .LBB36_1 +; CHECK-LDAPR-O0-NEXT: .LBB36_1: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-LDAPR-O0-NEXT: // Child Loop BB36_2 Depth 2 +; CHECK-LDAPR-O0-NEXT: ldr w10, [sp, #28] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: ldr w8, [sp, #24] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: .Lpcsection273: +; CHECK-LDAPR-O0-NEXT: and w9, w10, #0xff +; CHECK-LDAPR-O0-NEXT: .Lpcsection274: +; CHECK-LDAPR-O0-NEXT: subs w9, w9, w8, uxtb +; CHECK-LDAPR-O0-NEXT: .Lpcsection275: +; CHECK-LDAPR-O0-NEXT: cset w9, hi +; CHECK-LDAPR-O0-NEXT: .Lpcsection276: +; CHECK-LDAPR-O0-NEXT: and w9, w9, #0x1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection277: +; CHECK-LDAPR-O0-NEXT: ands w9, w9, #0x1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection278: +; CHECK-LDAPR-O0-NEXT: csel w12, w10, w8, ne +; CHECK-LDAPR-O0-NEXT: .LBB36_2: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // Parent Loop BB36_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection279: +; CHECK-LDAPR-O0-NEXT: ldaxrb w9, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection280: +; CHECK-LDAPR-O0-NEXT: cmp w9, w10, uxtb +; CHECK-LDAPR-O0-NEXT: .Lpcsection281: +; CHECK-LDAPR-O0-NEXT: b.ne .LBB36_4 +; CHECK-LDAPR-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB36_2 Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection282: +; CHECK-LDAPR-O0-NEXT: stlxrb w8, w12, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection283: +; CHECK-LDAPR-O0-NEXT: cbnz w8, .LBB36_2 +; CHECK-LDAPR-O0-NEXT: .LBB36_4: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB36_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: str w9, [sp, #12] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection284: +; CHECK-LDAPR-O0-NEXT: and w8, w9, #0xff +; CHECK-LDAPR-O0-NEXT: .Lpcsection285: +; CHECK-LDAPR-O0-NEXT: subs w8, w8, w10, uxtb +; CHECK-LDAPR-O0-NEXT: .Lpcsection286: +; CHECK-LDAPR-O0-NEXT: cset w8, eq +; CHECK-LDAPR-O0-NEXT: str w9, [sp, #28] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection287: +; CHECK-LDAPR-O0-NEXT: tbz w8, #0, .LBB36_1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection288: +; CHECK-LDAPR-O0-NEXT: b .LBB36_5 +; CHECK-LDAPR-O0-NEXT: .LBB36_5: // %atomicrmw.end +; CHECK-LDAPR-O0-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: add sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: ret + %res = atomicrmw umax i8* %ptr, i8 %rhs monotonic, !pcsections !0 + ret i8 %res +} + +define i16 @atomicrmw_add_i16(i16* %ptr, i16 %rhs) { +; CHECK-NOLSE-O1-LABEL: atomicrmw_add_i16: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: .LBB37_1: // %atomicrmw.start +; CHECK-NOLSE-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection153: +; CHECK-NOLSE-O1-NEXT: ldaxrh w8, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection154: +; CHECK-NOLSE-O1-NEXT: add w9, w8, w1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection155: +; CHECK-NOLSE-O1-NEXT: stlxrh w10, w9, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection156: +; CHECK-NOLSE-O1-NEXT: cbnz w10, .LBB37_1 +; CHECK-NOLSE-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NOLSE-O1-NEXT: mov w0, w8 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: atomicrmw_add_i16: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection289: +; CHECK-NOLSE-O0-NEXT: ldrh w8, [x0] +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection290: +; CHECK-NOLSE-O0-NEXT: b .LBB37_1 +; CHECK-NOLSE-O0-NEXT: .LBB37_1: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-NOLSE-O0-NEXT: // Child Loop BB37_2 Depth 2 +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: .Lpcsection291: +; CHECK-NOLSE-O0-NEXT: add w12, w9, w8, uxth +; CHECK-NOLSE-O0-NEXT: .LBB37_2: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // Parent Loop BB37_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection292: +; CHECK-NOLSE-O0-NEXT: ldaxrh w9, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection293: +; CHECK-NOLSE-O0-NEXT: cmp w9, w8, uxth +; CHECK-NOLSE-O0-NEXT: .Lpcsection294: +; CHECK-NOLSE-O0-NEXT: b.ne .LBB37_4 +; CHECK-NOLSE-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB37_2 Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection295: +; CHECK-NOLSE-O0-NEXT: stlxrh w10, w12, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection296: +; CHECK-NOLSE-O0-NEXT: cbnz w10, .LBB37_2 +; CHECK-NOLSE-O0-NEXT: .LBB37_4: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB37_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection297: +; CHECK-NOLSE-O0-NEXT: uxth w8, w8 +; CHECK-NOLSE-O0-NEXT: .Lpcsection298: +; CHECK-NOLSE-O0-NEXT: subs w8, w8, w9, uxth +; CHECK-NOLSE-O0-NEXT: .Lpcsection299: +; CHECK-NOLSE-O0-NEXT: cset w8, eq +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection300: +; CHECK-NOLSE-O0-NEXT: tbz w8, #0, .LBB37_1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection301: +; CHECK-NOLSE-O0-NEXT: b .LBB37_5 +; CHECK-NOLSE-O0-NEXT: .LBB37_5: // %atomicrmw.end +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: atomicrmw_add_i16: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: .LBB37_1: // %atomicrmw.start +; CHECK-LDAPR-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection153: +; CHECK-LDAPR-O1-NEXT: ldaxrh w8, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection154: +; CHECK-LDAPR-O1-NEXT: add w9, w8, w1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection155: +; CHECK-LDAPR-O1-NEXT: stlxrh w10, w9, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection156: +; CHECK-LDAPR-O1-NEXT: cbnz w10, .LBB37_1 +; CHECK-LDAPR-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-LDAPR-O1-NEXT: mov w0, w8 +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: atomicrmw_add_i16: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: sub sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-LDAPR-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: str w1, [sp, #24] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection289: +; CHECK-LDAPR-O0-NEXT: ldrh w8, [x0] +; CHECK-LDAPR-O0-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection290: +; CHECK-LDAPR-O0-NEXT: b .LBB37_1 +; CHECK-LDAPR-O0-NEXT: .LBB37_1: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-LDAPR-O0-NEXT: // Child Loop BB37_2 Depth 2 +; CHECK-LDAPR-O0-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: ldr w9, [sp, #24] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: .Lpcsection291: +; CHECK-LDAPR-O0-NEXT: add w12, w9, w8, uxth +; CHECK-LDAPR-O0-NEXT: .LBB37_2: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // Parent Loop BB37_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection292: +; CHECK-LDAPR-O0-NEXT: ldaxrh w9, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection293: +; CHECK-LDAPR-O0-NEXT: cmp w9, w8, uxth +; CHECK-LDAPR-O0-NEXT: .Lpcsection294: +; CHECK-LDAPR-O0-NEXT: b.ne .LBB37_4 +; CHECK-LDAPR-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB37_2 Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection295: +; CHECK-LDAPR-O0-NEXT: stlxrh w10, w12, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection296: +; CHECK-LDAPR-O0-NEXT: cbnz w10, .LBB37_2 +; CHECK-LDAPR-O0-NEXT: .LBB37_4: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB37_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: str w9, [sp, #12] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection297: +; CHECK-LDAPR-O0-NEXT: uxth w8, w8 +; CHECK-LDAPR-O0-NEXT: .Lpcsection298: +; CHECK-LDAPR-O0-NEXT: subs w8, w8, w9, uxth +; CHECK-LDAPR-O0-NEXT: .Lpcsection299: +; CHECK-LDAPR-O0-NEXT: cset w8, eq +; CHECK-LDAPR-O0-NEXT: str w9, [sp, #28] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection300: +; CHECK-LDAPR-O0-NEXT: tbz w8, #0, .LBB37_1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection301: +; CHECK-LDAPR-O0-NEXT: b .LBB37_5 +; CHECK-LDAPR-O0-NEXT: .LBB37_5: // %atomicrmw.end +; CHECK-LDAPR-O0-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: add sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: ret + %res = atomicrmw add i16* %ptr, i16 %rhs seq_cst, !pcsections !0 + ret i16 %res +} + +define i16 @atomicrmw_xchg_i16(i16* %ptr, i16 %rhs) { +; CHECK-NOLSE-O1-LABEL: atomicrmw_xchg_i16: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NOLSE-O1-NEXT: .LBB38_1: // %atomicrmw.start +; CHECK-NOLSE-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection157: +; CHECK-NOLSE-O1-NEXT: ldxrh w8, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection158: +; CHECK-NOLSE-O1-NEXT: stxrh w9, w1, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection159: +; CHECK-NOLSE-O1-NEXT: cbnz w9, .LBB38_1 +; CHECK-NOLSE-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NOLSE-O1-NEXT: mov w0, w8 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: atomicrmw_xchg_i16: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection302: +; CHECK-NOLSE-O0-NEXT: ldrh w8, [x0] +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection303: +; CHECK-NOLSE-O0-NEXT: b .LBB38_1 +; CHECK-NOLSE-O0-NEXT: .LBB38_1: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-NOLSE-O0-NEXT: // Child Loop BB38_2 Depth 2 +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w12, [sp, #24] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: .LBB38_2: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // Parent Loop BB38_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection304: +; CHECK-NOLSE-O0-NEXT: ldaxrh w9, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection305: +; CHECK-NOLSE-O0-NEXT: cmp w9, w8, uxth +; CHECK-NOLSE-O0-NEXT: .Lpcsection306: +; CHECK-NOLSE-O0-NEXT: b.ne .LBB38_4 +; CHECK-NOLSE-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB38_2 Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection307: +; CHECK-NOLSE-O0-NEXT: stlxrh w10, w12, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection308: +; CHECK-NOLSE-O0-NEXT: cbnz w10, .LBB38_2 +; CHECK-NOLSE-O0-NEXT: .LBB38_4: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB38_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection309: +; CHECK-NOLSE-O0-NEXT: uxth w8, w8 +; CHECK-NOLSE-O0-NEXT: .Lpcsection310: +; CHECK-NOLSE-O0-NEXT: subs w8, w8, w9, uxth +; CHECK-NOLSE-O0-NEXT: .Lpcsection311: +; CHECK-NOLSE-O0-NEXT: cset w8, eq +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection312: +; CHECK-NOLSE-O0-NEXT: tbz w8, #0, .LBB38_1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection313: +; CHECK-NOLSE-O0-NEXT: b .LBB38_5 +; CHECK-NOLSE-O0-NEXT: .LBB38_5: // %atomicrmw.end +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: atomicrmw_xchg_i16: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-LDAPR-O1-NEXT: .LBB38_1: // %atomicrmw.start +; CHECK-LDAPR-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection157: +; CHECK-LDAPR-O1-NEXT: ldxrh w8, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection158: +; CHECK-LDAPR-O1-NEXT: stxrh w9, w1, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection159: +; CHECK-LDAPR-O1-NEXT: cbnz w9, .LBB38_1 +; CHECK-LDAPR-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-LDAPR-O1-NEXT: mov w0, w8 +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: atomicrmw_xchg_i16: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: sub sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-LDAPR-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: str w1, [sp, #24] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection302: +; CHECK-LDAPR-O0-NEXT: ldrh w8, [x0] +; CHECK-LDAPR-O0-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection303: +; CHECK-LDAPR-O0-NEXT: b .LBB38_1 +; CHECK-LDAPR-O0-NEXT: .LBB38_1: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-LDAPR-O0-NEXT: // Child Loop BB38_2 Depth 2 +; CHECK-LDAPR-O0-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: ldr w12, [sp, #24] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: .LBB38_2: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // Parent Loop BB38_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection304: +; CHECK-LDAPR-O0-NEXT: ldaxrh w9, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection305: +; CHECK-LDAPR-O0-NEXT: cmp w9, w8, uxth +; CHECK-LDAPR-O0-NEXT: .Lpcsection306: +; CHECK-LDAPR-O0-NEXT: b.ne .LBB38_4 +; CHECK-LDAPR-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB38_2 Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection307: +; CHECK-LDAPR-O0-NEXT: stlxrh w10, w12, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection308: +; CHECK-LDAPR-O0-NEXT: cbnz w10, .LBB38_2 +; CHECK-LDAPR-O0-NEXT: .LBB38_4: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB38_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: str w9, [sp, #12] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection309: +; CHECK-LDAPR-O0-NEXT: uxth w8, w8 +; CHECK-LDAPR-O0-NEXT: .Lpcsection310: +; CHECK-LDAPR-O0-NEXT: subs w8, w8, w9, uxth +; CHECK-LDAPR-O0-NEXT: .Lpcsection311: +; CHECK-LDAPR-O0-NEXT: cset w8, eq +; CHECK-LDAPR-O0-NEXT: str w9, [sp, #28] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection312: +; CHECK-LDAPR-O0-NEXT: tbz w8, #0, .LBB38_1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection313: +; CHECK-LDAPR-O0-NEXT: b .LBB38_5 +; CHECK-LDAPR-O0-NEXT: .LBB38_5: // %atomicrmw.end +; CHECK-LDAPR-O0-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: add sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: ret + %res = atomicrmw xchg i16* %ptr, i16 %rhs monotonic, !pcsections !0 + ret i16 %res +} + +define i16 @atomicrmw_sub_i16(i16* %ptr, i16 %rhs) { +; CHECK-NOLSE-O1-LABEL: atomicrmw_sub_i16: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: .LBB39_1: // %atomicrmw.start +; CHECK-NOLSE-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection160: +; CHECK-NOLSE-O1-NEXT: ldaxrh w8, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection161: +; CHECK-NOLSE-O1-NEXT: sub w9, w8, w1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection162: +; CHECK-NOLSE-O1-NEXT: stxrh w10, w9, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection163: +; CHECK-NOLSE-O1-NEXT: cbnz w10, .LBB39_1 +; CHECK-NOLSE-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NOLSE-O1-NEXT: mov w0, w8 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: atomicrmw_sub_i16: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection314: +; CHECK-NOLSE-O0-NEXT: ldrh w8, [x0] +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection315: +; CHECK-NOLSE-O0-NEXT: b .LBB39_1 +; CHECK-NOLSE-O0-NEXT: .LBB39_1: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-NOLSE-O0-NEXT: // Child Loop BB39_2 Depth 2 +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: .Lpcsection316: +; CHECK-NOLSE-O0-NEXT: subs w12, w8, w9 +; CHECK-NOLSE-O0-NEXT: .LBB39_2: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // Parent Loop BB39_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection317: +; CHECK-NOLSE-O0-NEXT: ldaxrh w9, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection318: +; CHECK-NOLSE-O0-NEXT: cmp w9, w8, uxth +; CHECK-NOLSE-O0-NEXT: .Lpcsection319: +; CHECK-NOLSE-O0-NEXT: b.ne .LBB39_4 +; CHECK-NOLSE-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB39_2 Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection320: +; CHECK-NOLSE-O0-NEXT: stlxrh w10, w12, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection321: +; CHECK-NOLSE-O0-NEXT: cbnz w10, .LBB39_2 +; CHECK-NOLSE-O0-NEXT: .LBB39_4: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB39_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection322: +; CHECK-NOLSE-O0-NEXT: uxth w8, w8 +; CHECK-NOLSE-O0-NEXT: .Lpcsection323: +; CHECK-NOLSE-O0-NEXT: subs w8, w8, w9, uxth +; CHECK-NOLSE-O0-NEXT: .Lpcsection324: +; CHECK-NOLSE-O0-NEXT: cset w8, eq +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection325: +; CHECK-NOLSE-O0-NEXT: tbz w8, #0, .LBB39_1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection326: +; CHECK-NOLSE-O0-NEXT: b .LBB39_5 +; CHECK-NOLSE-O0-NEXT: .LBB39_5: // %atomicrmw.end +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: atomicrmw_sub_i16: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: .LBB39_1: // %atomicrmw.start +; CHECK-LDAPR-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection160: +; CHECK-LDAPR-O1-NEXT: ldaxrh w8, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection161: +; CHECK-LDAPR-O1-NEXT: sub w9, w8, w1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection162: +; CHECK-LDAPR-O1-NEXT: stxrh w10, w9, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection163: +; CHECK-LDAPR-O1-NEXT: cbnz w10, .LBB39_1 +; CHECK-LDAPR-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-LDAPR-O1-NEXT: mov w0, w8 +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: atomicrmw_sub_i16: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: sub sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-LDAPR-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: str w1, [sp, #24] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection314: +; CHECK-LDAPR-O0-NEXT: ldrh w8, [x0] +; CHECK-LDAPR-O0-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection315: +; CHECK-LDAPR-O0-NEXT: b .LBB39_1 +; CHECK-LDAPR-O0-NEXT: .LBB39_1: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-LDAPR-O0-NEXT: // Child Loop BB39_2 Depth 2 +; CHECK-LDAPR-O0-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: ldr w9, [sp, #24] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: .Lpcsection316: +; CHECK-LDAPR-O0-NEXT: subs w12, w8, w9 +; CHECK-LDAPR-O0-NEXT: .LBB39_2: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // Parent Loop BB39_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection317: +; CHECK-LDAPR-O0-NEXT: ldaxrh w9, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection318: +; CHECK-LDAPR-O0-NEXT: cmp w9, w8, uxth +; CHECK-LDAPR-O0-NEXT: .Lpcsection319: +; CHECK-LDAPR-O0-NEXT: b.ne .LBB39_4 +; CHECK-LDAPR-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB39_2 Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection320: +; CHECK-LDAPR-O0-NEXT: stlxrh w10, w12, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection321: +; CHECK-LDAPR-O0-NEXT: cbnz w10, .LBB39_2 +; CHECK-LDAPR-O0-NEXT: .LBB39_4: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB39_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: str w9, [sp, #12] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection322: +; CHECK-LDAPR-O0-NEXT: uxth w8, w8 +; CHECK-LDAPR-O0-NEXT: .Lpcsection323: +; CHECK-LDAPR-O0-NEXT: subs w8, w8, w9, uxth +; CHECK-LDAPR-O0-NEXT: .Lpcsection324: +; CHECK-LDAPR-O0-NEXT: cset w8, eq +; CHECK-LDAPR-O0-NEXT: str w9, [sp, #28] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection325: +; CHECK-LDAPR-O0-NEXT: tbz w8, #0, .LBB39_1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection326: +; CHECK-LDAPR-O0-NEXT: b .LBB39_5 +; CHECK-LDAPR-O0-NEXT: .LBB39_5: // %atomicrmw.end +; CHECK-LDAPR-O0-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: add sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: ret + %res = atomicrmw sub i16* %ptr, i16 %rhs acquire, !pcsections !0 + ret i16 %res +} + +define i16 @atomicrmw_and_i16(i16* %ptr, i16 %rhs) { +; CHECK-NOLSE-O1-LABEL: atomicrmw_and_i16: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: .LBB40_1: // %atomicrmw.start +; CHECK-NOLSE-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection164: +; CHECK-NOLSE-O1-NEXT: ldxrh w8, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection165: +; CHECK-NOLSE-O1-NEXT: and w9, w8, w1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection166: +; CHECK-NOLSE-O1-NEXT: stlxrh w10, w9, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection167: +; CHECK-NOLSE-O1-NEXT: cbnz w10, .LBB40_1 +; CHECK-NOLSE-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NOLSE-O1-NEXT: mov w0, w8 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: atomicrmw_and_i16: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection327: +; CHECK-NOLSE-O0-NEXT: ldrh w8, [x0] +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection328: +; CHECK-NOLSE-O0-NEXT: b .LBB40_1 +; CHECK-NOLSE-O0-NEXT: .LBB40_1: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-NOLSE-O0-NEXT: // Child Loop BB40_2 Depth 2 +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: .Lpcsection329: +; CHECK-NOLSE-O0-NEXT: and w12, w8, w9 +; CHECK-NOLSE-O0-NEXT: .LBB40_2: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // Parent Loop BB40_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection330: +; CHECK-NOLSE-O0-NEXT: ldaxrh w9, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection331: +; CHECK-NOLSE-O0-NEXT: cmp w9, w8, uxth +; CHECK-NOLSE-O0-NEXT: .Lpcsection332: +; CHECK-NOLSE-O0-NEXT: b.ne .LBB40_4 +; CHECK-NOLSE-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB40_2 Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection333: +; CHECK-NOLSE-O0-NEXT: stlxrh w10, w12, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection334: +; CHECK-NOLSE-O0-NEXT: cbnz w10, .LBB40_2 +; CHECK-NOLSE-O0-NEXT: .LBB40_4: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB40_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection335: +; CHECK-NOLSE-O0-NEXT: uxth w8, w8 +; CHECK-NOLSE-O0-NEXT: .Lpcsection336: +; CHECK-NOLSE-O0-NEXT: subs w8, w8, w9, uxth +; CHECK-NOLSE-O0-NEXT: .Lpcsection337: +; CHECK-NOLSE-O0-NEXT: cset w8, eq +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection338: +; CHECK-NOLSE-O0-NEXT: tbz w8, #0, .LBB40_1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection339: +; CHECK-NOLSE-O0-NEXT: b .LBB40_5 +; CHECK-NOLSE-O0-NEXT: .LBB40_5: // %atomicrmw.end +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: atomicrmw_and_i16: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: .LBB40_1: // %atomicrmw.start +; CHECK-LDAPR-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection164: +; CHECK-LDAPR-O1-NEXT: ldxrh w8, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection165: +; CHECK-LDAPR-O1-NEXT: and w9, w8, w1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection166: +; CHECK-LDAPR-O1-NEXT: stlxrh w10, w9, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection167: +; CHECK-LDAPR-O1-NEXT: cbnz w10, .LBB40_1 +; CHECK-LDAPR-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-LDAPR-O1-NEXT: mov w0, w8 +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: atomicrmw_and_i16: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: sub sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-LDAPR-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: str w1, [sp, #24] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection327: +; CHECK-LDAPR-O0-NEXT: ldrh w8, [x0] +; CHECK-LDAPR-O0-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection328: +; CHECK-LDAPR-O0-NEXT: b .LBB40_1 +; CHECK-LDAPR-O0-NEXT: .LBB40_1: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-LDAPR-O0-NEXT: // Child Loop BB40_2 Depth 2 +; CHECK-LDAPR-O0-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: ldr w9, [sp, #24] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: .Lpcsection329: +; CHECK-LDAPR-O0-NEXT: and w12, w8, w9 +; CHECK-LDAPR-O0-NEXT: .LBB40_2: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // Parent Loop BB40_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection330: +; CHECK-LDAPR-O0-NEXT: ldaxrh w9, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection331: +; CHECK-LDAPR-O0-NEXT: cmp w9, w8, uxth +; CHECK-LDAPR-O0-NEXT: .Lpcsection332: +; CHECK-LDAPR-O0-NEXT: b.ne .LBB40_4 +; CHECK-LDAPR-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB40_2 Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection333: +; CHECK-LDAPR-O0-NEXT: stlxrh w10, w12, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection334: +; CHECK-LDAPR-O0-NEXT: cbnz w10, .LBB40_2 +; CHECK-LDAPR-O0-NEXT: .LBB40_4: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB40_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: str w9, [sp, #12] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection335: +; CHECK-LDAPR-O0-NEXT: uxth w8, w8 +; CHECK-LDAPR-O0-NEXT: .Lpcsection336: +; CHECK-LDAPR-O0-NEXT: subs w8, w8, w9, uxth +; CHECK-LDAPR-O0-NEXT: .Lpcsection337: +; CHECK-LDAPR-O0-NEXT: cset w8, eq +; CHECK-LDAPR-O0-NEXT: str w9, [sp, #28] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection338: +; CHECK-LDAPR-O0-NEXT: tbz w8, #0, .LBB40_1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection339: +; CHECK-LDAPR-O0-NEXT: b .LBB40_5 +; CHECK-LDAPR-O0-NEXT: .LBB40_5: // %atomicrmw.end +; CHECK-LDAPR-O0-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: add sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: ret + %res = atomicrmw and i16* %ptr, i16 %rhs release, !pcsections !0 + ret i16 %res +} + +define i16 @atomicrmw_or_i16(i16* %ptr, i16 %rhs) { +; CHECK-NOLSE-O1-LABEL: atomicrmw_or_i16: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: .LBB41_1: // %atomicrmw.start +; CHECK-NOLSE-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection168: +; CHECK-NOLSE-O1-NEXT: ldaxrh w8, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection169: +; CHECK-NOLSE-O1-NEXT: orr w9, w8, w1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection170: +; CHECK-NOLSE-O1-NEXT: stlxrh w10, w9, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection171: +; CHECK-NOLSE-O1-NEXT: cbnz w10, .LBB41_1 +; CHECK-NOLSE-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NOLSE-O1-NEXT: mov w0, w8 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: atomicrmw_or_i16: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection340: +; CHECK-NOLSE-O0-NEXT: ldrh w8, [x0] +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection341: +; CHECK-NOLSE-O0-NEXT: b .LBB41_1 +; CHECK-NOLSE-O0-NEXT: .LBB41_1: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-NOLSE-O0-NEXT: // Child Loop BB41_2 Depth 2 +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: .Lpcsection342: +; CHECK-NOLSE-O0-NEXT: orr w12, w8, w9 +; CHECK-NOLSE-O0-NEXT: .LBB41_2: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // Parent Loop BB41_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection343: +; CHECK-NOLSE-O0-NEXT: ldaxrh w9, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection344: +; CHECK-NOLSE-O0-NEXT: cmp w9, w8, uxth +; CHECK-NOLSE-O0-NEXT: .Lpcsection345: +; CHECK-NOLSE-O0-NEXT: b.ne .LBB41_4 +; CHECK-NOLSE-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB41_2 Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection346: +; CHECK-NOLSE-O0-NEXT: stlxrh w10, w12, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection347: +; CHECK-NOLSE-O0-NEXT: cbnz w10, .LBB41_2 +; CHECK-NOLSE-O0-NEXT: .LBB41_4: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB41_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection348: +; CHECK-NOLSE-O0-NEXT: uxth w8, w8 +; CHECK-NOLSE-O0-NEXT: .Lpcsection349: +; CHECK-NOLSE-O0-NEXT: subs w8, w8, w9, uxth +; CHECK-NOLSE-O0-NEXT: .Lpcsection350: +; CHECK-NOLSE-O0-NEXT: cset w8, eq +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection351: +; CHECK-NOLSE-O0-NEXT: tbz w8, #0, .LBB41_1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection352: +; CHECK-NOLSE-O0-NEXT: b .LBB41_5 +; CHECK-NOLSE-O0-NEXT: .LBB41_5: // %atomicrmw.end +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: atomicrmw_or_i16: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: .LBB41_1: // %atomicrmw.start +; CHECK-LDAPR-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection168: +; CHECK-LDAPR-O1-NEXT: ldaxrh w8, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection169: +; CHECK-LDAPR-O1-NEXT: orr w9, w8, w1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection170: +; CHECK-LDAPR-O1-NEXT: stlxrh w10, w9, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection171: +; CHECK-LDAPR-O1-NEXT: cbnz w10, .LBB41_1 +; CHECK-LDAPR-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-LDAPR-O1-NEXT: mov w0, w8 +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: atomicrmw_or_i16: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: sub sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-LDAPR-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: str w1, [sp, #24] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection340: +; CHECK-LDAPR-O0-NEXT: ldrh w8, [x0] +; CHECK-LDAPR-O0-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection341: +; CHECK-LDAPR-O0-NEXT: b .LBB41_1 +; CHECK-LDAPR-O0-NEXT: .LBB41_1: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-LDAPR-O0-NEXT: // Child Loop BB41_2 Depth 2 +; CHECK-LDAPR-O0-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: ldr w9, [sp, #24] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: .Lpcsection342: +; CHECK-LDAPR-O0-NEXT: orr w12, w8, w9 +; CHECK-LDAPR-O0-NEXT: .LBB41_2: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // Parent Loop BB41_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection343: +; CHECK-LDAPR-O0-NEXT: ldaxrh w9, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection344: +; CHECK-LDAPR-O0-NEXT: cmp w9, w8, uxth +; CHECK-LDAPR-O0-NEXT: .Lpcsection345: +; CHECK-LDAPR-O0-NEXT: b.ne .LBB41_4 +; CHECK-LDAPR-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB41_2 Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection346: +; CHECK-LDAPR-O0-NEXT: stlxrh w10, w12, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection347: +; CHECK-LDAPR-O0-NEXT: cbnz w10, .LBB41_2 +; CHECK-LDAPR-O0-NEXT: .LBB41_4: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB41_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: str w9, [sp, #12] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection348: +; CHECK-LDAPR-O0-NEXT: uxth w8, w8 +; CHECK-LDAPR-O0-NEXT: .Lpcsection349: +; CHECK-LDAPR-O0-NEXT: subs w8, w8, w9, uxth +; CHECK-LDAPR-O0-NEXT: .Lpcsection350: +; CHECK-LDAPR-O0-NEXT: cset w8, eq +; CHECK-LDAPR-O0-NEXT: str w9, [sp, #28] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection351: +; CHECK-LDAPR-O0-NEXT: tbz w8, #0, .LBB41_1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection352: +; CHECK-LDAPR-O0-NEXT: b .LBB41_5 +; CHECK-LDAPR-O0-NEXT: .LBB41_5: // %atomicrmw.end +; CHECK-LDAPR-O0-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: add sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: ret + %res = atomicrmw or i16* %ptr, i16 %rhs seq_cst, !pcsections !0 + ret i16 %res +} + +define i16 @atomicrmw_xor_i16(i16* %ptr, i16 %rhs) { +; CHECK-NOLSE-O1-LABEL: atomicrmw_xor_i16: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: .LBB42_1: // %atomicrmw.start +; CHECK-NOLSE-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection172: +; CHECK-NOLSE-O1-NEXT: ldxrh w8, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection173: +; CHECK-NOLSE-O1-NEXT: eor w9, w8, w1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection174: +; CHECK-NOLSE-O1-NEXT: stxrh w10, w9, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection175: +; CHECK-NOLSE-O1-NEXT: cbnz w10, .LBB42_1 +; CHECK-NOLSE-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NOLSE-O1-NEXT: mov w0, w8 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: atomicrmw_xor_i16: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection353: +; CHECK-NOLSE-O0-NEXT: ldrh w8, [x0] +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection354: +; CHECK-NOLSE-O0-NEXT: b .LBB42_1 +; CHECK-NOLSE-O0-NEXT: .LBB42_1: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-NOLSE-O0-NEXT: // Child Loop BB42_2 Depth 2 +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: .Lpcsection355: +; CHECK-NOLSE-O0-NEXT: eor w12, w8, w9 +; CHECK-NOLSE-O0-NEXT: .LBB42_2: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // Parent Loop BB42_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection356: +; CHECK-NOLSE-O0-NEXT: ldaxrh w9, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection357: +; CHECK-NOLSE-O0-NEXT: cmp w9, w8, uxth +; CHECK-NOLSE-O0-NEXT: .Lpcsection358: +; CHECK-NOLSE-O0-NEXT: b.ne .LBB42_4 +; CHECK-NOLSE-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB42_2 Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection359: +; CHECK-NOLSE-O0-NEXT: stlxrh w10, w12, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection360: +; CHECK-NOLSE-O0-NEXT: cbnz w10, .LBB42_2 +; CHECK-NOLSE-O0-NEXT: .LBB42_4: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB42_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection361: +; CHECK-NOLSE-O0-NEXT: uxth w8, w8 +; CHECK-NOLSE-O0-NEXT: .Lpcsection362: +; CHECK-NOLSE-O0-NEXT: subs w8, w8, w9, uxth +; CHECK-NOLSE-O0-NEXT: .Lpcsection363: +; CHECK-NOLSE-O0-NEXT: cset w8, eq +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection364: +; CHECK-NOLSE-O0-NEXT: tbz w8, #0, .LBB42_1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection365: +; CHECK-NOLSE-O0-NEXT: b .LBB42_5 +; CHECK-NOLSE-O0-NEXT: .LBB42_5: // %atomicrmw.end +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: atomicrmw_xor_i16: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: .LBB42_1: // %atomicrmw.start +; CHECK-LDAPR-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection172: +; CHECK-LDAPR-O1-NEXT: ldxrh w8, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection173: +; CHECK-LDAPR-O1-NEXT: eor w9, w8, w1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection174: +; CHECK-LDAPR-O1-NEXT: stxrh w10, w9, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection175: +; CHECK-LDAPR-O1-NEXT: cbnz w10, .LBB42_1 +; CHECK-LDAPR-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-LDAPR-O1-NEXT: mov w0, w8 +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: atomicrmw_xor_i16: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: sub sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-LDAPR-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: str w1, [sp, #24] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection353: +; CHECK-LDAPR-O0-NEXT: ldrh w8, [x0] +; CHECK-LDAPR-O0-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection354: +; CHECK-LDAPR-O0-NEXT: b .LBB42_1 +; CHECK-LDAPR-O0-NEXT: .LBB42_1: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-LDAPR-O0-NEXT: // Child Loop BB42_2 Depth 2 +; CHECK-LDAPR-O0-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: ldr w9, [sp, #24] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: .Lpcsection355: +; CHECK-LDAPR-O0-NEXT: eor w12, w8, w9 +; CHECK-LDAPR-O0-NEXT: .LBB42_2: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // Parent Loop BB42_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection356: +; CHECK-LDAPR-O0-NEXT: ldaxrh w9, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection357: +; CHECK-LDAPR-O0-NEXT: cmp w9, w8, uxth +; CHECK-LDAPR-O0-NEXT: .Lpcsection358: +; CHECK-LDAPR-O0-NEXT: b.ne .LBB42_4 +; CHECK-LDAPR-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB42_2 Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection359: +; CHECK-LDAPR-O0-NEXT: stlxrh w10, w12, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection360: +; CHECK-LDAPR-O0-NEXT: cbnz w10, .LBB42_2 +; CHECK-LDAPR-O0-NEXT: .LBB42_4: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB42_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: str w9, [sp, #12] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection361: +; CHECK-LDAPR-O0-NEXT: uxth w8, w8 +; CHECK-LDAPR-O0-NEXT: .Lpcsection362: +; CHECK-LDAPR-O0-NEXT: subs w8, w8, w9, uxth +; CHECK-LDAPR-O0-NEXT: .Lpcsection363: +; CHECK-LDAPR-O0-NEXT: cset w8, eq +; CHECK-LDAPR-O0-NEXT: str w9, [sp, #28] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection364: +; CHECK-LDAPR-O0-NEXT: tbz w8, #0, .LBB42_1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection365: +; CHECK-LDAPR-O0-NEXT: b .LBB42_5 +; CHECK-LDAPR-O0-NEXT: .LBB42_5: // %atomicrmw.end +; CHECK-LDAPR-O0-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: add sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: ret + %res = atomicrmw xor i16* %ptr, i16 %rhs monotonic, !pcsections !0 + ret i16 %res +} + +define i16 @atomicrmw_min_i16(i16* %ptr, i16 %rhs) { +; CHECK-NOLSE-O1-LABEL: atomicrmw_min_i16: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: .LBB43_1: // %atomicrmw.start +; CHECK-NOLSE-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection176: +; CHECK-NOLSE-O1-NEXT: ldaxrh w8, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection177: +; CHECK-NOLSE-O1-NEXT: sxth w9, w8 +; CHECK-NOLSE-O1-NEXT: .Lpcsection178: +; CHECK-NOLSE-O1-NEXT: cmp w9, w1, sxth +; CHECK-NOLSE-O1-NEXT: .Lpcsection179: +; CHECK-NOLSE-O1-NEXT: csel w9, w8, w1, le +; CHECK-NOLSE-O1-NEXT: .Lpcsection180: +; CHECK-NOLSE-O1-NEXT: stxrh w10, w9, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection181: +; CHECK-NOLSE-O1-NEXT: cbnz w10, .LBB43_1 +; CHECK-NOLSE-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NOLSE-O1-NEXT: mov w0, w8 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: atomicrmw_min_i16: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection366: +; CHECK-NOLSE-O0-NEXT: ldrh w8, [x0] +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection367: +; CHECK-NOLSE-O0-NEXT: b .LBB43_1 +; CHECK-NOLSE-O0-NEXT: .LBB43_1: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-NOLSE-O0-NEXT: // Child Loop BB43_2 Depth 2 +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: .Lpcsection368: +; CHECK-NOLSE-O0-NEXT: sxth w10, w8 +; CHECK-NOLSE-O0-NEXT: .Lpcsection369: +; CHECK-NOLSE-O0-NEXT: subs w10, w10, w9, sxth +; CHECK-NOLSE-O0-NEXT: .Lpcsection370: +; CHECK-NOLSE-O0-NEXT: cset w10, le +; CHECK-NOLSE-O0-NEXT: .Lpcsection371: +; CHECK-NOLSE-O0-NEXT: and w10, w10, #0x1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection372: +; CHECK-NOLSE-O0-NEXT: ands w10, w10, #0x1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection373: +; CHECK-NOLSE-O0-NEXT: csel w12, w8, w9, ne +; CHECK-NOLSE-O0-NEXT: .LBB43_2: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // Parent Loop BB43_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection374: +; CHECK-NOLSE-O0-NEXT: ldaxrh w9, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection375: +; CHECK-NOLSE-O0-NEXT: cmp w9, w8, uxth +; CHECK-NOLSE-O0-NEXT: .Lpcsection376: +; CHECK-NOLSE-O0-NEXT: b.ne .LBB43_4 +; CHECK-NOLSE-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB43_2 Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection377: +; CHECK-NOLSE-O0-NEXT: stlxrh w10, w12, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection378: +; CHECK-NOLSE-O0-NEXT: cbnz w10, .LBB43_2 +; CHECK-NOLSE-O0-NEXT: .LBB43_4: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB43_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection379: +; CHECK-NOLSE-O0-NEXT: uxth w8, w8 +; CHECK-NOLSE-O0-NEXT: .Lpcsection380: +; CHECK-NOLSE-O0-NEXT: subs w8, w8, w9, uxth +; CHECK-NOLSE-O0-NEXT: .Lpcsection381: +; CHECK-NOLSE-O0-NEXT: cset w8, eq +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection382: +; CHECK-NOLSE-O0-NEXT: tbz w8, #0, .LBB43_1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection383: +; CHECK-NOLSE-O0-NEXT: b .LBB43_5 +; CHECK-NOLSE-O0-NEXT: .LBB43_5: // %atomicrmw.end +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: atomicrmw_min_i16: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: .LBB43_1: // %atomicrmw.start +; CHECK-LDAPR-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection176: +; CHECK-LDAPR-O1-NEXT: ldaxrh w8, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection177: +; CHECK-LDAPR-O1-NEXT: sxth w9, w8 +; CHECK-LDAPR-O1-NEXT: .Lpcsection178: +; CHECK-LDAPR-O1-NEXT: cmp w9, w1, sxth +; CHECK-LDAPR-O1-NEXT: .Lpcsection179: +; CHECK-LDAPR-O1-NEXT: csel w9, w8, w1, le +; CHECK-LDAPR-O1-NEXT: .Lpcsection180: +; CHECK-LDAPR-O1-NEXT: stxrh w10, w9, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection181: +; CHECK-LDAPR-O1-NEXT: cbnz w10, .LBB43_1 +; CHECK-LDAPR-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-LDAPR-O1-NEXT: mov w0, w8 +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: atomicrmw_min_i16: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: sub sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-LDAPR-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: str w1, [sp, #24] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection366: +; CHECK-LDAPR-O0-NEXT: ldrh w8, [x0] +; CHECK-LDAPR-O0-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection367: +; CHECK-LDAPR-O0-NEXT: b .LBB43_1 +; CHECK-LDAPR-O0-NEXT: .LBB43_1: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-LDAPR-O0-NEXT: // Child Loop BB43_2 Depth 2 +; CHECK-LDAPR-O0-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: ldr w9, [sp, #24] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: .Lpcsection368: +; CHECK-LDAPR-O0-NEXT: sxth w10, w8 +; CHECK-LDAPR-O0-NEXT: .Lpcsection369: +; CHECK-LDAPR-O0-NEXT: subs w10, w10, w9, sxth +; CHECK-LDAPR-O0-NEXT: .Lpcsection370: +; CHECK-LDAPR-O0-NEXT: cset w10, le +; CHECK-LDAPR-O0-NEXT: .Lpcsection371: +; CHECK-LDAPR-O0-NEXT: and w10, w10, #0x1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection372: +; CHECK-LDAPR-O0-NEXT: ands w10, w10, #0x1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection373: +; CHECK-LDAPR-O0-NEXT: csel w12, w8, w9, ne +; CHECK-LDAPR-O0-NEXT: .LBB43_2: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // Parent Loop BB43_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection374: +; CHECK-LDAPR-O0-NEXT: ldaxrh w9, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection375: +; CHECK-LDAPR-O0-NEXT: cmp w9, w8, uxth +; CHECK-LDAPR-O0-NEXT: .Lpcsection376: +; CHECK-LDAPR-O0-NEXT: b.ne .LBB43_4 +; CHECK-LDAPR-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB43_2 Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection377: +; CHECK-LDAPR-O0-NEXT: stlxrh w10, w12, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection378: +; CHECK-LDAPR-O0-NEXT: cbnz w10, .LBB43_2 +; CHECK-LDAPR-O0-NEXT: .LBB43_4: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB43_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: str w9, [sp, #12] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection379: +; CHECK-LDAPR-O0-NEXT: uxth w8, w8 +; CHECK-LDAPR-O0-NEXT: .Lpcsection380: +; CHECK-LDAPR-O0-NEXT: subs w8, w8, w9, uxth +; CHECK-LDAPR-O0-NEXT: .Lpcsection381: +; CHECK-LDAPR-O0-NEXT: cset w8, eq +; CHECK-LDAPR-O0-NEXT: str w9, [sp, #28] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection382: +; CHECK-LDAPR-O0-NEXT: tbz w8, #0, .LBB43_1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection383: +; CHECK-LDAPR-O0-NEXT: b .LBB43_5 +; CHECK-LDAPR-O0-NEXT: .LBB43_5: // %atomicrmw.end +; CHECK-LDAPR-O0-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: add sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: ret + %res = atomicrmw min i16* %ptr, i16 %rhs acquire, !pcsections !0 + ret i16 %res +} + +define i16 @atomicrmw_max_i16(i16* %ptr, i16 %rhs) { +; CHECK-NOLSE-O1-LABEL: atomicrmw_max_i16: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: .LBB44_1: // %atomicrmw.start +; CHECK-NOLSE-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection182: +; CHECK-NOLSE-O1-NEXT: ldxrh w8, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection183: +; CHECK-NOLSE-O1-NEXT: sxth w9, w8 +; CHECK-NOLSE-O1-NEXT: .Lpcsection184: +; CHECK-NOLSE-O1-NEXT: cmp w9, w1, sxth +; CHECK-NOLSE-O1-NEXT: .Lpcsection185: +; CHECK-NOLSE-O1-NEXT: csel w9, w8, w1, gt +; CHECK-NOLSE-O1-NEXT: .Lpcsection186: +; CHECK-NOLSE-O1-NEXT: stlxrh w10, w9, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection187: +; CHECK-NOLSE-O1-NEXT: cbnz w10, .LBB44_1 +; CHECK-NOLSE-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NOLSE-O1-NEXT: mov w0, w8 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: atomicrmw_max_i16: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection384: +; CHECK-NOLSE-O0-NEXT: ldrh w8, [x0] +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection385: +; CHECK-NOLSE-O0-NEXT: b .LBB44_1 +; CHECK-NOLSE-O0-NEXT: .LBB44_1: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-NOLSE-O0-NEXT: // Child Loop BB44_2 Depth 2 +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: .Lpcsection386: +; CHECK-NOLSE-O0-NEXT: sxth w10, w8 +; CHECK-NOLSE-O0-NEXT: .Lpcsection387: +; CHECK-NOLSE-O0-NEXT: subs w10, w10, w9, sxth +; CHECK-NOLSE-O0-NEXT: .Lpcsection388: +; CHECK-NOLSE-O0-NEXT: cset w10, gt +; CHECK-NOLSE-O0-NEXT: .Lpcsection389: +; CHECK-NOLSE-O0-NEXT: and w10, w10, #0x1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection390: +; CHECK-NOLSE-O0-NEXT: ands w10, w10, #0x1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection391: +; CHECK-NOLSE-O0-NEXT: csel w12, w8, w9, ne +; CHECK-NOLSE-O0-NEXT: .LBB44_2: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // Parent Loop BB44_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection392: +; CHECK-NOLSE-O0-NEXT: ldaxrh w9, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection393: +; CHECK-NOLSE-O0-NEXT: cmp w9, w8, uxth +; CHECK-NOLSE-O0-NEXT: .Lpcsection394: +; CHECK-NOLSE-O0-NEXT: b.ne .LBB44_4 +; CHECK-NOLSE-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB44_2 Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection395: +; CHECK-NOLSE-O0-NEXT: stlxrh w10, w12, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection396: +; CHECK-NOLSE-O0-NEXT: cbnz w10, .LBB44_2 +; CHECK-NOLSE-O0-NEXT: .LBB44_4: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB44_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection397: +; CHECK-NOLSE-O0-NEXT: uxth w8, w8 +; CHECK-NOLSE-O0-NEXT: .Lpcsection398: +; CHECK-NOLSE-O0-NEXT: subs w8, w8, w9, uxth +; CHECK-NOLSE-O0-NEXT: .Lpcsection399: +; CHECK-NOLSE-O0-NEXT: cset w8, eq +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection400: +; CHECK-NOLSE-O0-NEXT: tbz w8, #0, .LBB44_1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection401: +; CHECK-NOLSE-O0-NEXT: b .LBB44_5 +; CHECK-NOLSE-O0-NEXT: .LBB44_5: // %atomicrmw.end +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: atomicrmw_max_i16: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: .LBB44_1: // %atomicrmw.start +; CHECK-LDAPR-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection182: +; CHECK-LDAPR-O1-NEXT: ldxrh w8, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection183: +; CHECK-LDAPR-O1-NEXT: sxth w9, w8 +; CHECK-LDAPR-O1-NEXT: .Lpcsection184: +; CHECK-LDAPR-O1-NEXT: cmp w9, w1, sxth +; CHECK-LDAPR-O1-NEXT: .Lpcsection185: +; CHECK-LDAPR-O1-NEXT: csel w9, w8, w1, gt +; CHECK-LDAPR-O1-NEXT: .Lpcsection186: +; CHECK-LDAPR-O1-NEXT: stlxrh w10, w9, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection187: +; CHECK-LDAPR-O1-NEXT: cbnz w10, .LBB44_1 +; CHECK-LDAPR-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-LDAPR-O1-NEXT: mov w0, w8 +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: atomicrmw_max_i16: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: sub sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-LDAPR-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: str w1, [sp, #24] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection384: +; CHECK-LDAPR-O0-NEXT: ldrh w8, [x0] +; CHECK-LDAPR-O0-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection385: +; CHECK-LDAPR-O0-NEXT: b .LBB44_1 +; CHECK-LDAPR-O0-NEXT: .LBB44_1: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-LDAPR-O0-NEXT: // Child Loop BB44_2 Depth 2 +; CHECK-LDAPR-O0-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: ldr w9, [sp, #24] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: .Lpcsection386: +; CHECK-LDAPR-O0-NEXT: sxth w10, w8 +; CHECK-LDAPR-O0-NEXT: .Lpcsection387: +; CHECK-LDAPR-O0-NEXT: subs w10, w10, w9, sxth +; CHECK-LDAPR-O0-NEXT: .Lpcsection388: +; CHECK-LDAPR-O0-NEXT: cset w10, gt +; CHECK-LDAPR-O0-NEXT: .Lpcsection389: +; CHECK-LDAPR-O0-NEXT: and w10, w10, #0x1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection390: +; CHECK-LDAPR-O0-NEXT: ands w10, w10, #0x1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection391: +; CHECK-LDAPR-O0-NEXT: csel w12, w8, w9, ne +; CHECK-LDAPR-O0-NEXT: .LBB44_2: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // Parent Loop BB44_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection392: +; CHECK-LDAPR-O0-NEXT: ldaxrh w9, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection393: +; CHECK-LDAPR-O0-NEXT: cmp w9, w8, uxth +; CHECK-LDAPR-O0-NEXT: .Lpcsection394: +; CHECK-LDAPR-O0-NEXT: b.ne .LBB44_4 +; CHECK-LDAPR-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB44_2 Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection395: +; CHECK-LDAPR-O0-NEXT: stlxrh w10, w12, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection396: +; CHECK-LDAPR-O0-NEXT: cbnz w10, .LBB44_2 +; CHECK-LDAPR-O0-NEXT: .LBB44_4: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB44_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: str w9, [sp, #12] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection397: +; CHECK-LDAPR-O0-NEXT: uxth w8, w8 +; CHECK-LDAPR-O0-NEXT: .Lpcsection398: +; CHECK-LDAPR-O0-NEXT: subs w8, w8, w9, uxth +; CHECK-LDAPR-O0-NEXT: .Lpcsection399: +; CHECK-LDAPR-O0-NEXT: cset w8, eq +; CHECK-LDAPR-O0-NEXT: str w9, [sp, #28] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection400: +; CHECK-LDAPR-O0-NEXT: tbz w8, #0, .LBB44_1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection401: +; CHECK-LDAPR-O0-NEXT: b .LBB44_5 +; CHECK-LDAPR-O0-NEXT: .LBB44_5: // %atomicrmw.end +; CHECK-LDAPR-O0-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: add sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: ret + %res = atomicrmw max i16* %ptr, i16 %rhs release, !pcsections !0 + ret i16 %res +} + +define i16 @atomicrmw_umin_i16(i16* %ptr, i16 %rhs) { +; CHECK-NOLSE-O1-LABEL: atomicrmw_umin_i16: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: and w9, w1, #0xffff +; CHECK-NOLSE-O1-NEXT: .LBB45_1: // %atomicrmw.start +; CHECK-NOLSE-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection188: +; CHECK-NOLSE-O1-NEXT: ldaxrh w8, [x0] +; CHECK-NOLSE-O1-NEXT: and w10, w8, #0xffff +; CHECK-NOLSE-O1-NEXT: .Lpcsection189: +; CHECK-NOLSE-O1-NEXT: cmp w10, w9 +; CHECK-NOLSE-O1-NEXT: .Lpcsection190: +; CHECK-NOLSE-O1-NEXT: csel w10, w10, w9, ls +; CHECK-NOLSE-O1-NEXT: .Lpcsection191: +; CHECK-NOLSE-O1-NEXT: stlxrh w11, w10, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection192: +; CHECK-NOLSE-O1-NEXT: cbnz w11, .LBB45_1 +; CHECK-NOLSE-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NOLSE-O1-NEXT: mov w0, w8 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: atomicrmw_umin_i16: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection402: +; CHECK-NOLSE-O0-NEXT: ldrh w8, [x0] +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection403: +; CHECK-NOLSE-O0-NEXT: b .LBB45_1 +; CHECK-NOLSE-O0-NEXT: .LBB45_1: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-NOLSE-O0-NEXT: // Child Loop BB45_2 Depth 2 +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: .Lpcsection404: +; CHECK-NOLSE-O0-NEXT: uxth w10, w8 +; CHECK-NOLSE-O0-NEXT: .Lpcsection405: +; CHECK-NOLSE-O0-NEXT: subs w10, w10, w9, uxth +; CHECK-NOLSE-O0-NEXT: .Lpcsection406: +; CHECK-NOLSE-O0-NEXT: cset w10, ls +; CHECK-NOLSE-O0-NEXT: .Lpcsection407: +; CHECK-NOLSE-O0-NEXT: and w10, w10, #0x1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection408: +; CHECK-NOLSE-O0-NEXT: ands w10, w10, #0x1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection409: +; CHECK-NOLSE-O0-NEXT: csel w12, w8, w9, ne +; CHECK-NOLSE-O0-NEXT: .LBB45_2: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // Parent Loop BB45_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection410: +; CHECK-NOLSE-O0-NEXT: ldaxrh w9, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection411: +; CHECK-NOLSE-O0-NEXT: cmp w9, w8, uxth +; CHECK-NOLSE-O0-NEXT: .Lpcsection412: +; CHECK-NOLSE-O0-NEXT: b.ne .LBB45_4 +; CHECK-NOLSE-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB45_2 Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection413: +; CHECK-NOLSE-O0-NEXT: stlxrh w10, w12, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection414: +; CHECK-NOLSE-O0-NEXT: cbnz w10, .LBB45_2 +; CHECK-NOLSE-O0-NEXT: .LBB45_4: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB45_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection415: +; CHECK-NOLSE-O0-NEXT: uxth w8, w8 +; CHECK-NOLSE-O0-NEXT: .Lpcsection416: +; CHECK-NOLSE-O0-NEXT: subs w8, w8, w9, uxth +; CHECK-NOLSE-O0-NEXT: .Lpcsection417: +; CHECK-NOLSE-O0-NEXT: cset w8, eq +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection418: +; CHECK-NOLSE-O0-NEXT: tbz w8, #0, .LBB45_1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection419: +; CHECK-NOLSE-O0-NEXT: b .LBB45_5 +; CHECK-NOLSE-O0-NEXT: .LBB45_5: // %atomicrmw.end +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: atomicrmw_umin_i16: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: and w9, w1, #0xffff +; CHECK-LDAPR-O1-NEXT: .LBB45_1: // %atomicrmw.start +; CHECK-LDAPR-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection188: +; CHECK-LDAPR-O1-NEXT: ldaxrh w8, [x0] +; CHECK-LDAPR-O1-NEXT: and w10, w8, #0xffff +; CHECK-LDAPR-O1-NEXT: .Lpcsection189: +; CHECK-LDAPR-O1-NEXT: cmp w10, w9 +; CHECK-LDAPR-O1-NEXT: .Lpcsection190: +; CHECK-LDAPR-O1-NEXT: csel w10, w10, w9, ls +; CHECK-LDAPR-O1-NEXT: .Lpcsection191: +; CHECK-LDAPR-O1-NEXT: stlxrh w11, w10, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection192: +; CHECK-LDAPR-O1-NEXT: cbnz w11, .LBB45_1 +; CHECK-LDAPR-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-LDAPR-O1-NEXT: mov w0, w8 +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: atomicrmw_umin_i16: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: sub sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-LDAPR-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: str w1, [sp, #24] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection402: +; CHECK-LDAPR-O0-NEXT: ldrh w8, [x0] +; CHECK-LDAPR-O0-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection403: +; CHECK-LDAPR-O0-NEXT: b .LBB45_1 +; CHECK-LDAPR-O0-NEXT: .LBB45_1: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-LDAPR-O0-NEXT: // Child Loop BB45_2 Depth 2 +; CHECK-LDAPR-O0-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: ldr w9, [sp, #24] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: .Lpcsection404: +; CHECK-LDAPR-O0-NEXT: uxth w10, w8 +; CHECK-LDAPR-O0-NEXT: .Lpcsection405: +; CHECK-LDAPR-O0-NEXT: subs w10, w10, w9, uxth +; CHECK-LDAPR-O0-NEXT: .Lpcsection406: +; CHECK-LDAPR-O0-NEXT: cset w10, ls +; CHECK-LDAPR-O0-NEXT: .Lpcsection407: +; CHECK-LDAPR-O0-NEXT: and w10, w10, #0x1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection408: +; CHECK-LDAPR-O0-NEXT: ands w10, w10, #0x1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection409: +; CHECK-LDAPR-O0-NEXT: csel w12, w8, w9, ne +; CHECK-LDAPR-O0-NEXT: .LBB45_2: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // Parent Loop BB45_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection410: +; CHECK-LDAPR-O0-NEXT: ldaxrh w9, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection411: +; CHECK-LDAPR-O0-NEXT: cmp w9, w8, uxth +; CHECK-LDAPR-O0-NEXT: .Lpcsection412: +; CHECK-LDAPR-O0-NEXT: b.ne .LBB45_4 +; CHECK-LDAPR-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB45_2 Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection413: +; CHECK-LDAPR-O0-NEXT: stlxrh w10, w12, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection414: +; CHECK-LDAPR-O0-NEXT: cbnz w10, .LBB45_2 +; CHECK-LDAPR-O0-NEXT: .LBB45_4: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB45_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: str w9, [sp, #12] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection415: +; CHECK-LDAPR-O0-NEXT: uxth w8, w8 +; CHECK-LDAPR-O0-NEXT: .Lpcsection416: +; CHECK-LDAPR-O0-NEXT: subs w8, w8, w9, uxth +; CHECK-LDAPR-O0-NEXT: .Lpcsection417: +; CHECK-LDAPR-O0-NEXT: cset w8, eq +; CHECK-LDAPR-O0-NEXT: str w9, [sp, #28] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection418: +; CHECK-LDAPR-O0-NEXT: tbz w8, #0, .LBB45_1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection419: +; CHECK-LDAPR-O0-NEXT: b .LBB45_5 +; CHECK-LDAPR-O0-NEXT: .LBB45_5: // %atomicrmw.end +; CHECK-LDAPR-O0-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: add sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: ret + %res = atomicrmw umin i16* %ptr, i16 %rhs seq_cst, !pcsections !0 + ret i16 %res +} + +define i16 @atomicrmw_umax_i16(i16* %ptr, i16 %rhs) { +; CHECK-NOLSE-O1-LABEL: atomicrmw_umax_i16: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: and w9, w1, #0xffff +; CHECK-NOLSE-O1-NEXT: .LBB46_1: // %atomicrmw.start +; CHECK-NOLSE-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection193: +; CHECK-NOLSE-O1-NEXT: ldxrh w8, [x0] +; CHECK-NOLSE-O1-NEXT: and w10, w8, #0xffff +; CHECK-NOLSE-O1-NEXT: .Lpcsection194: +; CHECK-NOLSE-O1-NEXT: cmp w10, w9 +; CHECK-NOLSE-O1-NEXT: .Lpcsection195: +; CHECK-NOLSE-O1-NEXT: csel w10, w10, w9, hi +; CHECK-NOLSE-O1-NEXT: .Lpcsection196: +; CHECK-NOLSE-O1-NEXT: stxrh w11, w10, [x0] +; CHECK-NOLSE-O1-NEXT: .Lpcsection197: +; CHECK-NOLSE-O1-NEXT: cbnz w11, .LBB46_1 +; CHECK-NOLSE-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NOLSE-O1-NEXT: mov w0, w8 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: atomicrmw_umax_i16: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection420: +; CHECK-NOLSE-O0-NEXT: ldrh w8, [x0] +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection421: +; CHECK-NOLSE-O0-NEXT: b .LBB46_1 +; CHECK-NOLSE-O0-NEXT: .LBB46_1: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-NOLSE-O0-NEXT: // Child Loop BB46_2 Depth 2 +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: .Lpcsection422: +; CHECK-NOLSE-O0-NEXT: uxth w10, w8 +; CHECK-NOLSE-O0-NEXT: .Lpcsection423: +; CHECK-NOLSE-O0-NEXT: subs w10, w10, w9, uxth +; CHECK-NOLSE-O0-NEXT: .Lpcsection424: +; CHECK-NOLSE-O0-NEXT: cset w10, hi +; CHECK-NOLSE-O0-NEXT: .Lpcsection425: +; CHECK-NOLSE-O0-NEXT: and w10, w10, #0x1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection426: +; CHECK-NOLSE-O0-NEXT: ands w10, w10, #0x1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection427: +; CHECK-NOLSE-O0-NEXT: csel w12, w8, w9, ne +; CHECK-NOLSE-O0-NEXT: .LBB46_2: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // Parent Loop BB46_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection428: +; CHECK-NOLSE-O0-NEXT: ldaxrh w9, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection429: +; CHECK-NOLSE-O0-NEXT: cmp w9, w8, uxth +; CHECK-NOLSE-O0-NEXT: .Lpcsection430: +; CHECK-NOLSE-O0-NEXT: b.ne .LBB46_4 +; CHECK-NOLSE-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB46_2 Depth=2 +; CHECK-NOLSE-O0-NEXT: .Lpcsection431: +; CHECK-NOLSE-O0-NEXT: stlxrh w10, w12, [x11] +; CHECK-NOLSE-O0-NEXT: .Lpcsection432: +; CHECK-NOLSE-O0-NEXT: cbnz w10, .LBB46_2 +; CHECK-NOLSE-O0-NEXT: .LBB46_4: // %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: // in Loop: Header=BB46_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection433: +; CHECK-NOLSE-O0-NEXT: uxth w8, w8 +; CHECK-NOLSE-O0-NEXT: .Lpcsection434: +; CHECK-NOLSE-O0-NEXT: subs w8, w8, w9, uxth +; CHECK-NOLSE-O0-NEXT: .Lpcsection435: +; CHECK-NOLSE-O0-NEXT: cset w8, eq +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] // 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: .Lpcsection436: +; CHECK-NOLSE-O0-NEXT: tbz w8, #0, .LBB46_1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection437: +; CHECK-NOLSE-O0-NEXT: b .LBB46_5 +; CHECK-NOLSE-O0-NEXT: .LBB46_5: // %atomicrmw.end +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: atomicrmw_umax_i16: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: and w9, w1, #0xffff +; CHECK-LDAPR-O1-NEXT: .LBB46_1: // %atomicrmw.start +; CHECK-LDAPR-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection193: +; CHECK-LDAPR-O1-NEXT: ldxrh w8, [x0] +; CHECK-LDAPR-O1-NEXT: and w10, w8, #0xffff +; CHECK-LDAPR-O1-NEXT: .Lpcsection194: +; CHECK-LDAPR-O1-NEXT: cmp w10, w9 +; CHECK-LDAPR-O1-NEXT: .Lpcsection195: +; CHECK-LDAPR-O1-NEXT: csel w10, w10, w9, hi +; CHECK-LDAPR-O1-NEXT: .Lpcsection196: +; CHECK-LDAPR-O1-NEXT: stxrh w11, w10, [x0] +; CHECK-LDAPR-O1-NEXT: .Lpcsection197: +; CHECK-LDAPR-O1-NEXT: cbnz w11, .LBB46_1 +; CHECK-LDAPR-O1-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-LDAPR-O1-NEXT: mov w0, w8 +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: atomicrmw_umax_i16: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: sub sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-LDAPR-O0-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: str w1, [sp, #24] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection420: +; CHECK-LDAPR-O0-NEXT: ldrh w8, [x0] +; CHECK-LDAPR-O0-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection421: +; CHECK-LDAPR-O0-NEXT: b .LBB46_1 +; CHECK-LDAPR-O0-NEXT: .LBB46_1: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // =>This Loop Header: Depth=1 +; CHECK-LDAPR-O0-NEXT: // Child Loop BB46_2 Depth 2 +; CHECK-LDAPR-O0-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: ldr w9, [sp, #24] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: .Lpcsection422: +; CHECK-LDAPR-O0-NEXT: uxth w10, w8 +; CHECK-LDAPR-O0-NEXT: .Lpcsection423: +; CHECK-LDAPR-O0-NEXT: subs w10, w10, w9, uxth +; CHECK-LDAPR-O0-NEXT: .Lpcsection424: +; CHECK-LDAPR-O0-NEXT: cset w10, hi +; CHECK-LDAPR-O0-NEXT: .Lpcsection425: +; CHECK-LDAPR-O0-NEXT: and w10, w10, #0x1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection426: +; CHECK-LDAPR-O0-NEXT: ands w10, w10, #0x1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection427: +; CHECK-LDAPR-O0-NEXT: csel w12, w8, w9, ne +; CHECK-LDAPR-O0-NEXT: .LBB46_2: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // Parent Loop BB46_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection428: +; CHECK-LDAPR-O0-NEXT: ldaxrh w9, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection429: +; CHECK-LDAPR-O0-NEXT: cmp w9, w8, uxth +; CHECK-LDAPR-O0-NEXT: .Lpcsection430: +; CHECK-LDAPR-O0-NEXT: b.ne .LBB46_4 +; CHECK-LDAPR-O0-NEXT: // %bb.3: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB46_2 Depth=2 +; CHECK-LDAPR-O0-NEXT: .Lpcsection431: +; CHECK-LDAPR-O0-NEXT: stlxrh w10, w12, [x11] +; CHECK-LDAPR-O0-NEXT: .Lpcsection432: +; CHECK-LDAPR-O0-NEXT: cbnz w10, .LBB46_2 +; CHECK-LDAPR-O0-NEXT: .LBB46_4: // %atomicrmw.start +; CHECK-LDAPR-O0-NEXT: // in Loop: Header=BB46_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: str w9, [sp, #12] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection433: +; CHECK-LDAPR-O0-NEXT: uxth w8, w8 +; CHECK-LDAPR-O0-NEXT: .Lpcsection434: +; CHECK-LDAPR-O0-NEXT: subs w8, w8, w9, uxth +; CHECK-LDAPR-O0-NEXT: .Lpcsection435: +; CHECK-LDAPR-O0-NEXT: cset w8, eq +; CHECK-LDAPR-O0-NEXT: str w9, [sp, #28] // 4-byte Folded Spill +; CHECK-LDAPR-O0-NEXT: .Lpcsection436: +; CHECK-LDAPR-O0-NEXT: tbz w8, #0, .LBB46_1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection437: +; CHECK-LDAPR-O0-NEXT: b .LBB46_5 +; CHECK-LDAPR-O0-NEXT: .LBB46_5: // %atomicrmw.end +; CHECK-LDAPR-O0-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; CHECK-LDAPR-O0-NEXT: add sp, sp, #32 +; CHECK-LDAPR-O0-NEXT: ret + %res = atomicrmw umax i16* %ptr, i16 %rhs monotonic, !pcsections !0 + ret i16 %res +} + +define { i8, i1 } @cmpxchg_i8(i8* %ptr, i8 %desired, i8 %new) { +; CHECK-NOLSE-O1-LABEL: cmpxchg_i8: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: mov x8, x0 +; CHECK-NOLSE-O1-NEXT: // kill: def $w2 killed $w2 def $x2 +; CHECK-NOLSE-O1-NEXT: .LBB47_1: // %cmpxchg.start +; CHECK-NOLSE-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection198: +; CHECK-NOLSE-O1-NEXT: ldxrb w0, [x8] +; CHECK-NOLSE-O1-NEXT: .Lpcsection199: +; CHECK-NOLSE-O1-NEXT: and w9, w0, #0xff +; CHECK-NOLSE-O1-NEXT: .Lpcsection200: +; CHECK-NOLSE-O1-NEXT: cmp w9, w1, uxtb +; CHECK-NOLSE-O1-NEXT: .Lpcsection201: +; CHECK-NOLSE-O1-NEXT: b.ne .LBB47_4 +; CHECK-NOLSE-O1-NEXT: // %bb.2: // %cmpxchg.trystore +; CHECK-NOLSE-O1-NEXT: // in Loop: Header=BB47_1 Depth=1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection202: +; CHECK-NOLSE-O1-NEXT: stxrb w9, w2, [x8] +; CHECK-NOLSE-O1-NEXT: cbnz w9, .LBB47_1 +; CHECK-NOLSE-O1-NEXT: // %bb.3: +; CHECK-NOLSE-O1-NEXT: mov w1, #1 +; CHECK-NOLSE-O1-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NOLSE-O1-NEXT: ret +; CHECK-NOLSE-O1-NEXT: .LBB47_4: // %cmpxchg.nostore +; CHECK-NOLSE-O1-NEXT: mov w1, wzr +; CHECK-NOLSE-O1-NEXT: .Lpcsection203: +; CHECK-NOLSE-O1-NEXT: clrex +; CHECK-NOLSE-O1-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: cmpxchg_i8: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: mov x9, x0 +; CHECK-NOLSE-O0-NEXT: .LBB47_1: // =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection438: +; CHECK-NOLSE-O0-NEXT: ldaxrb w0, [x9] +; CHECK-NOLSE-O0-NEXT: .Lpcsection439: +; CHECK-NOLSE-O0-NEXT: cmp w0, w1, uxtb +; CHECK-NOLSE-O0-NEXT: .Lpcsection440: +; CHECK-NOLSE-O0-NEXT: b.ne .LBB47_3 +; CHECK-NOLSE-O0-NEXT: // %bb.2: // in Loop: Header=BB47_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection441: +; CHECK-NOLSE-O0-NEXT: stlxrb w8, w2, [x9] +; CHECK-NOLSE-O0-NEXT: .Lpcsection442: +; CHECK-NOLSE-O0-NEXT: cbnz w8, .LBB47_1 +; CHECK-NOLSE-O0-NEXT: .LBB47_3: +; CHECK-NOLSE-O0-NEXT: .Lpcsection443: +; CHECK-NOLSE-O0-NEXT: and w8, w0, #0xff +; CHECK-NOLSE-O0-NEXT: .Lpcsection444: +; CHECK-NOLSE-O0-NEXT: subs w8, w8, w1, uxtb +; CHECK-NOLSE-O0-NEXT: .Lpcsection445: +; CHECK-NOLSE-O0-NEXT: cset w8, eq +; CHECK-NOLSE-O0-NEXT: and w1, w8, #0x1 +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: cmpxchg_i8: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: mov x8, x0 +; CHECK-LDAPR-O1-NEXT: // kill: def $w2 killed $w2 def $x2 +; CHECK-LDAPR-O1-NEXT: .LBB47_1: // %cmpxchg.start +; CHECK-LDAPR-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection198: +; CHECK-LDAPR-O1-NEXT: ldxrb w0, [x8] +; CHECK-LDAPR-O1-NEXT: .Lpcsection199: +; CHECK-LDAPR-O1-NEXT: and w9, w0, #0xff +; CHECK-LDAPR-O1-NEXT: .Lpcsection200: +; CHECK-LDAPR-O1-NEXT: cmp w9, w1, uxtb +; CHECK-LDAPR-O1-NEXT: .Lpcsection201: +; CHECK-LDAPR-O1-NEXT: b.ne .LBB47_4 +; CHECK-LDAPR-O1-NEXT: // %bb.2: // %cmpxchg.trystore +; CHECK-LDAPR-O1-NEXT: // in Loop: Header=BB47_1 Depth=1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection202: +; CHECK-LDAPR-O1-NEXT: stxrb w9, w2, [x8] +; CHECK-LDAPR-O1-NEXT: cbnz w9, .LBB47_1 +; CHECK-LDAPR-O1-NEXT: // %bb.3: +; CHECK-LDAPR-O1-NEXT: mov w1, #1 +; CHECK-LDAPR-O1-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-LDAPR-O1-NEXT: ret +; CHECK-LDAPR-O1-NEXT: .LBB47_4: // %cmpxchg.nostore +; CHECK-LDAPR-O1-NEXT: mov w1, wzr +; CHECK-LDAPR-O1-NEXT: .Lpcsection203: +; CHECK-LDAPR-O1-NEXT: clrex +; CHECK-LDAPR-O1-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: cmpxchg_i8: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: mov x9, x0 +; CHECK-LDAPR-O0-NEXT: .LBB47_1: // =>This Inner Loop Header: Depth=1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection438: +; CHECK-LDAPR-O0-NEXT: ldaxrb w0, [x9] +; CHECK-LDAPR-O0-NEXT: .Lpcsection439: +; CHECK-LDAPR-O0-NEXT: cmp w0, w1, uxtb +; CHECK-LDAPR-O0-NEXT: .Lpcsection440: +; CHECK-LDAPR-O0-NEXT: b.ne .LBB47_3 +; CHECK-LDAPR-O0-NEXT: // %bb.2: // in Loop: Header=BB47_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection441: +; CHECK-LDAPR-O0-NEXT: stlxrb w8, w2, [x9] +; CHECK-LDAPR-O0-NEXT: .Lpcsection442: +; CHECK-LDAPR-O0-NEXT: cbnz w8, .LBB47_1 +; CHECK-LDAPR-O0-NEXT: .LBB47_3: +; CHECK-LDAPR-O0-NEXT: .Lpcsection443: +; CHECK-LDAPR-O0-NEXT: and w8, w0, #0xff +; CHECK-LDAPR-O0-NEXT: .Lpcsection444: +; CHECK-LDAPR-O0-NEXT: subs w8, w8, w1, uxtb +; CHECK-LDAPR-O0-NEXT: .Lpcsection445: +; CHECK-LDAPR-O0-NEXT: cset w8, eq +; CHECK-LDAPR-O0-NEXT: and w1, w8, #0x1 +; CHECK-LDAPR-O0-NEXT: ret + %res = cmpxchg i8* %ptr, i8 %desired, i8 %new monotonic monotonic, !pcsections !0 + ret { i8, i1 } %res +} + +define { i16, i1 } @cmpxchg_i16(i16* %ptr, i16 %desired, i16 %new) { +; CHECK-NOLSE-O1-LABEL: cmpxchg_i16: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: mov x8, x0 +; CHECK-NOLSE-O1-NEXT: // kill: def $w2 killed $w2 def $x2 +; CHECK-NOLSE-O1-NEXT: .LBB48_1: // %cmpxchg.start +; CHECK-NOLSE-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection204: +; CHECK-NOLSE-O1-NEXT: ldxrh w0, [x8] +; CHECK-NOLSE-O1-NEXT: .Lpcsection205: +; CHECK-NOLSE-O1-NEXT: and w9, w0, #0xffff +; CHECK-NOLSE-O1-NEXT: .Lpcsection206: +; CHECK-NOLSE-O1-NEXT: cmp w9, w1, uxth +; CHECK-NOLSE-O1-NEXT: .Lpcsection207: +; CHECK-NOLSE-O1-NEXT: b.ne .LBB48_4 +; CHECK-NOLSE-O1-NEXT: // %bb.2: // %cmpxchg.trystore +; CHECK-NOLSE-O1-NEXT: // in Loop: Header=BB48_1 Depth=1 +; CHECK-NOLSE-O1-NEXT: .Lpcsection208: +; CHECK-NOLSE-O1-NEXT: stxrh w9, w2, [x8] +; CHECK-NOLSE-O1-NEXT: cbnz w9, .LBB48_1 +; CHECK-NOLSE-O1-NEXT: // %bb.3: +; CHECK-NOLSE-O1-NEXT: mov w1, #1 +; CHECK-NOLSE-O1-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NOLSE-O1-NEXT: ret +; CHECK-NOLSE-O1-NEXT: .LBB48_4: // %cmpxchg.nostore +; CHECK-NOLSE-O1-NEXT: mov w1, wzr +; CHECK-NOLSE-O1-NEXT: .Lpcsection209: +; CHECK-NOLSE-O1-NEXT: clrex +; CHECK-NOLSE-O1-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: cmpxchg_i16: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: mov x9, x0 +; CHECK-NOLSE-O0-NEXT: .LBB48_1: // =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection446: +; CHECK-NOLSE-O0-NEXT: ldaxrh w0, [x9] +; CHECK-NOLSE-O0-NEXT: .Lpcsection447: +; CHECK-NOLSE-O0-NEXT: cmp w0, w1, uxth +; CHECK-NOLSE-O0-NEXT: .Lpcsection448: +; CHECK-NOLSE-O0-NEXT: b.ne .LBB48_3 +; CHECK-NOLSE-O0-NEXT: // %bb.2: // in Loop: Header=BB48_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: .Lpcsection449: +; CHECK-NOLSE-O0-NEXT: stlxrh w8, w2, [x9] +; CHECK-NOLSE-O0-NEXT: .Lpcsection450: +; CHECK-NOLSE-O0-NEXT: cbnz w8, .LBB48_1 +; CHECK-NOLSE-O0-NEXT: .LBB48_3: +; CHECK-NOLSE-O0-NEXT: .Lpcsection451: +; CHECK-NOLSE-O0-NEXT: and w8, w0, #0xffff +; CHECK-NOLSE-O0-NEXT: .Lpcsection452: +; CHECK-NOLSE-O0-NEXT: subs w8, w8, w1, uxth +; CHECK-NOLSE-O0-NEXT: .Lpcsection453: +; CHECK-NOLSE-O0-NEXT: cset w8, eq +; CHECK-NOLSE-O0-NEXT: and w1, w8, #0x1 +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: cmpxchg_i16: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: mov x8, x0 +; CHECK-LDAPR-O1-NEXT: // kill: def $w2 killed $w2 def $x2 +; CHECK-LDAPR-O1-NEXT: .LBB48_1: // %cmpxchg.start +; CHECK-LDAPR-O1-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection204: +; CHECK-LDAPR-O1-NEXT: ldxrh w0, [x8] +; CHECK-LDAPR-O1-NEXT: .Lpcsection205: +; CHECK-LDAPR-O1-NEXT: and w9, w0, #0xffff +; CHECK-LDAPR-O1-NEXT: .Lpcsection206: +; CHECK-LDAPR-O1-NEXT: cmp w9, w1, uxth +; CHECK-LDAPR-O1-NEXT: .Lpcsection207: +; CHECK-LDAPR-O1-NEXT: b.ne .LBB48_4 +; CHECK-LDAPR-O1-NEXT: // %bb.2: // %cmpxchg.trystore +; CHECK-LDAPR-O1-NEXT: // in Loop: Header=BB48_1 Depth=1 +; CHECK-LDAPR-O1-NEXT: .Lpcsection208: +; CHECK-LDAPR-O1-NEXT: stxrh w9, w2, [x8] +; CHECK-LDAPR-O1-NEXT: cbnz w9, .LBB48_1 +; CHECK-LDAPR-O1-NEXT: // %bb.3: +; CHECK-LDAPR-O1-NEXT: mov w1, #1 +; CHECK-LDAPR-O1-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-LDAPR-O1-NEXT: ret +; CHECK-LDAPR-O1-NEXT: .LBB48_4: // %cmpxchg.nostore +; CHECK-LDAPR-O1-NEXT: mov w1, wzr +; CHECK-LDAPR-O1-NEXT: .Lpcsection209: +; CHECK-LDAPR-O1-NEXT: clrex +; CHECK-LDAPR-O1-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: cmpxchg_i16: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: mov x9, x0 +; CHECK-LDAPR-O0-NEXT: .LBB48_1: // =>This Inner Loop Header: Depth=1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection446: +; CHECK-LDAPR-O0-NEXT: ldaxrh w0, [x9] +; CHECK-LDAPR-O0-NEXT: .Lpcsection447: +; CHECK-LDAPR-O0-NEXT: cmp w0, w1, uxth +; CHECK-LDAPR-O0-NEXT: .Lpcsection448: +; CHECK-LDAPR-O0-NEXT: b.ne .LBB48_3 +; CHECK-LDAPR-O0-NEXT: // %bb.2: // in Loop: Header=BB48_1 Depth=1 +; CHECK-LDAPR-O0-NEXT: .Lpcsection449: +; CHECK-LDAPR-O0-NEXT: stlxrh w8, w2, [x9] +; CHECK-LDAPR-O0-NEXT: .Lpcsection450: +; CHECK-LDAPR-O0-NEXT: cbnz w8, .LBB48_1 +; CHECK-LDAPR-O0-NEXT: .LBB48_3: +; CHECK-LDAPR-O0-NEXT: .Lpcsection451: +; CHECK-LDAPR-O0-NEXT: and w8, w0, #0xffff +; CHECK-LDAPR-O0-NEXT: .Lpcsection452: +; CHECK-LDAPR-O0-NEXT: subs w8, w8, w1, uxth +; CHECK-LDAPR-O0-NEXT: .Lpcsection453: +; CHECK-LDAPR-O0-NEXT: cset w8, eq +; CHECK-LDAPR-O0-NEXT: and w1, w8, #0x1 +; CHECK-LDAPR-O0-NEXT: ret + %res = cmpxchg i16* %ptr, i16 %desired, i16 %new monotonic monotonic, !pcsections !0 + ret { i16, i1 } %res +} + +define internal double @bitcast_to_double(i64* %ptr) { +; CHECK-NOLSE-O1-LABEL: bitcast_to_double: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: .Lpcsection210: +; CHECK-NOLSE-O1-NEXT: ldar x8, [x0] +; CHECK-NOLSE-O1-NEXT: fmov d0, x8 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: bitcast_to_double: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: .Lpcsection454: +; CHECK-NOLSE-O0-NEXT: ldar x8, [x0] +; CHECK-NOLSE-O0-NEXT: fmov d0, x8 +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: bitcast_to_double: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: .Lpcsection210: +; CHECK-LDAPR-O1-NEXT: ldar x8, [x0] +; CHECK-LDAPR-O1-NEXT: fmov d0, x8 +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: bitcast_to_double: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: .Lpcsection454: +; CHECK-LDAPR-O0-NEXT: ldar x8, [x0] +; CHECK-LDAPR-O0-NEXT: fmov d0, x8 +; CHECK-LDAPR-O0-NEXT: ret + %load = load atomic i64, i64* %ptr seq_cst, align 8, !pcsections !0 + %bitcast = bitcast i64 %load to double + ret double %bitcast +} + +define internal float @bitcast_to_float(i32* %ptr) { +; CHECK-NOLSE-O1-LABEL: bitcast_to_float: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: .Lpcsection211: +; CHECK-NOLSE-O1-NEXT: ldar w8, [x0] +; CHECK-NOLSE-O1-NEXT: fmov s0, w8 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: bitcast_to_float: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: .Lpcsection455: +; CHECK-NOLSE-O0-NEXT: ldar w8, [x0] +; CHECK-NOLSE-O0-NEXT: fmov s0, w8 +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: bitcast_to_float: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: .Lpcsection211: +; CHECK-LDAPR-O1-NEXT: ldar w8, [x0] +; CHECK-LDAPR-O1-NEXT: fmov s0, w8 +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: bitcast_to_float: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: .Lpcsection455: +; CHECK-LDAPR-O0-NEXT: ldar w8, [x0] +; CHECK-LDAPR-O0-NEXT: fmov s0, w8 +; CHECK-LDAPR-O0-NEXT: ret + %load = load atomic i32, i32* %ptr seq_cst, align 8, !pcsections !0 + %bitcast = bitcast i32 %load to float + ret float %bitcast +} + +define internal half @bitcast_to_half(i16* %ptr) { +; CHECK-NOLSE-O1-LABEL: bitcast_to_half: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: .Lpcsection212: +; CHECK-NOLSE-O1-NEXT: ldarh w8, [x0] +; CHECK-NOLSE-O1-NEXT: fmov s0, w8 +; CHECK-NOLSE-O1-NEXT: // kill: def $h0 killed $h0 killed $s0 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: bitcast_to_half: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: .Lpcsection456: +; CHECK-NOLSE-O0-NEXT: ldarh w8, [x0] +; CHECK-NOLSE-O0-NEXT: fmov s0, w8 +; CHECK-NOLSE-O0-NEXT: // kill: def $h0 killed $h0 killed $s0 +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: bitcast_to_half: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: .Lpcsection212: +; CHECK-LDAPR-O1-NEXT: ldarh w8, [x0] +; CHECK-LDAPR-O1-NEXT: fmov s0, w8 +; CHECK-LDAPR-O1-NEXT: // kill: def $h0 killed $h0 killed $s0 +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: bitcast_to_half: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: .Lpcsection456: +; CHECK-LDAPR-O0-NEXT: ldarh w8, [x0] +; CHECK-LDAPR-O0-NEXT: fmov s0, w8 +; CHECK-LDAPR-O0-NEXT: // kill: def $h0 killed $h0 killed $s0 +; CHECK-LDAPR-O0-NEXT: ret + %load = load atomic i16, i16* %ptr seq_cst, align 8, !pcsections !0 + %bitcast = bitcast i16 %load to half + ret half %bitcast +} + +define internal i64* @inttoptr(i64* %ptr) { +; CHECK-NOLSE-O1-LABEL: inttoptr: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: .Lpcsection213: +; CHECK-NOLSE-O1-NEXT: ldar x0, [x0] +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: inttoptr: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: .Lpcsection457: +; CHECK-NOLSE-O0-NEXT: ldar x0, [x0] +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: inttoptr: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: .Lpcsection213: +; CHECK-LDAPR-O1-NEXT: ldar x0, [x0] +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: inttoptr: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: .Lpcsection457: +; CHECK-LDAPR-O0-NEXT: ldar x0, [x0] +; CHECK-LDAPR-O0-NEXT: ret + %load = load atomic i64, i64* %ptr seq_cst, align 8, !pcsections !0 + %bitcast = inttoptr i64 %load to i64* + ret i64* %bitcast +} + +define internal i64* @load_ptr(i64** %ptr) { +; CHECK-NOLSE-O1-LABEL: load_ptr: +; CHECK-NOLSE-O1: // %bb.0: +; CHECK-NOLSE-O1-NEXT: .Lpcsection214: +; CHECK-NOLSE-O1-NEXT: ldar x0, [x0] +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: load_ptr: +; CHECK-NOLSE-O0: // %bb.0: +; CHECK-NOLSE-O0-NEXT: .Lpcsection458: +; CHECK-NOLSE-O0-NEXT: ldar x0, [x0] +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LDAPR-O1-LABEL: load_ptr: +; CHECK-LDAPR-O1: // %bb.0: +; CHECK-LDAPR-O1-NEXT: .Lpcsection214: +; CHECK-LDAPR-O1-NEXT: ldar x0, [x0] +; CHECK-LDAPR-O1-NEXT: ret +; +; CHECK-LDAPR-O0-LABEL: load_ptr: +; CHECK-LDAPR-O0: // %bb.0: +; CHECK-LDAPR-O0-NEXT: .Lpcsection458: +; CHECK-LDAPR-O0-NEXT: ldar x0, [x0] +; CHECK-LDAPR-O0-NEXT: ret + %load = load atomic i64*, i64** %ptr seq_cst, align 8, !pcsections !0 + ret i64* %load +} + +!0 = !{!"foo"} diff --git a/llvm/test/CodeGen/AArch64/pcsections.ll b/llvm/test/CodeGen/AArch64/pcsections.ll --- a/llvm/test/CodeGen/AArch64/pcsections.ll +++ b/llvm/test/CodeGen/AArch64/pcsections.ll @@ -81,7 +81,7 @@ ; CHECK-UNOPT-NEXT: stlxr ; CHECK-UNOPT: .Lpcsection9: ; CHECK-UNOPT-NEXT: cbnz -; CHECK-UNOPT: .Lpcsection12: +; CHECK-UNOPT: .Lpcsection13: ; CHECK-UNOPT-NEXT: b ; --- ; CHECK-NOT: .Lpcsection @@ -100,7 +100,7 @@ ; CHECK-NEXT: .Lpcsection_base7: ; DEFCM-NEXT: .word .Lpcsection5-.Lpcsection_base7 ; LARGE-NEXT: .xword .Lpcsection5-.Lpcsection_base7 -; CHECK-UNOPT: .word .Lpcsection12-.Lpcsection_base14 +; CHECK-UNOPT: .word .Lpcsection13-.Lpcsection_base15 ; CHECK-NEXT: .text entry: %0 = atomicrmw add i64* @foo, i64 1 monotonic, align 8, !pcsections !0