Index: lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp =================================================================== --- lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp +++ lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp @@ -84,6 +84,9 @@ DEBUG(dbgs() << " Ignoring, XZR or WZR already used by the instruction\n"); continue; } + for (const MachineMemOperand *MMO : MI.memoperands()) { + if (MMO->isAtomic()) break; + } const MCInstrDesc &Desc = MI.getDesc(); for (int I = 0, E = Desc.getNumDefs(); I != E; ++I) { MachineOperand &MO = MI.getOperand(I); Index: lib/Target/AArch64/AArch64ISelDAGToDAG.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -201,7 +201,7 @@ bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width); - void SelectCMP_SWAP(SDNode *N); + bool SelectCMP_SWAP(SDNode *N); }; } // end anonymous namespace @@ -2609,9 +2609,13 @@ } /// We've got special pseudo-instructions for these -void AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) { +bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) { unsigned Opcode; EVT MemTy = cast(N)->getMemoryVT(); + + // Leave IR for LSE if subtarget supports it. + if (Subtarget->hasLSE()) return false; + if (MemTy == MVT::i8) Opcode = AArch64::CMP_SWAP_8; else if (MemTy == MVT::i16) @@ -2637,6 +2641,8 @@ ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); CurDAG->RemoveDeadNode(N); + + return true; } void AArch64DAGToDAGISel::Select(SDNode *Node) { @@ -2660,8 +2666,9 @@ break; case ISD::ATOMIC_CMP_SWAP: - SelectCMP_SWAP(Node); - return; + if (SelectCMP_SWAP(Node)) + return; + break; case ISD::READ_REGISTER: if (tryReadRegister(Node)) Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -10566,7 +10566,14 @@ TargetLowering::AtomicExpansionKind AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { unsigned Size = AI->getType()->getPrimitiveSizeInBits(); - return Size <= 128 ? AtomicExpansionKind::LLSC : AtomicExpansionKind::None; + if (Size > 128) return AtomicExpansionKind::None; + // Nand not supported in LSE. + if (AI->getOperation() == AtomicRMWInst::Nand) return AtomicExpansionKind::LLSC; + // Currently leaving And and Sub to LLSC + if ((AI->getOperation() == AtomicRMWInst::And) || (AI->getOperation() == AtomicRMWInst::Sub)) + return AtomicExpansionKind::LLSC; + // Leave 128 bits to LLSC. + return (Subtarget->hasLSE() && Size < 128) ? AtomicExpansionKind::None : AtomicExpansionKind::LLSC; } bool AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR( @@ -10576,6 +10583,7 @@ // on the stack and close enough to the spill slot, this can lead to a // situation where the monitor always gets cleared and the atomic operation // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead. + if (Subtarget->hasLSE()) return false; return getTargetMachine().getOptLevel() != 0; } Index: lib/Target/AArch64/AArch64InstrAtomics.td =================================================================== --- lib/Target/AArch64/AArch64InstrAtomics.td +++ lib/Target/AArch64/AArch64InstrAtomics.td @@ -405,3 +405,49 @@ (ins GPR64:$addr, GPR64:$desiredLo, GPR64:$desiredHi, GPR64:$newLo, GPR64:$newHi), []>, Sched<[WriteAtomic]>; + +// v8.1 Atomic instructions: +def : Pat<(atomic_load_add_8 GPR64:$Rn, GPR32:$Rs), (LDADDALb GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_add_16 GPR64:$Rn, GPR32:$Rs), (LDADDALh GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_add_32 GPR64:$Rn, GPR32:$Rs), (LDADDALs GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_add_64 GPR64:$Rn, GPR64:$Rs), (LDADDALd GPR64:$Rs, GPR64sp:$Rn)>; + +def : Pat<(atomic_load_or_8 GPR64:$Rn, GPR32:$Rs), (LDSETALb GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_or_16 GPR64:$Rn, GPR32:$Rs), (LDSETALh GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_or_32 GPR64:$Rn, GPR32:$Rs), (LDSETALs GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_or_64 GPR64:$Rn, GPR64:$Rs), (LDSETALd GPR64:$Rs, GPR64sp:$Rn)>; + +def : Pat<(atomic_load_xor_8 GPR64:$Rn, GPR32:$Rs), (LDEORALb GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_xor_16 GPR64:$Rn, GPR32:$Rs), (LDEORALh GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_xor_32 GPR64:$Rn, GPR32:$Rs), (LDEORALs GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_xor_64 GPR64:$Rn, GPR64:$Rs), (LDEORALd GPR64:$Rs, GPR64sp:$Rn)>; + +def : Pat<(atomic_load_max_8 GPR64:$Rn, GPR32:$Rs), (LDSMAXALb GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_max_16 GPR64:$Rn, GPR32:$Rs), (LDSMAXALh GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_max_32 GPR64:$Rn, GPR32:$Rs), (LDSMAXALs GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_max_64 GPR64:$Rn, GPR64:$Rs), (LDSMAXALd GPR64:$Rs, GPR64sp:$Rn)>; + +def : Pat<(atomic_load_umax_8 GPR64:$Rn, GPR32:$Rs), (LDUMAXALb GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_umax_16 GPR64:$Rn, GPR32:$Rs), (LDUMAXALh GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_umax_32 GPR64:$Rn, GPR32:$Rs), (LDUMAXALs GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_umax_64 GPR64:$Rn, GPR64:$Rs), (LDUMAXALd GPR64:$Rs, GPR64sp:$Rn)>; + +def : Pat<(atomic_load_min_8 GPR64:$Rn, GPR32:$Rs), (LDSMINALb GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_min_16 GPR64:$Rn, GPR32:$Rs), (LDSMINALh GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_min_32 GPR64:$Rn, GPR32:$Rs), (LDSMINALs GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_min_64 GPR64:$Rn, GPR64:$Rs), (LDSMINALd GPR64:$Rs, GPR64sp:$Rn)>; + +def : Pat<(atomic_load_umin_8 GPR64:$Rn, GPR32:$Rs), (LDUMINALb GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_umin_16 GPR64:$Rn, GPR32:$Rs), (LDUMINALh GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_umin_32 GPR64:$Rn, GPR32:$Rs), (LDUMINALs GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_umin_64 GPR64:$Rn, GPR64:$Rs), (LDUMINALd GPR64:$Rs, GPR64sp:$Rn)>; + +def : Pat<(atomic_cmp_swap_8 GPR64:$Rn, GPR32:$Rs, GPR32:$Rt), (CASALb GPR32:$Rt, GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_cmp_swap_16 GPR64:$Rn, GPR32:$Rs, GPR32:$Rt), (CASALh GPR32:$Rt, GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_cmp_swap_32 GPR64:$Rn, GPR32:$Rs, GPR32:$Rt), (CASALs GPR32:$Rt, GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_cmp_swap_64 GPR64:$Rn, GPR64:$Rs, GPR64:$Rt), (CASALd GPR64:$Rt, GPR64:$Rs, GPR64sp:$Rn)>; + +def : Pat<(atomic_swap_8 GPR64:$Rn, GPR32:$Rs), (SWPALb GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_swap_16 GPR64:$Rn, GPR32:$Rs), (SWPALh GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_swap_32 GPR64:$Rn, GPR32:$Rs), (SWPALs GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_swap_64 GPR64:$Rn, GPR64:$Rs), (SWPALd GPR64:$Rs, GPR64sp:$Rn)>; Index: test/CodeGen/AArch64/atomic-ops-lse.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/atomic-ops-lse.ll @@ -0,0 +1,493 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mcpu=thunderx2t99 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mcpu=thunderx2t99 < %s | FileCheck %s --check-prefix=CHECK-REG + +; Point of CHECK-REG is to make sure UNPREDICTABLE instructions aren't created +; (i.e. reusing a register for status & data in store exclusive). +; CHECK-REG-NOT: stlxrb w[[NEW:[0-9]+]], w[[NEW]], [x{{[0-9]+}}] +; CHECK-REG-NOT: stlxrb w[[NEW:[0-9]+]], x[[NEW]], [x{{[0-9]+}}] + +@var8 = global i8 0 +@var16 = global i16 0 +@var32 = global i32 0 +@var64 = global i64 0 + +define i8 @test_atomic_load_add_i8(i8 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_add_i8: + %old = atomicrmw add i8* @var8, i8 %offset seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 + +; CHECK: ldaddalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i8 %old +} + +define i16 @test_atomic_load_add_i16(i16 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_add_i16: + %old = atomicrmw add i16* @var16, i16 %offset seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 + +; CHECK: ldaddalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i16 %old +} + +define i32 @test_atomic_load_add_i32(i32 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_add_i32: + %old = atomicrmw add i32* @var32, i32 %offset seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 + +; CHECK: ldaddal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i32 %old +} + +define i64 @test_atomic_load_add_i64(i64 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_add_i64: + %old = atomicrmw add i64* @var64, i64 %offset seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 + +; CHECK: ldaddal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i64 %old +} + +define i8 @test_atomic_load_or_i8(i8 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_or_i8: + %old = atomicrmw or i8* @var8, i8 %offset seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 + +; CHECK: ldsetalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i8 %old +} + +define i16 @test_atomic_load_or_i16(i16 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_or_i16: + %old = atomicrmw or i16* @var16, i16 %offset seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 + +; CHECK: ldsetalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i16 %old +} + +define i32 @test_atomic_load_or_i32(i32 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_or_i32: + %old = atomicrmw or i32* @var32, i32 %offset seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 + +; CHECK: ldsetal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i32 %old +} + +define i64 @test_atomic_load_or_i64(i64 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_or_i64: + %old = atomicrmw or i64* @var64, i64 %offset seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 + +; CHECK: ldsetal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i64 %old +} + +define i8 @test_atomic_load_xor_i8(i8 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_xor_i8: + %old = atomicrmw xor i8* @var8, i8 %offset seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 + +; CHECK: ldeoralb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i8 %old +} + +define i16 @test_atomic_load_xor_i16(i16 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_xor_i16: + %old = atomicrmw xor i16* @var16, i16 %offset seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 + +; CHECK: ldeoralh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i16 %old +} + +define i32 @test_atomic_load_xor_i32(i32 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_xor_i32: + %old = atomicrmw xor i32* @var32, i32 %offset seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 + +; CHECK: ldeoral w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i32 %old +} + +define i64 @test_atomic_load_xor_i64(i64 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_xor_i64: + %old = atomicrmw xor i64* @var64, i64 %offset seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 + +; CHECK: ldeoral x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i64 %old +} + +define i8 @test_atomic_load_min_i8(i8 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_min_i8: + %old = atomicrmw min i8* @var8, i8 %offset seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 + +; CHECK: ldsminalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i8 %old +} + +define i16 @test_atomic_load_min_i16(i16 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_min_i16: + %old = atomicrmw min i16* @var16, i16 %offset seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 + +; CHECK: ldsminalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i16 %old +} + +define i32 @test_atomic_load_min_i32(i32 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_min_i32: + %old = atomicrmw min i32* @var32, i32 %offset seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 + +; CHECK: ldsminal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i32 %old +} + +define i64 @test_atomic_load_min_i64(i64 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_min_i64: + %old = atomicrmw min i64* @var64, i64 %offset seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 + +; CHECK: ldsminal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i64 %old +} + +define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_umin_i8: + %old = atomicrmw umin i8* @var8, i8 %offset seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 + +; CHECK: lduminalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i8 %old +} + +define i16 @test_atomic_load_umin_i16(i16 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_umin_i16: + %old = atomicrmw umin i16* @var16, i16 %offset seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 + +; CHECK: lduminalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i16 %old +} + +define i32 @test_atomic_load_umin_i32(i32 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_umin_i32: + %old = atomicrmw umin i32* @var32, i32 %offset seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 + +; CHECK: lduminal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i32 %old +} + +define i64 @test_atomic_load_umin_i64(i64 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_umin_i64: + %old = atomicrmw umin i64* @var64, i64 %offset seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 + +; CHECK: lduminal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i64 %old +} + +define i8 @test_atomic_load_max_i8(i8 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_max_i8: + %old = atomicrmw max i8* @var8, i8 %offset seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 + +; CHECK: ldsmaxalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i8 %old +} + +define i16 @test_atomic_load_max_i16(i16 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_max_i16: + %old = atomicrmw max i16* @var16, i16 %offset seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 + +; CHECK: ldsmaxalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i16 %old +} + +define i32 @test_atomic_load_max_i32(i32 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_max_i32: + %old = atomicrmw max i32* @var32, i32 %offset seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 + +; CHECK: ldsmaxal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i32 %old +} + +define i64 @test_atomic_load_max_i64(i64 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_max_i64: + %old = atomicrmw max i64* @var64, i64 %offset seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 + +; CHECK: ldsmaxal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i64 %old +} + +define i8 @test_atomic_load_umax_i8(i8 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_umax_i8: + %old = atomicrmw umax i8* @var8, i8 %offset seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 + +; CHECK: ldumaxalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i8 %old +} + +define i16 @test_atomic_load_umax_i16(i16 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_umax_i16: + %old = atomicrmw umax i16* @var16, i16 %offset seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 + +; CHECK: ldumaxalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i16 %old +} + +define i32 @test_atomic_load_umax_i32(i32 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_umax_i32: + %old = atomicrmw umax i32* @var32, i32 %offset seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 + +; CHECK: ldumaxal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i32 %old +} + +define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_umax_i64: + %old = atomicrmw umax i64* @var64, i64 %offset seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 + +; CHECK: ldumaxal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i64 %old +} + +define i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_xchg_i8: + %old = atomicrmw xchg i8* @var8, i8 %offset seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 + +; CHECK: swpalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i8 %old +} + +define i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_xchg_i16: + %old = atomicrmw xchg i16* @var16, i16 %offset seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 + +; CHECK: swpalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i16 %old +} + +define i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_xchg_i32: + %old = atomicrmw xchg i32* @var32, i32 %offset seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 + +; CHECK: swpal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i32 %old +} + +define i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_xchg_i64: + %old = atomicrmw xchg i64* @var64, i64 %offset seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 + +; CHECK: swpal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i64 %old +} + +define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind { +; CHECK-LABEL: test_atomic_cmpxchg_i8: + %pair = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire acquire + %old = extractvalue { i8, i1 } %pair, 0 + +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 + +; CHECK: casalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb +; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] + + ret i8 %old +} + +define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind { +; CHECK-LABEL: test_atomic_cmpxchg_i16: + %pair = cmpxchg i16* @var16, i16 %wanted, i16 %new acquire acquire + %old = extractvalue { i16, i1 } %pair, 0 + +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 + +; CHECK: casalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb +; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] + + ret i16 %old +} + +define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind { +; CHECK-LABEL: test_atomic_cmpxchg_i32: + %pair = cmpxchg i32* @var32, i32 %wanted, i32 %new acquire acquire + %old = extractvalue { i32, i1 } %pair, 0 + +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 + +; CHECK: casal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb +; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] + + ret i32 %old +} + +define i64 @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind { +; CHECK-LABEL: test_atomic_cmpxchg_i64: + %pair = cmpxchg i64* @var64, i64 %wanted, i64 %new acquire acquire + %old = extractvalue { i64, i1 } %pair, 0 + +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 + +; CHECK: casal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb +; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] + + ret i64 %old +} +