diff --git a/llvm/include/llvm/CodeGen/AtomicLoopBundler.h b/llvm/include/llvm/CodeGen/AtomicLoopBundler.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/CodeGen/AtomicLoopBundler.h @@ -0,0 +1,110 @@ +//===--- AtomicLoopBundler.h ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file This pass bundles the basic block created by AtomicExpand so that the +/// Fast Register Allocator cannot insert spills between the exclusive load and +/// stores, which clears the exclusive monitor (and causes an infinite loop). +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_ATOMICLOOPBUNDLER_H +#define LLVM_CODEGEN_ATOMICLOOPBUNDLER_H + +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBundle.h" + +#define DEBUG_TYPE "atomic-loop-bundler" + +namespace llvm { + +/// Bundle instructions between exclusive loads and stores that were inserted by +/// the Atomic Exand Pass. +/// \param Derived must specify provide the following two predicate functions, +/// which indicate when a machine instruction is a relevant load or store: +/// static bool isExclusiveLoad(const MachineInstr &MI); +/// static bool isExclusiveStore(const MachineInstr &MI); +/// A bundle will be inserted in appropriate blocks between the first identified +/// exclusive load and the next occurring exclusive store. +template +class AtomicLoopBundler : public MachineFunctionPass { +private: + bool bundleBlock(MachineBasicBlock &MBB) { + // One of the basic blocks inserted by AtomicExpandPass looks like this: + // atomicrmw.start: + // %loaded = @load.linked(%addr) + // %new = some_op iN %loaded, %incr + // %stored = @store_conditional(%new, %addr) + // %try_again = icmp i32 ne %stored, 0 + // br i1 %try_again, label %loop, label %atomicrmw.end + if (!MBB.getName().contains("atomicrmw.start")) + return false; + + // Search for the exclusive load + MachineBasicBlock::instr_iterator LdIter = std::find_if( + MBB.instr_begin(), MBB.instr_end(), Derived::isExclusiveLoad); + + // cmpxchg is expanded into a pseudo instruction CMP_SWAP_*. It can also be + // inserted by atomic loop expansion for floating point types. If we have a + // cmpxchg we won't see an exclusive load here, and don't need to do + // anything. + // FIXME: We could handle cmpxchg with bundles as well (remove the pseudos). + if (LdIter == MBB.end()) + return false; + + // Check we haven't already bundled + if (LdIter->isBundled()) + return false; + + // Search for the exclusive store + MachineBasicBlock::instr_iterator StrIter = + std::find_if(LdIter, MBB.instr_end(), Derived::isExclusiveStore); + + assert(StrIter != MBB.end() && + "Failed to find exclusive store in atomicrmw.start block"); + if (StrIter == MBB.end()) + return false; + + // Create a finalized bundle ready for register allocation. + finalizeBundle(MBB, LdIter, std::next(StrIter)); + + // Print some info + LLVM_DEBUG(dbgs() << "Created bundle with " + << std::distance(LdIter, StrIter) + << " instructions between\n" + " " + << *LdIter << "\n" + << " and " << *StrIter << ".\n";); + return true; + } + +public: + static char ID; + + // Base class must initialize LdOpcodes and StrOpcodes + AtomicLoopBundler(char &ID) : MachineFunctionPass(ID) {} + + bool runOnMachineFunction(MachineFunction &MF) override { + // If the ISel pipeline failed, do not bother running this pass. + if (MF.getProperties().hasProperty( + MachineFunctionProperties::Property::FailedISel)) + return false; + + LLVM_DEBUG(dbgs() << "Bundle Atomic Loops for: " << MF.getName() << '\n'); + + bool Changed = false; + for (MachineFunction::iterator I = MF.begin(); I != MF.end(); ++I) { + Changed |= bundleBlock(*I); + } + return Changed; + } +}; + +} // End namespace llvm. + +#undef DEBUG_TYPE + +#endif diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -312,18 +312,18 @@ return FrameIdx; } -static bool dominates(MachineBasicBlock &MBB, - MachineBasicBlock::const_iterator A, - MachineBasicBlock::const_iterator B) { - auto MBBEnd = MBB.end(); +static bool dominates(const MachineBasicBlock &MBB, + MachineBasicBlock::const_instr_iterator A, + MachineBasicBlock::const_instr_iterator B) { + MachineBasicBlock::const_instr_iterator MBBEnd = MBB.instr_end(); if (B == MBBEnd) return true; - MachineBasicBlock::const_iterator I = MBB.begin(); - for (; &*I != A && &*I != B; ++I) + MachineBasicBlock::const_instr_iterator I = MBB.instr_begin(); + for (; I != A && I != B; ++I) ; - return &*I == A; + return I == A; } /// Returns false if \p VirtReg is known to not live out of the current block. @@ -1090,6 +1090,13 @@ UsedInInstr.clear(); BundleVirtRegsMap.clear(); + // If a bundle contains a virtual register def followed by a use of another, + // they must not be allocated the same physical register. The alternative + // case, i.e. when the def is not followed by any use within the bundle, is + // probably uncommon enough to ignore for now. Hence we treat any def in a + // bundle like an early-clobber. + const bool IsBundle = MI.getOpcode() == TargetOpcode::BUNDLE; + // Scan for special cases; Apply pre-assigned register defs to state. bool HasPhysRegUse = false; bool HasRegMask = false; @@ -1110,6 +1117,8 @@ } if (MO.isTied() || (MO.getSubReg() != 0 && !MO.isUndef())) NeedToAssignLiveThroughs = true; + if (IsBundle) + NeedToAssignLiveThroughs = true; } } else if (Reg.isPhysical()) { if (!MRI->isReserved(Reg)) { @@ -1208,7 +1217,7 @@ LLVM_DEBUG(dbgs() << "Allocating " << MO << '\n'); unsigned Reg = MO.getReg(); if (MO.isEarlyClobber() || MO.isTied() || - (MO.getSubReg() && !MO.isUndef())) { + (MO.getSubReg() && !MO.isUndef()) || IsBundle) { defineLiveThroughVirtReg(MI, OpIdx, Reg); } else { defineVirtReg(MI, OpIdx, Reg); @@ -1243,7 +1252,7 @@ } // Do not free tied operands and early clobbers. - if (MO.isTied() || MO.isEarlyClobber()) + if (MO.isTied() || MO.isEarlyClobber() || IsBundle) continue; Register Reg = MO.getReg(); if (!Reg) @@ -1333,10 +1342,10 @@ } // Free early clobbers. - if (HasEarlyClobber) { + if (HasEarlyClobber || IsBundle) { for (unsigned I = MI.getNumOperands(); I-- > 0; ) { MachineOperand &MO = MI.getOperand(I); - if (!MO.isReg() || !MO.isDef() || !MO.isEarlyClobber()) + if (!MO.isReg() || !MO.isDef() || !(MO.isEarlyClobber() || IsBundle)) continue; // subreg defs don't free the full register. We left the subreg number // around as a marker in setPhysReg() to recognize this case here. diff --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h --- a/llvm/lib/Target/AArch64/AArch64.h +++ b/llvm/lib/Target/AArch64/AArch64.h @@ -69,6 +69,7 @@ void initializeAArch64A53Fix835769Pass(PassRegistry&); void initializeAArch64A57FPLoadBalancingPass(PassRegistry&); void initializeAArch64AdvSIMDScalarPass(PassRegistry&); +void initializeAArch64AtomicLoopBundlerPass(PassRegistry &); void initializeAArch64BranchTargetsPass(PassRegistry&); void initializeAArch64CollectLOHPass(PassRegistry&); void initializeAArch64CondBrTuningPass(PassRegistry &); diff --git a/llvm/lib/Target/AArch64/AArch64AtomicLoopBundler.h b/llvm/lib/Target/AArch64/AArch64AtomicLoopBundler.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AArch64/AArch64AtomicLoopBundler.h @@ -0,0 +1,38 @@ +//===--- AArch64AtomicLoopBundler.h -----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file Implements AtomicLoopBundler for AArch64. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ATOMICLOOPBUNDLER_H +#define LLVM_LIB_TARGET_AARCH64_AARCH64ATOMICLOOPBUNDLER_H + +#include "llvm/CodeGen/AtomicLoopBundler.h" +#include "llvm/CodeGen/MachineFunctionPass.h" + +namespace llvm { + +class AArch64AtomicLoopBundler + : public AtomicLoopBundler { + +public: + static char ID; + + static bool isExclusiveLoad(const MachineInstr &MI); + static bool isExclusiveStore(const MachineInstr &MI); + + StringRef getPassName() const override { + return "AArch64 Atomic Loop Bundler"; + } + + AArch64AtomicLoopBundler() + : AtomicLoopBundler(ID){}; +}; +} // namespace llvm + +#endif diff --git a/llvm/lib/Target/AArch64/AArch64AtomicLoopBundler.cpp b/llvm/lib/Target/AArch64/AArch64AtomicLoopBundler.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AArch64/AArch64AtomicLoopBundler.cpp @@ -0,0 +1,66 @@ +//===--- AArch64AtomicLoopBundler.cpp ---------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file Implements AtomicLoopBundler for AArch64. +//===----------------------------------------------------------------------===// + +#include "AArch64AtomicLoopBundler.h" +#include "AArch64.h" + +using namespace llvm; + +char AArch64AtomicLoopBundler::ID = 0; + +INITIALIZE_PASS_BEGIN( + AArch64AtomicLoopBundler, "aarch64-atomic-loop-bundler", + "Bundle exclusive loads and stores created by atomic loop expansion", false, + false) +INITIALIZE_PASS_END( + AArch64AtomicLoopBundler, "aarch64-atomic-loop-bundler", + "Bundle exclusive loads and stores created by atomic loop expansion", false, + false) + +bool AArch64AtomicLoopBundler::isExclusiveLoad(const MachineInstr &MI) { + switch (MI.getOpcode()) { + case AArch64::LDAXRW: + case AArch64::LDAXRX: + case AArch64::LDAXRB: + case AArch64::LDAXRH: + case AArch64::LDXRW: + case AArch64::LDXRX: + case AArch64::LDXRB: + case AArch64::LDXRH: + case AArch64::LDAXPW: + case AArch64::LDAXPX: + case AArch64::LDXPW: + case AArch64::LDXPX: + return true; + default: + return false; + } +} + +bool AArch64AtomicLoopBundler::isExclusiveStore(const MachineInstr &MI) { + switch (MI.getOpcode()) { + case AArch64::STLXRW: + case AArch64::STLXRX: + case AArch64::STLXRB: + case AArch64::STLXRH: + case AArch64::STXRW: + case AArch64::STXRX: + case AArch64::STXRB: + case AArch64::STXRH: + case AArch64::STLXPW: + case AArch64::STLXPX: + case AArch64::STXPW: + case AArch64::STXPX: + return true; + default: + return false; + } +} diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -11,6 +11,7 @@ #include "AArch64TargetMachine.h" #include "AArch64.h" +#include "AArch64AtomicLoopBundler.h" #include "AArch64MachineFunctionInfo.h" #include "AArch64MacroFusion.h" #include "AArch64Subtarget.h" @@ -175,6 +176,7 @@ initializeAArch64A53Fix835769Pass(*PR); initializeAArch64A57FPLoadBalancingPass(*PR); initializeAArch64AdvSIMDScalarPass(*PR); + initializeAArch64AtomicLoopBundlerPass(*PR); initializeAArch64BranchTargetsPass(*PR); initializeAArch64CollectLOHPass(*PR); initializeAArch64CompressJumpTablesPass(*PR); @@ -426,6 +428,7 @@ bool addRegBankSelect() override; void addPreGlobalInstructionSelect() override; bool addGlobalInstructionSelect() override; + void addFastRegAlloc() override; bool addILPOpts() override; void addPreRegAlloc() override; void addPostRegAlloc() override; @@ -594,6 +597,22 @@ return false; } +void AArch64PassConfig::addFastRegAlloc() { + // Bundles must be finalized (register defs/uses added to the BUNDLE MI) + // before register allocation, because the register allocator looks at only + // top level MachineInstructions, not the contents of the bunde. However, + // this can't be done in SSA form as it creates multiple definitions for + // virtual registers which will fail validation. Must also be done after + // two-address instruction expansion, which removes REG_SEQUENCE. + insertPass(&TwoAddressInstructionPassID, &AArch64AtomicLoopBundler::ID); + + TargetPassConfig::addFastRegAlloc(); + + // Remove the bundles created by AtomicLoopBundler; otherwise instructions + // inside the bundle will not be lowered correctly. + addPass(createUnpackMachineBundles(nullptr)); +} + bool AArch64PassConfig::addILPOpts() { if (EnableCondOpt) addPass(createAArch64ConditionOptimizerPass()); diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt --- a/llvm/lib/Target/AArch64/CMakeLists.txt +++ b/llvm/lib/Target/AArch64/CMakeLists.txt @@ -40,6 +40,7 @@ AArch64A57FPLoadBalancing.cpp AArch64AdvSIMDScalarPass.cpp AArch64AsmPrinter.cpp + AArch64AtomicLoopBundler.cpp AArch64BranchTargets.cpp AArch64CallingConvention.cpp AArch64CleanupLocalDynamicTLSPass.cpp diff --git a/llvm/lib/Target/ARM/ARM.h b/llvm/lib/Target/ARM/ARM.h --- a/llvm/lib/Target/ARM/ARM.h +++ b/llvm/lib/Target/ARM/ARM.h @@ -62,6 +62,7 @@ void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, ARMAsmPrinter &AP); +void initializeARMAtomicLoopBundlerPass(PassRegistry &); void initializeARMParallelDSPPass(PassRegistry &); void initializeARMLoadStoreOptPass(PassRegistry &); void initializeARMPreAllocLoadStoreOptPass(PassRegistry &); diff --git a/llvm/lib/Target/ARM/ARMAtomicLoopBundler.h b/llvm/lib/Target/ARM/ARMAtomicLoopBundler.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/ARM/ARMAtomicLoopBundler.h @@ -0,0 +1,34 @@ +//===--- ARMAtomicLoopBundler.h ---------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file Implements AtomicLoopBundler for ARM. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_ARM_ARMATOMICLOOPBUNDLER_H +#define LLVM_LIB_TARGET_ARM_ARMATOMICLOOPBUNDLER_H + +#include "llvm/CodeGen/AtomicLoopBundler.h" +#include "llvm/CodeGen/MachineFunctionPass.h" + +namespace llvm { + +class ARMAtomicLoopBundler : public AtomicLoopBundler { + +public: + static char ID; + + static bool isExclusiveLoad(const MachineInstr &MI); + static bool isExclusiveStore(const MachineInstr &MI); + + StringRef getPassName() const override { return "ARM Atomic Loop Bundler"; } + + ARMAtomicLoopBundler() : AtomicLoopBundler(ID){}; +}; +} // namespace llvm + +#endif diff --git a/llvm/lib/Target/ARM/ARMAtomicLoopBundler.cpp b/llvm/lib/Target/ARM/ARMAtomicLoopBundler.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/ARM/ARMAtomicLoopBundler.cpp @@ -0,0 +1,60 @@ +//===--- ARMAtomicLoopBundler.cpp -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file Implements AtomicLoopBundler for ARM. +//===----------------------------------------------------------------------===// + +#include "ARMAtomicLoopBundler.h" +#include "ARM.h" +#include "ARMBaseInstrInfo.h" + +using namespace llvm; + +char ARMAtomicLoopBundler::ID = 0; + +INITIALIZE_PASS_BEGIN( + ARMAtomicLoopBundler, "arm-atomic-loop-bundler", + "Bundle exclusive loads and stores created by atomic loop expansion", false, + false) +INITIALIZE_PASS_END( + ARMAtomicLoopBundler, "arm-atomic-loop-bundler", + "Bundle exclusive loads and stores created by atomic loop expansion", false, + false) + +// TODO add LoadAquire/StoreRelease instructions? +bool ARMAtomicLoopBundler::isExclusiveLoad(const MachineInstr &MI) { + switch (MI.getOpcode()) { + case ARM::t2LDREX: + case ARM::t2LDREXB: + case ARM::t2LDREXD: + case ARM::t2LDREXH: + case ARM::LDREX: + case ARM::LDREXB: + case ARM::LDREXD: + case ARM::LDREXH: + return true; + default: + return false; + } +} + +bool ARMAtomicLoopBundler::isExclusiveStore(const MachineInstr &MI) { + switch (MI.getOpcode()) { + case ARM::t2STREX: + case ARM::t2STREXB: + case ARM::t2STREXD: + case ARM::t2STREXH: + case ARM::STREX: + case ARM::STREXB: + case ARM::STREXD: + case ARM::STREXH: + return true; + default: + return false; + } +} diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp --- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -11,6 +11,7 @@ #include "ARMTargetMachine.h" #include "ARM.h" +#include "ARMAtomicLoopBundler.h" #include "ARMMacroFusion.h" #include "ARMSubtarget.h" #include "ARMTargetObjectFile.h" @@ -88,6 +89,7 @@ PassRegistry &Registry = *PassRegistry::getPassRegistry(); initializeGlobalISel(Registry); + initializeARMAtomicLoopBundlerPass(Registry); initializeARMLoadStoreOptPass(Registry); initializeARMPreAllocLoadStoreOptPass(Registry); initializeARMParallelDSPPass(Registry); @@ -364,6 +366,7 @@ bool addLegalizeMachineIR() override; bool addRegBankSelect() override; bool addGlobalInstructionSelect() override; + void addFastRegAlloc() override; void addPreRegAlloc() override; void addPreSched2() override; void addPreEmitPass() override; @@ -491,6 +494,22 @@ return false; } +void ARMPassConfig::addFastRegAlloc() { + // Bundles must be finalized (register defs/uses added to the BUNDLE MI) + // before register allocation, because the register allocator looks at only + // top level MachineInstructions, not the contents of the bunde. However, + // this can't be done in SSA form as it creates multiple definitions for + // virtual registers which will fail validation. Must also be done after + // two-address instruction expansion, which removes REG_SEQUENCE. + insertPass(&TwoAddressInstructionPassID, &ARMAtomicLoopBundler::ID); + + TargetPassConfig::addFastRegAlloc(); + + // Remove the bundles created by AtomicLoopBundler; otherwise instructions + // inside the bundle will not be lowered correctly. + addPass(createUnpackMachineBundles(nullptr)); +} + void ARMPassConfig::addPreRegAlloc() { if (getOptLevel() != CodeGenOpt::None) { addPass(createMVETPAndVPTOptimisationsPass()); diff --git a/llvm/lib/Target/ARM/CMakeLists.txt b/llvm/lib/Target/ARM/CMakeLists.txt --- a/llvm/lib/Target/ARM/CMakeLists.txt +++ b/llvm/lib/Target/ARM/CMakeLists.txt @@ -22,6 +22,7 @@ add_llvm_target(ARMCodeGen A15SDOptimizer.cpp ARMAsmPrinter.cpp + ARMAtomicLoopBundler.cpp ARMBaseInstrInfo.cpp ARMBaseRegisterInfo.cpp ARMBasicBlockInfo.cpp diff --git a/llvm/test/CodeGen/AArch64/O0-pipeline.ll b/llvm/test/CodeGen/AArch64/O0-pipeline.ll --- a/llvm/test/CodeGen/AArch64/O0-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O0-pipeline.ll @@ -46,7 +46,9 @@ ; CHECK-NEXT: Local Stack Slot Allocation ; CHECK-NEXT: Eliminate PHI nodes for register allocation ; CHECK-NEXT: Two-Address instruction pass +; CHECK-NEXT: AArch64 Atomic Loop Bundler ; CHECK-NEXT: Fast Register Allocator +; CHECK-NEXT: Unpack machine instruction bundles ; CHECK-NEXT: Fixup Statepoint Caller Saved ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Machine Optimization Remark Emitter diff --git a/llvm/test/CodeGen/AArch64/atomicrmw_exclusive_monitor.ll b/llvm/test/CodeGen/AArch64/atomicrmw_exclusive_monitor.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/atomicrmw_exclusive_monitor.ll @@ -0,0 +1,471 @@ +; RUN: llc -O0 -o - %s | FileCheck %s --check-prefix=CHECK +target triple = "aarch64-none-eabi" + +@atomic_i8 = external global i8 +@atomic_i16 = external global i16 +@atomic_i32 = external global i32 +@atomic_i64 = external global i64 + +@atomic_half = external global half +@atomic_float = external global float +@atomic_double = external global double + + +define i8 @test_xchg_i8() { +entry: + %0 = atomicrmw xchg i8* @atomic_i8, i8 1 monotonic + ; CHECK-LABEL: test_xchg_i8: + ; CHECK: ldxrb {{w[0-9]+}}, [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stxrb {{w[0-9]+}}, {{w[0-9]+}}, [[ADDR]] + ret i8 %0 +} +define i8 @test_add_i8() { +entry: + %0 = atomicrmw add i8* @atomic_i8, i8 1 monotonic + ; CHECK-LABEL: test_add_i8: + ; CHECK: ldxrb {{w[0-9]+}}, [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stxrb {{w[0-9]+}}, {{w[0-9]+}}, [[ADDR]] + ret i8 %0 +} +define i8 @test_sub_i8() { +entry: + %0 = atomicrmw sub i8* @atomic_i8, i8 1 monotonic + ; CHECK-LABEL: test_sub_i8: + ; CHECK: ldxrb {{w[0-9]+}}, [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stxrb {{w[0-9]+}}, {{w[0-9]+}}, [[ADDR]] + ret i8 %0 +} +define i8 @test_and_i8() { +entry: + %0 = atomicrmw and i8* @atomic_i8, i8 1 monotonic + ; CHECK-LABEL: test_and_i8: + ; CHECK: ldxrb {{w[0-9]+}}, [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stxrb {{w[0-9]+}}, {{w[0-9]+}}, [[ADDR]] + ret i8 %0 +} +define i8 @test_nand_i8() { +entry: + %0 = atomicrmw nand i8* @atomic_i8, i8 1 monotonic + ; CHECK-LABEL: test_nand_i8: + ; CHECK: ldxrb {{w[0-9]+}}, [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stxrb {{w[0-9]+}}, {{w[0-9]+}}, [[ADDR]] + ret i8 %0 +} +define i8 @test_or_i8() { +entry: + %0 = atomicrmw or i8* @atomic_i8, i8 1 monotonic + ; CHECK-LABEL: test_or_i8: + ; CHECK: ldxrb {{w[0-9]+}}, [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stxrb {{w[0-9]+}}, {{w[0-9]+}}, [[ADDR]] + ret i8 %0 +} +define i8 @test_xor_i8() { +entry: + %0 = atomicrmw xor i8* @atomic_i8, i8 1 monotonic + ; CHECK-LABEL: test_xor_i8: + ; CHECK: ldxrb {{w[0-9]+}}, [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stxrb {{w[0-9]+}}, {{w[0-9]+}}, [[ADDR]] + ret i8 %0 +} +define i8 @test_max_i8() { +entry: + %0 = atomicrmw max i8* @atomic_i8, i8 1 monotonic + ; CHECK-LABEL: test_max_i8: + ; CHECK: ldxrb {{w[0-9]+}}, [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stxrb {{w[0-9]+}}, {{w[0-9]+}}, [[ADDR]] + ret i8 %0 +} +define i8 @test_min_i8() { +entry: + %0 = atomicrmw min i8* @atomic_i8, i8 1 monotonic + ; CHECK-LABEL: test_min_i8: + ; CHECK: ldxrb {{w[0-9]+}}, [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stxrb {{w[0-9]+}}, {{w[0-9]+}}, [[ADDR]] + ret i8 %0 +} +define i8 @test_umax_i8() { +entry: + %0 = atomicrmw umax i8* @atomic_i8, i8 1 monotonic + ; CHECK-LABEL: test_umax_i8: + ; CHECK: ldxrb {{w[0-9]+}}, [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stxrb {{w[0-9]+}}, {{w[0-9]+}}, [[ADDR]] + ret i8 %0 +} +define i8 @test_umin_i8() { +entry: + %0 = atomicrmw umin i8* @atomic_i8, i8 1 monotonic + ; CHECK-LABEL: test_umin_i8: + ; CHECK: ldxrb {{w[0-9]+}}, [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stxrb {{w[0-9]+}}, {{w[0-9]+}}, [[ADDR]] + ret i8 %0 +} + + +define i16 @test_xchg_i16() { +entry: + %0 = atomicrmw xchg i16* @atomic_i16, i16 1 monotonic + ; CHECK-LABEL: test_xchg_i16: + ; CHECK: ldxrh {{w[0-9]+}}, [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stxrh {{w[0-9]+}}, {{w[0-9]+}}, [[ADDR]] + ret i16 %0 +} +define i16 @test_add_i16() { +entry: + %0 = atomicrmw add i16* @atomic_i16, i16 1 monotonic + ; CHECK-LABEL: test_add_i16: + ; CHECK: ldxrh {{w[0-9]+}}, [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stxrh {{w[0-9]+}}, {{w[0-9]+}}, [[ADDR]] + ret i16 %0 +} +define i16 @test_sub_i16() { +entry: + %0 = atomicrmw sub i16* @atomic_i16, i16 1 monotonic + ; CHECK-LABEL: test_sub_i16: + ; CHECK: ldxrh {{w[0-9]+}}, [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stxrh {{w[0-9]+}}, {{w[0-9]+}}, [[ADDR]] + ret i16 %0 +} +define i16 @test_and_i16() { +entry: + %0 = atomicrmw and i16* @atomic_i16, i16 1 monotonic + ; CHECK-LABEL: test_and_i16: + ; CHECK: ldxrh {{w[0-9]+}}, [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stxrh {{w[0-9]+}}, {{w[0-9]+}}, [[ADDR]] + ret i16 %0 +} +define i16 @test_nand_i16() { +entry: + %0 = atomicrmw nand i16* @atomic_i16, i16 1 monotonic + ; CHECK-LABEL: test_nand_i16: + ; CHECK: ldxrh {{w[0-9]+}}, [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stxrh {{w[0-9]+}}, {{w[0-9]+}}, [[ADDR]] + ret i16 %0 +} +define i16 @test_or_i16() { +entry: + %0 = atomicrmw or i16* @atomic_i16, i16 1 monotonic + ; CHECK-LABEL: test_or_i16: + ; CHECK: ldxrh {{w[0-9]+}}, [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stxrh {{w[0-9]+}}, {{w[0-9]+}}, [[ADDR]] + ret i16 %0 +} +define i16 @test_xor_i16() { +entry: + %0 = atomicrmw xor i16* @atomic_i16, i16 1 monotonic + ; CHECK-LABEL: test_xor_i16: + ; CHECK: ldxrh {{w[0-9]+}}, [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stxrh {{w[0-9]+}}, {{w[0-9]+}}, [[ADDR]] + ret i16 %0 +} +define i16 @test_max_i16() { +entry: + %0 = atomicrmw max i16* @atomic_i16, i16 1 monotonic + ; CHECK-LABEL: test_max_i16: + ; CHECK: ldxrh {{w[0-9]+}}, [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stxrh {{w[0-9]+}}, {{w[0-9]+}}, [[ADDR]] + ret i16 %0 +} +define i16 @test_min_i16() { +entry: + %0 = atomicrmw min i16* @atomic_i16, i16 1 monotonic + ; CHECK-LABEL: test_min_i16: + ; CHECK: ldxrh {{w[0-9]+}}, [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stxrh {{w[0-9]+}}, {{w[0-9]+}}, [[ADDR]] + ret i16 %0 +} +define i16 @test_umax_i16() { +entry: + %0 = atomicrmw umax i16* @atomic_i16, i16 1 monotonic + ; CHECK-LABEL: test_umax_i16: + ; CHECK: ldxrh {{w[0-9]+}}, [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stxrh {{w[0-9]+}}, {{w[0-9]+}}, [[ADDR]] + ret i16 %0 +} +define i16 @test_umin_i16() { +entry: + %0 = atomicrmw umin i16* @atomic_i16, i16 1 monotonic + ; CHECK-LABEL: test_umin_i16: + ; CHECK: ldxrh {{w[0-9]+}}, [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stxrh {{w[0-9]+}}, {{w[0-9]+}}, [[ADDR]] + ret i16 %0 +} +define half @test_fadd_half() { +entry: + %0 = atomicrmw fadd half* @atomic_half, half 1.0 monotonic + ; CHECK-LABEL: test_fadd_half: + ; CHECK: ldaxrh {{w[0-9]+}}, [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stlxrh {{w[0-9]+}}, {{w[0-9]+}}, [[ADDR]] + ret half %0 +} +define half @test_fsub_half() { +entry: + %0 = atomicrmw fsub half* @atomic_half, half 1.0 monotonic + ; CHECK-LABEL: test_fsub_half: + ; CHECK: ldaxrh {{w[0-9]+}}, [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stlxrh {{w[0-9]+}}, {{w[0-9]+}}, [[ADDR]] + ret half %0 +} + + +define i32 @test_xchg_i32() { +entry: + %0 = atomicrmw xchg i32* @atomic_i32, i32 1 monotonic + ; CHECK-LABEL: test_xchg_i32: + ; CHECK: ldxr {{w[0-9]+}}, [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stxr {{w[0-9]+}}, {{w[0-9]+}}, [[ADDR]] + ret i32 %0 +} +define i32 @test_add_i32() { +entry: + %0 = atomicrmw add i32* @atomic_i32, i32 1 monotonic + ; CHECK-LABEL: test_add_i32: + ; CHECK: ldxr {{w[0-9]+}}, [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stxr {{w[0-9]+}}, {{w[0-9]+}}, [[ADDR]] + ret i32 %0 +} +define i32 @test_sub_i32() { +entry: + %0 = atomicrmw sub i32* @atomic_i32, i32 1 monotonic + ; CHECK-LABEL: test_sub_i32: + ; CHECK: ldxr {{w[0-9]+}}, [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stxr {{w[0-9]+}}, {{w[0-9]+}}, [[ADDR]] + ret i32 %0 +} +define i32 @test_and_i32() { +entry: + %0 = atomicrmw and i32* @atomic_i32, i32 1 monotonic + ; CHECK-LABEL: test_and_i32: + ; CHECK: ldxr {{w[0-9]+}}, [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stxr {{w[0-9]+}}, {{w[0-9]+}}, [[ADDR]] + ret i32 %0 +} +define i32 @test_nand_i32() { +entry: + %0 = atomicrmw nand i32* @atomic_i32, i32 1 monotonic + ; CHECK-LABEL: test_nand_i32: + ; CHECK: ldxr {{w[0-9]+}}, [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stxr {{w[0-9]+}}, {{w[0-9]+}}, [[ADDR]] + ret i32 %0 +} +define i32 @test_or_i32() { +entry: + %0 = atomicrmw or i32* @atomic_i32, i32 1 monotonic + ; CHECK-LABEL: test_or_i32: + ; CHECK: ldxr {{w[0-9]+}}, [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stxr {{w[0-9]+}}, {{w[0-9]+}}, [[ADDR]] + ret i32 %0 +} +define i32 @test_xor_i32() { +entry: + %0 = atomicrmw xor i32* @atomic_i32, i32 1 monotonic + ; CHECK-LABEL: test_xor_i32: + ; CHECK: ldxr {{w[0-9]+}}, [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stxr {{w[0-9]+}}, {{w[0-9]+}}, [[ADDR]] + ret i32 %0 +} +define i32 @test_max_i32() { +entry: + %0 = atomicrmw max i32* @atomic_i32, i32 1 monotonic + ; CHECK-LABEL: test_max_i32: + ; CHECK: ldxr {{w[0-9]+}}, [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stxr {{w[0-9]+}}, {{w[0-9]+}}, [[ADDR]] + ret i32 %0 +} +define i32 @test_min_i32() { +entry: + %0 = atomicrmw min i32* @atomic_i32, i32 1 monotonic + ; CHECK-LABEL: test_min_i32: + ; CHECK: ldxr {{w[0-9]+}}, [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stxr {{w[0-9]+}}, {{w[0-9]+}}, [[ADDR]] + ret i32 %0 +} +define i32 @test_umax_i32() { +entry: + %0 = atomicrmw umax i32* @atomic_i32, i32 1 monotonic + ; CHECK-LABEL: test_umax_i32: + ; CHECK: ldxr {{w[0-9]+}}, [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stxr {{w[0-9]+}}, {{w[0-9]+}}, [[ADDR]] + ret i32 %0 +} +define i32 @test_umin_i32() { +entry: + %0 = atomicrmw umin i32* @atomic_i32, i32 1 monotonic + ; CHECK-LABEL: test_umin_i32: + ; CHECK: ldxr {{w[0-9]+}}, [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stxr {{w[0-9]+}}, {{w[0-9]+}}, [[ADDR]] + ret i32 %0 +} +define float @test_fadd_float() { +entry: + %0 = atomicrmw fadd float* @atomic_float, float 1.0 monotonic + ; CHECK-LABEL: test_fadd_float: + ; CHECK: ldaxr {{w[0-9]+}}, [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stlxr {{w[0-9]+}}, {{w[0-9]+}}, [[ADDR]] + ret float %0 +} +define float @test_fsub_float() { +entry: + %0 = atomicrmw fsub float* @atomic_float, float 1.0 monotonic + ; CHECK-LABEL: test_fsub_float: + ; CHECK: ldaxr {{w[0-9]+}}, [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stlxr {{w[0-9]+}}, {{w[0-9]+}}, [[ADDR]] + ret float %0 +} + + + + +define i64 @test_xchg_i64() { +entry: + %0 = atomicrmw xchg i64* @atomic_i64, i64 1 monotonic + ; CHECK-LABEL: test_xchg_i64: + ; CHECK: ldxr [[RA:x[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stxr {{w[0-9]+}}, {{x[0-9]+}}, [[ADDR]] + ret i64 %0 +} +define i64 @test_add_i64() { +entry: + %0 = atomicrmw add i64* @atomic_i64, i64 1 monotonic + ; CHECK-LABEL: test_add_i64: + ; CHECK: ldxr [[RA:x[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stxr {{w[0-9]+}}, {{x[0-9]+}}, [[ADDR]] + ret i64 %0 +} +define i64 @test_sub_i64() { +entry: + %0 = atomicrmw sub i64* @atomic_i64, i64 1 monotonic + ; CHECK-LABEL: test_sub_i64: + ; CHECK: ldxr [[RA:x[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stxr {{w[0-9]+}}, {{x[0-9]+}}, [[ADDR]] + ret i64 %0 +} +define i64 @test_and_i64() { +entry: + %0 = atomicrmw and i64* @atomic_i64, i64 1 monotonic + ; CHECK-LABEL: test_and_i64: + ; CHECK: ldxr [[RA:x[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stxr {{w[0-9]+}}, {{x[0-9]+}}, [[ADDR]] + ret i64 %0 +} +define i64 @test_nand_i64() { +entry: + %0 = atomicrmw nand i64* @atomic_i64, i64 1 monotonic + ; CHECK-LABEL: test_nand_i64: + ; CHECK: ldxr [[RA:x[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stxr {{w[0-9]+}}, {{x[0-9]+}}, [[ADDR]] + ret i64 %0 +} +define i64 @test_or_i64() { +entry: + %0 = atomicrmw or i64* @atomic_i64, i64 1 monotonic + ; CHECK-LABEL: test_or_i64: + ; CHECK: ldxr [[RA:x[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stxr {{w[0-9]+}}, {{x[0-9]+}}, [[ADDR]] + ret i64 %0 +} +define i64 @test_xor_i64() { +entry: + %0 = atomicrmw xor i64* @atomic_i64, i64 1 monotonic + ; CHECK-LABEL: test_xor_i64: + ; CHECK: ldxr [[RA:x[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stxr {{w[0-9]+}}, {{x[0-9]+}}, [[ADDR]] + ret i64 %0 +} +define i64 @test_max_i64() { +entry: + %0 = atomicrmw max i64* @atomic_i64, i64 1 monotonic + ; CHECK-LABEL: test_max_i64: + ; CHECK: ldxr [[RA:x[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stxr {{w[0-9]+}}, {{x[0-9]+}}, [[ADDR]] + ret i64 %0 +} +define i64 @test_min_i64() { +entry: + %0 = atomicrmw min i64* @atomic_i64, i64 1 monotonic + ; CHECK-LABEL: test_min_i64: + ; CHECK: ldxr [[RA:x[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stxr {{w[0-9]+}}, {{x[0-9]+}}, [[ADDR]] + ret i64 %0 +} +define i64 @test_umax_i64() { +entry: + %0 = atomicrmw umax i64* @atomic_i64, i64 1 monotonic + ; CHECK-LABEL: test_umax_i64: + ; CHECK: ldxr [[RA:x[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stxr {{w[0-9]+}}, {{x[0-9]+}}, [[ADDR]] + ret i64 %0 +} +define i64 @test_umin_i64() { +entry: + %0 = atomicrmw umin i64* @atomic_i64, i64 1 monotonic + ; CHECK-LABEL: test_umin_i64: + ; CHECK: ldxr [[RA:x[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stxr {{w[0-9]+}}, {{x[0-9]+}}, [[ADDR]] + ret i64 %0 +} +define double @test_fadd_double() { +entry: + %0 = atomicrmw fadd double* @atomic_double, double 1.0 monotonic + ; CHECK-LABEL: test_fadd_double: + ; CHECK: ldaxr [[RA:x[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stlxr {{w[0-9]+}}, {{x[0-9]+}}, [[ADDR]] + ret double %0 +} +define double @test_fsub_double() { +entry: + %0 = atomicrmw fsub double* @atomic_double, double 1.0 monotonic + ; CHECK-LABEL: test_fsub_double: + ; CHECK: ldaxr [[RA:x[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; CHECK: stlxr {{w[0-9]+}}, {{x[0-9]+}}, [[ADDR]] + ret double %0 +} diff --git a/llvm/test/CodeGen/AMDGPU/fast-regalloc-bundles.mir b/llvm/test/CodeGen/AMDGPU/fast-regalloc-bundles.mir --- a/llvm/test/CodeGen/AMDGPU/fast-regalloc-bundles.mir +++ b/llvm/test/CodeGen/AMDGPU/fast-regalloc-bundles.mir @@ -11,10 +11,10 @@ body: | bb.0: ; GCN-LABEL: name: fast_regalloc_bundle_handling - ; GCN: renamable $vgpr0 = IMPLICIT_DEF ; GCN: renamable $vgpr1 = IMPLICIT_DEF - ; GCN: renamable $vgpr0 = BUNDLE implicit killed renamable $vgpr0, implicit killed renamable $vgpr1, implicit $exec { - ; GCN: renamable $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec + ; GCN: renamable $vgpr2 = IMPLICIT_DEF + ; GCN: renamable $vgpr0 = BUNDLE implicit killed renamable $vgpr1, implicit killed renamable $vgpr2, implicit $exec { + ; GCN: renamable $vgpr0 = V_ADD_U32_e32 $vgpr1, $vgpr2, implicit $exec ; GCN: } ; GCN: S_ENDPGM 0, implicit killed renamable $vgpr0 %0 = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor.ll b/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor.ll @@ -0,0 +1,381 @@ +; RUN: llc -O0 -o - %s | FileCheck %s --check-prefix=CHECK +target triple = "armv7-none-eabi" + +@atomic_i8 = external global i8 +@atomic_i16 = external global i16 +@atomic_i32 = external global i32 +@atomic_i64 = external global i64 + +@atomic_half = external global half +@atomic_float = external global float +@atomic_double = external global double + + +define i8 @test_xchg_i8() { +entry: + %0 = atomicrmw xchg i8* @atomic_i8, i8 1 monotonic + ; CHECK-LABEL: test_xchg_i8: + ; CHECK: ldrexb {{r[0-9]+|lr}}, [[ADDR:.(r[0-9]+|lr).]] + ; CHECK-NOT: str + ; CHECK: strexb {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, [[ADDR]] + ret i8 %0 +} +define i8 @test_add_i8() { +entry: + %0 = atomicrmw add i8* @atomic_i8, i8 1 monotonic + ; CHECK-LABEL: test_add_i8: + ; CHECK: ldrexb {{r[0-9]+|lr}}, [[ADDR:.(r[0-9]+|lr).]] + ; CHECK-NOT: str + ; CHECK: strexb {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, [[ADDR]] + ret i8 %0 +} +define i8 @test_sub_i8() { +entry: + %0 = atomicrmw sub i8* @atomic_i8, i8 1 monotonic + ; CHECK-LABEL: test_sub_i8: + ; CHECK: ldrexb {{r[0-9]+|lr}}, [[ADDR:.(r[0-9]+|lr).]] + ; CHECK-NOT: str + ; CHECK: strexb {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, [[ADDR]] + ret i8 %0 +} +define i8 @test_and_i8() { +entry: + %0 = atomicrmw and i8* @atomic_i8, i8 1 monotonic + ; CHECK-LABEL: test_and_i8: + ; CHECK: ldrexb {{r[0-9]+|lr}}, [[ADDR:.(r[0-9]+|lr).]] + ; CHECK-NOT: str + ; CHECK: strexb {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, [[ADDR]] + ret i8 %0 +} +define i8 @test_nand_i8() { +entry: + %0 = atomicrmw nand i8* @atomic_i8, i8 1 monotonic + ; CHECK-LABEL: test_nand_i8: + ; CHECK: ldrexb {{r[0-9]+|lr}}, [[ADDR:.(r[0-9]+|lr).]] + ; CHECK-NOT: str + ; CHECK: strexb {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, [[ADDR]] + ret i8 %0 +} +define i8 @test_or_i8() { +entry: + %0 = atomicrmw or i8* @atomic_i8, i8 1 monotonic + ; CHECK-LABEL: test_or_i8: + ; CHECK: ldrexb {{r[0-9]+|lr}}, [[ADDR:.(r[0-9]+|lr).]] + ; CHECK-NOT: str + ; CHECK: strexb {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, [[ADDR]] + ret i8 %0 +} +define i8 @test_xor_i8() { +entry: + %0 = atomicrmw xor i8* @atomic_i8, i8 1 monotonic + ; CHECK-LABEL: test_xor_i8: + ; CHECK: ldrexb {{r[0-9]+|lr}}, [[ADDR:.(r[0-9]+|lr).]] + ; CHECK-NOT: str + ; CHECK: strexb {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, [[ADDR]] + ret i8 %0 +} +define i8 @test_max_i8() { +entry: + %0 = atomicrmw max i8* @atomic_i8, i8 1 monotonic + ; CHECK-LABEL: test_max_i8: + ; CHECK: ldrexb {{r[0-9]+|lr}}, [[ADDR:.(r[0-9]+|lr).]] + ; CHECK-NOT: str + ; CHECK: strexb {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, [[ADDR]] + ret i8 %0 +} +define i8 @test_min_i8() { +entry: + %0 = atomicrmw min i8* @atomic_i8, i8 1 monotonic + ; CHECK-LABEL: test_min_i8: + ; CHECK: ldrexb {{r[0-9]+|lr}}, [[ADDR:.(r[0-9]+|lr).]] + ; CHECK-NOT: str + ; CHECK: strexb {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, [[ADDR]] + ret i8 %0 +} +define i8 @test_umax_i8() { +entry: + %0 = atomicrmw umax i8* @atomic_i8, i8 1 monotonic + ; CHECK-LABEL: test_umax_i8: + ; CHECK: ldrexb {{r[0-9]+|lr}}, [[ADDR:.(r[0-9]+|lr).]] + ; CHECK-NOT: str + ; CHECK: strexb {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, [[ADDR]] + ret i8 %0 +} +define i8 @test_umin_i8() { +entry: + %0 = atomicrmw umin i8* @atomic_i8, i8 1 monotonic + ; CHECK-LABEL: test_umin_i8: + ; CHECK: ldrexb {{r[0-9]+|lr}}, [[ADDR:.(r[0-9]+|lr).]] + ; CHECK-NOT: str + ; CHECK: strexb {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, [[ADDR]] + ret i8 %0 +} + + +define i16 @test_xchg_i16() { +entry: + %0 = atomicrmw xchg i16* @atomic_i16, i16 1 monotonic + ; CHECK-LABEL: test_xchg_i16: + ; CHECK: ldrexh {{r[0-9]+|lr}}, [[ADDR:.(r[0-9]+|lr).]] + ; CHECK-NOT: str + ; CHECK: strexh {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, [[ADDR]] + ret i16 %0 +} +define i16 @test_add_i16() { +entry: + %0 = atomicrmw add i16* @atomic_i16, i16 1 monotonic + ; CHECK-LABEL: test_add_i16: + ; CHECK: ldrexh {{r[0-9]+|lr}}, [[ADDR:.(r[0-9]+|lr).]] + ; CHECK-NOT: str + ; CHECK: strexh {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, [[ADDR]] + ret i16 %0 +} +define i16 @test_sub_i16() { +entry: + %0 = atomicrmw sub i16* @atomic_i16, i16 1 monotonic + ; CHECK-LABEL: test_sub_i16: + ; CHECK: ldrexh {{r[0-9]+|lr}}, [[ADDR:.(r[0-9]+|lr).]] + ; CHECK-NOT: str + ; CHECK: strexh {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, [[ADDR]] + ret i16 %0 +} +define i16 @test_and_i16() { +entry: + %0 = atomicrmw and i16* @atomic_i16, i16 1 monotonic + ; CHECK-LABEL: test_and_i16: + ; CHECK: ldrexh {{r[0-9]+|lr}}, [[ADDR:.(r[0-9]+|lr).]] + ; CHECK-NOT: str + ; CHECK: strexh {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, [[ADDR]] + ret i16 %0 +} +define i16 @test_nand_i16() { +entry: + %0 = atomicrmw nand i16* @atomic_i16, i16 1 monotonic + ; CHECK-LABEL: test_nand_i16: + ; CHECK: ldrexh {{r[0-9]+|lr}}, [[ADDR:.(r[0-9]+|lr).]] + ; CHECK-NOT: str + ; CHECK: strexh {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, [[ADDR]] + ret i16 %0 +} +define i16 @test_or_i16() { +entry: + %0 = atomicrmw or i16* @atomic_i16, i16 1 monotonic + ; CHECK-LABEL: test_or_i16: + ; CHECK: ldrexh {{r[0-9]+|lr}}, [[ADDR:.(r[0-9]+|lr).]] + ; CHECK-NOT: str + ; CHECK: strexh {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, [[ADDR]] + ret i16 %0 +} +define i16 @test_xor_i16() { +entry: + %0 = atomicrmw xor i16* @atomic_i16, i16 1 monotonic + ; CHECK-LABEL: test_xor_i16: + ; CHECK: ldrexh {{r[0-9]+|lr}}, [[ADDR:.(r[0-9]+|lr).]] + ; CHECK-NOT: str + ; CHECK: strexh {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, [[ADDR]] + ret i16 %0 +} +define i16 @test_max_i16() { +entry: + %0 = atomicrmw max i16* @atomic_i16, i16 1 monotonic + ; CHECK-LABEL: test_max_i16: + ; CHECK: ldrexh {{r[0-9]+|lr}}, [[ADDR:.(r[0-9]+|lr).]] + ; CHECK-NOT: str + ; CHECK: strexh {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, [[ADDR]] + ret i16 %0 +} +define i16 @test_min_i16() { +entry: + %0 = atomicrmw min i16* @atomic_i16, i16 1 monotonic + ; CHECK-LABEL: test_min_i16: + ; CHECK: ldrexh {{r[0-9]+|lr}}, [[ADDR:.(r[0-9]+|lr).]] + ; CHECK-NOT: str + ; CHECK: strexh {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, [[ADDR]] + ret i16 %0 +} +define i16 @test_umax_i16() { +entry: + %0 = atomicrmw umax i16* @atomic_i16, i16 1 monotonic + ; CHECK-LABEL: test_umax_i16: + ; CHECK: ldrexh {{r[0-9]+|lr}}, [[ADDR:.(r[0-9]+|lr).]] + ; CHECK-NOT: str + ; CHECK: strexh {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, [[ADDR]] + ret i16 %0 +} +define i16 @test_umin_i16() { +entry: + %0 = atomicrmw umin i16* @atomic_i16, i16 1 monotonic + ; CHECK-LABEL: test_umin_i16: + ; CHECK: ldrexh {{r[0-9]+|lr}}, [[ADDR:.(r[0-9]+|lr).]] + ; CHECK-NOT: str + ; CHECK: strexh {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, [[ADDR]] + ret i16 %0 +} + + +define i32 @test_xchg_i32() { +entry: + %0 = atomicrmw xchg i32* @atomic_i32, i32 1 monotonic + ; CHECK-LABEL: test_xchg_i32: + ; CHECK: ldrex {{r[0-9]+|lr}}, [[ADDR:.(r[0-9]+|lr).]] + ; CHECK-NOT: str + ; CHECK: strex {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, [[ADDR]] + ret i32 %0 +} +define i32 @test_add_i32() { +entry: + %0 = atomicrmw add i32* @atomic_i32, i32 1 monotonic + ; CHECK-LABEL: test_add_i32: + ; CHECK: ldrex {{r[0-9]+|lr}}, [[ADDR:.(r[0-9]+|lr).]] + ; CHECK-NOT: str + ; CHECK: strex {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, [[ADDR]] + ret i32 %0 +} +define i32 @test_sub_i32() { +entry: + %0 = atomicrmw sub i32* @atomic_i32, i32 1 monotonic + ; CHECK-LABEL: test_sub_i32: + ; CHECK: ldrex {{r[0-9]+|lr}}, [[ADDR:.(r[0-9]+|lr).]] + ; CHECK-NOT: str + ; CHECK: strex {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, [[ADDR]] + ret i32 %0 +} +define i32 @test_and_i32() { +entry: + %0 = atomicrmw and i32* @atomic_i32, i32 1 monotonic + ; CHECK-LABEL: test_and_i32: + ; CHECK: ldrex {{r[0-9]+|lr}}, [[ADDR:.(r[0-9]+|lr).]] + ; CHECK-NOT: str + ; CHECK: strex {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, [[ADDR]] + ret i32 %0 +} +define i32 @test_nand_i32() { +entry: + %0 = atomicrmw nand i32* @atomic_i32, i32 1 monotonic + ; CHECK-LABEL: test_nand_i32: + ; CHECK: ldrex {{r[0-9]+|lr}}, [[ADDR:.(r[0-9]+|lr).]] + ; CHECK-NOT: str + ; CHECK: strex {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, [[ADDR]] + ret i32 %0 +} +define i32 @test_or_i32() { +entry: + %0 = atomicrmw or i32* @atomic_i32, i32 1 monotonic + ; CHECK-LABEL: test_or_i32: + ; CHECK: ldrex {{r[0-9]+|lr}}, [[ADDR:.(r[0-9]+|lr).]] + ; CHECK-NOT: str + ; CHECK: strex {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, [[ADDR]] + ret i32 %0 +} +define i32 @test_xor_i32() { +entry: + %0 = atomicrmw xor i32* @atomic_i32, i32 1 monotonic + ; CHECK-LABEL: test_xor_i32: + ; CHECK: ldrex {{r[0-9]+|lr}}, [[ADDR:.(r[0-9]+|lr).]] + ; CHECK-NOT: str + ; CHECK: strex {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, [[ADDR]] + ret i32 %0 +} +define i32 @test_max_i32() { +entry: + %0 = atomicrmw max i32* @atomic_i32, i32 1 monotonic + ; CHECK-LABEL: test_max_i32: + ; CHECK: ldrex {{r[0-9]+|lr}}, [[ADDR:.(r[0-9]+|lr).]] + ; CHECK-NOT: str + ; CHECK: strex {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, [[ADDR]] + ret i32 %0 +} +define i32 @test_min_i32() { +entry: + %0 = atomicrmw min i32* @atomic_i32, i32 1 monotonic + ; CHECK-LABEL: test_min_i32: + ; CHECK: ldrex {{r[0-9]+|lr}}, [[ADDR:.(r[0-9]+|lr).]] + ; CHECK-NOT: str + ; CHECK: strex {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, [[ADDR]] + ret i32 %0 +} +define i32 @test_umax_i32() { +entry: + %0 = atomicrmw umax i32* @atomic_i32, i32 1 monotonic + ; CHECK-LABEL: test_umax_i32: + ; CHECK: ldrex {{r[0-9]+|lr}}, [[ADDR:.(r[0-9]+|lr).]] + ; CHECK-NOT: str + ; CHECK: strex {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, [[ADDR]] + ret i32 %0 +} +define i32 @test_umin_i32() { +entry: + %0 = atomicrmw umin i32* @atomic_i32, i32 1 monotonic + ; CHECK-LABEL: test_umin_i32: + ; CHECK: ldrex {{r[0-9]+|lr}}, [[ADDR:.(r[0-9]+|lr).]] + ; CHECK-NOT: str + ; CHECK: strex {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, [[ADDR]] + ret i32 %0 +} + + + + +define i64 @test_xchg_i64() { +entry: + %0 = atomicrmw xchg i64* @atomic_i64, i64 1 monotonic + ; CHECK-LABEL: test_xchg_i64: + ; CHECK: ldrexd {{r[0-9]+|lr}}, [[RB:r[0-9]+]], [[ADDR:.(r[0-9]+|lr).]] + ; CHECK-NOT: str + ; CHECK: strexd {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, [[ADDR]] + ret i64 %0 +} +define i64 @test_add_i64() { +entry: + %0 = atomicrmw add i64* @atomic_i64, i64 1 monotonic + ; CHECK-LABEL: test_add_i64: + ; CHECK: ldrexd {{r[0-9]+|lr}}, [[RB:r[0-9]+]], [[ADDR:.(r[0-9]+|lr).]] + ; CHECK-NOT: str + ; CHECK: strexd {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, [[ADDR]] + ret i64 %0 +} +define i64 @test_sub_i64() { +entry: + %0 = atomicrmw sub i64* @atomic_i64, i64 1 monotonic + ; CHECK-LABEL: test_sub_i64: + ; CHECK: ldrexd {{r[0-9]+|lr}}, [[RB:r[0-9]+]], [[ADDR:.(r[0-9]+|lr).]] + ; CHECK-NOT: str + ; CHECK: strexd {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, [[ADDR]] + ret i64 %0 +} +define i64 @test_and_i64() { +entry: + %0 = atomicrmw and i64* @atomic_i64, i64 1 monotonic + ; CHECK-LABEL: test_and_i64: + ; CHECK: ldrexd {{r[0-9]+|lr}}, [[RB:r[0-9]+]], [[ADDR:.(r[0-9]+|lr).]] + ; CHECK-NOT: str + ; CHECK: strexd {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, [[ADDR]] + ret i64 %0 +} +define i64 @test_nand_i64() { +entry: + %0 = atomicrmw nand i64* @atomic_i64, i64 1 monotonic + ; CHECK-LABEL: test_nand_i64: + ; CHECK: ldrexd {{r[0-9]+|lr}}, [[RB:r[0-9]+]], [[ADDR:.(r[0-9]+|lr).]] + ; CHECK-NOT: str + ; CHECK: strexd {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, [[ADDR]] + ret i64 %0 +} +define i64 @test_or_i64() { +entry: + %0 = atomicrmw or i64* @atomic_i64, i64 1 monotonic + ; CHECK-LABEL: test_or_i64: + ; CHECK: ldrexd {{r[0-9]+|lr}}, [[RB:r[0-9]+]], [[ADDR:.(r[0-9]+|lr).]] + ; CHECK-NOT: str + ; CHECK: strexd {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, [[ADDR]] + ret i64 %0 +} +define i64 @test_xor_i64() { +entry: + %0 = atomicrmw xor i64* @atomic_i64, i64 1 monotonic + ; CHECK-LABEL: test_xor_i64: + ; CHECK: ldrexd {{r[0-9]+|lr}}, [[RB:r[0-9]+]], [[ADDR:.(r[0-9]+|lr).]] + ; CHECK-NOT: str + ; CHECK: strexd {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, {{r[0-9]+|lr}}, [[ADDR]] + ret i64 %0 +}