Index: include/llvm/IR/Instruction.h =================================================================== --- include/llvm/IR/Instruction.h +++ include/llvm/IR/Instruction.h @@ -333,6 +333,11 @@ return mayReadFromMemory() || mayWriteToMemory(); } + /// isAtomic - Return true if this instruction has an + /// AtomicOrdering of unordered or higher. + /// + bool isAtomic() const; + /// mayThrow - Return true if this instruction may throw an exception. /// bool mayThrow() const; Index: include/llvm/IR/Instructions.h =================================================================== --- include/llvm/IR/Instructions.h +++ include/llvm/IR/Instructions.h @@ -241,7 +241,6 @@ (xthread << 6)); } - bool isAtomic() const { return getOrdering() != NotAtomic; } void setAtomic(AtomicOrdering Ordering, SynchronizationScope SynchScope = CrossThread) { setOrdering(Ordering); @@ -361,7 +360,6 @@ (xthread << 6)); } - bool isAtomic() const { return getOrdering() != NotAtomic; } void setAtomic(AtomicOrdering Ordering, SynchronizationScope SynchScope = CrossThread) { setOrdering(Ordering); Index: include/llvm/Target/TargetLowering.h =================================================================== --- include/llvm/Target/TargetLowering.h +++ include/llvm/Target/TargetLowering.h @@ -31,6 +31,7 @@ #include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IRBuilder.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Target/TargetCallingConv.h" @@ -960,7 +961,7 @@ /// It is called by AtomicExpandPass before expanding an /// AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad. /// RMW and CmpXchg set both IsStore and IsLoad to true. - /// Backends with !getInsertFencesForAtomic() should keep a no-op here + /// Backends with !getInsertFencesForAtomic() should keep a no-op here. virtual void emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const { assert(!getInsertFencesForAtomic()); @@ -970,20 +971,30 @@ /// It is called by AtomicExpandPass after expanding an /// AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad. /// RMW and CmpXchg set both IsStore and IsLoad to true. - /// Backends with !getInsertFencesForAtomic() should keep a no-op here + /// Backends with !getInsertFencesForAtomic() should keep a no-op here. virtual void emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const { assert(!getInsertFencesForAtomic()); } - /// Return true if the given (atomic) instruction should be expanded by the - /// IR-level AtomicExpand pass into a loop involving - /// load-linked/store-conditional pairs. Atomic stores will be expanded in the - /// same way as "atomic xchg" operations which ignore their output if needed. - virtual bool shouldExpandAtomicInIR(Instruction *Inst) const { + /// Returns true if the given (atomic) store should be expanded by the + /// IR-level AtomicExpand pass into an "atomic xchg" which ignores its input. + virtual bool shouldExpandAtomicStoreInIR(StoreInst *SI) const { return false; } + /// Returns true if the given (atomic) load should be expanded by the + /// IR-level AtomicExpand pass into a load-linked instruction + /// (through emitLoadLinked()). + virtual bool shouldExpandAtomicLoadInIR(LoadInst *LI) const { + return false; + } + + /// Returns true if the given AtomicRMW should be expanded by the + /// IR-level AtomicExpand pass into a loop using LoadLinked/StoreConditional. + virtual bool shouldExpandAtomicRMWInIR(AtomicRMWInst *RMWI) const { + return false; + } //===--------------------------------------------------------------------===// // TargetLowering Configuration Methods - These methods should be invoked by Index: lib/CodeGen/AtomicExpandPass.cpp =================================================================== --- lib/CodeGen/AtomicExpandPass.cpp +++ lib/CodeGen/AtomicExpandPass.cpp @@ -15,6 +15,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" @@ -38,10 +39,10 @@ } bool runOnFunction(Function &F) override; - bool expandAtomicInsts(Function &F); + private: bool expandAtomicLoad(LoadInst *LI); - bool expandAtomicStore(StoreInst *LI); + bool expandAtomicStore(StoreInst *SI); bool expandAtomicRMW(AtomicRMWInst *AI); bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); }; @@ -60,37 +61,37 @@ bool AtomicExpand::runOnFunction(Function &F) { if (!TM || !TM->getSubtargetImpl()->enableAtomicExpand()) return false; + auto TargetLowering = TM->getSubtargetImpl()->getTargetLowering(); SmallVector AtomicInsts; // Changing control-flow while iterating through it is a bad idea, so gather a // list of all atomic instructions before we start. - for (BasicBlock &BB : F) - for (Instruction &Inst : BB) { - if (isa(&Inst) || isa(&Inst) || - (isa(&Inst) && cast(&Inst)->isAtomic()) || - (isa(&Inst) && cast(&Inst)->isAtomic())) - AtomicInsts.push_back(&Inst); - } + for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { + if (I->isAtomic()) + AtomicInsts.push_back(&*I); + } bool MadeChange = false; - for (Instruction *Inst : AtomicInsts) { - if (!TM->getSubtargetImpl()->getTargetLowering()->shouldExpandAtomicInIR( - Inst)) - continue; + for (auto I : AtomicInsts) { + auto LI = dyn_cast(I); + auto SI = dyn_cast(I); + auto RMWI = dyn_cast(I); + auto CASI = dyn_cast(I); - if (AtomicRMWInst *AI = dyn_cast(Inst)) - MadeChange |= expandAtomicRMW(AI); - else if (AtomicCmpXchgInst *CI = dyn_cast(Inst)) - MadeChange |= expandAtomicCmpXchg(CI); - else if (LoadInst *LI = dyn_cast(Inst)) + assert((LI || SI || RMWI || CASI || isa(I)) && + "Unknown atomic instruction"); + + if (LI && TargetLowering->shouldExpandAtomicLoadInIR(LI)) { MadeChange |= expandAtomicLoad(LI); - else if (StoreInst *SI = dyn_cast(Inst)) + } else if (SI && TargetLowering->shouldExpandAtomicStoreInIR(SI)) { MadeChange |= expandAtomicStore(SI); - else - llvm_unreachable("Unknown atomic instruction"); + } else if (RMWI && TargetLowering->shouldExpandAtomicRMWInIR(RMWI)) { + MadeChange |= expandAtomicRMW(RMWI); + } else if (CASI) { + MadeChange |= expandAtomicCmpXchg(CASI); + } } - return MadeChange; } @@ -145,7 +146,7 @@ BasicBlock *BB = AI->getParent(); Function *F = BB->getParent(); LLVMContext &Ctx = F->getContext(); - // If getInsertFencesForAtomic() return true, then the target does not want to + // If getInsertFencesForAtomic() returns true, then the target does not want to // deal with memory orders, and emitLeading/TrailingFence should take care of // everything. Otherwise, emitLeading/TrailingFence are no-op and we should // preserve the ordering. @@ -251,7 +252,7 @@ BasicBlock *BB = CI->getParent(); Function *F = BB->getParent(); LLVMContext &Ctx = F->getContext(); - // If getInsertFencesForAtomic() return true, then the target does not want to + // If getInsertFencesForAtomic() returns true, then the target does not want to // deal with memory orders, and emitLeading/TrailingFence should take care of // everything. Otherwise, emitLeading/TrailingFence are no-op and we should // preserve the ordering. Index: lib/IR/Instruction.cpp =================================================================== --- lib/IR/Instruction.cpp +++ lib/IR/Instruction.cpp @@ -438,6 +438,20 @@ } } +bool Instruction::isAtomic() const { + switch(getOpcode()) { + default: return false; + case Instruction::AtomicCmpXchg: + case Instruction::AtomicRMW: + case Instruction::Fence: + return true; + case Instruction::Load: + return cast(this)->getOrdering() != NotAtomic; + case Instruction::Store: + return cast(this)->getOrdering() != NotAtomic; + } +} + bool Instruction::mayThrow() const { if (const CallInst *CI = dyn_cast(this)) return !CI->doesNotThrow(); Index: lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.h +++ lib/Target/AArch64/AArch64ISelLowering.h @@ -323,7 +323,9 @@ Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override; - bool shouldExpandAtomicInIR(Instruction *Inst) const override; + bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override; + bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; + bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; bool useLoadStackGuardNode() const override; TargetLoweringBase::LegalizeTypeAction Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8122,19 +8122,6 @@ } } -bool AArch64TargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const { - // Loads and stores less than 128-bits are already atomic; ones above that - // are doomed anyway, so defer to the default libcall and blame the OS when - // things go wrong: - if (StoreInst *SI = dyn_cast(Inst)) - return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128; - else if (LoadInst *LI = dyn_cast(Inst)) - return LI->getType()->getPrimitiveSizeInBits() == 128; - - // For the real atomic operations, we have ldxr/stxr up to 128 bits. - return Inst->getType()->getPrimitiveSizeInBits() <= 128; -} - bool AArch64TargetLowering::useLoadStackGuardNode() const { return true; } @@ -8151,6 +8138,28 @@ return TargetLoweringBase::getPreferredVectorAction(VT); } +// Loads and stores less than 128-bits are already atomic; ones above that +// are doomed anyway, so defer to the default libcall and blame the OS when +// things go wrong. +bool AArch64TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { + unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits(); + return Size == 128; +} + +// Loads and stores less than 128-bits are already atomic; ones above that +// are doomed anyway, so defer to the default libcall and blame the OS when +// things go wrong. +bool AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { + unsigned Size = LI->getType()->getPrimitiveSizeInBits(); + return Size == 128; +} + +// For the real atomic operations, we have ldxr/stxr up to 128 bits, +bool AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { + unsigned Size = AI->getType()->getPrimitiveSizeInBits(); + return Size <= 128; +} + Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord) const { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); Index: lib/Target/ARM/ARMISelLowering.h =================================================================== --- lib/Target/ARM/ARMISelLowering.h +++ lib/Target/ARM/ARMISelLowering.h @@ -402,7 +402,9 @@ void emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const override; - bool shouldExpandAtomicInIR(Instruction *Inst) const override; + bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override; + bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; + bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; bool useLoadStackGuardNode() const override; Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -11059,23 +11059,29 @@ } } -bool ARMTargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const { - // Loads and stores less than 64-bits are already atomic; ones above that - // are doomed anyway, so defer to the default libcall and blame the OS when - // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit - // anything for those. - bool IsMClass = Subtarget->isMClass(); - if (StoreInst *SI = dyn_cast(Inst)) { - unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits(); - return Size == 64 && !IsMClass; - } else if (LoadInst *LI = dyn_cast(Inst)) { - return LI->getType()->getPrimitiveSizeInBits() == 64 && !IsMClass; - } - - // For the real atomic operations, we have ldrex/strex up to 32 bits, - // and up to 64 bits on the non-M profiles - unsigned AtomicLimit = IsMClass ? 32 : 64; - return Inst->getType()->getPrimitiveSizeInBits() <= AtomicLimit; +// Loads and stores less than 64-bits are already atomic; ones above that +// are doomed anyway, so defer to the default libcall and blame the OS when +// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit +// anything for those. +bool ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { + unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits(); + return (Size == 64) && !Subtarget->isMClass(); +} + +// Loads and stores less than 64-bits are already atomic; ones above that +// are doomed anyway, so defer to the default libcall and blame the OS when +// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit +// anything for those. +bool ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { + unsigned Size = LI->getType()->getPrimitiveSizeInBits(); + return (Size == 64) && !Subtarget->isMClass(); +} + +// For the real atomic operations, we have ldrex/strex up to 32 bits, +// and up to 64 bits on the non-M profiles +bool ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { + unsigned Size = AI->getType()->getPrimitiveSizeInBits(); + return Size <= (Subtarget->isMClass() ? 32 : 64); } // This has so far only been implemented for MachO.