Index: llvm/trunk/docs/Atomics.rst =================================================================== --- llvm/trunk/docs/Atomics.rst +++ llvm/trunk/docs/Atomics.rst @@ -446,7 +446,7 @@ atomic constructs. Here are some lowerings it can do: * cmpxchg -> loop with load-linked/store-conditional - by overriding ``hasLoadLinkedStoreConditional()``, ``emitLoadLinked()``, + by overriding ``shouldExpandAtomicCmpXchgInIR()``, ``emitLoadLinked()``, ``emitStoreConditional()`` * large loads/stores -> ll-sc/cmpxchg by overriding ``shouldExpandAtomicStoreInIR()``/``shouldExpandAtomicLoadInIR()`` Index: llvm/trunk/include/llvm/Target/TargetLowering.h =================================================================== --- llvm/trunk/include/llvm/Target/TargetLowering.h +++ llvm/trunk/include/llvm/Target/TargetLowering.h @@ -124,15 +124,14 @@ // mask (ex: x86 blends). }; - /// Enum that specifies what a AtomicRMWInst is expanded to, if at all. Exists - /// because different targets have different levels of support for these - /// atomic RMW instructions, and also have different options w.r.t. what they - /// should expand to. + /// Enum that specifies what an atomic load/AtomicRMWInst is expanded + /// to, if at all. Exists because different targets have different levels of + /// support for these atomic instructions, and also have different options + /// w.r.t. what they should expand to. enum class AtomicExpansionKind { None, // Don't expand the instruction. LLSC, // Expand the instruction into loadlinked/storeconditional; used - // by ARM/AArch64. Implies `hasLoadLinkedStoreConditional` - // returns true. + // by ARM/AArch64. CmpXChg, // Expand the instruction into cmpxchg; used by at least X86. }; @@ -1027,10 +1026,6 @@ /// \name Helpers for atomic expansion. /// @{ - /// True if AtomicExpandPass should use emitLoadLinked/emitStoreConditional - /// and expand AtomicCmpXchgInst. - virtual bool hasLoadLinkedStoreConditional() const { return false; } - /// Perform a load-linked operation on Addr, returning a "Value *" with the /// corresponding pointee type. This may entail some non-trivial operations to /// truncate or reconstruct types that will be illegal in the backend. See @@ -1111,12 +1106,20 @@ /// Returns true if arguments should be sign-extended in lib calls. virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { return IsSigned; - } + } - /// Returns true if the given (atomic) load should be expanded by the - /// IR-level AtomicExpand pass into a load-linked instruction - /// (through emitLoadLinked()). - virtual bool shouldExpandAtomicLoadInIR(LoadInst *LI) const { return false; } + /// Returns how the given (atomic) load should be expanded by the + /// IR-level AtomicExpand pass. + virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const { + return AtomicExpansionKind::None; + } + + /// Returns true if the given atomic cmpxchg should be expanded by the + /// IR-level AtomicExpand pass into a load-linked/store-conditional sequence + /// (through emitLoadLinked() and emitStoreConditional()). + virtual bool shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const { + return false; + } /// Returns how the IR-level AtomicExpand pass should expand the given /// AtomicRMW, if at all. Default is to never expand. Index: llvm/trunk/lib/CodeGen/AtomicExpandPass.cpp =================================================================== --- llvm/trunk/lib/CodeGen/AtomicExpandPass.cpp +++ llvm/trunk/lib/CodeGen/AtomicExpandPass.cpp @@ -46,7 +46,7 @@ private: bool bracketInstWithFences(Instruction *I, AtomicOrdering Order, bool IsStore, bool IsLoad); - bool expandAtomicLoad(LoadInst *LI); + bool tryExpandAtomicLoad(LoadInst *LI); bool expandAtomicLoadToLL(LoadInst *LI); bool expandAtomicLoadToCmpXchg(LoadInst *LI); bool expandAtomicStore(StoreInst *SI); @@ -109,7 +109,7 @@ FenceOrdering = RMWI->getOrdering(); RMWI->setOrdering(Monotonic); IsStore = IsLoad = true; - } else if (CASI && !TLI->hasLoadLinkedStoreConditional() && + } else if (CASI && !TLI->shouldExpandAtomicCmpXchgInIR(CASI) && (isAtLeastRelease(CASI->getSuccessOrdering()) || isAtLeastAcquire(CASI->getSuccessOrdering()))) { // If a compare and swap is lowered to LL/SC, we can do smarter fence @@ -127,8 +127,8 @@ } } - if (LI && TLI->shouldExpandAtomicLoadInIR(LI)) { - MadeChange |= expandAtomicLoad(LI); + if (LI) { + MadeChange |= tryExpandAtomicLoad(LI); } else if (SI && TLI->shouldExpandAtomicStoreInIR(SI)) { MadeChange |= expandAtomicStore(SI); } else if (RMWI) { @@ -142,7 +142,7 @@ } else { MadeChange |= tryExpandAtomicRMW(RMWI); } - } else if (CASI && TLI->hasLoadLinkedStoreConditional()) { + } else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI)) { MadeChange |= expandAtomicCmpXchg(CASI); } } @@ -170,11 +170,18 @@ return (LeadingFence || TrailingFence); } -bool AtomicExpand::expandAtomicLoad(LoadInst *LI) { - if (TLI->hasLoadLinkedStoreConditional()) +bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) { + switch (TLI->shouldExpandAtomicLoadInIR(LI)) { + case TargetLoweringBase::AtomicExpansionKind::None: + return false; + case TargetLoweringBase::AtomicExpansionKind::LLSC: { return expandAtomicLoadToLL(LI); - else + } + case TargetLoweringBase::AtomicExpansionKind::CmpXChg: { return expandAtomicLoadToCmpXchg(LI); + } + } + llvm_unreachable("Unhandled case in tryExpandAtomicLoad"); } bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) { @@ -243,11 +250,6 @@ case TargetLoweringBase::AtomicExpansionKind::None: return false; case TargetLoweringBase::AtomicExpansionKind::LLSC: { - assert(TLI->hasLoadLinkedStoreConditional() && - "TargetLowering requested we expand AtomicRMW instruction into " - "load-linked/store-conditional combos, but such instructions aren't " - "supported"); - return expandAtomicRMWToLLSC(AI); } case TargetLoweringBase::AtomicExpansionKind::CmpXChg: { @@ -503,11 +505,8 @@ } bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) { - if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) { - if (TLI->shouldExpandAtomicLoadInIR(ResultingLoad)) - expandAtomicLoad(ResultingLoad); - return true; - } + if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) + return tryExpandAtomicLoad(ResultingLoad); return false; } Index: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h +++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h @@ -343,17 +343,19 @@ bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override; - bool hasLoadLinkedStoreConditional() const override; Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord) const override; Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override; - bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override; + TargetLoweringBase::AtomicExpansionKind + shouldExpandAtomicLoadInIR(LoadInst *LI) const override; bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + bool shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override; + bool useLoadStackGuardNode() const override; TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT) const override; Index: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -9492,19 +9492,21 @@ // Loads and stores less than 128-bits are already atomic; ones above that // are doomed anyway, so defer to the default libcall and blame the OS when // things go wrong. -bool AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { +TargetLowering::AtomicExpansionKind +AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { unsigned Size = LI->getType()->getPrimitiveSizeInBits(); - return Size == 128; + return Size == 128 ? AtomicExpansionKind::LLSC : AtomicExpansionKind::None; } // For the real atomic operations, we have ldxr/stxr up to 128 bits, -TargetLoweringBase::AtomicExpansionKind +TargetLowering::AtomicExpansionKind AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { unsigned Size = AI->getType()->getPrimitiveSizeInBits(); return Size <= 128 ? AtomicExpansionKind::LLSC : AtomicExpansionKind::None; } -bool AArch64TargetLowering::hasLoadLinkedStoreConditional() const { +bool AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR( + AtomicCmpXchgInst *AI) const { return true; } Index: llvm/trunk/lib/Target/ARM/ARMISelLowering.h =================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.h +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h @@ -415,7 +415,6 @@ bool functionArgumentNeedsConsecutiveRegisters( Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override; - bool hasLoadLinkedStoreConditional() const override; Instruction *makeDMB(IRBuilder<> &Builder, ARM_MB::MemBOpt Domain) const; Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord) const override; @@ -436,10 +435,12 @@ bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override; - bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override; + TargetLoweringBase::AtomicExpansionKind + shouldExpandAtomicLoadInIR(LoadInst *LI) const override; bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + bool shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override; bool useLoadStackGuardNode() const override; Index: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp @@ -11430,8 +11430,6 @@ return true; } -bool ARMTargetLowering::hasLoadLinkedStoreConditional() const { return true; } - Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder, ARM_MB::MemBOpt Domain) const { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); @@ -11527,14 +11525,16 @@ // FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that // guarantee, see DDI0406C ARM architecture reference manual, // sections A8.8.72-74 LDRD) -bool ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { +TargetLowering::AtomicExpansionKind +ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { unsigned Size = LI->getType()->getPrimitiveSizeInBits(); - return (Size == 64) && !Subtarget->isMClass(); + return ((Size == 64) && !Subtarget->isMClass()) ? AtomicExpansionKind::LLSC + : AtomicExpansionKind::None; } // For the real atomic operations, we have ldrex/strex up to 32 bits, // and up to 64 bits on the non-M profiles -TargetLoweringBase::AtomicExpansionKind +TargetLowering::AtomicExpansionKind ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { unsigned Size = AI->getType()->getPrimitiveSizeInBits(); return (Size <= (Subtarget->isMClass() ? 32U : 64U)) @@ -11542,6 +11542,11 @@ : AtomicExpansionKind::None; } +bool ARMTargetLowering::shouldExpandAtomicCmpXchgInIR( + AtomicCmpXchgInst *AI) const { + return true; +} + // This has so far only been implemented for MachO. bool ARMTargetLowering::useLoadStackGuardNode() const { return Subtarget->isTargetMachO(); Index: llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.h =================================================================== --- llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.h +++ llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.h @@ -209,14 +209,11 @@ bool isLegalICmpImmediate(int64_t Imm) const override; // Handling of atomic RMW instructions. - bool hasLoadLinkedStoreConditional() const override { - return true; - } Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord) const override; Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override; - bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override; + AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override; bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override { Index: llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.cpp +++ llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -2499,9 +2499,12 @@ return Ext; } -bool HexagonTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { +TargetLowering::AtomicExpansionKind +HexagonTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { // Do not expand loads and stores that don't exceed 64 bits. - return LI->getType()->getPrimitiveSizeInBits() > 64; + return LI->getType()->getPrimitiveSizeInBits() > 64 + ? AtomicExpansionKind::LLSC + : AtomicExpansionKind::None; } bool HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { Index: llvm/trunk/lib/Target/X86/X86ISelLowering.h =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.h +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h @@ -1053,7 +1053,8 @@ const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; - bool shouldExpandAtomicLoadInIR(LoadInst *SI) const override; + TargetLoweringBase::AtomicExpansionKind + shouldExpandAtomicLoadInIR(LoadInst *SI) const override; bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -18406,12 +18406,14 @@ // Note: this turns large loads into lock cmpxchg8b/16b. // FIXME: On 32 bits x86, fild/movq might be faster than lock cmpxchg8b. -bool X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { +TargetLowering::AtomicExpansionKind +X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { auto PTy = cast(LI->getPointerOperand()->getType()); - return needsCmpXchgNb(PTy->getElementType()); + return needsCmpXchgNb(PTy->getElementType()) ? AtomicExpansionKind::CmpXChg + : AtomicExpansionKind::None; } -TargetLoweringBase::AtomicExpansionKind +TargetLowering::AtomicExpansionKind X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { unsigned NativeWidth = Subtarget->is64Bit() ? 64 : 32; Type *MemType = AI->getType();