Index: include/llvm/Target/TargetLowering.h =================================================================== --- include/llvm/Target/TargetLowering.h +++ include/llvm/Target/TargetLowering.h @@ -123,6 +123,18 @@ // mask (ex: x86 blends). }; + /// Enum that specifies what a AtomicRMWInst is expanded to, if at all. Exists + /// because different targets have different levels of support for these + /// atomic RMW instructions, and also have different options w.r.t. what they should + /// expand to. + enum class AtomicRMWExpansionKind { + None, // Don't expand the instruction. + LLSC, // Expand the instruction into loadlinked/storeconditional; used + // by ARM/AArch64. Implies `hasLoadLinkedStoreConditional` + // returns true. + CmpXChg, // Expand the instruction into cmpxchg; used by at least X86. + }; + static ISD::NodeType getExtendForContent(BooleanContent Content) { switch (Content) { case UndefinedBooleanContent: @@ -1070,10 +1082,11 @@ /// (through emitLoadLinked()). virtual bool shouldExpandAtomicLoadInIR(LoadInst *LI) const { return false; } - /// Returns true if the given AtomicRMW should be expanded by the - /// IR-level AtomicExpand pass into a loop using LoadLinked/StoreConditional. - virtual bool shouldExpandAtomicRMWInIR(AtomicRMWInst *RMWI) const { - return false; + /// Returns how the IR-level AtomicExpand pass should expand the given + /// AtomicRMW, if at all. Default is to never expand. + virtual AtomicRMWExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMWI) const { + (void)RMWI; + return AtomicRMWExpansionKind::None; } /// On some platforms, an AtomicRMW that never actually modifies the value Index: lib/CodeGen/AtomicExpandPass.cpp =================================================================== --- lib/CodeGen/AtomicExpandPass.cpp +++ lib/CodeGen/AtomicExpandPass.cpp @@ -48,7 +48,7 @@ bool expandAtomicLoadToLL(LoadInst *LI); bool expandAtomicLoadToCmpXchg(LoadInst *LI); bool expandAtomicStore(StoreInst *SI); - bool expandAtomicRMW(AtomicRMWInst *AI); + bool tryExpandAtomicRMW(AtomicRMWInst *AI); bool expandAtomicRMWToLLSC(AtomicRMWInst *AI); bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI); bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); @@ -135,9 +135,12 @@ // - into a load if it is idempotent // - into a Cmpxchg/LL-SC loop otherwise // we try them in that order. - MadeChange |= - (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) || - (TLI->shouldExpandAtomicRMWInIR(RMWI) && expandAtomicRMW(RMWI)); + + if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) { + MadeChange = true; + } else { + MadeChange |= tryExpandAtomicRMW(RMWI); + } } else if (CASI && TLI->hasLoadLinkedStoreConditional()) { MadeChange |= expandAtomicCmpXchg(CASI); } @@ -211,7 +214,7 @@ // atomic if implemented as a native store. So we replace them by an // atomic swap, that can be implemented for example as a ldrex/strex on ARM // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes. - // It is the responsibility of the target to only return true in + // It is the responsibility of the target to only signal expansion via // shouldExpandAtomicRMW in cases where this is required and possible. IRBuilder<> Builder(SI); AtomicRMWInst *AI = @@ -220,14 +223,25 @@ SI->eraseFromParent(); // Now we have an appropriate swap instruction, lower it as usual. - return expandAtomicRMW(AI); + return tryExpandAtomicRMW(AI); } -bool AtomicExpand::expandAtomicRMW(AtomicRMWInst *AI) { - if (TLI->hasLoadLinkedStoreConditional()) +bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { + switch (TLI->shouldExpandAtomicRMWInIR(AI)) { + case TargetLoweringBase::AtomicRMWExpansionKind::None: + return false; + case TargetLoweringBase::AtomicRMWExpansionKind::LLSC: { + assert(TLI->hasLoadLinkedStoreConditional() && + "TargetLowering requested we expand AtomicRMW instruction into " + "load-linked/store-conditional combos, but such instructions aren't " + "supported"); + return expandAtomicRMWToLLSC(AI); - else + } + case TargetLoweringBase::AtomicRMWExpansionKind::CmpXChg: { return expandAtomicRMWToCmpXchg(AI); + } + }; } /// Emit IR to implement the given atomicrmw operation on values in registers, Index: lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.h +++ lib/Target/AArch64/AArch64ISelLowering.h @@ -336,7 +336,8 @@ bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override; bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; - bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + TargetLoweringBase::AtomicRMWExpansionKind + shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; bool useLoadStackGuardNode() const override; TargetLoweringBase::LegalizeTypeAction Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8772,9 +8772,11 @@ } // For the real atomic operations, we have ldxr/stxr up to 128 bits, -bool AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { +TargetLoweringBase::AtomicRMWExpansionKind +AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { unsigned Size = AI->getType()->getPrimitiveSizeInBits(); - return Size <= 128; + return Size <= 128 ? + AtomicRMWExpansionKind::LLSC : AtomicRMWExpansionKind::None; } bool AArch64TargetLowering::hasLoadLinkedStoreConditional() const { Index: lib/Target/ARM/ARMISelLowering.h =================================================================== --- lib/Target/ARM/ARMISelLowering.h +++ lib/Target/ARM/ARMISelLowering.h @@ -407,7 +407,8 @@ bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override; bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; - bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + TargetLoweringBase::AtomicRMWExpansionKind + shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; bool useLoadStackGuardNode() const override; Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -11100,9 +11100,11 @@ // For the real atomic operations, we have ldrex/strex up to 32 bits, // and up to 64 bits on the non-M profiles -bool ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { +TargetLoweringBase::AtomicRMWExpansionKind +ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { unsigned Size = AI->getType()->getPrimitiveSizeInBits(); - return Size <= (Subtarget->isMClass() ? 32U : 64U); + return (Size <= (Subtarget->isMClass() ? 32U : 64U)) ? + AtomicRMWExpansionKind::LLSC : AtomicRMWExpansionKind::None; } // This has so far only been implemented for MachO. Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -986,7 +986,8 @@ bool shouldExpandAtomicLoadInIR(LoadInst *SI) const override; bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; - bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + TargetLoweringBase::AtomicRMWExpansionKind + shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; LoadInst * lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override; Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -19607,14 +19607,17 @@ return needsCmpXchgNb(PTy->getElementType()); } -bool X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { +TargetLoweringBase::AtomicRMWExpansionKind +X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { unsigned NativeWidth = Subtarget->is64Bit() ? 64 : 32; const Type *MemType = AI->getType(); // If the operand is too big, we must see if cmpxchg8/16b is available // and default to library calls otherwise. - if (MemType->getPrimitiveSizeInBits() > NativeWidth) - return needsCmpXchgNb(MemType); + if (MemType->getPrimitiveSizeInBits() > NativeWidth) { + return needsCmpXchgNb(MemType) ? + AtomicRMWExpansionKind::CmpXChg : AtomicRMWExpansionKind::None; + } AtomicRMWInst::BinOp Op = AI->getOperation(); switch (Op) { @@ -19624,13 +19627,14 @@ case AtomicRMWInst::Add: case AtomicRMWInst::Sub: // It's better to use xadd, xsub or xchg for these in all cases. - return false; + return AtomicRMWExpansionKind::None; case AtomicRMWInst::Or: case AtomicRMWInst::And: case AtomicRMWInst::Xor: // If the atomicrmw's result isn't actually used, we can just add a "lock" // prefix to a normal instruction for these operations. - return !AI->use_empty(); + return !AI->use_empty() ? + AtomicRMWExpansionKind::CmpXChg : AtomicRMWExpansionKind::None; case AtomicRMWInst::Nand: case AtomicRMWInst::Max: case AtomicRMWInst::Min: @@ -19638,7 +19642,7 @@ case AtomicRMWInst::UMin: // These always require a non-trivial set of data operations on x86. We must // use a cmpxchg loop. - return true; + return AtomicRMWExpansionKind::CmpXChg; } }