Index: llvm/trunk/include/llvm/Target/TargetLowering.h =================================================================== --- llvm/trunk/include/llvm/Target/TargetLowering.h +++ llvm/trunk/include/llvm/Target/TargetLowering.h @@ -123,6 +123,18 @@ // mask (ex: x86 blends). }; + /// Enum that specifies what a AtomicRMWInst is expanded to, if at all. Exists + /// because different targets have different levels of support for these + /// atomic RMW instructions, and also have different options w.r.t. what they should + /// expand to. + enum class AtomicRMWExpansionKind { + None, // Don't expand the instruction. + LLSC, // Expand the instruction into loadlinked/storeconditional; used + // by ARM/AArch64. Implies `hasLoadLinkedStoreConditional` + // returns true. + CmpXChg, // Expand the instruction into cmpxchg; used by at least X86. + }; + static ISD::NodeType getExtendForContent(BooleanContent Content) { switch (Content) { case UndefinedBooleanContent: @@ -1064,10 +1076,11 @@ /// (through emitLoadLinked()). virtual bool shouldExpandAtomicLoadInIR(LoadInst *LI) const { return false; } - /// Returns true if the given AtomicRMW should be expanded by the - /// IR-level AtomicExpand pass into a loop using LoadLinked/StoreConditional. - virtual bool shouldExpandAtomicRMWInIR(AtomicRMWInst *RMWI) const { - return false; + /// Returns how the IR-level AtomicExpand pass should expand the given + /// AtomicRMW, if at all. Default is to never expand. + virtual AtomicRMWExpansionKind + shouldExpandAtomicRMWInIR(AtomicRMWInst *) const { + return AtomicRMWExpansionKind::None; } /// On some platforms, an AtomicRMW that never actually modifies the value Index: llvm/trunk/lib/CodeGen/AtomicExpandPass.cpp =================================================================== --- llvm/trunk/lib/CodeGen/AtomicExpandPass.cpp +++ llvm/trunk/lib/CodeGen/AtomicExpandPass.cpp @@ -48,7 +48,7 @@ bool expandAtomicLoadToLL(LoadInst *LI); bool expandAtomicLoadToCmpXchg(LoadInst *LI); bool expandAtomicStore(StoreInst *SI); - bool expandAtomicRMW(AtomicRMWInst *AI); + bool tryExpandAtomicRMW(AtomicRMWInst *AI); bool expandAtomicRMWToLLSC(AtomicRMWInst *AI); bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI); bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); @@ -135,9 +135,12 @@ // - into a load if it is idempotent // - into a Cmpxchg/LL-SC loop otherwise // we try them in that order. - MadeChange |= - (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) || - (TLI->shouldExpandAtomicRMWInIR(RMWI) && expandAtomicRMW(RMWI)); + + if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) { + MadeChange = true; + } else { + MadeChange |= tryExpandAtomicRMW(RMWI); + } } else if (CASI && TLI->hasLoadLinkedStoreConditional()) { MadeChange |= expandAtomicCmpXchg(CASI); } @@ -211,7 +214,7 @@ // atomic if implemented as a native store. So we replace them by an // atomic swap, that can be implemented for example as a ldrex/strex on ARM // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes. - // It is the responsibility of the target to only return true in + // It is the responsibility of the target to only signal expansion via // shouldExpandAtomicRMW in cases where this is required and possible. IRBuilder<> Builder(SI); AtomicRMWInst *AI = @@ -220,14 +223,26 @@ SI->eraseFromParent(); // Now we have an appropriate swap instruction, lower it as usual. - return expandAtomicRMW(AI); + return tryExpandAtomicRMW(AI); } -bool AtomicExpand::expandAtomicRMW(AtomicRMWInst *AI) { - if (TLI->hasLoadLinkedStoreConditional()) +bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { + switch (TLI->shouldExpandAtomicRMWInIR(AI)) { + case TargetLoweringBase::AtomicRMWExpansionKind::None: + return false; + case TargetLoweringBase::AtomicRMWExpansionKind::LLSC: { + assert(TLI->hasLoadLinkedStoreConditional() && + "TargetLowering requested we expand AtomicRMW instruction into " + "load-linked/store-conditional combos, but such instructions aren't " + "supported"); + return expandAtomicRMWToLLSC(AI); - else + } + case TargetLoweringBase::AtomicRMWExpansionKind::CmpXChg: { return expandAtomicRMWToCmpXchg(AI); + } + } + llvm_unreachable("Unhandled case in tryExpandAtomicRMW"); } /// Emit IR to implement the given atomicrmw operation on values in registers, Index: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h +++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h @@ -335,7 +335,8 @@ bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override; bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; - bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + TargetLoweringBase::AtomicRMWExpansionKind + shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; bool useLoadStackGuardNode() const override; TargetLoweringBase::LegalizeTypeAction Index: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8755,9 +8755,11 @@ } // For the real atomic operations, we have ldxr/stxr up to 128 bits, -bool AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { +TargetLoweringBase::AtomicRMWExpansionKind +AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { unsigned Size = AI->getType()->getPrimitiveSizeInBits(); - return Size <= 128; + return Size <= 128 ? AtomicRMWExpansionKind::LLSC + : AtomicRMWExpansionKind::None; } bool AArch64TargetLowering::hasLoadLinkedStoreConditional() const { Index: llvm/trunk/lib/Target/ARM/ARMISelLowering.h =================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.h +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h @@ -404,7 +404,8 @@ bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override; bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; - bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + TargetLoweringBase::AtomicRMWExpansionKind + shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; bool useLoadStackGuardNode() const override; Index: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp @@ -11199,9 +11199,12 @@ // For the real atomic operations, we have ldrex/strex up to 32 bits, // and up to 64 bits on the non-M profiles -bool ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { +TargetLoweringBase::AtomicRMWExpansionKind +ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { unsigned Size = AI->getType()->getPrimitiveSizeInBits(); - return Size <= (Subtarget->isMClass() ? 32U : 64U); + return (Size <= (Subtarget->isMClass() ? 32U : 64U)) + ? AtomicRMWExpansionKind::LLSC + : AtomicRMWExpansionKind::None; } // This has so far only been implemented for MachO. Index: llvm/trunk/lib/Target/X86/X86ISelLowering.h =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.h +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h @@ -991,7 +991,8 @@ bool shouldExpandAtomicLoadInIR(LoadInst *SI) const override; bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; - bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + TargetLoweringBase::AtomicRMWExpansionKind + shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; LoadInst * lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override; Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -16398,14 +16398,17 @@ return needsCmpXchgNb(PTy->getElementType()); } -bool X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { +TargetLoweringBase::AtomicRMWExpansionKind +X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { unsigned NativeWidth = Subtarget->is64Bit() ? 64 : 32; const Type *MemType = AI->getType(); // If the operand is too big, we must see if cmpxchg8/16b is available // and default to library calls otherwise. - if (MemType->getPrimitiveSizeInBits() > NativeWidth) - return needsCmpXchgNb(MemType); + if (MemType->getPrimitiveSizeInBits() > NativeWidth) { + return needsCmpXchgNb(MemType) ? AtomicRMWExpansionKind::CmpXChg + : AtomicRMWExpansionKind::None; + } AtomicRMWInst::BinOp Op = AI->getOperation(); switch (Op) { @@ -16415,13 +16418,14 @@ case AtomicRMWInst::Add: case AtomicRMWInst::Sub: // It's better to use xadd, xsub or xchg for these in all cases. - return false; + return AtomicRMWExpansionKind::None; case AtomicRMWInst::Or: case AtomicRMWInst::And: case AtomicRMWInst::Xor: // If the atomicrmw's result isn't actually used, we can just add a "lock" // prefix to a normal instruction for these operations. - return !AI->use_empty(); + return !AI->use_empty() ? AtomicRMWExpansionKind::CmpXChg + : AtomicRMWExpansionKind::None; case AtomicRMWInst::Nand: case AtomicRMWInst::Max: case AtomicRMWInst::Min: @@ -16429,7 +16433,7 @@ case AtomicRMWInst::UMin: // These always require a non-trivial set of data operations on x86. We must // use a cmpxchg loop. - return true; + return AtomicRMWExpansionKind::CmpXChg; } }