Index: include/llvm/Target/TargetLowering.h =================================================================== --- include/llvm/Target/TargetLowering.h +++ include/llvm/Target/TargetLowering.h @@ -123,6 +123,13 @@ // mask (ex: x86 blends). }; + /// Enum that specifies what a AtomicRMWInst is expanded to, if at all. + enum class AtomicRMWExpansionKind { + Native, // Don't expand the instruction. + LLSC, // Expand the instruction into loadlinked/storeconditional. + CmpXChg, // Expand the instruction into cmpxchg. + }; + static ISD::NodeType getExtendForContent(BooleanContent Content) { switch (Content) { case UndefinedBooleanContent: @@ -256,7 +263,7 @@ virtual bool isCheapToSpeculateCttz() const { return false; } - + /// \brief Return true if it is cheap to speculate a call to intrinsic ctlz. virtual bool isCheapToSpeculateCtlz() const { return false; @@ -1070,10 +1077,11 @@ /// (through emitLoadLinked()). virtual bool shouldExpandAtomicLoadInIR(LoadInst *LI) const { return false; } - /// Returns true if the given AtomicRMW should be expanded by the - /// IR-level AtomicExpand pass into a loop using LoadLinked/StoreConditional. - virtual bool shouldExpandAtomicRMWInIR(AtomicRMWInst *RMWI) const { - return false; + /// Returns how the IR-level AtomicExpand pass should expand the given + /// AtomicRMW, if at all. Default is to never expand. + virtual AtomicRMWExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMWI) const { + (void)RMWI; + return AtomicRMWExpansionKind::Native; } /// On some platforms, an AtomicRMW that never actually modifies the value @@ -2730,7 +2738,7 @@ /// Hooks for building estimates in place of slower divisions and square /// roots. - + /// Return a reciprocal square root estimate value for the input operand. /// The RefinementSteps output is the number of Newton-Raphson refinement /// iterations required to generate a sufficient (though not necessarily Index: lib/CodeGen/AtomicExpandPass.cpp =================================================================== --- lib/CodeGen/AtomicExpandPass.cpp +++ lib/CodeGen/AtomicExpandPass.cpp @@ -135,9 +135,12 @@ // - into a load if it is idempotent // - into a Cmpxchg/LL-SC loop otherwise // we try them in that order. - MadeChange |= - (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) || - (TLI->shouldExpandAtomicRMWInIR(RMWI) && expandAtomicRMW(RMWI)); + + if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) { + MadeChange = true; + continue; + } + MadeChange |= expandAtomicRMW(RMWI); } else if (CASI && TLI->hasLoadLinkedStoreConditional()) { MadeChange |= expandAtomicCmpXchg(CASI); } @@ -224,10 +227,21 @@ } bool AtomicExpand::expandAtomicRMW(AtomicRMWInst *AI) { - if (TLI->hasLoadLinkedStoreConditional()) - return expandAtomicRMWToLLSC(AI); - else - return expandAtomicRMWToCmpXchg(AI); + const auto How = TLI->shouldExpandAtomicRMWInIR(AI); + switch(How) { + case TargetLoweringBase::AtomicRMWExpansionKind::Native: return false; + case TargetLoweringBase::AtomicRMWExpansionKind::LLSC: { + assert(TLI->hasLoadLinkedStoreConditional() && + "TargetLowering requested we expand AtomicRMW instruction into \ + load-linked/store-conditional combos, but such instruction aren't \ + supported"); + + return expandAtomicRMWToLLSC(AI); + } + case TargetLoweringBase::AtomicRMWExpansionKind::CmpXChg: { + return expandAtomicRMWToCmpXchg(AI); + } + }; } /// Emit IR to implement the given atomicrmw operation on values in registers, Index: lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.h +++ lib/Target/AArch64/AArch64ISelLowering.h @@ -336,7 +336,8 @@ bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override; bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; - bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + TargetLoweringBase::AtomicRMWExpansionKind + shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; bool useLoadStackGuardNode() const override; TargetLoweringBase::LegalizeTypeAction Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2055,7 +2055,7 @@ continue; } - + if (VA.isRegLoc()) { // Arguments stored in registers. EVT RegVT = VA.getLocVT(); @@ -4744,7 +4744,7 @@ // The index of an EXT is the first element if it is not UNDEF. // Watch out for the beginning UNDEFs. The EXT index should be the expected - // value of the first element. E.g. + // value of the first element. E.g. // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>. // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>. // ExpectedElt is the last mask index plus 1. @@ -8772,9 +8772,14 @@ } // For the real atomic operations, we have ldxr/stxr up to 128 bits, -bool AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { +TargetLoweringBase::AtomicRMWExpansionKind +AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { unsigned Size = AI->getType()->getPrimitiveSizeInBits(); - return Size <= 128; + if (Size <= 128) { + return TargetLoweringBase::AtomicRMWExpansionKind::LLSC; + } else { + return TargetLoweringBase::AtomicRMWExpansionKind::Native; + } } bool AArch64TargetLowering::hasLoadLinkedStoreConditional() const { Index: lib/Target/ARM/ARMISelLowering.h =================================================================== --- lib/Target/ARM/ARMISelLowering.h +++ lib/Target/ARM/ARMISelLowering.h @@ -407,7 +407,8 @@ bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override; bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; - bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + TargetLoweringBase::AtomicRMWExpansionKind + shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; bool useLoadStackGuardNode() const override; Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -7953,7 +7953,7 @@ // Get widened type and narrowed type. MVT widenType; unsigned numElem = VT.getVectorNumElements(); - + EVT inputLaneType = Vec.getValueType().getVectorElementType(); switch (inputLaneType.getSimpleVT().SimpleTy) { case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break; @@ -11100,9 +11100,14 @@ // For the real atomic operations, we have ldrex/strex up to 32 bits, // and up to 64 bits on the non-M profiles -bool ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { +TargetLoweringBase::AtomicRMWExpansionKind +ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { unsigned Size = AI->getType()->getPrimitiveSizeInBits(); - return Size <= (Subtarget->isMClass() ? 32U : 64U); + if (Size <= (Subtarget->isMClass() ? 32U : 64U)) { + return TargetLoweringBase::AtomicRMWExpansionKind::LLSC; + } else { + return TargetLoweringBase::AtomicRMWExpansionKind::Native; + } } // This has so far only been implemented for MachO. Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -388,7 +388,7 @@ FMSUB_RND, FNMSUB_RND, FMADDSUB_RND, - FMSUBADD_RND, + FMSUBADD_RND, // Compress and expand COMPRESS, @@ -986,7 +986,8 @@ bool shouldExpandAtomicLoadInIR(LoadInst *SI) const override; bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; - bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + TargetLoweringBase::AtomicRMWExpansionKind + shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; LoadInst * lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override; Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -19607,14 +19607,19 @@ return needsCmpXchgNb(PTy->getElementType()); } -bool X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { +TargetLoweringBase::AtomicRMWExpansionKind +X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { unsigned NativeWidth = Subtarget->is64Bit() ? 64 : 32; const Type *MemType = AI->getType(); // If the operand is too big, we must see if cmpxchg8/16b is available // and default to library calls otherwise. - if (MemType->getPrimitiveSizeInBits() > NativeWidth) - return needsCmpXchgNb(MemType); + if (MemType->getPrimitiveSizeInBits() > NativeWidth) { + if (needsCmpXchgNb(MemType)) + return TargetLoweringBase::AtomicRMWExpansionKind::CmpXChg; + else + return TargetLoweringBase::AtomicRMWExpansionKind::Native; + } AtomicRMWInst::BinOp Op = AI->getOperation(); switch (Op) { @@ -19624,13 +19629,16 @@ case AtomicRMWInst::Add: case AtomicRMWInst::Sub: // It's better to use xadd, xsub or xchg for these in all cases. - return false; + return TargetLoweringBase::AtomicRMWExpansionKind::Native; case AtomicRMWInst::Or: case AtomicRMWInst::And: case AtomicRMWInst::Xor: // If the atomicrmw's result isn't actually used, we can just add a "lock" // prefix to a normal instruction for these operations. - return !AI->use_empty(); + if (!AI->use_empty()) + return TargetLoweringBase::AtomicRMWExpansionKind::CmpXChg; + else + return TargetLoweringBase::AtomicRMWExpansionKind::Native; case AtomicRMWInst::Nand: case AtomicRMWInst::Max: case AtomicRMWInst::Min: @@ -19638,7 +19646,7 @@ case AtomicRMWInst::UMin: // These always require a non-trivial set of data operations on x86. We must // use a cmpxchg loop. - return true; + return TargetLoweringBase::AtomicRMWExpansionKind::CmpXChg; } } @@ -24907,7 +24915,7 @@ // an and with a mask. // We'd like to try to combine that into a shuffle with zero // plus a bitcast, removing the and. - if (N0.getOpcode() != ISD::BITCAST || + if (N0.getOpcode() != ISD::BITCAST || N0.getOperand(0).getOpcode() != ISD::VECTOR_SHUFFLE) return SDValue(); @@ -24937,7 +24945,7 @@ unsigned ResSize = N1.getValueType().getScalarSizeInBits(); // Make sure the splat matches the mask we expect - if (SplatBitSize > ResSize || + if (SplatBitSize > ResSize || (SplatValue + 1).exactLogBase2() != (int)SrcSize) return SDValue(); @@ -25897,7 +25905,7 @@ if (ConstantFPSDNode *C = dyn_cast(N->getOperand(1))) if (C->getValueAPF().isPosZero()) return N->getOperand(1); - + return SDValue(); }