Index: include/llvm/Target/TargetLowering.h
===================================================================
--- include/llvm/Target/TargetLowering.h
+++ include/llvm/Target/TargetLowering.h
@@ -123,6 +123,13 @@
                           // mask (ex: x86 blends).
   };
 
+  /// Enum that specifies what a AtomicRMWInst is expanded to, if at all.
+  enum class AtomicRMWExpansionKind {
+    Native,    // Don't expand the instruction.
+    LLSC,      // Expand the instruction into loadlinked/storeconditional.
+    CmpXChg,   // Expand the instruction into cmpxchg.
+  };
+
   static ISD::NodeType getExtendForContent(BooleanContent Content) {
     switch (Content) {
     case UndefinedBooleanContent:
@@ -256,7 +263,7 @@
   virtual bool isCheapToSpeculateCttz() const {
     return false;
   }
-  
+
   /// \brief Return true if it is cheap to speculate a call to intrinsic ctlz.
   virtual bool isCheapToSpeculateCtlz() const {
     return false;
@@ -1070,10 +1077,11 @@
   /// (through emitLoadLinked()).
   virtual bool shouldExpandAtomicLoadInIR(LoadInst *LI) const { return false; }
 
-  /// Returns true if the given AtomicRMW should be expanded by the
-  /// IR-level AtomicExpand pass into a loop using LoadLinked/StoreConditional.
-  virtual bool shouldExpandAtomicRMWInIR(AtomicRMWInst *RMWI) const {
-    return false;
+  /// Returns how the IR-level AtomicExpand pass should expand the given
+  /// AtomicRMW, if at all. Default is to never expand.
+  virtual AtomicRMWExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMWI) const {
+    (void)RMWI;
+    return AtomicRMWExpansionKind::Native;
   }
 
   /// On some platforms, an AtomicRMW that never actually modifies the value
@@ -2730,7 +2738,7 @@
 
   /// Hooks for building estimates in place of slower divisions and square
   /// roots.
-  
+
   /// Return a reciprocal square root estimate value for the input operand.
   /// The RefinementSteps output is the number of Newton-Raphson refinement
   /// iterations required to generate a sufficient (though not necessarily
Index: lib/CodeGen/AtomicExpandPass.cpp
===================================================================
--- lib/CodeGen/AtomicExpandPass.cpp
+++ lib/CodeGen/AtomicExpandPass.cpp
@@ -135,9 +135,12 @@
       // - into a load if it is idempotent
       // - into a Cmpxchg/LL-SC loop otherwise
       // we try them in that order.
-      MadeChange |=
-          (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) ||
-          (TLI->shouldExpandAtomicRMWInIR(RMWI) && expandAtomicRMW(RMWI));
+
+      if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
+        MadeChange = true;
+        continue;
+      }
+      MadeChange |= expandAtomicRMW(RMWI);
     } else if (CASI && TLI->hasLoadLinkedStoreConditional()) {
       MadeChange |= expandAtomicCmpXchg(CASI);
     }
@@ -224,10 +227,21 @@
 }
 
 bool AtomicExpand::expandAtomicRMW(AtomicRMWInst *AI) {
-  if (TLI->hasLoadLinkedStoreConditional())
-    return expandAtomicRMWToLLSC(AI);
-  else
-    return expandAtomicRMWToCmpXchg(AI);
+  const auto How = TLI->shouldExpandAtomicRMWInIR(AI);
+  switch(How) {
+    case TargetLoweringBase::AtomicRMWExpansionKind::Native: return false;
+    case TargetLoweringBase::AtomicRMWExpansionKind::LLSC: {
+      assert(TLI->hasLoadLinkedStoreConditional() &&
+             "TargetLowering requested we expand AtomicRMW instruction into \
+              load-linked/store-conditional combos, but such instruction aren't \
+              supported");
+
+      return expandAtomicRMWToLLSC(AI);
+    }
+    case TargetLoweringBase::AtomicRMWExpansionKind::CmpXChg: {
+      return expandAtomicRMWToCmpXchg(AI);
+    }
+  };
 }
 
 /// Emit IR to implement the given atomicrmw operation on values in registers,
Index: lib/Target/AArch64/AArch64ISelLowering.h
===================================================================
--- lib/Target/AArch64/AArch64ISelLowering.h
+++ lib/Target/AArch64/AArch64ISelLowering.h
@@ -336,7 +336,8 @@
 
   bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
   bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
-  bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+  TargetLoweringBase::AtomicRMWExpansionKind
+  shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
 
   bool useLoadStackGuardNode() const override;
   TargetLoweringBase::LegalizeTypeAction
Index: lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- lib/Target/AArch64/AArch64ISelLowering.cpp
+++ lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2055,7 +2055,7 @@
 
       continue;
     }
-    
+
     if (VA.isRegLoc()) {
       // Arguments stored in registers.
       EVT RegVT = VA.getLocVT();
@@ -4744,7 +4744,7 @@
 
   // The index of an EXT is the first element if it is not UNDEF.
   // Watch out for the beginning UNDEFs. The EXT index should be the expected
-  // value of the first element.  E.g. 
+  // value of the first element.  E.g.
   // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
   // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
   // ExpectedElt is the last mask index plus 1.
@@ -8772,9 +8772,14 @@
 }
 
 // For the real atomic operations, we have ldxr/stxr up to 128 bits,
-bool AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+TargetLoweringBase::AtomicRMWExpansionKind
+AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
-  return Size <= 128;
+  if (Size <= 128) {
+    return TargetLoweringBase::AtomicRMWExpansionKind::LLSC;
+  } else {
+    return TargetLoweringBase::AtomicRMWExpansionKind::Native;
+  }
 }
 
 bool AArch64TargetLowering::hasLoadLinkedStoreConditional() const {
Index: lib/Target/ARM/ARMISelLowering.h
===================================================================
--- lib/Target/ARM/ARMISelLowering.h
+++ lib/Target/ARM/ARMISelLowering.h
@@ -407,7 +407,8 @@
 
     bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
     bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
-    bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+    TargetLoweringBase::AtomicRMWExpansionKind
+    shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
 
     bool useLoadStackGuardNode() const override;
 
Index: lib/Target/ARM/ARMISelLowering.cpp
===================================================================
--- lib/Target/ARM/ARMISelLowering.cpp
+++ lib/Target/ARM/ARMISelLowering.cpp
@@ -7953,7 +7953,7 @@
   // Get widened type and narrowed type.
   MVT widenType;
   unsigned numElem = VT.getVectorNumElements();
-  
+
   EVT inputLaneType = Vec.getValueType().getVectorElementType();
   switch (inputLaneType.getSimpleVT().SimpleTy) {
     case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;
@@ -11100,9 +11100,14 @@
 
 // For the real atomic operations, we have ldrex/strex up to 32 bits,
 // and up to 64 bits on the non-M profiles
-bool ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+TargetLoweringBase::AtomicRMWExpansionKind
+ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
-  return Size <= (Subtarget->isMClass() ? 32U : 64U);
+  if (Size <= (Subtarget->isMClass() ? 32U : 64U)) {
+    return TargetLoweringBase::AtomicRMWExpansionKind::LLSC;
+  } else {
+    return TargetLoweringBase::AtomicRMWExpansionKind::Native;
+  }
 }
 
 // This has so far only been implemented for MachO.
Index: lib/Target/X86/X86ISelLowering.h
===================================================================
--- lib/Target/X86/X86ISelLowering.h
+++ lib/Target/X86/X86ISelLowering.h
@@ -388,7 +388,7 @@
       FMSUB_RND,
       FNMSUB_RND,
       FMADDSUB_RND,
-      FMSUBADD_RND,     
+      FMSUBADD_RND,
 
       // Compress and expand
       COMPRESS,
@@ -986,7 +986,8 @@
 
     bool shouldExpandAtomicLoadInIR(LoadInst *SI) const override;
     bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
-    bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+    TargetLoweringBase::AtomicRMWExpansionKind
+    shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
 
     LoadInst *
     lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp
+++ lib/Target/X86/X86ISelLowering.cpp
@@ -19607,14 +19607,19 @@
   return needsCmpXchgNb(PTy->getElementType());
 }
 
-bool X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+TargetLoweringBase::AtomicRMWExpansionKind
+X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
   unsigned NativeWidth = Subtarget->is64Bit() ? 64 : 32;
   const Type *MemType = AI->getType();
 
   // If the operand is too big, we must see if cmpxchg8/16b is available
   // and default to library calls otherwise.
-  if (MemType->getPrimitiveSizeInBits() > NativeWidth)
-    return needsCmpXchgNb(MemType);
+  if (MemType->getPrimitiveSizeInBits() > NativeWidth) {
+    if (needsCmpXchgNb(MemType))
+      return TargetLoweringBase::AtomicRMWExpansionKind::CmpXChg;
+    else
+      return TargetLoweringBase::AtomicRMWExpansionKind::Native;
+  }
 
   AtomicRMWInst::BinOp Op = AI->getOperation();
   switch (Op) {
@@ -19624,13 +19629,16 @@
   case AtomicRMWInst::Add:
   case AtomicRMWInst::Sub:
     // It's better to use xadd, xsub or xchg for these in all cases.
-    return false;
+    return TargetLoweringBase::AtomicRMWExpansionKind::Native;
   case AtomicRMWInst::Or:
   case AtomicRMWInst::And:
   case AtomicRMWInst::Xor:
     // If the atomicrmw's result isn't actually used, we can just add a "lock"
     // prefix to a normal instruction for these operations.
-    return !AI->use_empty();
+    if (!AI->use_empty())
+      return TargetLoweringBase::AtomicRMWExpansionKind::CmpXChg;
+    else
+      return TargetLoweringBase::AtomicRMWExpansionKind::Native;
   case AtomicRMWInst::Nand:
   case AtomicRMWInst::Max:
   case AtomicRMWInst::Min:
@@ -19638,7 +19646,7 @@
   case AtomicRMWInst::UMin:
     // These always require a non-trivial set of data operations on x86. We must
     // use a cmpxchg loop.
-    return true;
+    return TargetLoweringBase::AtomicRMWExpansionKind::CmpXChg;
   }
 }
 
@@ -24907,7 +24915,7 @@
   // an and with a mask.
   // We'd like to try to combine that into a shuffle with zero
   // plus a bitcast, removing the and.
-  if (N0.getOpcode() != ISD::BITCAST || 
+  if (N0.getOpcode() != ISD::BITCAST ||
       N0.getOperand(0).getOpcode() != ISD::VECTOR_SHUFFLE)
     return SDValue();
 
@@ -24937,7 +24945,7 @@
 
   unsigned ResSize = N1.getValueType().getScalarSizeInBits();
   // Make sure the splat matches the mask we expect
-  if (SplatBitSize > ResSize || 
+  if (SplatBitSize > ResSize ||
       (SplatValue + 1).exactLogBase2() != (int)SrcSize)
     return SDValue();
 
@@ -25897,7 +25905,7 @@
   if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
     if (C->getValueAPF().isPosZero())
       return N->getOperand(1);
-  
+
   return SDValue();
 }