diff --git a/llvm/docs/Atomics.rst b/llvm/docs/Atomics.rst
--- a/llvm/docs/Atomics.rst
+++ b/llvm/docs/Atomics.rst
@@ -453,10 +453,10 @@
 atomic constructs. Here are some lowerings it can do:
 
 * cmpxchg -> loop with load-linked/store-conditional
-  by overriding ``shouldExpandAtomicCmpXchgInIR()``, ``emitLoadLinked()``,
-  ``emitStoreConditional()``
-* large loads/stores -> ll-sc/cmpxchg
-  by overriding ``shouldExpandAtomicStoreInIR()``/``shouldExpandAtomicLoadInIR()``
+  by overriding ``shouldExpandAtomicInstInIR()`` for ``AtomicCmpXchgInst``,
+  ``emitLoadLinked()``, and ``emitStoreConditional()``
+* large loads/stores -> ll-sc/cmpxchg by overriding
+  ``shouldExpandAtomicInstInIR()`` for ``LoadInst``/``StoreInst``
 * strong atomic accesses -> monotonic accesses + fences by overriding
   ``shouldInsertFencesForAtomic()``, ``emitLeadingFence()``, and
   ``emitTrailingFence()``
@@ -464,8 +464,8 @@
   by overriding ``expandAtomicRMWInIR()``
 * expansion to __atomic_* libcalls for unsupported sizes.
 * part-word atomicrmw/cmpxchg -> target-specific intrinsic by overriding
-  ``shouldExpandAtomicRMWInIR``, ``emitMaskedAtomicRMWIntrinsic``,
-  ``shouldExpandAtomicCmpXchgInIR``, and ``emitMaskedAtomicCmpXchgIntrinsic``.
+  ``shouldExpandAtomicInstInIR`` for ``AtomicRMWInst``/``AtomicCmpXchgInst``,
+  ``emitMaskedAtomicRMWIntrinsic``, and ``emitMaskedAtomicCmpXchgIntrinsic``.
 
 For an example of these look at the ARM (first five lowerings) or RISC-V (last
 lowering) backend.
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -2041,9 +2041,14 @@
     return true;
   }
 
-  /// Returns how the given (atomic) load should be expanded by the
-  /// IR-level AtomicExpand pass.
-  virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const {
+  /// Returns how the given (atomic) instruction should be expanded by the
+  /// IR-level AtomicExpand pass. This is a combination of previous four API
+  /// functions.
+  virtual AtomicExpansionKind shouldExpandAtomicInstInIR(Instruction *I) const {
+    if (auto *RMW = dyn_cast<AtomicRMWInst>(I))
+      return RMW->isFloatingPointOperation() ? AtomicExpansionKind::CmpXChg
+                                             : AtomicExpansionKind::None;
+
     return AtomicExpansionKind::None;
   }
 
@@ -2055,13 +2060,6 @@
     return AtomicExpansionKind::None;
   }
 
-  /// Returns how the given (atomic) store should be expanded by the IR-level
-  /// AtomicExpand pass into. For instance AtomicExpansionKind::Expand will try
-  /// to use an atomicrmw xchg.
-  virtual AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const {
-    return AtomicExpansionKind::None;
-  }
-
   /// Returns how the given (atomic) store should be cast by the IR-level
   /// AtomicExpand pass into. For instance AtomicExpansionKind::CastToInteger
   /// will try to cast the operands to integer values.
@@ -2071,20 +2069,6 @@
     return AtomicExpansionKind::None;
   }
 
-  /// Returns how the given atomic cmpxchg should be expanded by the IR-level
-  /// AtomicExpand pass.
-  virtual AtomicExpansionKind
-  shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
-    return AtomicExpansionKind::None;
-  }
-
-  /// Returns how the IR-level AtomicExpand pass should expand the given
-  /// AtomicRMW, if at all. Default is to never expand.
-  virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
-    return RMW->isFloatingPointOperation() ?
-      AtomicExpansionKind::CmpXChg : AtomicExpansionKind::None;
-  }
-
   /// Returns how the given atomic atomicrmw should be cast by the IR-level
   /// AtomicExpand pass.
   virtual AtomicExpansionKind
@@ -2105,7 +2089,7 @@
   /// http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf
   /// This method tries doing that transformation, returning the atomic load if
   /// it succeeds, and nullptr otherwise.
-  /// If shouldExpandAtomicLoadInIR returns true on that load, it will undergo
+  /// If shouldExpandAtomicInstInIR returns true on that load, it will undergo
   /// another round of expansion.
   virtual LoadInst *
   lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const {
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -234,7 +234,7 @@
         FenceOrdering = RMWI->getOrdering();
         RMWI->setOrdering(AtomicOrdering::Monotonic);
       } else if (CASI &&
-                 TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
+                 TLI->shouldExpandAtomicInstInIR(CASI) ==
                      TargetLoweringBase::AtomicExpansionKind::None &&
                  (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
                   isAcquireOrStronger(CASI->getSuccessOrdering()) ||
@@ -402,7 +402,7 @@
 }
 
 bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) {
-  switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
+  switch (TLI->shouldExpandAtomicInstInIR(LI)) {
   case TargetLoweringBase::AtomicExpansionKind::None:
     return false;
   case TargetLoweringBase::AtomicExpansionKind::LLSC:
@@ -424,7 +424,7 @@
 }
 
 bool AtomicExpand::tryExpandAtomicStore(StoreInst *SI) {
-  switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
+  switch (TLI->shouldExpandAtomicInstInIR(SI)) {
   case TargetLoweringBase::AtomicExpansionKind::None:
     return false;
   case TargetLoweringBase::AtomicExpansionKind::Expand:
@@ -509,7 +509,7 @@
   // atomic swap, that can be implemented for example as a ldrex/strex on ARM
   // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
   // It is the responsibility of the target to only signal expansion via
-  // shouldExpandAtomicRMW in cases where this is required and possible.
+  // shouldExpandAtomicInstInIR in cases where this is required and possible.
   IRBuilder<> Builder(SI);
   AtomicRMWInst *AI = Builder.CreateAtomicRMW(
       AtomicRMWInst::Xchg, SI->getPointerOperand(), SI->getValueOperand(),
@@ -548,7 +548,8 @@
 
 bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
   LLVMContext &Ctx = AI->getModule()->getContext();
-  TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI);
+  TargetLowering::AtomicExpansionKind Kind =
+      TLI->shouldExpandAtomicInstInIR(AI);
   switch (Kind) {
   case TargetLoweringBase::AtomicExpansionKind::None:
     return false;
@@ -1493,7 +1494,7 @@
   unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
   unsigned ValueSize = getAtomicOpSize(CI);
 
-  switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
+  switch (TLI->shouldExpandAtomicInstInIR(CI)) {
   default:
     llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
   case TargetLoweringBase::AtomicExpansionKind::None:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -676,14 +676,7 @@
   bool shouldInsertFencesForAtomic(const Instruction *I) const override;
 
   TargetLoweringBase::AtomicExpansionKind
-  shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
-  TargetLoweringBase::AtomicExpansionKind
-  shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
-  TargetLoweringBase::AtomicExpansionKind
-  shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
-
-  TargetLoweringBase::AtomicExpansionKind
-  shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
+  shouldExpandAtomicInstInIR(Instruction *I) const override;
 
   bool useLoadStackGuardNode() const override;
   TargetLoweringBase::LegalizeTypeAction
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -19697,100 +19697,102 @@
   return isOpSuitableForLDPSTP(I);
 }
 
-// Loads and stores less than 128-bits are already atomic; ones above that
-// are doomed anyway, so defer to the default libcall and blame the OS when
-// things go wrong.
 TargetLoweringBase::AtomicExpansionKind
-AArch64TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
-  unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
-  if (Size != 128 || isOpSuitableForLDPSTP(SI))
-    return AtomicExpansionKind::None;
-  return AtomicExpansionKind::Expand;
-}
-
-// Loads and stores less than 128-bits are already atomic; ones above that
-// are doomed anyway, so defer to the default libcall and blame the OS when
-// things go wrong.
-TargetLowering::AtomicExpansionKind
-AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
-  unsigned Size = LI->getType()->getPrimitiveSizeInBits();
-
-  if (Size != 128 || isOpSuitableForLDPSTP(LI))
-    return AtomicExpansionKind::None;
-
-  // At -O0, fast-regalloc cannot cope with the live vregs necessary to
-  // implement atomicrmw without spilling. If the target address is also on the
-  // stack and close enough to the spill slot, this can lead to a situation
-  // where the monitor always gets cleared and the atomic operation can never
-  // succeed. So at -O0 lower this operation to a CAS loop.
-  if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
-    return AtomicExpansionKind::CmpXChg;
-
-  return AtomicExpansionKind::LLSC;
-}
-
-// For the real atomic operations, we have ldxr/stxr up to 128 bits,
-TargetLowering::AtomicExpansionKind
-AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
-  if (AI->isFloatingPointOperation())
-    return AtomicExpansionKind::CmpXChg;
-
-  unsigned Size = AI->getType()->getPrimitiveSizeInBits();
-  if (Size > 128) return AtomicExpansionKind::None;
-
-  // Nand is not supported in LSE.
-  // Leave 128 bits to LLSC or CmpXChg.
-  if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128) {
-    if (Subtarget->hasLSE())
+AArch64TargetLowering::shouldExpandAtomicInstInIR(Instruction *I) const {
+  if (auto *SI = dyn_cast<StoreInst>(I)) {
+    // Loads and stores less than 128-bits are already atomic; ones above that
+    // are doomed anyway, so defer to the default libcall and blame the OS when
+    // things go wrong.
+    unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
+    if (Size != 128 || isOpSuitableForLDPSTP(SI))
       return AtomicExpansionKind::None;
-    if (Subtarget->outlineAtomics()) {
-      // [U]Min/[U]Max RWM atomics are used in __sync_fetch_ libcalls so far.
-      // Don't outline them unless
-      // (1) high level <atomic> support approved:
-      //   http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf
-      // (2) low level libgcc and compiler-rt support implemented by:
-      //   min/max outline atomics helpers
-      if (AI->getOperation() != AtomicRMWInst::Min &&
-          AI->getOperation() != AtomicRMWInst::Max &&
-          AI->getOperation() != AtomicRMWInst::UMin &&
-          AI->getOperation() != AtomicRMWInst::UMax) {
-        return AtomicExpansionKind::None;
-      }
-    }
+    return AtomicExpansionKind::Expand;
   }
 
-  // At -O0, fast-regalloc cannot cope with the live vregs necessary to
-  // implement atomicrmw without spilling. If the target address is also on the
-  // stack and close enough to the spill slot, this can lead to a situation
-  // where the monitor always gets cleared and the atomic operation can never
-  // succeed. So at -O0 lower this operation to a CAS loop.
-  if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
-    return AtomicExpansionKind::CmpXChg;
+  if (auto *LI = dyn_cast<LoadInst>(I)) {
+    // Loads and stores less than 128-bits are already atomic; ones above that
+    // are doomed anyway, so defer to the default libcall and blame the OS when
+    // things go wrong.
+    unsigned Size = LI->getType()->getPrimitiveSizeInBits();
 
-  return AtomicExpansionKind::LLSC;
-}
+    if (Size != 128 || isOpSuitableForLDPSTP(LI))
+      return AtomicExpansionKind::None;
 
-TargetLowering::AtomicExpansionKind
-AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR(
-    AtomicCmpXchgInst *AI) const {
-  // If subtarget has LSE, leave cmpxchg intact for codegen.
-  if (Subtarget->hasLSE() || Subtarget->outlineAtomics())
-    return AtomicExpansionKind::None;
-  // At -O0, fast-regalloc cannot cope with the live vregs necessary to
-  // implement cmpxchg without spilling. If the address being exchanged is also
-  // on the stack and close enough to the spill slot, this can lead to a
-  // situation where the monitor always gets cleared and the atomic operation
-  // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
-  if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
-    return AtomicExpansionKind::None;
+    // At -O0, fast-regalloc cannot cope with the live vregs necessary to
+    // implement atomicrmw without spilling. If the target address is also on
+    // the stack and close enough to the spill slot, this can lead to a
+    // situation where the monitor always gets cleared and the atomic operation
+    // can never succeed. So at -O0 lower this operation to a CAS loop.
+    if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
+      return AtomicExpansionKind::CmpXChg;
 
-  // 128-bit atomic cmpxchg is weird; AtomicExpand doesn't know how to expand
-  // it.
-  unsigned Size = AI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
-  if (Size > 64)
-    return AtomicExpansionKind::None;
+    return AtomicExpansionKind::LLSC;
+  }
 
-  return AtomicExpansionKind::LLSC;
+  if (auto *AI = dyn_cast<AtomicRMWInst>(I)) {
+    // For the real atomic operations, we have ldxr/stxr up to 128 bits,
+    if (AI->isFloatingPointOperation())
+      return AtomicExpansionKind::CmpXChg;
+
+    unsigned Size = AI->getType()->getPrimitiveSizeInBits();
+    if (Size > 128)
+      return AtomicExpansionKind::None;
+
+    // Nand is not supported in LSE.
+    // Leave 128 bits to LLSC or CmpXChg.
+    if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128) {
+      if (Subtarget->hasLSE())
+        return AtomicExpansionKind::None;
+      if (Subtarget->outlineAtomics()) {
+        // [U]Min/[U]Max RWM atomics are used in __sync_fetch_ libcalls so far.
+        // Don't outline them unless
+        // (1) high level <atomic> support approved:
+        //   http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf
+        // (2) low level libgcc and compiler-rt support implemented by:
+        //   min/max outline atomics helpers
+        if (AI->getOperation() != AtomicRMWInst::Min &&
+            AI->getOperation() != AtomicRMWInst::Max &&
+            AI->getOperation() != AtomicRMWInst::UMin &&
+            AI->getOperation() != AtomicRMWInst::UMax) {
+          return AtomicExpansionKind::None;
+        }
+      }
+    }
+
+    // At -O0, fast-regalloc cannot cope with the live vregs necessary to
+    // implement atomicrmw without spilling. If the target address is also on
+    // the stack and close enough to the spill slot, this can lead to a
+    // situation where the monitor always gets cleared and the atomic operation
+    // can never succeed. So at -O0 lower this operation to a CAS loop.
+    if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
+      return AtomicExpansionKind::CmpXChg;
+
+    return AtomicExpansionKind::LLSC;
+  }
+
+  if (auto *AI = dyn_cast<AtomicCmpXchgInst>(I)) {
+    // If subtarget has LSE, leave cmpxchg intact for codegen.
+    if (Subtarget->hasLSE() || Subtarget->outlineAtomics())
+      return AtomicExpansionKind::None;
+    // At -O0, fast-regalloc cannot cope with the live vregs necessary to
+    // implement cmpxchg without spilling. If the address being exchanged is
+    // also on the stack and close enough to the spill slot, this can lead to a
+    // situation where the monitor always gets cleared and the atomic operation
+    // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
+    if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
+      return AtomicExpansionKind::None;
+
+    // 128-bit atomic cmpxchg is weird; AtomicExpand doesn't know how to expand
+    // it.
+    unsigned Size =
+        AI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
+    if (Size > 64)
+      return AtomicExpansionKind::None;
+
+    return AtomicExpansionKind::LLSC;
+  }
+
+  return TargetLoweringBase::shouldExpandAtomicInstInIR(I);
 }
 
 Value *AArch64TargetLowering::emitLoadLinked(IRBuilderBase &Builder,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -334,7 +334,8 @@
     return MVT::i32;
   }
 
-  AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override;
+  TargetLoweringBase::AtomicExpansionKind
+  shouldExpandAtomicInstInIR(Instruction *I) const override;
 
   bool isConstantUnsignedBitfieldExtractLegal(unsigned Opc, LLT Ty1,
                                               LLT Ty2) const override;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -4793,16 +4793,20 @@
   }
 }
 
-TargetLowering::AtomicExpansionKind
-AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
-  switch (RMW->getOperation()) {
-  case AtomicRMWInst::Nand:
-  case AtomicRMWInst::FAdd:
-  case AtomicRMWInst::FSub:
-    return AtomicExpansionKind::CmpXChg;
-  default:
-    return AtomicExpansionKind::None;
+TargetLoweringBase::AtomicExpansionKind
+AMDGPUTargetLowering::shouldExpandAtomicInstInIR(Instruction *I) const {
+  if (auto *RMW = dyn_cast<AtomicRMWInst>(I)) {
+    switch (RMW->getOperation()) {
+    case AtomicRMWInst::Nand:
+    case AtomicRMWInst::FAdd:
+    case AtomicRMWInst::FSub:
+      return AtomicExpansionKind::CmpXChg;
+    default:
+      return AtomicExpansionKind::None;
+    }
   }
+
+  return TargetLoweringBase::shouldExpandAtomicInstInIR(I);
 }
 
 bool AMDGPUTargetLowering::isConstantUnsignedBitfieldExtractLegal(
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -473,11 +473,9 @@
                                     const SelectionDAG &DAG,
                                     bool SNaN = false,
                                     unsigned Depth = 0) const override;
-  AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override;
-  AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
-  AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
-  AtomicExpansionKind
-  shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
+
+  TargetLoweringBase::AtomicExpansionKind
+  shouldExpandAtomicInstInIR(Instruction *I) const override;
 
   virtual const TargetRegisterClass *
   getRegClassFor(MVT VT, bool isDivergent) const override;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -12545,120 +12545,120 @@
   return DenormMode == DenormalMode::getIEEE();
 }
 
-TargetLowering::AtomicExpansionKind
-SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
-  unsigned AS = RMW->getPointerAddressSpace();
-  if (AS == AMDGPUAS::PRIVATE_ADDRESS)
-    return AtomicExpansionKind::NotAtomic;
+TargetLoweringBase::AtomicExpansionKind
+SITargetLowering::shouldExpandAtomicInstInIR(Instruction *I) const {
+  if (auto *SI = dyn_cast<StoreInst>(I))
+    return SI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS
+               ? AtomicExpansionKind::NotAtomic
+               : AtomicExpansionKind::None;
 
-  auto ReportUnsafeHWInst = [&](TargetLowering::AtomicExpansionKind Kind) {
-    OptimizationRemarkEmitter ORE(RMW->getFunction());
-    LLVMContext &Ctx = RMW->getFunction()->getContext();
-    SmallVector<StringRef> SSNs;
-    Ctx.getSyncScopeNames(SSNs);
-    auto MemScope = SSNs[RMW->getSyncScopeID()].empty()
-                        ? "system"
-                        : SSNs[RMW->getSyncScopeID()];
-    ORE.emit([&]() {
-      return OptimizationRemark(DEBUG_TYPE, "Passed", RMW)
-             << "Hardware instruction generated for atomic "
-             << RMW->getOperationName(RMW->getOperation())
-             << " operation at memory scope " << MemScope
-             << " due to an unsafe request.";
-    });
-    return Kind;
-  };
+  if (auto *LI = dyn_cast<LoadInst>(I))
+    return LI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS
+               ? AtomicExpansionKind::NotAtomic
+               : AtomicExpansionKind::None;
 
-  switch (RMW->getOperation()) {
-  case AtomicRMWInst::FAdd: {
-    Type *Ty = RMW->getType();
+  if (auto *RMW = dyn_cast<AtomicRMWInst>(I)) {
+    unsigned AS = RMW->getPointerAddressSpace();
+    if (AS == AMDGPUAS::PRIVATE_ADDRESS)
+      return AtomicExpansionKind::NotAtomic;
 
-    // We don't have a way to support 16-bit atomics now, so just leave them
-    // as-is.
-    if (Ty->isHalfTy())
-      return AtomicExpansionKind::None;
+    auto ReportUnsafeHWInst = [&](TargetLowering::AtomicExpansionKind Kind) {
+      OptimizationRemarkEmitter ORE(RMW->getFunction());
+      LLVMContext &Ctx = RMW->getFunction()->getContext();
+      SmallVector<StringRef> SSNs;
+      Ctx.getSyncScopeNames(SSNs);
+      auto MemScope = SSNs[RMW->getSyncScopeID()].empty()
+                          ? "system"
+                          : SSNs[RMW->getSyncScopeID()];
+      ORE.emit([&]() {
+        return OptimizationRemark(DEBUG_TYPE, "Passed", RMW)
+               << "Hardware instruction generated for atomic "
+               << RMW->getOperationName(RMW->getOperation())
+               << " operation at memory scope " << MemScope
+               << " due to an unsafe request.";
+      });
+      return Kind;
+    };
 
-    if (!Ty->isFloatTy() && (!Subtarget->hasGFX90AInsts() || !Ty->isDoubleTy()))
-      return AtomicExpansionKind::CmpXChg;
+    switch (RMW->getOperation()) {
+    case AtomicRMWInst::FAdd: {
+      Type *Ty = RMW->getType();
 
-    if ((AS == AMDGPUAS::GLOBAL_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS) &&
-         Subtarget->hasAtomicFaddInsts()) {
-      if (Subtarget->hasGFX940Insts())
+      // We don't have a way to support 16-bit atomics now, so just leave them
+      // as-is.
+      if (Ty->isHalfTy())
         return AtomicExpansionKind::None;
 
-      // The amdgpu-unsafe-fp-atomics attribute enables generation of unsafe
-      // floating point atomic instructions. May generate more efficient code,
-      // but may not respect rounding and denormal modes, and may give incorrect
-      // results for certain memory destinations.
-      if (RMW->getFunction()
-              ->getFnAttribute("amdgpu-unsafe-fp-atomics")
-              .getValueAsString() != "true")
+      if (!Ty->isFloatTy() &&
+          (!Subtarget->hasGFX90AInsts() || !Ty->isDoubleTy()))
         return AtomicExpansionKind::CmpXChg;
 
-      if (Subtarget->hasGFX90AInsts()) {
-        if (Ty->isFloatTy() && AS == AMDGPUAS::FLAT_ADDRESS)
+      if ((AS == AMDGPUAS::GLOBAL_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS) &&
+          Subtarget->hasAtomicFaddInsts()) {
+        if (Subtarget->hasGFX940Insts())
+          return AtomicExpansionKind::None;
+
+        // The amdgpu-unsafe-fp-atomics attribute enables generation of unsafe
+        // floating point atomic instructions. May generate more efficient code,
+        // but may not respect rounding and denormal modes, and may give
+        // incorrect results for certain memory destinations.
+        if (RMW->getFunction()
+                ->getFnAttribute("amdgpu-unsafe-fp-atomics")
+                .getValueAsString() != "true")
           return AtomicExpansionKind::CmpXChg;
 
-        auto SSID = RMW->getSyncScopeID();
-        if (SSID == SyncScope::System ||
-            SSID == RMW->getContext().getOrInsertSyncScopeID("one-as"))
+        if (Subtarget->hasGFX90AInsts()) {
+          if (Ty->isFloatTy() && AS == AMDGPUAS::FLAT_ADDRESS)
+            return AtomicExpansionKind::CmpXChg;
+
+          auto SSID = RMW->getSyncScopeID();
+          if (SSID == SyncScope::System ||
+              SSID == RMW->getContext().getOrInsertSyncScopeID("one-as"))
+            return AtomicExpansionKind::CmpXChg;
+
+          return ReportUnsafeHWInst(AtomicExpansionKind::None);
+        }
+
+        if (AS == AMDGPUAS::FLAT_ADDRESS)
           return AtomicExpansionKind::CmpXChg;
 
-        return ReportUnsafeHWInst(AtomicExpansionKind::None);
+        return RMW->use_empty() ? ReportUnsafeHWInst(AtomicExpansionKind::None)
+                                : AtomicExpansionKind::CmpXChg;
       }
 
-      if (AS == AMDGPUAS::FLAT_ADDRESS)
-        return AtomicExpansionKind::CmpXChg;
+      // DS FP atomics do respect the denormal mode, but the rounding mode is
+      // fixed to round-to-nearest-even.
+      // The only exception is DS_ADD_F64 which never flushes regardless of
+      // mode.
+      if (AS == AMDGPUAS::LOCAL_ADDRESS && Subtarget->hasLDSFPAtomicAdd()) {
+        if (!Ty->isDoubleTy())
+          return AtomicExpansionKind::None;
 
-      return RMW->use_empty() ? ReportUnsafeHWInst(AtomicExpansionKind::None)
-                              : AtomicExpansionKind::CmpXChg;
+        if (fpModeMatchesGlobalFPAtomicMode(RMW))
+          return AtomicExpansionKind::None;
+
+        return RMW->getFunction()
+                           ->getFnAttribute("amdgpu-unsafe-fp-atomics")
+                           .getValueAsString() == "true"
+                   ? ReportUnsafeHWInst(AtomicExpansionKind::None)
+                   : AtomicExpansionKind::CmpXChg;
+      }
+
+      return AtomicExpansionKind::CmpXChg;
+    }
+    default:
+      break;
     }
 
-    // DS FP atomics do respect the denormal mode, but the rounding mode is
-    // fixed to round-to-nearest-even.
-    // The only exception is DS_ADD_F64 which never flushes regardless of mode.
-    if (AS == AMDGPUAS::LOCAL_ADDRESS && Subtarget->hasLDSFPAtomicAdd()) {
-      if (!Ty->isDoubleTy())
-        return AtomicExpansionKind::None;
-
-      if (fpModeMatchesGlobalFPAtomicMode(RMW))
-        return AtomicExpansionKind::None;
-
-      return RMW->getFunction()
-                         ->getFnAttribute("amdgpu-unsafe-fp-atomics")
-                         .getValueAsString() == "true"
-                 ? ReportUnsafeHWInst(AtomicExpansionKind::None)
-                 : AtomicExpansionKind::CmpXChg;
-    }
-
-    return AtomicExpansionKind::CmpXChg;
-  }
-  default:
-    break;
+    return AMDGPUTargetLowering::shouldExpandAtomicInstInIR(RMW);
   }
 
-  return AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(RMW);
-}
+  if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(I))
+    return CmpX->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS
+               ? AtomicExpansionKind::NotAtomic
+               : AtomicExpansionKind::None;
 
-TargetLowering::AtomicExpansionKind
-SITargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
-  return LI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS
-             ? AtomicExpansionKind::NotAtomic
-             : AtomicExpansionKind::None;
-}
-
-TargetLowering::AtomicExpansionKind
-SITargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
-  return SI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS
-             ? AtomicExpansionKind::NotAtomic
-             : AtomicExpansionKind::None;
-}
-
-TargetLowering::AtomicExpansionKind
-SITargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CmpX) const {
-  return CmpX->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS
-             ? AtomicExpansionKind::NotAtomic
-             : AtomicExpansionKind::None;
+  return TargetLoweringBase::shouldExpandAtomicInstInIR(I);
 }
 
 const TargetRegisterClass *
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -663,14 +663,9 @@
                                unsigned Factor) const override;
 
     bool shouldInsertFencesForAtomic(const Instruction *I) const override;
+
     TargetLoweringBase::AtomicExpansionKind
-    shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
-    TargetLoweringBase::AtomicExpansionKind
-    shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
-    TargetLoweringBase::AtomicExpansionKind
-    shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
-    TargetLoweringBase::AtomicExpansionKind
-    shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
+    shouldExpandAtomicInstInIR(Instruction *I) const override;
 
     bool useLoadStackGuardNode() const override;
 
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -20943,96 +20943,97 @@
   llvm_unreachable("Unknown fence ordering in emitTrailingFence");
 }
 
-// Loads and stores less than 64-bits are already atomic; ones above that
-// are doomed anyway, so defer to the default libcall and blame the OS when
-// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
-// anything for those.
 TargetLoweringBase::AtomicExpansionKind
-ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
-  bool has64BitAtomicStore;
-  if (Subtarget->isMClass())
-    has64BitAtomicStore = false;
-  else if (Subtarget->isThumb())
-    has64BitAtomicStore = Subtarget->hasV7Ops();
-  else
-    has64BitAtomicStore = Subtarget->hasV6Ops();
+ARMTargetLowering::shouldExpandAtomicInstInIR(Instruction *I) const {
+  if (auto *SI = dyn_cast<StoreInst>(I)) {
+    // Loads and stores less than 64-bits are already atomic; ones above that
+    // are doomed anyway, so defer to the default libcall and blame the OS when
+    // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't
+    // emit anything for those.
+    bool has64BitAtomicStore;
+    if (Subtarget->isMClass())
+      has64BitAtomicStore = false;
+    else if (Subtarget->isThumb())
+      has64BitAtomicStore = Subtarget->hasV7Ops();
+    else
+      has64BitAtomicStore = Subtarget->hasV6Ops();
 
-  unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
-  return Size == 64 && has64BitAtomicStore ? AtomicExpansionKind::Expand
-                                           : AtomicExpansionKind::None;
-}
-
-// Loads and stores less than 64-bits are already atomic; ones above that
-// are doomed anyway, so defer to the default libcall and blame the OS when
-// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
-// anything for those.
-// FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that
-// guarantee, see DDI0406C ARM architecture reference manual,
-// sections A8.8.72-74 LDRD)
-TargetLowering::AtomicExpansionKind
-ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
-  bool has64BitAtomicLoad;
-  if (Subtarget->isMClass())
-    has64BitAtomicLoad = false;
-  else if (Subtarget->isThumb())
-    has64BitAtomicLoad = Subtarget->hasV7Ops();
-  else
-    has64BitAtomicLoad = Subtarget->hasV6Ops();
-
-  unsigned Size = LI->getType()->getPrimitiveSizeInBits();
-  return (Size == 64 && has64BitAtomicLoad) ? AtomicExpansionKind::LLOnly
-                                            : AtomicExpansionKind::None;
-}
-
-// For the real atomic operations, we have ldrex/strex up to 32 bits,
-// and up to 64 bits on the non-M profiles
-TargetLowering::AtomicExpansionKind
-ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
-  if (AI->isFloatingPointOperation())
-    return AtomicExpansionKind::CmpXChg;
-
-  unsigned Size = AI->getType()->getPrimitiveSizeInBits();
-  bool hasAtomicRMW;
-  if (Subtarget->isMClass())
-    hasAtomicRMW = Subtarget->hasV8MBaselineOps();
-  else if (Subtarget->isThumb())
-    hasAtomicRMW = Subtarget->hasV7Ops();
-  else
-    hasAtomicRMW = Subtarget->hasV6Ops();
-  if (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW) {
-    // At -O0, fast-regalloc cannot cope with the live vregs necessary to
-    // implement atomicrmw without spilling. If the target address is also on
-    // the stack and close enough to the spill slot, this can lead to a
-    // situation where the monitor always gets cleared and the atomic operation
-    // can never succeed. So at -O0 lower this operation to a CAS loop.
-    if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
-      return AtomicExpansionKind::CmpXChg;
-    return AtomicExpansionKind::LLSC;
+    unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
+    return Size == 64 && has64BitAtomicStore ? AtomicExpansionKind::Expand
+                                             : AtomicExpansionKind::None;
   }
-  return AtomicExpansionKind::None;
-}
 
-// Similar to shouldExpandAtomicRMWInIR, ldrex/strex can be used  up to 32
-// bits, and up to 64 bits on the non-M profiles.
-TargetLowering::AtomicExpansionKind
-ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
-  // At -O0, fast-regalloc cannot cope with the live vregs necessary to
-  // implement cmpxchg without spilling. If the address being exchanged is also
-  // on the stack and close enough to the spill slot, this can lead to a
-  // situation where the monitor always gets cleared and the atomic operation
-  // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
-  unsigned Size = AI->getOperand(1)->getType()->getPrimitiveSizeInBits();
-  bool HasAtomicCmpXchg;
-  if (Subtarget->isMClass())
-    HasAtomicCmpXchg = Subtarget->hasV8MBaselineOps();
-  else if (Subtarget->isThumb())
-    HasAtomicCmpXchg = Subtarget->hasV7Ops();
-  else
-    HasAtomicCmpXchg = Subtarget->hasV6Ops();
-  if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg &&
-      Size <= (Subtarget->isMClass() ? 32U : 64U))
-    return AtomicExpansionKind::LLSC;
-  return AtomicExpansionKind::None;
+  if (auto *LI = dyn_cast<LoadInst>(I)) {
+    // Loads and stores less than 64-bits are already atomic; ones above that
+    // are doomed anyway, so defer to the default libcall and blame the OS when
+    // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't
+    // emit anything for those.
+    // FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that
+    // guarantee, see DDI0406C ARM architecture reference manual,
+    // sections A8.8.72-74 LDRD)
+    bool has64BitAtomicLoad;
+    if (Subtarget->isMClass())
+      has64BitAtomicLoad = false;
+    else if (Subtarget->isThumb())
+      has64BitAtomicLoad = Subtarget->hasV7Ops();
+    else
+      has64BitAtomicLoad = Subtarget->hasV6Ops();
+
+    unsigned Size = LI->getType()->getPrimitiveSizeInBits();
+    return (Size == 64 && has64BitAtomicLoad) ? AtomicExpansionKind::LLOnly
+                                              : AtomicExpansionKind::None;
+  }
+
+  if (auto *AI = dyn_cast<AtomicRMWInst>(I)) {
+    // For the real atomic operations, we have ldrex/strex up to 32 bits,
+    // and up to 64 bits on the non-M profiles
+    if (AI->isFloatingPointOperation())
+      return AtomicExpansionKind::CmpXChg;
+
+    unsigned Size = AI->getType()->getPrimitiveSizeInBits();
+    bool hasAtomicRMW;
+    if (Subtarget->isMClass())
+      hasAtomicRMW = Subtarget->hasV8MBaselineOps();
+    else if (Subtarget->isThumb())
+      hasAtomicRMW = Subtarget->hasV7Ops();
+    else
+      hasAtomicRMW = Subtarget->hasV6Ops();
+    if (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW) {
+      // At -O0, fast-regalloc cannot cope with the live vregs necessary to
+      // implement atomicrmw without spilling. If the target address is also on
+      // the stack and close enough to the spill slot, this can lead to a
+      // situation where the monitor always gets cleared and the atomic
+      // operation can never succeed. So at -O0 lower this operation to a CAS
+      // loop.
+      if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
+        return AtomicExpansionKind::CmpXChg;
+      return AtomicExpansionKind::LLSC;
+    }
+    return AtomicExpansionKind::None;
+  }
+
+  if (auto *AI = dyn_cast<AtomicCmpXchgInst>(I)) {
+    // ldrex/strex can be used  up to 32 bits, and up to 64 bits on the non-M
+    // profiles. At -O0, fast-regalloc cannot cope with the live vregs necessary
+    // to implement cmpxchg without spilling. If the address being exchanged is
+    // also on the stack and close enough to the spill slot, this can lead to a
+    // situation where the monitor always gets cleared and the atomic operation
+    // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
+    unsigned Size = AI->getOperand(1)->getType()->getPrimitiveSizeInBits();
+    bool HasAtomicCmpXchg;
+    if (Subtarget->isMClass())
+      HasAtomicCmpXchg = Subtarget->hasV8MBaselineOps();
+    else if (Subtarget->isThumb())
+      HasAtomicCmpXchg = Subtarget->hasV7Ops();
+    else
+      HasAtomicCmpXchg = Subtarget->hasV6Ops();
+    if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg &&
+        Size <= (Subtarget->isMClass() ? 32U : 64U))
+      return AtomicExpansionKind::LLSC;
+    return AtomicExpansionKind::None;
+  }
+
+  return TargetLoweringBase::shouldExpandAtomicInstInIR(I);
 }
 
 bool ARMTargetLowering::shouldInsertFencesForAtomic(
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -327,15 +327,8 @@
                         AtomicOrdering Ord) const override;
   Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr,
                               AtomicOrdering Ord) const override;
-  AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
-  AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
-  AtomicExpansionKind
-  shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
-
-  AtomicExpansionKind
-  shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override {
-    return AtomicExpansionKind::LLSC;
-  }
+  TargetLoweringBase::AtomicExpansionKind
+  shouldExpandAtomicInstInIR(Instruction *I) const override;
 
 private:
   void initializeHVXLowering();
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -3658,24 +3658,27 @@
   return Ext;
 }
 
-TargetLowering::AtomicExpansionKind
-HexagonTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
-  // Do not expand loads and stores that don't exceed 64 bits.
-  return LI->getType()->getPrimitiveSizeInBits() > 64
-             ? AtomicExpansionKind::LLOnly
-             : AtomicExpansionKind::None;
-}
+TargetLoweringBase::AtomicExpansionKind
+HexagonTargetLowering::shouldExpandAtomicInstInIR(Instruction *I) const {
+  if (auto *SI = dyn_cast<StoreInst>(I)) {
+    // Do not expand loads and stores that don't exceed 64 bits.
+    return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() > 64
+               ? AtomicExpansionKind::Expand
+               : AtomicExpansionKind::None;
+  }
 
-TargetLowering::AtomicExpansionKind
-HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
-  // Do not expand loads and stores that don't exceed 64 bits.
-  return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() > 64
-             ? AtomicExpansionKind::Expand
-             : AtomicExpansionKind::None;
-}
+  if (auto *LI = dyn_cast<LoadInst>(I)) {
+    // Do not expand loads and stores that don't exceed 64 bits.
+    return LI->getType()->getPrimitiveSizeInBits() > 64
+               ? AtomicExpansionKind::LLOnly
+               : AtomicExpansionKind::None;
+  }
 
-TargetLowering::AtomicExpansionKind
-HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR(
-    AtomicCmpXchgInst *AI) const {
-  return AtomicExpansionKind::LLSC;
+  if (auto *AI = dyn_cast<AtomicRMWInst>(I))
+    return AtomicExpansionKind::LLSC;
+
+  if (auto *AI = dyn_cast<AtomicCmpXchgInst>(I))
+    return AtomicExpansionKind::LLSC;
+
+  return TargetLoweringBase::shouldExpandAtomicInstInIR(I);
 }
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -569,8 +569,8 @@
     return AtomicExpansionKind::None;
   }
 
-  AtomicExpansionKind
-  shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+  TargetLoweringBase::AtomicExpansionKind
+  shouldExpandAtomicInstInIR(Instruction *I) const override;
 
 private:
   const NVPTXSubtarget &STI; // cache the subtarget here
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -5127,65 +5127,69 @@
   }
 }
 
-NVPTXTargetLowering::AtomicExpansionKind
-NVPTXTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
-  Type *Ty = AI->getValOperand()->getType();
+TargetLoweringBase::AtomicExpansionKind
+NVPTXTargetLowering::shouldExpandAtomicInstInIR(Instruction *I) const {
+  if (auto *AI = dyn_cast<AtomicRMWInst>(I)) {
+    Type *Ty = AI->getValOperand()->getType();
 
-  if (AI->isFloatingPointOperation()) {
-    if (AI->getOperation() == AtomicRMWInst::BinOp::FAdd) {
-      if (Ty->isFloatTy())
-        return AtomicExpansionKind::None;
-      if (Ty->isDoubleTy() && STI.hasAtomAddF64())
-        return AtomicExpansionKind::None;
+    if (AI->isFloatingPointOperation()) {
+      if (AI->getOperation() == AtomicRMWInst::BinOp::FAdd) {
+        if (Ty->isFloatTy())
+          return AtomicExpansionKind::None;
+        if (Ty->isDoubleTy() && STI.hasAtomAddF64())
+          return AtomicExpansionKind::None;
+      }
+      return AtomicExpansionKind::CmpXChg;
     }
+
+    assert(Ty->isIntegerTy() && "Ty should be integer at this point");
+    auto ITy = cast<llvm::IntegerType>(Ty);
+
+    switch (AI->getOperation()) {
+    default:
+      return AtomicExpansionKind::CmpXChg;
+    case AtomicRMWInst::BinOp::And:
+    case AtomicRMWInst::BinOp::Or:
+    case AtomicRMWInst::BinOp::Xor:
+    case AtomicRMWInst::BinOp::Xchg:
+      switch (ITy->getBitWidth()) {
+      case 8:
+      case 16:
+        return AtomicExpansionKind::CmpXChg;
+      case 32:
+        return AtomicExpansionKind::None;
+      case 64:
+        if (STI.hasAtomBitwise64())
+          return AtomicExpansionKind::None;
+        return AtomicExpansionKind::CmpXChg;
+      default:
+        llvm_unreachable("unsupported width encountered");
+      }
+    case AtomicRMWInst::BinOp::Add:
+    case AtomicRMWInst::BinOp::Sub:
+    case AtomicRMWInst::BinOp::Max:
+    case AtomicRMWInst::BinOp::Min:
+    case AtomicRMWInst::BinOp::UMax:
+    case AtomicRMWInst::BinOp::UMin:
+      switch (ITy->getBitWidth()) {
+      case 8:
+      case 16:
+        return AtomicExpansionKind::CmpXChg;
+      case 32:
+        return AtomicExpansionKind::None;
+      case 64:
+        if (STI.hasAtomMinMax64())
+          return AtomicExpansionKind::None;
+        return AtomicExpansionKind::CmpXChg;
+      default:
+        llvm_unreachable("unsupported width encountered");
+      }
+    }
+
     return AtomicExpansionKind::CmpXChg;
   }
 
-  assert(Ty->isIntegerTy() && "Ty should be integer at this point");
-  auto ITy = cast<llvm::IntegerType>(Ty);
-
-  switch (AI->getOperation()) {
-  default:
-    return AtomicExpansionKind::CmpXChg;
-  case AtomicRMWInst::BinOp::And:
-  case AtomicRMWInst::BinOp::Or:
-  case AtomicRMWInst::BinOp::Xor:
-  case AtomicRMWInst::BinOp::Xchg:
-    switch (ITy->getBitWidth()) {
-    case 8:
-    case 16:
-      return AtomicExpansionKind::CmpXChg;
-    case 32:
-      return AtomicExpansionKind::None;
-    case 64:
-      if (STI.hasAtomBitwise64())
-        return AtomicExpansionKind::None;
-      return AtomicExpansionKind::CmpXChg;
-    default:
-      llvm_unreachable("unsupported width encountered");
-    }
-  case AtomicRMWInst::BinOp::Add:
-  case AtomicRMWInst::BinOp::Sub:
-  case AtomicRMWInst::BinOp::Max:
-  case AtomicRMWInst::BinOp::Min:
-  case AtomicRMWInst::BinOp::UMax:
-  case AtomicRMWInst::BinOp::UMin:
-    switch (ITy->getBitWidth()) {
-    case 8:
-    case 16:
-      return AtomicExpansionKind::CmpXChg;
-    case 32:
-      return AtomicExpansionKind::None;
-    case 64:
-      if (STI.hasAtomMinMax64())
-        return AtomicExpansionKind::None;
-      return AtomicExpansionKind::CmpXChg;
-    default:
-      llvm_unreachable("unsupported width encountered");
-    }
-  }
-
-  return AtomicExpansionKind::CmpXChg;
+  return TargetLoweringBase::shouldExpandAtomicInstInIR(I);
 }
 
 // Pin NVPTXTargetObjectFile's vtables to this file.
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -912,11 +912,8 @@
 
     bool shouldInlineQuadwordAtomics() const;
 
-    TargetLowering::AtomicExpansionKind
-    shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
-
-    TargetLowering::AtomicExpansionKind
-    shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
+    TargetLoweringBase::AtomicExpansionKind
+    shouldExpandAtomicInstInIR(Instruction *I) const override;
 
     Value *emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder,
                                         AtomicRMWInst *AI, Value *AlignedAddr,
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -6238,7 +6238,7 @@
         ArgOffset += PtrByteSize;
         continue;
       }
-      // Copy the object to parameter save area if it can not be entirely passed 
+      // Copy the object to parameter save area if it can not be entirely passed
       // by registers.
       // FIXME: we only need to copy the parts which need to be passed in
       // parameter save area. For the parts passed by registers, we don't need
@@ -6871,7 +6871,7 @@
 //
 //   Low Memory +--------------------------------------------+
 //   SP   +---> | Back chain                                 | ---+
-//        |     +--------------------------------------------+    |   
+//        |     +--------------------------------------------+    |
 //        |     | Saved Condition Register                   |    |
 //        |     +--------------------------------------------+    |
 //        |     | Saved Linkage Register                     |    |
@@ -7836,7 +7836,7 @@
     return SDValue();
 
   SDValue N1 = Op.getOperand(0);
-  EVT SrcVT = N1.getValueType();  
+  EVT SrcVT = N1.getValueType();
   unsigned SrcSize = SrcVT.getSizeInBits();
   if (SrcSize > 256 ||
       !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
@@ -18080,20 +18080,23 @@
          Subtarget.hasQuadwordAtomics();
 }
 
-TargetLowering::AtomicExpansionKind
-PPCTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
-  unsigned Size = AI->getType()->getPrimitiveSizeInBits();
-  if (shouldInlineQuadwordAtomics() && Size == 128)
-    return AtomicExpansionKind::MaskedIntrinsic;
-  return TargetLowering::shouldExpandAtomicRMWInIR(AI);
-}
+TargetLoweringBase::AtomicExpansionKind
+PPCTargetLowering::shouldExpandAtomicInstInIR(Instruction *I) const {
+  if (auto *AI = dyn_cast<AtomicRMWInst>(I)) {
+    unsigned Size = AI->getType()->getPrimitiveSizeInBits();
+    if (shouldInlineQuadwordAtomics() && Size == 128)
+      return AtomicExpansionKind::MaskedIntrinsic;
+    return TargetLowering::shouldExpandAtomicInstInIR(AI);
+  }
 
-TargetLowering::AtomicExpansionKind
-PPCTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
-  unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits();
-  if (shouldInlineQuadwordAtomics() && Size == 128)
-    return AtomicExpansionKind::MaskedIntrinsic;
-  return TargetLowering::shouldExpandAtomicCmpXchgInIR(AI);
+  if (auto *AI = dyn_cast<AtomicCmpXchgInst>(I)) {
+    unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits();
+    if (shouldInlineQuadwordAtomics() && Size == 128)
+      return AtomicExpansionKind::MaskedIntrinsic;
+    return TargetLowering::shouldExpandAtomicInstInIR(AI);
+  }
+
+  return TargetLoweringBase::shouldExpandAtomicInstInIR(I);
 }
 
 static Intrinsic::ID
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -509,14 +509,12 @@
   bool isMulAddWithConstProfitable(SDValue AddNode,
                                    SDValue ConstNode) const override;
 
-  TargetLowering::AtomicExpansionKind
-  shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+  TargetLoweringBase::AtomicExpansionKind
+  shouldExpandAtomicInstInIR(Instruction *I) const override;
   Value *emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI,
                                       Value *AlignedAddr, Value *Incr,
                                       Value *Mask, Value *ShiftAmt,
                                       AtomicOrdering Ord) const override;
-  TargetLowering::AtomicExpansionKind
-  shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override;
   Value *emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder,
                                           AtomicCmpXchgInst *CI,
                                           Value *AlignedAddr, Value *CmpVal,
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -11517,18 +11517,30 @@
   return nullptr;
 }
 
-TargetLowering::AtomicExpansionKind
-RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
-  // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
-  // point operations can't be used in an lr/sc sequence without breaking the
-  // forward-progress guarantee.
-  if (AI->isFloatingPointOperation())
-    return AtomicExpansionKind::CmpXChg;
+TargetLoweringBase::AtomicExpansionKind
+RISCVTargetLowering::shouldExpandAtomicInstInIR(Instruction *I) const {
+  if (auto *AI = dyn_cast<AtomicRMWInst>(I)) {
+    // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as
+    // floating point operations can't be used in an lr/sc sequence without
+    // breaking the forward-progress guarantee.
+    if (AI->isFloatingPointOperation())
+      return AtomicExpansionKind::CmpXChg;
 
-  unsigned Size = AI->getType()->getPrimitiveSizeInBits();
-  if (Size == 8 || Size == 16)
-    return AtomicExpansionKind::MaskedIntrinsic;
-  return AtomicExpansionKind::None;
+    unsigned Size = AI->getType()->getPrimitiveSizeInBits();
+    if (Size == 8 || Size == 16)
+      return AtomicExpansionKind::MaskedIntrinsic;
+    return AtomicExpansionKind::None;
+  }
+
+  if (auto *CI = dyn_cast<AtomicCmpXchgInst>(I)) {
+    unsigned Size =
+        CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
+    if (Size == 8 || Size == 16)
+      return AtomicExpansionKind::MaskedIntrinsic;
+    return AtomicExpansionKind::None;
+  }
+
+  return TargetLoweringBase::shouldExpandAtomicInstInIR(I);
 }
 
 static Intrinsic::ID
@@ -11625,15 +11637,6 @@
   return Result;
 }
 
-TargetLowering::AtomicExpansionKind
-RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
-    AtomicCmpXchgInst *CI) const {
-  unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
-  if (Size == 8 || Size == 16)
-    return AtomicExpansionKind::MaskedIntrinsic;
-  return AtomicExpansionKind::None;
-}
-
 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
     IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
     Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.h b/llvm/lib/Target/Sparc/SparcISelLowering.h
--- a/llvm/lib/Target/Sparc/SparcISelLowering.h
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.h
@@ -201,7 +201,8 @@
       return true;
     }
 
-    AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+    TargetLoweringBase::AtomicExpansionKind
+    shouldExpandAtomicInstInIR(Instruction *I) const override;
 
     void ReplaceNodeResults(SDNode *N,
                             SmallVectorImpl<SDValue>& Results,
diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
--- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -1397,12 +1397,17 @@
 // TargetLowering Implementation
 //===----------------------------------------------------------------------===//
 
-TargetLowering::AtomicExpansionKind SparcTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
-  if (AI->getOperation() == AtomicRMWInst::Xchg &&
-      AI->getType()->getPrimitiveSizeInBits() == 32)
-    return AtomicExpansionKind::None; // Uses xchg instruction
+TargetLoweringBase::AtomicExpansionKind
+SparcTargetLowering::shouldExpandAtomicInstInIR(Instruction *I) const {
+  if (auto *AI = dyn_cast<AtomicRMWInst>(I)) {
+    if (AI->getOperation() == AtomicRMWInst::Xchg &&
+        AI->getType()->getPrimitiveSizeInBits() == 32)
+      return AtomicExpansionKind::None; // Uses xchg instruction
 
-  return AtomicExpansionKind::CmpXChg;
+    return AtomicExpansionKind::CmpXChg;
+  }
+
+  return TargetLoweringBase::shouldExpandAtomicInstInIR(I);
 }
 
 /// IntCondCCodeToICC - Convert a DAG integer condition code to a SPARC ICC
diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h
--- a/llvm/lib/Target/VE/VEISelLowering.h
+++ b/llvm/lib/Target/VE/VEISelLowering.h
@@ -112,7 +112,7 @@
   Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
                                  AtomicOrdering Ord) const override;
   TargetLoweringBase::AtomicExpansionKind
-  shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+  shouldExpandAtomicInstInIR(Instruction *I) const override;
   ISD::NodeType getExtendForAtomicOps() const override {
     return ISD::ANY_EXTEND;
   }
diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp
--- a/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -1120,17 +1120,21 @@
   return DAG.getNode(VEISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
 }
 
-TargetLowering::AtomicExpansionKind
-VETargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
-  // We have TS1AM implementation for i8/i16/i32/i64, so use it.
-  if (AI->getOperation() == AtomicRMWInst::Xchg) {
-    return AtomicExpansionKind::None;
-  }
-  // FIXME: Support "ATMAM" instruction for LOAD_ADD/SUB/AND/OR.
+TargetLoweringBase::AtomicExpansionKind
+VETargetLowering::shouldExpandAtomicInstInIR(Instruction *I) const {
+  if (auto *AI = dyn_cast<AtomicRMWInst>(I)) {
+    // We have TS1AM implementation for i8/i16/i32/i64, so use it.
+    if (AI->getOperation() == AtomicRMWInst::Xchg) {
+      return AtomicExpansionKind::None;
+    }
+    // FIXME: Support "ATMAM" instruction for LOAD_ADD/SUB/AND/OR.
 
-  // Otherwise, expand it using compare and exchange instruction to not call
-  // __sync_fetch_and_* functions.
-  return AtomicExpansionKind::CmpXChg;
+    // Otherwise, expand it using compare and exchange instruction to not call
+    // __sync_fetch_and_* functions.
+    return AtomicExpansionKind::CmpXChg;
+  }
+
+  return TargetLoweringBase::shouldExpandAtomicInstInIR(I);
 }
 
 static SDValue prepareTS1AM(SDValue Op, SelectionDAG &DAG, SDValue &Flag,
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
@@ -53,7 +53,8 @@
   /// right decision when generating code for different targets.
   const WebAssemblySubtarget *Subtarget;
 
-  AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override;
+  TargetLoweringBase::AtomicExpansionKind
+  shouldExpandAtomicInstInIR(Instruction *I) const override;
   bool shouldScalarizeBinop(SDValue VecOp) const override;
   FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
                            const TargetLibraryInfo *LibInfo) const override;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -362,21 +362,25 @@
   return TargetLowering::getPointerMemTy(DL, AS);
 }
 
-TargetLowering::AtomicExpansionKind
-WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
-  // We have wasm instructions for these
-  switch (AI->getOperation()) {
-  case AtomicRMWInst::Add:
-  case AtomicRMWInst::Sub:
-  case AtomicRMWInst::And:
-  case AtomicRMWInst::Or:
-  case AtomicRMWInst::Xor:
-  case AtomicRMWInst::Xchg:
-    return AtomicExpansionKind::None;
-  default:
-    break;
+TargetLoweringBase::AtomicExpansionKind
+WebAssemblyTargetLowering::shouldExpandAtomicInstInIR(Instruction *I) const {
+  if (auto *AI = dyn_cast<AtomicRMWInst>(I)) {
+    // We have wasm instructions for these
+    switch (AI->getOperation()) {
+    case AtomicRMWInst::Add:
+    case AtomicRMWInst::Sub:
+    case AtomicRMWInst::And:
+    case AtomicRMWInst::Or:
+    case AtomicRMWInst::Xor:
+    case AtomicRMWInst::Xchg:
+      return AtomicExpansionKind::None;
+    default:
+      break;
+    }
+    return AtomicExpansionKind::CmpXChg;
   }
-  return AtomicExpansionKind::CmpXChg;
+
+  return TargetLoweringBase::shouldExpandAtomicInstInIR(I);
 }
 
 bool WebAssemblyTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1623,11 +1623,8 @@
     const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
 
     TargetLoweringBase::AtomicExpansionKind
-    shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
-    TargetLoweringBase::AtomicExpansionKind
-    shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
-    TargetLoweringBase::AtomicExpansionKind
-    shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+    shouldExpandAtomicInstInIR(Instruction *I) const override;
+
     TargetLoweringBase::AtomicExpansionKind
     shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const;
     void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -30472,38 +30472,78 @@
 }
 
 TargetLoweringBase::AtomicExpansionKind
-X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
-  Type *MemType = SI->getValueOperand()->getType();
+X86TargetLowering::shouldExpandAtomicInstInIR(Instruction *I) const {
+  if (auto *SI = dyn_cast<StoreInst>(I)) {
+    Type *MemType = SI->getValueOperand()->getType();
 
-  bool NoImplicitFloatOps =
-      SI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat);
-  if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() &&
-      !Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
-      (Subtarget.hasSSE1() || Subtarget.hasX87()))
-    return AtomicExpansionKind::None;
+    bool NoImplicitFloatOps =
+        SI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat);
+    if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() &&
+        !Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
+        (Subtarget.hasSSE1() || Subtarget.hasX87()))
+      return AtomicExpansionKind::None;
 
-  return needsCmpXchgNb(MemType) ? AtomicExpansionKind::Expand
-                                 : AtomicExpansionKind::None;
-}
+    return needsCmpXchgNb(MemType) ? AtomicExpansionKind::Expand
+                                   : AtomicExpansionKind::None;
+  }
 
-// Note: this turns large loads into lock cmpxchg8b/16b.
-// TODO: In 32-bit mode, use MOVLPS when SSE1 is available?
-TargetLowering::AtomicExpansionKind
-X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
-  Type *MemType = LI->getType();
+  if (auto *LI = dyn_cast<LoadInst>(I)) {
+    // Note: this turns large loads into lock cmpxchg8b/16b.
+    // TODO: In 32-bit mode, use MOVLPS when SSE1 is available?
+    Type *MemType = LI->getType();
 
-  // If this a 64 bit atomic load on a 32-bit target and SSE2 is enabled, we
-  // can use movq to do the load. If we have X87 we can load into an 80-bit
-  // X87 register and store it to a stack temporary.
-  bool NoImplicitFloatOps =
-      LI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat);
-  if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() &&
-      !Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
-      (Subtarget.hasSSE1() || Subtarget.hasX87()))
-    return AtomicExpansionKind::None;
+    // If this a 64 bit atomic load on a 32-bit target and SSE2 is enabled, we
+    // can use movq to do the load. If we have X87 we can load into an 80-bit
+    // X87 register and store it to a stack temporary.
+    bool NoImplicitFloatOps =
+        LI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat);
+    if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() &&
+        !Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
+        (Subtarget.hasSSE1() || Subtarget.hasX87()))
+      return AtomicExpansionKind::None;
 
-  return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg
-                                 : AtomicExpansionKind::None;
+    return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg
+                                   : AtomicExpansionKind::None;
+  }
+
+  if (auto *AI = dyn_cast<AtomicRMWInst>(I)) {
+    unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
+    Type *MemType = AI->getType();
+
+    // If the operand is too big, we must see if cmpxchg8/16b is available
+    // and default to library calls otherwise.
+    if (MemType->getPrimitiveSizeInBits() > NativeWidth) {
+      return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg
+                                     : AtomicExpansionKind::None;
+    }
+
+    AtomicRMWInst::BinOp Op = AI->getOperation();
+    switch (Op) {
+    default:
+      llvm_unreachable("Unknown atomic operation");
+    case AtomicRMWInst::Xchg:
+    case AtomicRMWInst::Add:
+    case AtomicRMWInst::Sub:
+      // It's better to use xadd, xsub or xchg for these in all cases.
+      return AtomicExpansionKind::None;
+    case AtomicRMWInst::Or:
+    case AtomicRMWInst::And:
+    case AtomicRMWInst::Xor:
+      return shouldExpandLogicAtomicRMWInIR(AI);
+    case AtomicRMWInst::Nand:
+    case AtomicRMWInst::Max:
+    case AtomicRMWInst::Min:
+    case AtomicRMWInst::UMax:
+    case AtomicRMWInst::UMin:
+    case AtomicRMWInst::FAdd:
+    case AtomicRMWInst::FSub:
+      // These always require a non-trivial set of data operations on x86. We
+      // must use a cmpxchg loop.
+      return AtomicExpansionKind::CmpXChg;
+    }
+  }
+
+  return TargetLoweringBase::shouldExpandAtomicInstInIR(I);
 }
 
 TargetLowering::AtomicExpansionKind
@@ -30565,44 +30605,6 @@
   AI->eraseFromParent();
 }
 
-TargetLowering::AtomicExpansionKind
-X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
-  unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
-  Type *MemType = AI->getType();
-
-  // If the operand is too big, we must see if cmpxchg8/16b is available
-  // and default to library calls otherwise.
-  if (MemType->getPrimitiveSizeInBits() > NativeWidth) {
-    return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg
-                                   : AtomicExpansionKind::None;
-  }
-
-  AtomicRMWInst::BinOp Op = AI->getOperation();
-  switch (Op) {
-  default:
-    llvm_unreachable("Unknown atomic operation");
-  case AtomicRMWInst::Xchg:
-  case AtomicRMWInst::Add:
-  case AtomicRMWInst::Sub:
-    // It's better to use xadd, xsub or xchg for these in all cases.
-    return AtomicExpansionKind::None;
-  case AtomicRMWInst::Or:
-  case AtomicRMWInst::And:
-  case AtomicRMWInst::Xor:
-    return shouldExpandLogicAtomicRMWInIR(AI);
-  case AtomicRMWInst::Nand:
-  case AtomicRMWInst::Max:
-  case AtomicRMWInst::Min:
-  case AtomicRMWInst::UMax:
-  case AtomicRMWInst::UMin:
-  case AtomicRMWInst::FAdd:
-  case AtomicRMWInst::FSub:
-    // These always require a non-trivial set of data operations on x86. We must
-    // use a cmpxchg loop.
-    return AtomicExpansionKind::CmpXChg;
-  }
-}
-
 LoadInst *
 X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
   unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
@@ -44387,7 +44389,7 @@
   // Attempt to convert a (vXi1 bitcast(iX Cond)) selection mask before it might
   // get split by legalization.
   if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::BITCAST &&
-      CondVT.getVectorElementType() == MVT::i1 && Cond.hasOneUse() && 
+      CondVT.getVectorElementType() == MVT::i1 && Cond.hasOneUse() &&
       TLI.isTypeLegal(VT.getScalarType())) {
     EVT ExtCondVT = VT.changeVectorElementTypeToInteger();
     if (SDValue ExtCond = combineToExtendBoolVectorInReg(