Index: llvm/include/llvm/CodeGen/MachineLoopInfo.h
===================================================================
--- llvm/include/llvm/CodeGen/MachineLoopInfo.h
+++ llvm/include/llvm/CodeGen/MachineLoopInfo.h
@@ -71,7 +71,10 @@
   /// I.e., all virtual register operands are defined outside of the loop,
   /// physical registers aren't accessed explicitly, and there are no side
   /// effects that aren't captured by the operands or other flags.
-  bool isLoopInvariant(MachineInstr &I) const;
+  bool isLoopInvariant(MachineInstr &MI) const;
+
+  /// Return true if the specified instruction is used by a phi node.
+  bool hasLoopPHIUse(const MachineInstr *MI) const;
 
   void dump() const;
 
Index: llvm/lib/CodeGen/MachineLICM.cpp
===================================================================
--- llvm/lib/CodeGen/MachineLICM.cpp
+++ llvm/lib/CodeGen/MachineLICM.cpp
@@ -214,8 +214,6 @@
 
     bool IsLoopInvariantInst(MachineInstr &I);
 
-    bool HasLoopPHIUse(const MachineInstr *MI) const;
-
     bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
                                Register Reg) const;
 
@@ -995,41 +993,6 @@
   return CurLoop->isLoopInvariant(I);
 }
 
-/// Return true if the specified instruction is used by a phi node and hoisting
-/// it could cause a copy to be inserted.
-bool MachineLICMBase::HasLoopPHIUse(const MachineInstr *MI) const {
-  SmallVector<const MachineInstr*, 8> Work(1, MI);
-  do {
-    MI = Work.pop_back_val();
-    for (const MachineOperand &MO : MI->operands()) {
-      if (!MO.isReg() || !MO.isDef())
-        continue;
-      Register Reg = MO.getReg();
-      if (!Register::isVirtualRegister(Reg))
-        continue;
-      for (MachineInstr &UseMI : MRI->use_instructions(Reg)) {
-        // A PHI may cause a copy to be inserted.
-        if (UseMI.isPHI()) {
-          // A PHI inside the loop causes a copy because the live range of Reg is
-          // extended across the PHI.
-          if (CurLoop->contains(&UseMI))
-            return true;
-          // A PHI in an exit block can cause a copy to be inserted if the PHI
-          // has multiple predecessors in the loop with different values.
-          // For now, approximate by rejecting all exit blocks.
-          if (isExitBlock(UseMI.getParent()))
-            return true;
-          continue;
-        }
-        // Look past copies as well.
-        if (UseMI.isCopy() && CurLoop->contains(&UseMI))
-          Work.push_back(&UseMI);
-      }
-    }
-  } while (!Work.empty());
-  return false;
-}
-
 /// Compute operand latency between a def of 'Reg' and an use in the current
 /// loop, return true if the target considered it high.
 bool MachineLICMBase::HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
@@ -1148,7 +1111,7 @@
     return true;
 
   bool CheapInstr = IsCheapInstruction(MI);
-  bool CreatesCopy = HasLoopPHIUse(&MI);
+  bool CreatesCopy = CurLoop->hasLoopPHIUse(&MI);
 
   // Don't hoist a cheap instruction if it would create a copy in the loop.
   if (CheapInstr && CreatesCopy) {
Index: llvm/lib/CodeGen/MachineLoopInfo.cpp
===================================================================
--- llvm/lib/CodeGen/MachineLoopInfo.cpp
+++ llvm/lib/CodeGen/MachineLoopInfo.cpp
@@ -149,13 +149,13 @@
   return Preheader;
 }
 
-bool MachineLoop::isLoopInvariant(MachineInstr &I) const {
-  MachineFunction *MF = I.getParent()->getParent();
+bool MachineLoop::isLoopInvariant(MachineInstr &MI) const {
+  MachineFunction *MF = MI.getParent()->getParent();
   MachineRegisterInfo *MRI = &MF->getRegInfo();
   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
 
   // The instruction is loop invariant if all of its operands are.
-  for (const MachineOperand &MO : I.operands()) {
+  for (const MachineOperand &MO : MI.operands()) {
     if (!MO.isReg())
       continue;
 
@@ -172,7 +172,7 @@
         // However, if the physreg is known to always be caller saved/restored
         // then this use is safe to hoist.
         if (!MRI->isConstantPhysReg(Reg) &&
-            !(TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *I.getMF())))
+            !(TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *MI.getMF())))
           return false;
         // Otherwise it's safe to move.
         continue;
@@ -202,6 +202,45 @@
   return true;
 }
 
+/// Return true if the specified instruction is used by a phi node and hoisting
+/// it could cause a copy to be inserted.
+bool MachineLoop::hasLoopPHIUse(const MachineInstr *MI) const {
+  const MachineFunction *MF = MI->getParent()->getParent();
+  const MachineRegisterInfo *MRI = &MF->getRegInfo();
+  SmallVector<MachineBasicBlock *> ExitBlocks;
+  getExitBlocks (ExitBlocks);
+  SmallVector<const MachineInstr*, 8> Work(1, MI);
+  do {
+    MI = Work.pop_back_val();
+    for (const MachineOperand &MO : MI->operands()) {
+      if (!MO.isReg() || !MO.isDef())
+        continue;
+      Register Reg = MO.getReg();
+      if (!Register::isVirtualRegister(Reg))
+        continue;
+      for (MachineInstr &UseMI : MRI->use_instructions(Reg)) {
+        // A PHI may cause a copy to be inserted.
+        if (UseMI.isPHI()) {
+          // A PHI inside the loop causes a copy because the live range of Reg is
+          // extended across the PHI.
+          if (contains(&UseMI))
+            return true;
+          // A PHI in an exit block can cause a copy to be inserted if the PHI
+          // has multiple predecessors in the loop with different values.
+          // For now, approximate by rejecting all exit blocks.
+          if (is_contained(ExitBlocks, UseMI.getParent()))
+            return true;
+          continue;
+        }
+        // Look past copies as well.
+        if (UseMI.isCopy() && contains(&UseMI))
+          Work.push_back(&UseMI);
+      }
+    }
+  } while (!Work.empty());
+  return false;
+}
+
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 LLVM_DUMP_METHOD void MachineLoop::dump() const {
   print(dbgs());
Index: llvm/lib/CodeGen/MachineSink.cpp
===================================================================
--- llvm/lib/CodeGen/MachineSink.cpp
+++ llvm/lib/CodeGen/MachineSink.cpp
@@ -227,6 +227,12 @@
     void FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB,
                                 SmallVectorImpl<MachineInstr *> &Candidates);
     bool SinkIntoLoop(MachineLoop *L, MachineInstr &I);
+    bool IsSafeToMove(MachineLoop *L, MachineInstr &I,
+                      MachineBasicBlock *SinkTo);
+    bool AreAliased(MachineInstr &First, MachineInstr &Second,
+                    MachineBasicBlock *From, MachineBasicBlock *To,
+                    DenseSet<MachineBasicBlock *> HandledDomBlocks,
+                    bool &SawStore, bool &HasAliasedStore) ;
 
     bool isProfitableToSinkTo(Register Reg, MachineInstr &MI,
                               MachineBasicBlock *MBB,
@@ -352,24 +358,6 @@
   return true;
 }
 
-/// Return true if this machine instruction loads from global offset table or
-/// constant pool.
-static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) {
-  assert(MI.mayLoad() && "Expected MI that loads!");
-
-  // If we lost memory operands, conservatively assume that the instruction
-  // reads from everything..
-  if (MI.memoperands_empty())
-    return true;
-
-  for (MachineMemOperand *MemOp : MI.memoperands())
-    if (const PseudoSourceValue *PSV = MemOp->getPseudoValue())
-      if (PSV->isGOT() || PSV->isConstantPool())
-        return true;
-
-  return false;
-}
-
 void MachineSinking::FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB,
     SmallVectorImpl<MachineInstr *> &Candidates) {
   for (auto &MI : *BB) {
@@ -379,27 +367,28 @@
                            "target\n");
       continue;
     }
-    if (!L->isLoopInvariant(MI)) {
+    // If physical registers are used, then this is marked as not loop
+    // invariant. This can be the case if the preheader is the entry block, and
+    // when there are copy instructions of function arguments that are passed
+    // through registers.
+    if (!L->isLoopInvariant(MI) || L->hasLoopPHIUse(&MI)) {
       LLVM_DEBUG(dbgs() << "LoopSink: Instruction is not loop invariant\n");
       continue;
     }
-    bool DontMoveAcrossStore = true;
-    if (!MI.isSafeToMove(AA, DontMoveAcrossStore)) {
-      LLVM_DEBUG(dbgs() << "LoopSink: Instruction not safe to move.\n");
-      continue;
-    }
-    if (MI.mayLoad() && !mayLoadFromGOTOrConstantPool(MI)) {
-      LLVM_DEBUG(dbgs() << "LoopSink: Dont sink GOT or constant pool loads\n");
-      continue;
-    }
     if (MI.isConvergent())
       continue;
 
+    // Skip instruction that don't produce values, like branches and certain
+    // store instructions (that e.g. don't post-increment).
     const MachineOperand &MO = MI.getOperand(0);
-    if (!MO.isReg() || !MO.getReg() || !MO.isDef())
+    if (!MO.isReg() || !MO.getReg() || !MO.isDef()) {
+      LLVM_DEBUG(dbgs() << "LoopSink: Instruction does not define a value.\n");
       continue;
-    if (!MRI->hasOneDef(MO.getReg()))
+    }
+    if (!MRI->hasOneDef(MO.getReg())) {
+      LLVM_DEBUG(dbgs() << "LoopSink: Instruction does not have 1 def.\n");
       continue;
+    }
 
     LLVM_DEBUG(dbgs() << "LoopSink: Instruction added as candidate.\n");
     Candidates.push_back(&MI);
@@ -470,8 +459,13 @@
       // of a def-use chain, if there is any.
       for (auto It = Candidates.rbegin(); It != Candidates.rend(); ++It) {
         MachineInstr *I = *It;
+
+        // TODO: This is conservative because we bail as soon as we find one
+        // instruction that cannot be sunk. Better is to do this per def-use
+        // chain, so we try a next chain if one fails.
         if (!SinkIntoLoop(L, *I))
           break;
+
         EverMadeChange = true;
         ++NumLoopSunk;
       }
@@ -1155,29 +1149,10 @@
       }
 
       for (MachineInstr &I : *BB) {
-        // Treat as alias conservatively for a call or an ordered memory
-        // operation.
-        if (I.isCall() || I.hasOrderedMemoryRef()) {
-          for (auto *DomBB : HandledDomBlocks) {
-            if (DomBB != BB && DT->dominates(DomBB, BB))
-              HasStoreCache[std::make_pair(DomBB, To)] = true;
-            else if(DomBB != BB && DT->dominates(BB, DomBB))
-              HasStoreCache[std::make_pair(From, DomBB)] = true;
-          }
-          HasStoreCache[BlockPair] = true;
+        bool Aliased = AreAliased(I, MI, From, To, HandledBlocks, SawStore,
+                                  HasAliasedStore);
+        if (Aliased && (I.isCall() || I.hasOrderedMemoryRef()))
           return true;
-        }
-
-        if (I.mayStore()) {
-          SawStore = true;
-          // We still have chance to sink MI if all stores between are not
-          // aliased to MI.
-          // Cache all store instructions, so that we don't need to go through
-          // all From reachable blocks for next load instruction.
-          if (I.mayAlias(AA, MI, false))
-            HasAliasedStore = true;
-          StoreInstrCache[BlockPair].push_back(&I);
-        }
       }
     }
   }
@@ -1187,6 +1162,87 @@
   return HasAliasedStore;
 }
 
+bool MachineSinking::AreAliased(MachineInstr &First, MachineInstr &Second,
+    MachineBasicBlock *From, MachineBasicBlock *To,
+    DenseSet<MachineBasicBlock *> HandledDomBlocks, bool &SawStore,
+    bool &HasAliasedStore) {
+  MachineBasicBlock *BB = First.getParent();
+  auto BlockPair = std::make_pair(From, To);
+
+  if (First.isCall() || Second.hasOrderedMemoryRef()) {
+    for (auto *DomBB : HandledDomBlocks) {
+      if (DomBB != BB && DT->dominates(DomBB, BB))
+        HasStoreCache[std::make_pair(DomBB, To)] = true;
+      else if(DomBB != BB && DT->dominates(BB, DomBB))
+        HasStoreCache[std::make_pair(From, DomBB)] = true;
+    }
+    HasStoreCache[BlockPair] = true;
+    return true;
+  }
+
+  if (First.mayStore()) {
+    SawStore = true;
+    // We still have chance to sink MI if all stores between are not
+    // aliased to MI.
+    // Cache all store instructions, so that we don't need to go through
+    // all From reachable blocks for next load instruction.
+    if (First.mayAlias(AA, Second, false))
+      HasAliasedStore = true;
+    StoreInstrCache[BlockPair].push_back(&First);
+  }
+
+  // If there is no store at all, cache the result.
+  if (!SawStore)
+    HasStoreCache[BlockPair] = false;
+  return HasAliasedStore;
+}
+
+bool MachineSinking::IsSafeToMove(MachineLoop *L, MachineInstr &I,
+                                  MachineBasicBlock *SinkTo) {
+  if (LI->getLoopFor(SinkTo) != L)
+    return false;
+
+  auto End = I.getParent()->instr_end();
+  auto It = I.getIterator();
+
+  // 1) First, analyse all instruction from the current instruction I to the end
+  // of its block.
+  bool HasAliasedStore = false;
+  bool SawStore = false;
+  ++It;
+  for ( ; It != End; ++It) {
+    if (AreAliased(*It, I, I.getParent(), SinkTo, {}, SawStore,
+                   HasAliasedStore)) {
+      LLVM_DEBUG(dbgs() << "LoopSink:   Alias pair found!\n");
+      return false;
+    }
+    LLVM_DEBUG(dbgs() << "LoopSink:   Not aliased with : " << *It);
+  }
+
+  // This isSafeToMove check is not doing any alias analysis, but checks
+  // different instruction types, side-effects, etc. It uses 'SawStore' that is
+  // set in 1) which analyses the block of the sink instruction, and in 2) alias
+  // analysis of the loop blocks is performed.
+  SawStore = false;
+  if (!I.isSafeToMove(AA, SawStore)) {
+    LLVM_DEBUG(dbgs() << "LoopSink:   Not safe to move!\n");
+    return false;
+  }
+
+  // 2) Next, check all instructions in the loop to see if there are aliases.
+  for (auto *BB : L->blocks()) {
+    for (auto &CurI : *BB) {
+      if (AreAliased(CurI, I, I.getParent(), SinkTo, {}, SawStore, HasAliasedStore)) {
+        LLVM_DEBUG(dbgs() << "LoopSink:   Alias found in loop block: " << CurI);
+        return false;
+      }
+      LLVM_DEBUG(dbgs() << "LoopSink:   Not aliased with loop ins: " << CurI);
+    }
+  }
+  LLVM_DEBUG(dbgs() << "LoopSink:   Instruction not aliased, safe to move!\n");
+  return true;
+}
+
 /// Sink instructions into loops if profitable. This especially tries to prevent
 /// register spills caused by register pressure if there is little to no
 /// overhead moving instructions into loops.
@@ -1209,12 +1265,7 @@
     // FIXME: Come up with a proper cost model that estimates whether sinking
     // the instruction (and thus possibly executing it on every loop
     // iteration) is more expensive than a register.
-    // For now assumes that copies are cheap and thus almost always worth it.
-    if (!MI.isCopy()) {
-      LLVM_DEBUG(dbgs() << "LoopSink:   Use is not a copy\n");
-      CanSink = false;
-      break;
-    }
+
     if (!SinkBlock) {
       SinkBlock = MI.getParent();
       LLVM_DEBUG(dbgs() << "LoopSink:   Setting sink block to: "
@@ -1243,6 +1294,10 @@
     LLVM_DEBUG(dbgs() << "LoopSink: Not sinking, sink block is the preheader\n");
     return false;
   }
+  if (!IsSafeToMove(L, I, SinkBlock)) {
+    LLVM_DEBUG(dbgs() << "LoopSink: Not safe to move\n");
+    return false;
+  }
 
   LLVM_DEBUG(dbgs() << "LoopSink: Sinking instruction!\n");
   SinkBlock->splice(SinkBlock->getFirstNonPHI(), Preheader, I);
Index: llvm/test/CodeGen/AArch64/loop-sink.mir
===================================================================
--- llvm/test/CodeGen/AArch64/loop-sink.mir
+++ llvm/test/CodeGen/AArch64/loop-sink.mir
@@ -1,11 +1,13 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple aarch64 -run-pass=machine-sink  -sink-insts-to-avoid-spills %s -o - 2>&1 | FileCheck %s
+# RUN: llc -mtriple aarch64 -run-pass=machine-sink  -sink-insts-to-avoid-spills -verify-machineinstrs %s -o - 2>&1 | FileCheck %s
+
 --- |
   target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
   target triple = "aarch64"
 
   @A = external dso_local global [100 x i32], align 4
   %struct.A = type { i32, i32, i32, i32, i32, i32 }
+  @G = external dso_local local_unnamed_addr global i32, align 4
 
   define void @cant_sink_adds_call_in_block(i8* nocapture readonly %input, %struct.A* %a) {
     %1 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 1
@@ -129,7 +131,7 @@
     br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
   }
 
-  define i32 @use_is_not_a_copy(i32 %n) {
+  define i32 @do_sink_use_is_not_a_copy(i32 %n) {
   entry:
     %cmp63 = icmp sgt i32 %n, 0
     br i1 %cmp63, label %for.body.preheader, label %for.cond.cleanup
@@ -151,7 +153,7 @@
     br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
   }
 
-  define dso_local void @sink_add(i32* noalias nocapture readonly %read, i32* noalias nocapture %write, i32 %n) local_unnamed_addr #0 {
+  define dso_local void @cant_sink_load_add_chain_loop_phi_use(i32* noalias nocapture readonly %read, i32* noalias nocapture %write, i32 %n) local_unnamed_addr #0 {
   entry:
     %0 = load i32, i32* %read, align 4, !tbaa !6
     %cmp10 = icmp sgt i32 %n, 0
@@ -177,63 +179,169 @@
     br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
   }
 
-  define dso_local void @store_after_add(i32* noalias nocapture readonly %read, i32* noalias nocapture %write, i32* nocapture %store, i32 %n) local_unnamed_addr #0 {
+  define dso_local void @cant_sink_multi_block_loop_with_call(i32* noalias nocapture %read, i32* noalias nocapture %write, i32* nocapture readnone %store, i32 %n) local_unnamed_addr #0 {
   entry:
-    %0 = load i32, i32* %read, align 4, !tbaa !6
-    %cmp10 = icmp sgt i32 %n, 0
-    br i1 %cmp10, label %for.body.preheader, label %for.cond.cleanup
+    %0 = load i32, i32* %read, align 4
+    store i32 %n, i32* %read, align 4
+    %cmp12 = icmp sgt i32 %n, 0
+    br i1 %cmp12, label %for.body.lr.ph, label %for.cond.cleanup
+
+  for.body.lr.ph:                                   ; preds = %entry
+    %1 = load i32, i32* @G, align 4
+    %2 = icmp eq i32 %1, 0
+    br i1 %2, label %for.body.us.preheader, label %for.body.preheader
+
+  for.body.preheader:                               ; preds = %for.body.lr.ph
+    %3 = add i32 %0, 42
+    br label %for.body
+
+  for.body.us.preheader:                            ; preds = %for.body.lr.ph
+    %4 = add i32 %n, -1
+    %5 = add i32 %0, 42
+    br label %for.body.us
+
+  for.body.us:                                      ; preds = %for.body.us.preheader, %for.inc.us.for.body.us_crit_edge
+    %lsr.iv2 = phi i32 [ %5, %for.body.us.preheader ], [ %lsr.iv.next3, %for.inc.us.for.body.us_crit_edge ]
+    %lsr.iv = phi i32 [ %4, %for.body.us.preheader ], [ %lsr.iv.next, %for.inc.us.for.body.us_crit_edge ]
+    %6 = phi i32 [ %.pre, %for.inc.us.for.body.us_crit_edge ], [ 0, %for.body.us.preheader ]
+    %sum.013.us = phi i32 [ %sum.1.us, %for.inc.us.for.body.us_crit_edge ], [ %n, %for.body.us.preheader ]
+    %tobool.not.us = icmp eq i32 %6, 0
+    br i1 %tobool.not.us, label %if.else.us, label %if.then.us
+
+  if.then.us:                                       ; preds = %for.body.us
+    %div.us = sdiv i32 %sum.013.us, %lsr.iv2
+    br label %for.inc.us
+
+  if.else.us:                                       ; preds = %for.body.us
+    tail call void @H() #2
+    br label %for.inc.us
+
+  for.inc.us:                                       ; preds = %if.else.us, %if.then.us
+    %sum.1.us = phi i32 [ %div.us, %if.then.us ], [ %sum.013.us, %if.else.us ]
+    %exitcond.not = icmp eq i32 %lsr.iv, 0
+    br i1 %exitcond.not, label %for.cond.cleanup, label %for.inc.us.for.body.us_crit_edge, !llvm.loop !10
+
+  for.inc.us.for.body.us_crit_edge:                 ; preds = %for.inc.us
+    %.pre = load i32, i32* @G, align 4
+    %lsr.iv.next = add i32 %lsr.iv, -1
+    %lsr.iv.next3 = add i32 %lsr.iv2, 1
+    br label %for.body.us
+
+  for.cond.cleanup:                                 ; preds = %for.body, %for.inc.us, %entry
+    %sum.0.lcssa = phi i32 [ %n, %entry ], [ %sum.1.us, %for.inc.us ], [ %div, %for.body ]
+    store i32 %sum.0.lcssa, i32* %write, align 4, !tbaa !6
+    ret void
+
+  for.body:                                         ; preds = %for.body.preheader, %for.body
+    %lsr.iv6 = phi i32 [ %3, %for.body.preheader ], [ %lsr.iv.next7, %for.body ]
+    %lsr.iv4 = phi i32 [ %n, %for.body.preheader ], [ %lsr.iv.next5, %for.body ]
+    %sum.013 = phi i32 [ %div, %for.body ], [ %n, %for.body.preheader ]
+    %div = sdiv i32 %sum.013, %lsr.iv6
+    %lsr.iv.next5 = add i32 %lsr.iv4, -1
+    %lsr.iv.next7 = add i32 %lsr.iv6, 1
+    %exitcond17.not = icmp eq i32 %lsr.iv.next5, 0
+    br i1 %exitcond17.not, label %for.cond.cleanup, label %for.body
+  }
+
+  define dso_local void @do_sink_load_add(float* noalias nocapture readonly %read, float* noalias nocapture %write, float* nocapture readnone %store, i32 %n) local_unnamed_addr #0 {
+  entry:
+    %0 = load float, float* %read, align 4, !tbaa !6
+    %add = fadd fast float %0, 4.200000e+01
+    %cmp8 = icmp sgt i32 %n, 0
+    br i1 %cmp8, label %for.body.preheader, label %for.cond.cleanup
 
   for.body.preheader:                               ; preds = %entry
-    %1 = add i32 %0, 42
-    store i32 43, i32* %store, align 4, !tbaa !6
     br label %for.body
 
   for.cond.cleanup:                                 ; preds = %for.body, %entry
-    %sum.0.lcssa = phi i32 [ %n, %entry ], [ %div, %for.body ]
-    store i32 %sum.0.lcssa, i32* %write, align 4, !tbaa !6
+    %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add1, %for.body ]
+    store float %sum.0.lcssa, float* %write, align 4
     ret void
 
   for.body:                                         ; preds = %for.body.preheader, %for.body
-    %lsr.iv1 = phi i32 [ %1, %for.body.preheader ], [ %lsr.iv.next2, %for.body ]
     %lsr.iv = phi i32 [ %n, %for.body.preheader ], [ %lsr.iv.next, %for.body ]
-    %sum.011 = phi i32 [ %div, %for.body ], [ %n, %for.body.preheader ]
-    %div = sdiv i32 %sum.011, %lsr.iv1
+    %sum.09 = phi float [ %add1, %for.body ], [ 0.000000e+00, %for.body.preheader ]
+    %add1 = fadd fast float %add, %sum.09
     %lsr.iv.next = add i32 %lsr.iv, -1
-    %lsr.iv.next2 = add i32 %lsr.iv1, 1
     %exitcond.not = icmp eq i32 %lsr.iv.next, 0
-    br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !10
+    br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
   }
 
-  define dso_local void @aliased_store_after_add(i32* noalias nocapture readonly %read, i32* noalias nocapture %write, i32* nocapture %store, i32 %n) local_unnamed_addr #0 {
+  define dso_local void @do_sink_no_aliased_store(float* noalias nocapture readonly %read, float* noalias nocapture %write, float* nocapture %store, i32 %n, float %m) local_unnamed_addr #0 {
   entry:
-    %0 = load i32, i32* %read, align 4, !tbaa !6
-    %cmp10 = icmp sgt i32 %n, 0
-    br i1 %cmp10, label %for.body.preheader, label %for.cond.cleanup
+    %cmp6 = icmp sgt i32 %n, 0
+    br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup
 
   for.body.preheader:                               ; preds = %entry
-    %1 = add i32 %0, 42
-    store i32 43, i32* %read, align 4, !tbaa !6
+    %0 = load float, float* %read, align 4
+    store float %m, float* %store, align 4
     br label %for.body
 
   for.cond.cleanup:                                 ; preds = %for.body, %entry
-    %sum.0.lcssa = phi i32 [ %n, %entry ], [ %div, %for.body ]
-    store i32 %sum.0.lcssa, i32* %write, align 4, !tbaa !6
+    %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
+    store float %sum.0.lcssa, float* %write, align 4
     ret void
 
   for.body:                                         ; preds = %for.body.preheader, %for.body
-    %lsr.iv1 = phi i32 [ %1, %for.body.preheader ], [ %lsr.iv.next2, %for.body ]
     %lsr.iv = phi i32 [ %n, %for.body.preheader ], [ %lsr.iv.next, %for.body ]
-    %sum.011 = phi i32 [ %div, %for.body ], [ %n, %for.body.preheader ]
-    %div = sdiv i32 %sum.011, %lsr.iv1
+    %sum.07 = phi float [ %add, %for.body ], [ 0.000000e+00, %for.body.preheader ]
+    %add = fadd fast float %sum.07, %0
     %lsr.iv.next = add i32 %lsr.iv, -1
-    %lsr.iv.next2 = add i32 %lsr.iv1, 1
     %exitcond.not = icmp eq i32 %lsr.iv.next, 0
-    br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !10
+    br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
   }
 
+  define dso_local void @cant_sink_load_aliased_store(float* nocapture readonly %read, float* nocapture %write, float* nocapture %store, i32 %n, float %m) local_unnamed_addr #0 {
+  entry:
+    %cmp6 = icmp sgt i32 %n, 0
+    br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup
+
+  for.body.preheader:                               ; preds = %entry
+    %0 = load float, float* %read, align 4
+    store float %m, float* %store, align 4
+    br label %for.body
+
+  for.cond.cleanup:                                 ; preds = %for.body, %entry
+    %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
+    store float %sum.0.lcssa, float* %write, align 4
+    ret void
+
+  for.body:                                         ; preds = %for.body.preheader, %for.body
+    %lsr.iv = phi i32 [ %n, %for.body.preheader ], [ %lsr.iv.next, %for.body ]
+    %sum.07 = phi float [ %add, %for.body ], [ 0.000000e+00, %for.body.preheader ]
+    %add = fadd fast float %sum.07, %0
+    %lsr.iv.next = add i32 %lsr.iv, -1
+    %exitcond.not = icmp eq i32 %lsr.iv.next, 0
+    br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+  }
+
+  define dso_local void @cant_sink_aliased_store_in_loop(float* nocapture readonly %read, float* nocapture %write, float* nocapture %store, i32 %n, float %m) local_unnamed_addr #0 {
+  entry:
+    %0 = load float, float* %read, align 4, !tbaa !6
+    %cmp6 = icmp sgt i32 %n, 0
+    br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup
+
+  for.body.preheader:                               ; preds = %entry
+    br label %for.body
+
+  for.cond.cleanup:                                 ; preds = %for.body, %entry
+    %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
+    store float %sum.0.lcssa, float* %write, align 4
+    ret void
+
+  for.body:                                         ; preds = %for.body.preheader, %for.body
+    %lsr.iv = phi i32 [ %n, %for.body.preheader ], [ %lsr.iv.next, %for.body ]
+    %sum.07 = phi float [ %add, %for.body ], [ 0.000000e+00, %for.body.preheader ]
+    %add = fadd fast float %sum.07, %0
+    store float %m, float* %store, align 4
+    %lsr.iv.next = add i32 %lsr.iv, -1
+    %exitcond.not = icmp eq i32 %lsr.iv.next, 0
+    br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+  }
 
   declare i32 @use(i32)
   declare void @_Z6assignPj(i32*)
+  declare void @H()
 
   !6 = !{!7, !7, i64 0}
   !7 = !{!"int", !8, i64 0}
@@ -871,7 +979,7 @@
 
 ...
 ---
-name:            use_is_not_a_copy
+name:            do_sink_use_is_not_a_copy
 alignment:       4
 exposesReturnsTwice: false
 legalized:       false
@@ -921,7 +1029,7 @@
 constants:       []
 machineFunctionInfo: {}
 body:             |
-  ; CHECK-LABEL: name: use_is_not_a_copy
+  ; CHECK-LABEL: name: do_sink_use_is_not_a_copy
   ; CHECK: bb.0.entry:
   ; CHECK:   successors: %bb.1(0x50000000), %bb.2(0x30000000)
   ; CHECK:   liveins: $w0
@@ -931,8 +1039,6 @@
   ; CHECK:   B %bb.1
   ; CHECK: bb.1.for.body.preheader:
   ; CHECK:   successors: %bb.3(0x80000000)
-  ; CHECK:   [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) @A
-  ; CHECK:   [[LDRWui:%[0-9]+]]:gpr32 = LDRWui killed [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load 4 from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`)
   ; CHECK:   B %bb.3
   ; CHECK: bb.2.for.cond.cleanup:
   ; CHECK:   [[PHI:%[0-9]+]]:gpr32all = PHI [[COPY]], %bb.0, %4, %bb.3
@@ -942,6 +1048,8 @@
   ; CHECK:   successors: %bb.2(0x04000000), %bb.3(0x7c000000)
   ; CHECK:   [[PHI1:%[0-9]+]]:gpr32sp = PHI [[COPY]], %bb.1, %5, %bb.3
   ; CHECK:   [[PHI2:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.1, %4, %bb.3
+  ; CHECK:   [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) @A
+  ; CHECK:   [[LDRWui:%[0-9]+]]:gpr32 = LDRWui killed [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load 4 from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`)
   ; CHECK:   [[SDIVWr:%[0-9]+]]:gpr32 = SDIVWr [[PHI2]], [[LDRWui]]
   ; CHECK:   [[COPY1:%[0-9]+]]:gpr32all = COPY [[SDIVWr]]
   ; CHECK:   [[SUBSWri1:%[0-9]+]]:gpr32 = SUBSWri [[PHI1]], 1, 0, implicit-def $nzcv
@@ -983,7 +1091,7 @@
 
 ...
 ---
-name:            sink_add
+name:            cant_sink_load_add_chain_loop_phi_use
 alignment:       16
 exposesReturnsTwice: false
 legalized:       false
@@ -1041,7 +1149,7 @@
 constants:       []
 machineFunctionInfo: {}
 body:             |
-  ; CHECK-LABEL: name: sink_add
+  ; CHECK-LABEL: name: cant_sink_load_add_chain_loop_phi_use
   ; CHECK: bb.0.entry:
   ; CHECK:   successors: %bb.1(0x50000000), %bb.2(0x30000000)
   ; CHECK:   liveins: $x0, $x1, $w2
@@ -1115,7 +1223,7 @@
 
 ...
 ---
-name:            store_after_add
+name:            cant_sink_multi_block_loop_with_call
 alignment:       16
 exposesReturnsTwice: false
 legalized:       false
@@ -1127,29 +1235,286 @@
 registers:
   - { id: 0, class: gpr32sp, preferred-register: '' }
   - { id: 1, class: gpr32all, preferred-register: '' }
-  - { id: 2, class: gpr32, preferred-register: '' }
-  - { id: 3, class: gpr32common, preferred-register: '' }
-  - { id: 4, class: gpr32sp, preferred-register: '' }
-  - { id: 5, class: gpr32, preferred-register: '' }
-  - { id: 6, class: gpr32all, preferred-register: '' }
-  - { id: 7, class: gpr32all, preferred-register: '' }
+  - { id: 2, class: gpr32all, preferred-register: '' }
+  - { id: 3, class: gpr32all, preferred-register: '' }
+  - { id: 4, class: gpr32common, preferred-register: '' }
+  - { id: 5, class: gpr32common, preferred-register: '' }
+  - { id: 6, class: gpr32, preferred-register: '' }
+  - { id: 7, class: gpr32, preferred-register: '' }
   - { id: 8, class: gpr32all, preferred-register: '' }
-  - { id: 9, class: gpr64common, preferred-register: '' }
-  - { id: 10, class: gpr64common, preferred-register: '' }
-  - { id: 11, class: gpr64common, preferred-register: '' }
-  - { id: 12, class: gpr32common, preferred-register: '' }
-  - { id: 13, class: gpr32common, preferred-register: '' }
+  - { id: 9, class: gpr32all, preferred-register: '' }
+  - { id: 10, class: gpr32all, preferred-register: '' }
+  - { id: 11, class: gpr32all, preferred-register: '' }
+  - { id: 12, class: gpr32all, preferred-register: '' }
+  - { id: 13, class: gpr32, preferred-register: '' }
+  - { id: 14, class: gpr32common, preferred-register: '' }
+  - { id: 15, class: gpr32sp, preferred-register: '' }
+  - { id: 16, class: gpr32, preferred-register: '' }
+  - { id: 17, class: gpr32all, preferred-register: '' }
+  - { id: 18, class: gpr32all, preferred-register: '' }
+  - { id: 19, class: gpr32all, preferred-register: '' }
+  - { id: 20, class: gpr64common, preferred-register: '' }
+  - { id: 21, class: gpr64common, preferred-register: '' }
+  - { id: 22, class: gpr64, preferred-register: '' }
+  - { id: 23, class: gpr32common, preferred-register: '' }
+  - { id: 24, class: gpr32common, preferred-register: '' }
+  - { id: 25, class: gpr32, preferred-register: '' }
+  - { id: 26, class: gpr64common, preferred-register: '' }
+  - { id: 27, class: gpr32, preferred-register: '' }
+  - { id: 28, class: gpr32sp, preferred-register: '' }
+  - { id: 29, class: gpr32, preferred-register: '' }
+  - { id: 30, class: gpr32, preferred-register: '' }
+  - { id: 31, class: gpr32sp, preferred-register: '' }
+  - { id: 32, class: gpr32all, preferred-register: '' }
+  - { id: 33, class: gpr32, preferred-register: '' }
+  - { id: 34, class: gpr32sp, preferred-register: '' }
+  - { id: 35, class: gpr32all, preferred-register: '' }
+  - { id: 36, class: gpr32, preferred-register: '' }
+  - { id: 37, class: gpr64common, preferred-register: '' }
+  - { id: 38, class: gpr32, preferred-register: '' }
+  - { id: 39, class: gpr32, preferred-register: '' }
+  - { id: 40, class: gpr32sp, preferred-register: '' }
+liveins:
+  - { reg: '$x0', virtual-reg: '%20' }
+  - { reg: '$x1', virtual-reg: '%21' }
+  - { reg: '$w3', virtual-reg: '%23' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    1
+  adjustsStack:    true
+  hasCalls:        true
+  stackProtector:  ''
+  maxCallFrameSize: 0
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           []
+callSites:       []
+debugValueSubstitutions: []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  ; CHECK-LABEL: name: cant_sink_multi_block_loop_with_call
+  ; CHECK: bb.0.entry:
+  ; CHECK:   successors: %bb.1(0x50000000), %bb.9(0x30000000)
+  ; CHECK:   liveins: $x0, $x1, $w3
+  ; CHECK:   [[COPY:%[0-9]+]]:gpr32common = COPY $w3
+  ; CHECK:   [[COPY1:%[0-9]+]]:gpr64common = COPY $x1
+  ; CHECK:   [[COPY2:%[0-9]+]]:gpr64common = COPY $x0
+  ; CHECK:   [[LDRWui:%[0-9]+]]:gpr32common = LDRWui [[COPY2]], 0 :: (load 4 from %ir.read, !tbaa !0)
+  ; CHECK:   STRWui [[COPY]], [[COPY2]], 0 :: (store 4 into %ir.read, !tbaa !0)
+  ; CHECK:   [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 1, 0, implicit-def $nzcv
+  ; CHECK:   Bcc 11, %bb.9, implicit $nzcv
+  ; CHECK:   B %bb.1
+  ; CHECK: bb.1.for.body.lr.ph:
+  ; CHECK:   successors: %bb.3(0x30000000), %bb.2(0x50000000)
+  ; CHECK:   [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) @G
+  ; CHECK:   [[LDRWui1:%[0-9]+]]:gpr32 = LDRWui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @G :: (dereferenceable load 4 from @G, !tbaa !0)
+  ; CHECK:   CBZW killed [[LDRWui1]], %bb.3
+  ; CHECK:   B %bb.2
+  ; CHECK: bb.2.for.body.preheader:
+  ; CHECK:   successors: %bb.10(0x80000000)
+  ; CHECK:   [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[LDRWui]], 42, 0
+  ; CHECK:   [[COPY3:%[0-9]+]]:gpr32all = COPY [[ADDWri]]
+  ; CHECK:   B %bb.10
+  ; CHECK: bb.3.for.body.us.preheader:
+  ; CHECK:   successors: %bb.4(0x80000000)
+  ; CHECK:   [[COPY4:%[0-9]+]]:gpr32all = COPY [[SUBSWri]]
+  ; CHECK:   [[ADDWri1:%[0-9]+]]:gpr32sp = ADDWri [[LDRWui]], 42, 0
+  ; CHECK:   [[COPY5:%[0-9]+]]:gpr32all = COPY $wzr
+  ; CHECK:   [[COPY6:%[0-9]+]]:gpr32all = COPY [[COPY5]]
+  ; CHECK:   [[COPY7:%[0-9]+]]:gpr32all = COPY [[ADDWri1]]
+  ; CHECK: bb.4.for.body.us:
+  ; CHECK:   successors: %bb.6(0x30000000), %bb.5(0x50000000)
+  ; CHECK:   [[PHI:%[0-9]+]]:gpr32common = PHI [[COPY7]], %bb.3, %12, %bb.8
+  ; CHECK:   [[PHI1:%[0-9]+]]:gpr32common = PHI [[COPY4]], %bb.3, %11, %bb.8
+  ; CHECK:   [[PHI2:%[0-9]+]]:gpr32 = PHI [[COPY6]], %bb.3, %10, %bb.8
+  ; CHECK:   [[PHI3:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.3, %9, %bb.8
+  ; CHECK:   CBZW [[PHI2]], %bb.6
+  ; CHECK:   B %bb.5
+  ; CHECK: bb.5.if.then.us:
+  ; CHECK:   successors: %bb.7(0x80000000)
+  ; CHECK:   [[SDIVWr:%[0-9]+]]:gpr32 = SDIVWr [[PHI3]], [[PHI]]
+  ; CHECK:   [[COPY8:%[0-9]+]]:gpr32all = COPY [[SDIVWr]]
+  ; CHECK:   B %bb.7
+  ; CHECK: bb.6.if.else.us:
+  ; CHECK:   successors: %bb.7(0x80000000)
+  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+  ; CHECK:   BL @H, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
+  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+  ; CHECK: bb.7.for.inc.us:
+  ; CHECK:   successors: %bb.9(0x04000000), %bb.8(0x7c000000)
+  ; CHECK:   [[PHI4:%[0-9]+]]:gpr32all = PHI [[COPY8]], %bb.5, [[PHI3]], %bb.6
+  ; CHECK:   CBZW [[PHI1]], %bb.9
+  ; CHECK:   B %bb.8
+  ; CHECK: bb.8.for.inc.us.for.body.us_crit_edge:
+  ; CHECK:   successors: %bb.4(0x80000000)
+  ; CHECK:   [[LDRWui2:%[0-9]+]]:gpr32 = LDRWui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @G :: (dereferenceable load 4 from @G, !tbaa !0)
+  ; CHECK:   [[COPY9:%[0-9]+]]:gpr32all = COPY [[LDRWui2]]
+  ; CHECK:   [[SUBSWri1:%[0-9]+]]:gpr32 = SUBSWri [[PHI1]], 1, 0, implicit-def dead $nzcv
+  ; CHECK:   [[COPY10:%[0-9]+]]:gpr32all = COPY [[SUBSWri1]]
+  ; CHECK:   [[ADDWri2:%[0-9]+]]:gpr32sp = ADDWri [[PHI]], 1, 0
+  ; CHECK:   [[COPY11:%[0-9]+]]:gpr32all = COPY [[ADDWri2]]
+  ; CHECK:   B %bb.4
+  ; CHECK: bb.9.for.cond.cleanup:
+  ; CHECK:   [[PHI5:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.0, %17, %bb.10, [[PHI4]], %bb.7
+  ; CHECK:   STRWui [[PHI5]], [[COPY1]], 0 :: (store 4 into %ir.write, !tbaa !0)
+  ; CHECK:   RET_ReallyLR
+  ; CHECK: bb.10.for.body:
+  ; CHECK:   successors: %bb.9(0x04000000), %bb.10(0x7c000000)
+  ; CHECK:   [[PHI6:%[0-9]+]]:gpr32common = PHI [[COPY3]], %bb.2, %19, %bb.10
+  ; CHECK:   [[PHI7:%[0-9]+]]:gpr32sp = PHI [[COPY]], %bb.2, %18, %bb.10
+  ; CHECK:   [[PHI8:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.2, %17, %bb.10
+  ; CHECK:   [[SDIVWr1:%[0-9]+]]:gpr32 = SDIVWr [[PHI8]], [[PHI6]]
+  ; CHECK:   [[COPY12:%[0-9]+]]:gpr32all = COPY [[SDIVWr1]]
+  ; CHECK:   [[SUBSWri2:%[0-9]+]]:gpr32 = SUBSWri [[PHI7]], 1, 0, implicit-def $nzcv
+  ; CHECK:   [[COPY13:%[0-9]+]]:gpr32all = COPY [[SUBSWri2]]
+  ; CHECK:   [[ADDWri3:%[0-9]+]]:gpr32sp = ADDWri [[PHI6]], 1, 0
+  ; CHECK:   [[COPY14:%[0-9]+]]:gpr32all = COPY [[ADDWri3]]
+  ; CHECK:   Bcc 0, %bb.9, implicit $nzcv
+  ; CHECK:   B %bb.10
+  bb.0.entry:
+    successors: %bb.1(0x50000000), %bb.9(0x30000000)
+    liveins: $x0, $x1, $w3
+
+    %23:gpr32common = COPY $w3
+    %21:gpr64common = COPY $x1
+    %20:gpr64common = COPY $x0
+    %24:gpr32common = LDRWui %20, 0 :: (load 4 from %ir.read, !tbaa !6)
+    STRWui %23, %20, 0 :: (store 4 into %ir.read, !tbaa !6)
+    %25:gpr32 = SUBSWri %23, 1, 0, implicit-def $nzcv
+    Bcc 11, %bb.9, implicit $nzcv
+    B %bb.1
+
+  bb.1.for.body.lr.ph:
+    successors: %bb.3(0x30000000), %bb.2(0x50000000)
+
+    %26:gpr64common = ADRP target-flags(aarch64-page) @G
+    %27:gpr32 = LDRWui %26, target-flags(aarch64-pageoff, aarch64-nc) @G :: (dereferenceable load 4 from @G, !tbaa !6)
+    CBZW killed %27, %bb.3
+    B %bb.2
+
+  bb.2.for.body.preheader:
+    successors: %bb.10(0x80000000)
+
+    %28:gpr32sp = ADDWri %24, 42, 0
+    %1:gpr32all = COPY %28
+    B %bb.10
+
+  bb.3.for.body.us.preheader:
+    successors: %bb.4(0x80000000)
+
+    %2:gpr32all = COPY %25
+    %34:gpr32sp = ADDWri %24, 42, 0
+    %35:gpr32all = COPY $wzr
+    %32:gpr32all = COPY %35
+    %3:gpr32all = COPY %34
+
+  bb.4.for.body.us:
+    successors: %bb.6(0x30000000), %bb.5(0x50000000)
+
+    %4:gpr32common = PHI %3, %bb.3, %12, %bb.8
+    %5:gpr32common = PHI %2, %bb.3, %11, %bb.8
+    %6:gpr32 = PHI %32, %bb.3, %10, %bb.8
+    %7:gpr32 = PHI %23, %bb.3, %9, %bb.8
+    CBZW %6, %bb.6
+    B %bb.5
+
+  bb.5.if.then.us:
+    successors: %bb.7(0x80000000)
+
+    %36:gpr32 = SDIVWr %7, %4
+    %8:gpr32all = COPY %36
+    B %bb.7
+
+  bb.6.if.else.us:
+    successors: %bb.7(0x80000000)
+
+    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+    BL @H, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
+    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+
+  bb.7.for.inc.us:
+    successors: %bb.9(0x04000000), %bb.8(0x7c000000)
+
+    %9:gpr32all = PHI %8, %bb.5, %7, %bb.6
+    CBZW %5, %bb.9
+    B %bb.8
+
+  bb.8.for.inc.us.for.body.us_crit_edge:
+    successors: %bb.4(0x80000000)
+
+    %38:gpr32 = LDRWui %26, target-flags(aarch64-pageoff, aarch64-nc) @G :: (dereferenceable load 4 from @G, !tbaa !6)
+    %10:gpr32all = COPY %38
+    %39:gpr32 = SUBSWri %5, 1, 0, implicit-def dead $nzcv
+    %11:gpr32all = COPY %39
+    %40:gpr32sp = ADDWri %4, 1, 0
+    %12:gpr32all = COPY %40
+    B %bb.4
+
+  bb.9.for.cond.cleanup:
+    %13:gpr32 = PHI %23, %bb.0, %17, %bb.10, %9, %bb.7
+    STRWui %13, %21, 0 :: (store 4 into %ir.write, !tbaa !6)
+    RET_ReallyLR
+
+  bb.10.for.body:
+    successors: %bb.9(0x04000000), %bb.10(0x7c000000)
+
+    %14:gpr32common = PHI %1, %bb.2, %19, %bb.10
+    %15:gpr32sp = PHI %23, %bb.2, %18, %bb.10
+    %16:gpr32 = PHI %23, %bb.2, %17, %bb.10
+    %29:gpr32 = SDIVWr %16, %14
+    %17:gpr32all = COPY %29
+    %30:gpr32 = SUBSWri %15, 1, 0, implicit-def $nzcv
+    %18:gpr32all = COPY %30
+    %31:gpr32sp = ADDWri %14, 1, 0
+    %19:gpr32all = COPY %31
+    Bcc 0, %bb.9, implicit $nzcv
+    B %bb.10
+
+...
+---
+name:            do_sink_load_add
+alignment:       16
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+registers:
+  - { id: 0, class: fpr32, preferred-register: '' }
+  - { id: 1, class: fpr32, preferred-register: '' }
+  - { id: 2, class: gpr32sp, preferred-register: '' }
+  - { id: 3, class: fpr32, preferred-register: '' }
+  - { id: 4, class: fpr32, preferred-register: '' }
+  - { id: 5, class: gpr32all, preferred-register: '' }
+  - { id: 6, class: gpr64common, preferred-register: '' }
+  - { id: 7, class: gpr64common, preferred-register: '' }
+  - { id: 8, class: gpr64, preferred-register: '' }
+  - { id: 9, class: gpr32common, preferred-register: '' }
+  - { id: 10, class: fpr32, preferred-register: '' }
+  - { id: 11, class: fpr32, preferred-register: '' }
+  - { id: 12, class: gpr32, preferred-register: '' }
+  - { id: 13, class: fpr32, preferred-register: '' }
   - { id: 14, class: gpr32, preferred-register: '' }
-  - { id: 15, class: gpr32, preferred-register: '' }
-  - { id: 16, class: gpr32sp, preferred-register: '' }
-  - { id: 17, class: gpr32, preferred-register: '' }
-  - { id: 18, class: gpr32, preferred-register: '' }
-  - { id: 19, class: gpr32sp, preferred-register: '' }
+  - { id: 15, class: fpr32, preferred-register: '' }
+  - { id: 16, class: gpr32, preferred-register: '' }
 liveins:
-  - { reg: '$x0', virtual-reg: '%9' }
-  - { reg: '$x1', virtual-reg: '%10' }
-  - { reg: '$x2', virtual-reg: '%11' }
-  - { reg: '$w3', virtual-reg: '%12' }
+  - { reg: '$x0', virtual-reg: '%6' }
+  - { reg: '$x1', virtual-reg: '%7' }
+  - { reg: '$w3', virtual-reg: '%9' }
 frameInfo:
   isFrameAddressTaken: false
   isReturnAddressTaken: false
@@ -1176,87 +1541,212 @@
 constants:       []
 machineFunctionInfo: {}
 body:             |
-  ; CHECK-LABEL: name: store_after_add
+  ; CHECK-LABEL: name: do_sink_load_add
   ; CHECK: bb.0.entry:
-  ; CHECK:   successors: %bb.1(0x50000000), %bb.2(0x30000000)
-  ; CHECK:   liveins: $x0, $x1, $x2, $w3
+  ; CHECK:   successors: %bb.1(0x50000000), %bb.4(0x30000000)
+  ; CHECK:   liveins: $x0, $x1, $w3
   ; CHECK:   [[COPY:%[0-9]+]]:gpr32common = COPY $w3
-  ; CHECK:   [[COPY1:%[0-9]+]]:gpr64common = COPY $x2
-  ; CHECK:   [[COPY2:%[0-9]+]]:gpr64common = COPY $x1
-  ; CHECK:   [[COPY3:%[0-9]+]]:gpr64common = COPY $x0
+  ; CHECK:   [[COPY1:%[0-9]+]]:gpr64common = COPY $x1
+  ; CHECK:   [[COPY2:%[0-9]+]]:gpr64common = COPY $x0
   ; CHECK:   [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 1, 0, implicit-def $nzcv
-  ; CHECK:   Bcc 11, %bb.2, implicit $nzcv
-  ; CHECK:   B %bb.1
+  ; CHECK:   Bcc 10, %bb.1, implicit $nzcv
+  ; CHECK: bb.4:
+  ; CHECK:   successors: %bb.2(0x80000000)
+  ; CHECK:   [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0
+  ; CHECK:   B %bb.2
   ; CHECK: bb.1.for.body.preheader:
   ; CHECK:   successors: %bb.3(0x80000000)
-  ; CHECK:   [[LDRWui:%[0-9]+]]:gpr32common = LDRWui [[COPY3]], 0 :: (load 4 from %ir.read, !tbaa !0)
-  ; CHECK:   [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[LDRWui]], 42, 0
-  ; CHECK:   [[COPY4:%[0-9]+]]:gpr32all = COPY [[ADDWri]]
-  ; CHECK:   [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 43
-  ; CHECK:   STRWui killed [[MOVi32imm]], [[COPY1]], 0 :: (store 4 into %ir.store, !tbaa !0)
+  ; CHECK:   [[FMOVS0_1:%[0-9]+]]:fpr32 = FMOVS0
   ; CHECK:   B %bb.3
   ; CHECK: bb.2.for.cond.cleanup:
-  ; CHECK:   [[PHI:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.0, %6, %bb.3
-  ; CHECK:   STRWui [[PHI]], [[COPY2]], 0 :: (store 4 into %ir.write, !tbaa !0)
+  ; CHECK:   [[PHI:%[0-9]+]]:fpr32 = PHI [[FMOVS0_]], %bb.4, %4, %bb.3
+  ; CHECK:   STRSui [[PHI]], [[COPY1]], 0 :: (store 4 into %ir.write, !tbaa !0)
   ; CHECK:   RET_ReallyLR
   ; CHECK: bb.3.for.body:
   ; CHECK:   successors: %bb.2(0x04000000), %bb.3(0x7c000000)
-  ; CHECK:   [[PHI1:%[0-9]+]]:gpr32common = PHI [[COPY4]], %bb.1, %8, %bb.3
-  ; CHECK:   [[PHI2:%[0-9]+]]:gpr32sp = PHI [[COPY]], %bb.1, %7, %bb.3
-  ; CHECK:   [[PHI3:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.1, %6, %bb.3
-  ; CHECK:   [[SDIVWr:%[0-9]+]]:gpr32 = SDIVWr [[PHI3]], [[PHI1]]
-  ; CHECK:   [[COPY5:%[0-9]+]]:gpr32all = COPY [[SDIVWr]]
-  ; CHECK:   [[SUBSWri1:%[0-9]+]]:gpr32 = SUBSWri [[PHI2]], 1, 0, implicit-def $nzcv
-  ; CHECK:   [[COPY6:%[0-9]+]]:gpr32all = COPY [[SUBSWri1]]
-  ; CHECK:   [[ADDWri1:%[0-9]+]]:gpr32sp = ADDWri [[PHI1]], 1, 0
-  ; CHECK:   [[COPY7:%[0-9]+]]:gpr32all = COPY [[ADDWri1]]
+  ; CHECK:   [[PHI1:%[0-9]+]]:gpr32sp = PHI [[COPY]], %bb.1, %5, %bb.3
+  ; CHECK:   [[PHI2:%[0-9]+]]:fpr32 = PHI [[FMOVS0_1]], %bb.1, %4, %bb.3
+  ; CHECK:   [[LDRSui:%[0-9]+]]:fpr32 = LDRSui [[COPY2]], 0 :: (load 4 from %ir.read, !tbaa !0)
+  ; CHECK:   [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1109917696
+  ; CHECK:   [[COPY3:%[0-9]+]]:fpr32 = COPY [[MOVi32imm]]
+  ; CHECK:   [[FADDSrr:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc FADDSrr [[LDRSui]], [[COPY3]]
+  ; CHECK:   [[FADDSrr1:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc FADDSrr [[FADDSrr]], [[PHI2]]
+  ; CHECK:   [[SUBSWri1:%[0-9]+]]:gpr32 = SUBSWri [[PHI1]], 1, 0, implicit-def $nzcv
+  ; CHECK:   [[COPY4:%[0-9]+]]:gpr32all = COPY [[SUBSWri1]]
   ; CHECK:   Bcc 0, %bb.2, implicit $nzcv
   ; CHECK:   B %bb.3
   bb.0.entry:
     successors: %bb.1(0x50000000), %bb.2(0x30000000)
-    liveins: $x0, $x1, $x2, $w3
+    liveins: $x0, $x1, $w3
+
+    %9:gpr32common = COPY $w3
+    %7:gpr64common = COPY $x1
+    %6:gpr64common = COPY $x0
+    %11:fpr32 = LDRSui %6, 0 :: (load 4 from %ir.read, !tbaa !6)
+    %12:gpr32 = MOVi32imm 1109917696
+    %13:fpr32 = COPY %12
+    %0:fpr32 = nnan ninf nsz arcp contract afn reassoc FADDSrr killed %11, killed %13
+    %10:fpr32 = FMOVS0
+    %14:gpr32 = SUBSWri %9, 1, 0, implicit-def $nzcv
+    Bcc 11, %bb.2, implicit $nzcv
+    B %bb.1
 
-    %12:gpr32common = COPY $w3
-    %11:gpr64common = COPY $x2
-    %10:gpr64common = COPY $x1
-    %9:gpr64common = COPY $x0
-    %13:gpr32common = LDRWui %9, 0 :: (load 4 from %ir.read, !tbaa !6)
-    %15:gpr32 = SUBSWri %12, 1, 0, implicit-def $nzcv
+  bb.1.for.body.preheader:
+    successors: %bb.3(0x80000000)
+
+    %15:fpr32 = FMOVS0
+    B %bb.3
+
+  bb.2.for.cond.cleanup:
+    %1:fpr32 = PHI %10, %bb.0, %4, %bb.3
+    STRSui %1, %7, 0 :: (store 4 into %ir.write, !tbaa !6)
+    RET_ReallyLR
+
+  bb.3.for.body:
+    successors: %bb.2(0x04000000), %bb.3(0x7c000000)
+
+    %2:gpr32sp = PHI %9, %bb.1, %5, %bb.3
+    %3:fpr32 = PHI %15, %bb.1, %4, %bb.3
+    %4:fpr32 = nnan ninf nsz arcp contract afn reassoc FADDSrr %0, %3
+    %16:gpr32 = SUBSWri %2, 1, 0, implicit-def $nzcv
+    %5:gpr32all = COPY %16
+    Bcc 0, %bb.2, implicit $nzcv
+    B %bb.3
+
+...
+---
+name:            do_sink_no_aliased_store
+alignment:       16
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+registers:
+  - { id: 0, class: fpr32, preferred-register: '' }
+  - { id: 1, class: fpr32, preferred-register: '' }
+  - { id: 2, class: gpr32sp, preferred-register: '' }
+  - { id: 3, class: fpr32, preferred-register: '' }
+  - { id: 4, class: fpr32, preferred-register: '' }
+  - { id: 5, class: gpr32all, preferred-register: '' }
+  - { id: 6, class: gpr64common, preferred-register: '' }
+  - { id: 7, class: gpr64common, preferred-register: '' }
+  - { id: 8, class: gpr64common, preferred-register: '' }
+  - { id: 9, class: gpr32common, preferred-register: '' }
+  - { id: 10, class: fpr32, preferred-register: '' }
+  - { id: 11, class: fpr32, preferred-register: '' }
+  - { id: 12, class: gpr32, preferred-register: '' }
+  - { id: 13, class: fpr32, preferred-register: '' }
+  - { id: 14, class: gpr32, preferred-register: '' }
+liveins:
+  - { reg: '$x0', virtual-reg: '%6' }
+  - { reg: '$x1', virtual-reg: '%7' }
+  - { reg: '$x2', virtual-reg: '%8' }
+  - { reg: '$w3', virtual-reg: '%9' }
+  - { reg: '$s0', virtual-reg: '%10' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    1
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 0
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           []
+callSites:       []
+debugValueSubstitutions: []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  ; CHECK-LABEL: name: do_sink_no_aliased_store
+  ; CHECK: bb.0.entry:
+  ; CHECK:   successors: %bb.1(0x50000000), %bb.4(0x30000000)
+  ; CHECK:   liveins: $x0, $x1, $x2, $w3, $s0
+  ; CHECK:   [[COPY:%[0-9]+]]:fpr32 = COPY $s0
+  ; CHECK:   [[COPY1:%[0-9]+]]:gpr32common = COPY $w3
+  ; CHECK:   [[COPY2:%[0-9]+]]:gpr64common = COPY $x2
+  ; CHECK:   [[COPY3:%[0-9]+]]:gpr64common = COPY $x1
+  ; CHECK:   [[COPY4:%[0-9]+]]:gpr64common = COPY $x0
+  ; CHECK:   [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY1]], 1, 0, implicit-def $nzcv
+  ; CHECK:   Bcc 10, %bb.1, implicit $nzcv
+  ; CHECK: bb.4:
+  ; CHECK:   successors: %bb.2(0x80000000)
+  ; CHECK:   [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0
+  ; CHECK:   B %bb.2
+  ; CHECK: bb.1.for.body.preheader:
+  ; CHECK:   successors: %bb.3(0x80000000)
+  ; CHECK:   [[FMOVS0_1:%[0-9]+]]:fpr32 = FMOVS0
+  ; CHECK:   STRSui [[COPY]], [[COPY2]], 0 :: (store 4 into %ir.store, !tbaa !0)
+  ; CHECK:   B %bb.3
+  ; CHECK: bb.2.for.cond.cleanup:
+  ; CHECK:   [[PHI:%[0-9]+]]:fpr32 = PHI [[FMOVS0_]], %bb.4, %4, %bb.3
+  ; CHECK:   STRSui [[PHI]], [[COPY3]], 0 :: (store 4 into %ir.write, !tbaa !0)
+  ; CHECK:   RET_ReallyLR
+  ; CHECK: bb.3.for.body:
+  ; CHECK:   successors: %bb.2(0x04000000), %bb.3(0x7c000000)
+  ; CHECK:   [[PHI1:%[0-9]+]]:gpr32sp = PHI [[COPY1]], %bb.1, %5, %bb.3
+  ; CHECK:   [[PHI2:%[0-9]+]]:fpr32 = PHI [[FMOVS0_1]], %bb.1, %4, %bb.3
+  ; CHECK:   [[LDRSui:%[0-9]+]]:fpr32 = LDRSui [[COPY4]], 0 :: (load 4 from %ir.read, !tbaa !0)
+  ; CHECK:   [[FADDSrr:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc FADDSrr [[PHI2]], [[LDRSui]]
+  ; CHECK:   [[SUBSWri1:%[0-9]+]]:gpr32 = SUBSWri [[PHI1]], 1, 0, implicit-def $nzcv
+  ; CHECK:   [[COPY5:%[0-9]+]]:gpr32all = COPY [[SUBSWri1]]
+  ; CHECK:   Bcc 0, %bb.2, implicit $nzcv
+  ; CHECK:   B %bb.3
+  bb.0.entry:
+    successors: %bb.1(0x50000000), %bb.2(0x30000000)
+    liveins: $x0, $x1, $x2, $w3, $s0
+
+    %10:fpr32 = COPY $s0
+    %9:gpr32common = COPY $w3
+    %8:gpr64common = COPY $x2
+    %7:gpr64common = COPY $x1
+    %6:gpr64common = COPY $x0
+    %11:fpr32 = FMOVS0
+    %12:gpr32 = SUBSWri %9, 1, 0, implicit-def $nzcv
     Bcc 11, %bb.2, implicit $nzcv
     B %bb.1
 
   bb.1.for.body.preheader:
     successors: %bb.3(0x80000000)
 
-    %16:gpr32sp = ADDWri %13, 42, 0
-    %1:gpr32all = COPY %16
-    %14:gpr32 = MOVi32imm 43
-    STRWui killed %14, %11, 0 :: (store 4 into %ir.store, !tbaa !6)
+    %13:fpr32 = FMOVS0
+    %0:fpr32 = LDRSui %6, 0 :: (load 4 from %ir.read, !tbaa !6)
+    STRSui %10, %8, 0 :: (store 4 into %ir.store, !tbaa !6)
     B %bb.3
 
   bb.2.for.cond.cleanup:
-    %2:gpr32 = PHI %12, %bb.0, %6, %bb.3
-    STRWui %2, %10, 0 :: (store 4 into %ir.write, !tbaa !6)
+    %1:fpr32 = PHI %11, %bb.0, %4, %bb.3
+    STRSui %1, %7, 0 :: (store 4 into %ir.write, !tbaa !6)
     RET_ReallyLR
 
   bb.3.for.body:
     successors: %bb.2(0x04000000), %bb.3(0x7c000000)
 
-    %3:gpr32common = PHI %1, %bb.1, %8, %bb.3
-    %4:gpr32sp = PHI %12, %bb.1, %7, %bb.3
-    %5:gpr32 = PHI %12, %bb.1, %6, %bb.3
-    %17:gpr32 = SDIVWr %5, %3
-    %6:gpr32all = COPY %17
-    %18:gpr32 = SUBSWri %4, 1, 0, implicit-def $nzcv
-    %7:gpr32all = COPY %18
-    %19:gpr32sp = ADDWri %3, 1, 0
-    %8:gpr32all = COPY %19
+    %2:gpr32sp = PHI %9, %bb.1, %5, %bb.3
+    %3:fpr32 = PHI %13, %bb.1, %4, %bb.3
+    %4:fpr32 = nnan ninf nsz arcp contract afn reassoc FADDSrr %3, %0
+    %14:gpr32 = SUBSWri %2, 1, 0, implicit-def $nzcv
+    %5:gpr32all = COPY %14
     Bcc 0, %bb.2, implicit $nzcv
     B %bb.3
 
 ...
 ---
-name:            aliased_store_after_add
+name:           cant_sink_load_aliased_store
 alignment:       16
 exposesReturnsTwice: false
 legalized:       false
@@ -1266,31 +1756,27 @@
 tracksRegLiveness: true
 hasWinCFI:       false
 registers:
-  - { id: 0, class: gpr32sp, preferred-register: '' }
-  - { id: 1, class: gpr32all, preferred-register: '' }
-  - { id: 2, class: gpr32, preferred-register: '' }
-  - { id: 3, class: gpr32common, preferred-register: '' }
-  - { id: 4, class: gpr32sp, preferred-register: '' }
-  - { id: 5, class: gpr32, preferred-register: '' }
-  - { id: 6, class: gpr32all, preferred-register: '' }
-  - { id: 7, class: gpr32all, preferred-register: '' }
-  - { id: 8, class: gpr32all, preferred-register: '' }
-  - { id: 9, class: gpr64common, preferred-register: '' }
-  - { id: 10, class: gpr64common, preferred-register: '' }
-  - { id: 11, class: gpr64common, preferred-register: '' }
-  - { id: 12, class: gpr32common, preferred-register: '' }
-  - { id: 13, class: gpr32common, preferred-register: '' }
+  - { id: 0, class: fpr32, preferred-register: '' }
+  - { id: 1, class: fpr32, preferred-register: '' }
+  - { id: 2, class: gpr32sp, preferred-register: '' }
+  - { id: 3, class: fpr32, preferred-register: '' }
+  - { id: 4, class: fpr32, preferred-register: '' }
+  - { id: 5, class: gpr32all, preferred-register: '' }
+  - { id: 6, class: gpr64common, preferred-register: '' }
+  - { id: 7, class: gpr64common, preferred-register: '' }
+  - { id: 8, class: gpr64common, preferred-register: '' }
+  - { id: 9, class: gpr32common, preferred-register: '' }
+  - { id: 10, class: fpr32, preferred-register: '' }
+  - { id: 11, class: fpr32, preferred-register: '' }
+  - { id: 12, class: gpr32, preferred-register: '' }
+  - { id: 13, class: fpr32, preferred-register: '' }
   - { id: 14, class: gpr32, preferred-register: '' }
-  - { id: 15, class: gpr32, preferred-register: '' }
-  - { id: 16, class: gpr32sp, preferred-register: '' }
-  - { id: 17, class: gpr32, preferred-register: '' }
-  - { id: 18, class: gpr32, preferred-register: '' }
-  - { id: 19, class: gpr32sp, preferred-register: '' }
 liveins:
-  - { reg: '$x0', virtual-reg: '%9' }
-  - { reg: '$x1', virtual-reg: '%10' }
-  - { reg: '$x2', virtual-reg: '%11' }
-  - { reg: '$w3', virtual-reg: '%12' }
+  - { reg: '$x0', virtual-reg: '%6' }
+  - { reg: '$x1', virtual-reg: '%7' }
+  - { reg: '$x2', virtual-reg: '%8' }
+  - { reg: '$w3', virtual-reg: '%9' }
+  - { reg: '$s0', virtual-reg: '%10' }
 frameInfo:
   isFrameAddressTaken: false
   isReturnAddressTaken: false
@@ -1317,81 +1803,206 @@
 constants:       []
 machineFunctionInfo: {}
 body:             |
-  ; CHECK-LABEL: name: aliased_store_after_add
+  ; CHECK-LABEL: name: cant_sink_load_aliased_store
   ; CHECK: bb.0.entry:
-  ; CHECK:   successors: %bb.1(0x50000000), %bb.2(0x30000000)
-  ; CHECK:   liveins: $x0, $x1, $x2, $w3
-  ; CHECK:   [[COPY:%[0-9]+]]:gpr32common = COPY $w3
-  ; CHECK:   [[COPY1:%[0-9]+]]:gpr64common = COPY $x2
-  ; CHECK:   [[COPY2:%[0-9]+]]:gpr64common = COPY $x1
-  ; CHECK:   [[COPY3:%[0-9]+]]:gpr64common = COPY $x0
-  ; CHECK:   [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 1, 0, implicit-def $nzcv
-  ; CHECK:   Bcc 11, %bb.2, implicit $nzcv
-  ; CHECK:   B %bb.1
+  ; CHECK:   successors: %bb.1(0x50000000), %bb.4(0x30000000)
+  ; CHECK:   liveins: $x0, $x1, $x2, $w3, $s0
+  ; CHECK:   [[COPY:%[0-9]+]]:fpr32 = COPY $s0
+  ; CHECK:   [[COPY1:%[0-9]+]]:gpr32common = COPY $w3
+  ; CHECK:   [[COPY2:%[0-9]+]]:gpr64common = COPY $x2
+  ; CHECK:   [[COPY3:%[0-9]+]]:gpr64common = COPY $x1
+  ; CHECK:   [[COPY4:%[0-9]+]]:gpr64common = COPY $x0
+  ; CHECK:   [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY1]], 1, 0, implicit-def $nzcv
+  ; CHECK:   Bcc 10, %bb.1, implicit $nzcv
+  ; CHECK: bb.4:
+  ; CHECK:   successors: %bb.2(0x80000000)
+  ; CHECK:   [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0
+  ; CHECK:   B %bb.2
   ; CHECK: bb.1.for.body.preheader:
   ; CHECK:   successors: %bb.3(0x80000000)
-  ; CHECK:   [[LDRWui:%[0-9]+]]:gpr32common = LDRWui [[COPY3]], 0 :: (load 4 from %ir.read, !tbaa !0)
-  ; CHECK:   [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[LDRWui]], 42, 0
-  ; CHECK:   [[COPY4:%[0-9]+]]:gpr32all = COPY [[ADDWri]]
-  ; CHECK:   [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 43
-  ; CHECK:   STRWui killed [[MOVi32imm]], [[COPY3]], 0 :: (store 4 into %ir.read, !tbaa !0)
+  ; CHECK:   [[FMOVS0_1:%[0-9]+]]:fpr32 = FMOVS0
+  ; CHECK:   [[LDRSui:%[0-9]+]]:fpr32 = LDRSui [[COPY4]], 0 :: (load 4 from %ir.read, !tbaa !0)
+  ; CHECK:   STRSui [[COPY]], [[COPY2]], 0 :: (store 4 into %ir.store, !tbaa !0)
   ; CHECK:   B %bb.3
   ; CHECK: bb.2.for.cond.cleanup:
-  ; CHECK:   [[PHI:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.0, %6, %bb.3
-  ; CHECK:   STRWui [[PHI]], [[COPY2]], 0 :: (store 4 into %ir.write, !tbaa !0)
+  ; CHECK:   [[PHI:%[0-9]+]]:fpr32 = PHI [[FMOVS0_]], %bb.4, %4, %bb.3
+  ; CHECK:   STRSui [[PHI]], [[COPY3]], 0 :: (store 4 into %ir.write, !tbaa !0)
   ; CHECK:   RET_ReallyLR
   ; CHECK: bb.3.for.body:
   ; CHECK:   successors: %bb.2(0x04000000), %bb.3(0x7c000000)
-  ; CHECK:   [[PHI1:%[0-9]+]]:gpr32common = PHI [[COPY4]], %bb.1, %8, %bb.3
-  ; CHECK:   [[PHI2:%[0-9]+]]:gpr32sp = PHI [[COPY]], %bb.1, %7, %bb.3
-  ; CHECK:   [[PHI3:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.1, %6, %bb.3
-  ; CHECK:   [[SDIVWr:%[0-9]+]]:gpr32 = SDIVWr [[PHI3]], [[PHI1]]
-  ; CHECK:   [[COPY5:%[0-9]+]]:gpr32all = COPY [[SDIVWr]]
-  ; CHECK:   [[SUBSWri1:%[0-9]+]]:gpr32 = SUBSWri [[PHI2]], 1, 0, implicit-def $nzcv
-  ; CHECK:   [[COPY6:%[0-9]+]]:gpr32all = COPY [[SUBSWri1]]
-  ; CHECK:   [[ADDWri1:%[0-9]+]]:gpr32sp = ADDWri [[PHI1]], 1, 0
-  ; CHECK:   [[COPY7:%[0-9]+]]:gpr32all = COPY [[ADDWri1]]
+  ; CHECK:   [[PHI1:%[0-9]+]]:gpr32sp = PHI [[COPY1]], %bb.1, %5, %bb.3
+  ; CHECK:   [[PHI2:%[0-9]+]]:fpr32 = PHI [[FMOVS0_1]], %bb.1, %4, %bb.3
+  ; CHECK:   [[FADDSrr:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc FADDSrr [[PHI2]], [[LDRSui]]
+  ; CHECK:   [[SUBSWri1:%[0-9]+]]:gpr32 = SUBSWri [[PHI1]], 1, 0, implicit-def $nzcv
+  ; CHECK:   [[COPY5:%[0-9]+]]:gpr32all = COPY [[SUBSWri1]]
   ; CHECK:   Bcc 0, %bb.2, implicit $nzcv
   ; CHECK:   B %bb.3
   bb.0.entry:
     successors: %bb.1(0x50000000), %bb.2(0x30000000)
-    liveins: $x0, $x1, $x2, $w3
+    liveins: $x0, $x1, $x2, $w3, $s0
+
+    %10:fpr32 = COPY $s0
+    %9:gpr32common = COPY $w3
+    %8:gpr64common = COPY $x2
+    %7:gpr64common = COPY $x1
+    %6:gpr64common = COPY $x0
+    %11:fpr32 = FMOVS0
+    %12:gpr32 = SUBSWri %9, 1, 0, implicit-def $nzcv
+    Bcc 11, %bb.2, implicit $nzcv
+    B %bb.1
 
-    %12:gpr32common = COPY $w3
-    %11:gpr64common = COPY $x2
-    %10:gpr64common = COPY $x1
-    %9:gpr64common = COPY $x0
-    %13:gpr32common = LDRWui %9, 0 :: (load 4 from %ir.read, !tbaa !6)
-    %15:gpr32 = SUBSWri %12, 1, 0, implicit-def $nzcv
+  bb.1.for.body.preheader:
+    successors: %bb.3(0x80000000)
+
+    %13:fpr32 = FMOVS0
+    %0:fpr32 = LDRSui %6, 0 :: (load 4 from %ir.read, !tbaa !6)
+    STRSui %10, %8, 0 :: (store 4 into %ir.store, !tbaa !6)
+    B %bb.3
+
+  bb.2.for.cond.cleanup:
+    %1:fpr32 = PHI %11, %bb.0, %4, %bb.3
+    STRSui %1, %7, 0 :: (store 4 into %ir.write, !tbaa !6)
+    RET_ReallyLR
+
+  bb.3.for.body:
+    successors: %bb.2(0x04000000), %bb.3(0x7c000000)
+
+    %2:gpr32sp = PHI %9, %bb.1, %5, %bb.3
+    %3:fpr32 = PHI %13, %bb.1, %4, %bb.3
+    %4:fpr32 = nnan ninf nsz arcp contract afn reassoc FADDSrr %3, %0
+    %14:gpr32 = SUBSWri %2, 1, 0, implicit-def $nzcv
+    %5:gpr32all = COPY %14
+    Bcc 0, %bb.2, implicit $nzcv
+    B %bb.3
+
+...
+---
+name:            cant_sink_aliased_store_in_loop
+alignment:       16
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+registers:
+  - { id: 0, class: fpr32, preferred-register: '' }
+  - { id: 1, class: fpr32, preferred-register: '' }
+  - { id: 2, class: gpr32sp, preferred-register: '' }
+  - { id: 3, class: fpr32, preferred-register: '' }
+  - { id: 4, class: fpr32, preferred-register: '' }
+  - { id: 5, class: gpr32all, preferred-register: '' }
+  - { id: 6, class: gpr64common, preferred-register: '' }
+  - { id: 7, class: gpr64common, preferred-register: '' }
+  - { id: 8, class: gpr64common, preferred-register: '' }
+  - { id: 9, class: gpr32common, preferred-register: '' }
+  - { id: 10, class: fpr32, preferred-register: '' }
+  - { id: 11, class: fpr32, preferred-register: '' }
+  - { id: 12, class: gpr32, preferred-register: '' }
+  - { id: 13, class: fpr32, preferred-register: '' }
+  - { id: 14, class: gpr32, preferred-register: '' }
+liveins:
+  - { reg: '$x0', virtual-reg: '%6' }
+  - { reg: '$x1', virtual-reg: '%7' }
+  - { reg: '$x2', virtual-reg: '%8' }
+  - { reg: '$w3', virtual-reg: '%9' }
+  - { reg: '$s0', virtual-reg: '%10' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    1
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 0
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           []
+callSites:       []
+debugValueSubstitutions: []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  ; CHECK-LABEL: name: cant_sink_aliased_store_in_loop
+  ; CHECK: bb.0.entry:
+  ; CHECK:   successors: %bb.1(0x50000000), %bb.4(0x30000000)
+  ; CHECK:   liveins: $x0, $x1, $x2, $w3, $s0
+  ; CHECK:   [[COPY:%[0-9]+]]:fpr32 = COPY $s0
+  ; CHECK:   [[COPY1:%[0-9]+]]:gpr32common = COPY $w3
+  ; CHECK:   [[COPY2:%[0-9]+]]:gpr64common = COPY $x2
+  ; CHECK:   [[COPY3:%[0-9]+]]:gpr64common = COPY $x1
+  ; CHECK:   [[COPY4:%[0-9]+]]:gpr64common = COPY $x0
+  ; CHECK:   [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY1]], 1, 0, implicit-def $nzcv
+  ; CHECK:   Bcc 10, %bb.1, implicit $nzcv
+  ; CHECK: bb.4:
+  ; CHECK:   successors: %bb.2(0x80000000)
+  ; CHECK:   [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0
+  ; CHECK:   B %bb.2
+  ; CHECK: bb.1.for.body.preheader:
+  ; CHECK:   successors: %bb.3(0x80000000)
+  ; CHECK:   [[LDRSui:%[0-9]+]]:fpr32 = LDRSui [[COPY4]], 0 :: (load 4 from %ir.read, !tbaa !0)
+  ; CHECK:   [[FMOVS0_1:%[0-9]+]]:fpr32 = FMOVS0
+  ; CHECK:   B %bb.3
+  ; CHECK: bb.2.for.cond.cleanup:
+  ; CHECK:   [[PHI:%[0-9]+]]:fpr32 = PHI [[FMOVS0_]], %bb.4, %4, %bb.3
+  ; CHECK:   STRSui [[PHI]], [[COPY3]], 0 :: (store 4 into %ir.write, !tbaa !0)
+  ; CHECK:   RET_ReallyLR
+  ; CHECK: bb.3.for.body:
+  ; CHECK:   successors: %bb.2(0x04000000), %bb.3(0x7c000000)
+  ; CHECK:   [[PHI1:%[0-9]+]]:gpr32sp = PHI [[COPY1]], %bb.1, %5, %bb.3
+  ; CHECK:   [[PHI2:%[0-9]+]]:fpr32 = PHI [[FMOVS0_1]], %bb.1, %4, %bb.3
+  ; CHECK:   [[FADDSrr:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc FADDSrr [[PHI2]], [[LDRSui]]
+  ; CHECK:   STRSui [[COPY]], [[COPY2]], 0 :: (store 4 into %ir.store, !tbaa !0)
+  ; CHECK:   [[SUBSWri1:%[0-9]+]]:gpr32 = SUBSWri [[PHI1]], 1, 0, implicit-def $nzcv
+  ; CHECK:   [[COPY5:%[0-9]+]]:gpr32all = COPY [[SUBSWri1]]
+  ; CHECK:   Bcc 0, %bb.2, implicit $nzcv
+  ; CHECK:   B %bb.3
+  bb.0.entry:
+    successors: %bb.1(0x50000000), %bb.2(0x30000000)
+    liveins: $x0, $x1, $x2, $w3, $s0
+
+    %10:fpr32 = COPY $s0
+    %9:gpr32common = COPY $w3
+    %8:gpr64common = COPY $x2
+    %7:gpr64common = COPY $x1
+    %6:gpr64common = COPY $x0
+    %0:fpr32 = LDRSui %6, 0 :: (load 4 from %ir.read, !tbaa !6)
+    %11:fpr32 = FMOVS0
+    %12:gpr32 = SUBSWri %9, 1, 0, implicit-def $nzcv
     Bcc 11, %bb.2, implicit $nzcv
     B %bb.1
 
   bb.1.for.body.preheader:
     successors: %bb.3(0x80000000)
 
-    %16:gpr32sp = ADDWri %13, 42, 0
-    %1:gpr32all = COPY %16
-    %14:gpr32 = MOVi32imm 43
-    STRWui killed %14, %9, 0 :: (store 4 into %ir.read, !tbaa !6)
+    %13:fpr32 = FMOVS0
     B %bb.3
 
   bb.2.for.cond.cleanup:
-    %2:gpr32 = PHI %12, %bb.0, %6, %bb.3
-    STRWui %2, %10, 0 :: (store 4 into %ir.write, !tbaa !6)
+    %1:fpr32 = PHI %11, %bb.0, %4, %bb.3
+    STRSui %1, %7, 0 :: (store 4 into %ir.write, !tbaa !6)
     RET_ReallyLR
 
   bb.3.for.body:
     successors: %bb.2(0x04000000), %bb.3(0x7c000000)
 
-    %3:gpr32common = PHI %1, %bb.1, %8, %bb.3
-    %4:gpr32sp = PHI %12, %bb.1, %7, %bb.3
-    %5:gpr32 = PHI %12, %bb.1, %6, %bb.3
-    %17:gpr32 = SDIVWr %5, %3
-    %6:gpr32all = COPY %17
-    %18:gpr32 = SUBSWri %4, 1, 0, implicit-def $nzcv
-    %7:gpr32all = COPY %18
-    %19:gpr32sp = ADDWri %3, 1, 0
-    %8:gpr32all = COPY %19
+    %2:gpr32sp = PHI %9, %bb.1, %5, %bb.3
+    %3:fpr32 = PHI %13, %bb.1, %4, %bb.3
+    %4:fpr32 = nnan ninf nsz arcp contract afn reassoc FADDSrr %3, %0
+    STRSui %10, %8, 0 :: (store 4 into %ir.store, !tbaa !6)
+    %14:gpr32 = SUBSWri %2, 1, 0, implicit-def $nzcv
+    %5:gpr32all = COPY %14
     Bcc 0, %bb.2, implicit $nzcv
     B %bb.3