Index: llvm/lib/CodeGen/MachineSink.cpp
===================================================================
--- llvm/lib/CodeGen/MachineSink.cpp
+++ llvm/lib/CodeGen/MachineSink.cpp
@@ -227,6 +227,11 @@
     void FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB,
                                 SmallVectorImpl<MachineInstr *> &Candidates);
     bool SinkIntoLoop(MachineLoop *L, MachineInstr &I);
+    bool IsSafeToMove(MachineInstr &I, MachineBasicBlock *SinkTo);
+    bool AreAliased(MachineInstr &First, MachineInstr &Second,
+                    MachineBasicBlock *From, MachineBasicBlock *To,
+                    DenseSet<MachineBasicBlock *> HandledDomBlocks,
+                    bool &SawStore, bool &HasAliasedStore) ;
 
     bool isProfitableToSinkTo(Register Reg, MachineInstr &MI,
                               MachineBasicBlock *MBB,
@@ -352,24 +357,6 @@
   return true;
 }
 
-/// Return true if this machine instruction loads from global offset table or
-/// constant pool.
-static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) {
-  assert(MI.mayLoad() && "Expected MI that loads!");
-
-  // If we lost memory operands, conservatively assume that the instruction
-  // reads from everything..
-  if (MI.memoperands_empty())
-    return true;
-
-  for (MachineMemOperand *MemOp : MI.memoperands())
-    if (const PseudoSourceValue *PSV = MemOp->getPseudoValue())
-      if (PSV->isGOT() || PSV->isConstantPool())
-        return true;
-
-  return false;
-}
-
 void MachineSinking::FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB,
     SmallVectorImpl<MachineInstr *> &Candidates) {
   for (auto &MI : *BB) {
@@ -379,27 +366,27 @@
                            "target\n");
       continue;
     }
+    // If physical registers are used, then this is marked as not loop
+    // invariant. This can be the case if the preheader is the entry block, and
+    // when there are copy instructions of function arguments that are passed
+    // through registers.
     if (!L->isLoopInvariant(MI)) {
       LLVM_DEBUG(dbgs() << "LoopSink: Instruction is not loop invariant\n");
       continue;
     }
-    bool DontMoveAcrossStore = true;
-    if (!MI.isSafeToMove(AA, DontMoveAcrossStore)) {
-      LLVM_DEBUG(dbgs() << "LoopSink: Instruction not safe to move.\n");
-      continue;
-    }
-    if (MI.mayLoad() && !mayLoadFromGOTOrConstantPool(MI)) {
-      LLVM_DEBUG(dbgs() << "LoopSink: Dont sink GOT or constant pool loads\n");
-      continue;
-    }
     if (MI.isConvergent())
       continue;
 
+    // This e.g. skips branche and store instructions.
     const MachineOperand &MO = MI.getOperand(0);
-    if (!MO.isReg() || !MO.getReg() || !MO.isDef())
+    if (!MO.isReg() || !MO.getReg() || !MO.isDef()) {
+      LLVM_DEBUG(dbgs() << "LoopSink: Instruction does not define a value.\n");
       continue;
-    if (!MRI->hasOneDef(MO.getReg()))
+    }
+    if (!MRI->hasOneDef(MO.getReg())) {
+      LLVM_DEBUG(dbgs() << "LoopSink: Instruction does not have 1 def.\n");
       continue;
+    }
 
     LLVM_DEBUG(dbgs() << "LoopSink: Instruction added as candidate.\n");
     Candidates.push_back(&MI);
@@ -455,6 +442,9 @@
     EverMadeChange = true;
   }
 
+  HasStoreCache.clear();
+  StoreInstrCache.clear();
+
   if (SinkInstsIntoLoop) {
     SmallVector<MachineLoop *, 8> Loops(LI->begin(), LI->end());
     for (auto *L : Loops) {
@@ -470,8 +460,13 @@
       // of a def-use chain, if there is any.
       for (auto It = Candidates.rbegin(); It != Candidates.rend(); ++It) {
         MachineInstr *I = *It;
+
+        // TODO: This is conservative because we bail as soon as we find one
+        // instruction that cannot be sunk. Better is to do this per def-use
+        // chain, so we try a next chain if one fails.
         if (!SinkIntoLoop(L, *I))
           break;
+
         EverMadeChange = true;
         ++NumLoopSunk;
       }
@@ -1155,29 +1150,10 @@
       }
 
       for (MachineInstr &I : *BB) {
-        // Treat as alias conservatively for a call or an ordered memory
-        // operation.
-        if (I.isCall() || I.hasOrderedMemoryRef()) {
-          for (auto *DomBB : HandledDomBlocks) {
-            if (DomBB != BB && DT->dominates(DomBB, BB))
-              HasStoreCache[std::make_pair(DomBB, To)] = true;
-            else if(DomBB != BB && DT->dominates(BB, DomBB))
-              HasStoreCache[std::make_pair(From, DomBB)] = true;
-          }
-          HasStoreCache[BlockPair] = true;
+        bool Aliased = AreAliased(I, MI, From, To, HandledBlocks, SawStore,
+                                  HasAliasedStore);
+        if (Aliased && (I.isCall() || I.hasOrderedMemoryRef()))
           return true;
-        }
-
-        if (I.mayStore()) {
-          SawStore = true;
-          // We still have chance to sink MI if all stores between are not
-          // aliased to MI.
-          // Cache all store instructions, so that we don't need to go through
-          // all From reachable blocks for next load instruction.
-          if (I.mayAlias(AA, MI, false))
-            HasAliasedStore = true;
-          StoreInstrCache[BlockPair].push_back(&I);
-        }
       }
     }
   }
@@ -1187,6 +1163,79 @@
   return HasAliasedStore;
 }
 
+bool MachineSinking::AreAliased(MachineInstr &First, MachineInstr &Second,
+    MachineBasicBlock *From, MachineBasicBlock *To,
+    DenseSet<MachineBasicBlock *> HandledDomBlocks, bool &SawStore,
+    bool &HasAliasedStore) {
+  MachineBasicBlock *BB = First.getParent();
+  auto BlockPair = std::make_pair(From, To);
+
+  if (First.isCall() || Second.hasOrderedMemoryRef()) {
+    for (auto *DomBB : HandledDomBlocks) {
+      if (DomBB != BB && DT->dominates(DomBB, BB))
+        HasStoreCache[std::make_pair(DomBB, To)] = true;
+      else if(DomBB != BB && DT->dominates(BB, DomBB))
+        HasStoreCache[std::make_pair(From, DomBB)] = true;
+    }
+    HasStoreCache[BlockPair] = true;
+    return true;
+  }
+
+  if (First.mayStore()) {
+    SawStore = true;
+    // We still have chance to sink MI if all stores between are not
+    // aliased to MI.
+    // Cache all store instructions, so that we don't need to go through
+    // all From reachable blocks for next load instruction.
+    if (First.mayAlias(AA, Second, false))
+      HasAliasedStore = true;
+    StoreInstrCache[BlockPair].push_back(&First);
+  }
+
+  // If there is no store at all, cache the result.
+  if (!SawStore)
+    HasStoreCache[BlockPair] = false;
+  return HasAliasedStore;
+}
+
+bool MachineSinking::IsSafeToMove(MachineInstr &I, MachineBasicBlock *SinkTo) {
+  auto End = I.getParent()->instr_end();
+  auto It = I.getIterator();
+  bool SawStore = false;
+  bool HasAliasedStore = false;
+
+  // 1) First, analyse all instruction from the current instruction I to the end
+  // of its block.
+  It++;
+  for (; It != End; ++It) {
+    if (AreAliased(*It, I, I.getParent(), SinkTo, {}, SawStore, HasAliasedStore)) {
+      LLVM_DEBUG(dbgs() << "LoopSink: Alias pair found!\n");
+      return false;
+    }
+    LLVM_DEBUG(dbgs() << "LoopSink: Not aliased with: " << *It);
+  }
+
+  // 2) Check if we can move I to Sink, and see if there are any stores in
+  // between that are aliased.
+  bool DontMoveAcrossStore = hasStoreBetween(I.getParent(), SinkTo, I);
+  LLVM_DEBUG(dbgs() << "LoopSink: Found store in between: "
+                    << DontMoveAcrossStore << "\n");
+  if (!I.isSafeToMove(AA, DontMoveAcrossStore)) {
+    LLVM_DEBUG(dbgs() << "LoopSink: Not safe to move\n");
+    return false;
+  }
+
+  // 3) Check all instruction in the sink block, to see if they alias.
+  for (auto &CurI : *SinkTo) {
+    if (AreAliased(CurI, I, I.getParent(), SinkTo, {}, SawStore, HasAliasedStore)) {
+      LLVM_DEBUG(dbgs() << "LoopSink: Alias found in sink block: " << CurI);
+      return false;
+    }
+  }
+  LLVM_DEBUG(dbgs() << "LoopSink: Instruction is not aliased, safe to move!\n");
+  return true;
+}
+
 /// Sink instructions into loops if profitable. This especially tries to prevent
 /// register spills caused by register pressure if there is little to no
 /// overhead moving instructions into loops.
@@ -1209,12 +1258,7 @@
     // FIXME: Come up with a proper cost model that estimates whether sinking
     // the instruction (and thus possibly executing it on every loop
     // iteration) is more expensive than a register.
-    // For now assumes that copies are cheap and thus almost always worth it.
-    if (!MI.isCopy()) {
-      LLVM_DEBUG(dbgs() << "LoopSink:   Use is not a copy\n");
-      CanSink = false;
-      break;
-    }
+
     if (!SinkBlock) {
       SinkBlock = MI.getParent();
       LLVM_DEBUG(dbgs() << "LoopSink:   Setting sink block to: "
@@ -1243,6 +1287,10 @@
     LLVM_DEBUG(dbgs() << "LoopSink: Not sinking, sink block is the preheader\n");
     return false;
   }
+  if (!IsSafeToMove(I, SinkBlock)) {
+    LLVM_DEBUG(dbgs() << "LoopSink: Not safe to move\n");
+    return false;
+  }
 
   LLVM_DEBUG(dbgs() << "LoopSink: Sinking instruction!\n");
   SinkBlock->splice(SinkBlock->getFirstNonPHI(), Preheader, I);
Index: llvm/test/CodeGen/AArch64/loop-sink.mir
===================================================================
--- llvm/test/CodeGen/AArch64/loop-sink.mir
+++ llvm/test/CodeGen/AArch64/loop-sink.mir
@@ -151,7 +151,7 @@
     br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
   }
 
-  define dso_local void @sink_add(i32* noalias nocapture readonly %read, i32* noalias nocapture %write, i32 %n) local_unnamed_addr #0 {
+  define dso_local void @sink_load_add_chain(i32* noalias nocapture readonly %read, i32* noalias nocapture %write, i32 %n) local_unnamed_addr #0 {
   entry:
     %0 = load i32, i32* %read, align 4, !tbaa !6
     %cmp10 = icmp sgt i32 %n, 0
@@ -204,7 +204,7 @@
     br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !10
   }
 
-  define dso_local void @aliased_store_after_add(i32* noalias nocapture readonly %read, i32* noalias nocapture %write, i32* nocapture %store, i32 %n) local_unnamed_addr #0 {
+  define dso_local void @aliased_store_imm_after_add(i32* noalias nocapture readonly %read, i32* noalias nocapture %write, i32* nocapture %store, i32 %n) local_unnamed_addr #0 {
   entry:
     %0 = load i32, i32* %read, align 4, !tbaa !6
     %cmp10 = icmp sgt i32 %n, 0
@@ -231,6 +231,32 @@
     br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !10
   }
 
+  define dso_local void @aliased_store_after_load(i32* noalias nocapture %read, i32* noalias nocapture %write, i32* nocapture readnone %store, i32 %n) local_unnamed_addr #0 {
+  entry:
+    %cmp12 = icmp sgt i32 %n, 0
+    br i1 %cmp12, label %for.body.preheader, label %for.cond.cleanup
+
+  for.body.preheader:                               ; preds = %entry
+    %0 = load i32, i32* %read, align 4, !tbaa !6
+    store i32 %n, i32* %read, align 4, !tbaa !6
+    %1 = add i32 %0, 42
+    br label %for.body
+
+  for.cond.cleanup:                                 ; preds = %for.body, %entry
+    %sum.0.lcssa = phi i32 [ %n, %entry ], [ %div, %for.body ]
+    store i32 %sum.0.lcssa, i32* %write, align 4, !tbaa !6
+    ret void
+
+  for.body:                                         ; preds = %for.body.preheader, %for.body
+    %lsr.iv1 = phi i32 [ %1, %for.body.preheader ], [ %lsr.iv.next2, %for.body ]
+    %lsr.iv = phi i32 [ %n, %for.body.preheader ], [ %lsr.iv.next, %for.body ]
+    %sum.013 = phi i32 [ %div, %for.body ], [ %n, %for.body.preheader ]
+    %div = sdiv i32 %sum.013, %lsr.iv1
+    %lsr.iv.next = add i32 %lsr.iv, -1
+    %lsr.iv.next2 = add i32 %lsr.iv1, 1
+    %exitcond.not = icmp eq i32 %lsr.iv.next, 0
+    br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !10
+  }
 
   declare i32 @use(i32)
   declare void @_Z6assignPj(i32*)
@@ -336,12 +362,11 @@
   ; CHECK:   [[COPY5:%[0-9]+]]:gpr64all = COPY [[ADDXri3]]
   ; CHECK:   [[ADDXri4:%[0-9]+]]:gpr64sp = nuw ADDXri [[COPY]], 20, 0
   ; CHECK:   [[COPY6:%[0-9]+]]:gpr64all = COPY [[ADDXri4]]
-  ; CHECK:   [[ADDXri5:%[0-9]+]]:gpr64sp = ADDXri [[COPY1]], 1, 0
-  ; CHECK:   [[COPY7:%[0-9]+]]:gpr64all = COPY [[ADDXri5]]
-  ; CHECK:   [[MOVaddrJT:%[0-9]+]]:gpr64 = MOVaddrJT target-flags(aarch64-page) %jump-table.0, target-flags(aarch64-pageoff, aarch64-nc) %jump-table.0
   ; CHECK: bb.1..backedge:
   ; CHECK:   successors: %bb.9(0x09249249), %bb.2(0x76db6db7)
-  ; CHECK:   [[PHI:%[0-9]+]]:gpr64sp = PHI [[COPY7]], %bb.0, %7, %bb.9
+  ; CHECK:   [[PHI:%[0-9]+]]:gpr64sp = PHI %5, %bb.0, %7, %bb.9
+  ; CHECK:   [[ADDXri5:%[0-9]+]]:gpr64sp = ADDXri [[COPY1]], 1, 0
+  ; CHECK:   [[COPY7:%[0-9]+]]:gpr64all = COPY [[ADDXri5]]
   ; CHECK:   [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[PHI]], 0 :: (load 1 from %ir.lsr.iv)
   ; CHECK:   [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, killed [[LDRBBui]], %subreg.sub_32
   ; CHECK:   [[COPY8:%[0-9]+]]:gpr32sp = COPY [[SUBREG_TO_REG]].sub_32
@@ -349,6 +374,7 @@
   ; CHECK:   Bcc 8, %bb.9, implicit $nzcv
   ; CHECK: bb.2..backedge:
   ; CHECK:   successors: %bb.3(0x13b13b14), %bb.9(0x09d89d8a), %bb.4(0x13b13b14), %bb.5(0x13b13b14), %bb.6(0x13b13b14), %bb.7(0x13b13b14), %bb.8(0x13b13b14)
+  ; CHECK:   [[MOVaddrJT:%[0-9]+]]:gpr64 = MOVaddrJT target-flags(aarch64-page) %jump-table.0, target-flags(aarch64-pageoff, aarch64-nc) %jump-table.0
   ; CHECK:   early-clobber %21:gpr64, early-clobber %22:gpr64sp = JumpTableDest32 [[MOVaddrJT]], [[SUBREG_TO_REG]], %jump-table.0
   ; CHECK:   BR killed %21
   ; CHECK: bb.3 (%ir-block.8):
@@ -931,8 +957,6 @@
   ; CHECK:   B %bb.1
   ; CHECK: bb.1.for.body.preheader:
   ; CHECK:   successors: %bb.3(0x80000000)
-  ; CHECK:   [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) @A
-  ; CHECK:   [[LDRWui:%[0-9]+]]:gpr32 = LDRWui killed [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load 4 from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`)
   ; CHECK:   B %bb.3
   ; CHECK: bb.2.for.cond.cleanup:
   ; CHECK:   [[PHI:%[0-9]+]]:gpr32all = PHI [[COPY]], %bb.0, %4, %bb.3
@@ -942,6 +966,8 @@
   ; CHECK:   successors: %bb.2(0x04000000), %bb.3(0x7c000000)
   ; CHECK:   [[PHI1:%[0-9]+]]:gpr32sp = PHI [[COPY]], %bb.1, %5, %bb.3
   ; CHECK:   [[PHI2:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.1, %4, %bb.3
+  ; CHECK:   [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) @A
+  ; CHECK:   [[LDRWui:%[0-9]+]]:gpr32 = LDRWui killed [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load 4 from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`)
   ; CHECK:   [[SDIVWr:%[0-9]+]]:gpr32 = SDIVWr [[PHI2]], [[LDRWui]]
   ; CHECK:   [[COPY1:%[0-9]+]]:gpr32all = COPY [[SDIVWr]]
   ; CHECK:   [[SUBSWri1:%[0-9]+]]:gpr32 = SUBSWri [[PHI1]], 1, 0, implicit-def $nzcv
@@ -983,7 +1009,7 @@
 
 ...
 ---
-name:            sink_add
+name:            sink_load_add_chain
 alignment:       16
 exposesReturnsTwice: false
 legalized:       false
@@ -1041,7 +1067,7 @@
 constants:       []
 machineFunctionInfo: {}
 body:             |
-  ; CHECK-LABEL: name: sink_add
+  ; CHECK-LABEL: name: sink_load_add_chain
   ; CHECK: bb.0.entry:
   ; CHECK:   successors: %bb.1(0x50000000), %bb.2(0x30000000)
   ; CHECK:   liveins: $x0, $x1, $w2
@@ -1053,9 +1079,6 @@
   ; CHECK:   B %bb.1
   ; CHECK: bb.1.for.body.preheader:
   ; CHECK:   successors: %bb.3(0x80000000)
-  ; CHECK:   [[LDRWui:%[0-9]+]]:gpr32common = LDRWui [[COPY2]], 0 :: (load 4 from %ir.read, !tbaa !0)
-  ; CHECK:   [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[LDRWui]], 42, 0
-  ; CHECK:   [[COPY3:%[0-9]+]]:gpr32all = COPY [[ADDWri]]
   ; CHECK:   B %bb.3
   ; CHECK: bb.2.for.cond.cleanup:
   ; CHECK:   [[PHI:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.0, %6, %bb.3
@@ -1063,9 +1086,12 @@
   ; CHECK:   RET_ReallyLR
   ; CHECK: bb.3.for.body:
   ; CHECK:   successors: %bb.2(0x04000000), %bb.3(0x7c000000)
-  ; CHECK:   [[PHI1:%[0-9]+]]:gpr32common = PHI [[COPY3]], %bb.1, %8, %bb.3
+  ; CHECK:   [[PHI1:%[0-9]+]]:gpr32common = PHI %1, %bb.1, %8, %bb.3
   ; CHECK:   [[PHI2:%[0-9]+]]:gpr32sp = PHI [[COPY]], %bb.1, %7, %bb.3
   ; CHECK:   [[PHI3:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.1, %6, %bb.3
+  ; CHECK:   [[LDRWui:%[0-9]+]]:gpr32common = LDRWui [[COPY2]], 0 :: (load 4 from %ir.read, !tbaa !0)
+  ; CHECK:   [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[LDRWui]], 42, 0
+  ; CHECK:   [[COPY3:%[0-9]+]]:gpr32all = COPY [[ADDWri]]
   ; CHECK:   [[SDIVWr:%[0-9]+]]:gpr32 = SDIVWr [[PHI3]], [[PHI1]]
   ; CHECK:   [[COPY4:%[0-9]+]]:gpr32all = COPY [[SDIVWr]]
   ; CHECK:   [[SUBSWri1:%[0-9]+]]:gpr32 = SUBSWri [[PHI2]], 1, 0, implicit-def $nzcv
@@ -1256,7 +1282,7 @@
 
 ...
 ---
-name:            aliased_store_after_add
+name:            aliased_store_imm_after_add
 alignment:       16
 exposesReturnsTwice: false
 legalized:       false
@@ -1317,7 +1343,7 @@
 constants:       []
 machineFunctionInfo: {}
 body:             |
-  ; CHECK-LABEL: name: aliased_store_after_add
+  ; CHECK-LABEL: name: aliased_store_imm_after_add
   ; CHECK: bb.0.entry:
   ; CHECK:   successors: %bb.1(0x50000000), %bb.2(0x30000000)
   ; CHECK:   liveins: $x0, $x1, $x2, $w3
@@ -1396,4 +1422,138 @@
     B %bb.3
 
 ...
+---
+name:            aliased_store_after_load
+alignment:       16
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+registers:
+  - { id: 0, class: gpr32sp, preferred-register: '' }
+  - { id: 1, class: gpr32all, preferred-register: '' }
+  - { id: 2, class: gpr32, preferred-register: '' }
+  - { id: 3, class: gpr32common, preferred-register: '' }
+  - { id: 4, class: gpr32sp, preferred-register: '' }
+  - { id: 5, class: gpr32, preferred-register: '' }
+  - { id: 6, class: gpr32all, preferred-register: '' }
+  - { id: 7, class: gpr32all, preferred-register: '' }
+  - { id: 8, class: gpr32all, preferred-register: '' }
+  - { id: 9, class: gpr64common, preferred-register: '' }
+  - { id: 10, class: gpr64common, preferred-register: '' }
+  - { id: 11, class: gpr64, preferred-register: '' }
+  - { id: 12, class: gpr32common, preferred-register: '' }
+  - { id: 13, class: gpr32common, preferred-register: '' }
+  - { id: 14, class: gpr32, preferred-register: '' }
+  - { id: 15, class: gpr32sp, preferred-register: '' }
+  - { id: 16, class: gpr32, preferred-register: '' }
+  - { id: 17, class: gpr32, preferred-register: '' }
+  - { id: 18, class: gpr32sp, preferred-register: '' }
+liveins:
+  - { reg: '$x0', virtual-reg: '%9' }
+  - { reg: '$x1', virtual-reg: '%10' }
+  - { reg: '$w3', virtual-reg: '%12' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    1
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 0
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           []
+callSites:       []
+debugValueSubstitutions: []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  ; CHECK-LABEL: name: aliased_store_after_load
+  ; CHECK: bb.0.entry:
+  ; CHECK:   successors: %bb.1(0x50000000), %bb.2(0x30000000)
+  ; CHECK:   liveins: $x0, $x1, $w3
+  ; CHECK:   [[COPY:%[0-9]+]]:gpr32common = COPY $w3
+  ; CHECK:   [[COPY1:%[0-9]+]]:gpr64common = COPY $x1
+  ; CHECK:   [[COPY2:%[0-9]+]]:gpr64common = COPY $x0
+  ; CHECK:   [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 1, 0, implicit-def $nzcv
+  ; CHECK:   Bcc 11, %bb.2, implicit $nzcv
+  ; CHECK:   B %bb.1
+  ; CHECK: bb.1.for.body.preheader:
+  ; CHECK:   successors: %bb.3(0x80000000)
+  ; CHECK:   [[LDRWui:%[0-9]+]]:gpr32common = LDRWui [[COPY2]], 0 :: (load 4 from %ir.read, !tbaa !0)
+  ; CHECK:   STRWui [[COPY]], [[COPY2]], 0 :: (store 4 into %ir.read, !tbaa !0)
+  ; CHECK:   B %bb.3
+  ; CHECK: bb.2.for.cond.cleanup:
+  ; CHECK:   [[PHI:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.0, %6, %bb.3
+  ; CHECK:   STRWui [[PHI]], [[COPY1]], 0 :: (store 4 into %ir.write, !tbaa !0)
+  ; CHECK:   RET_ReallyLR
+  ; CHECK: bb.3.for.body:
+  ; CHECK:   successors: %bb.2(0x04000000), %bb.3(0x7c000000)
+  ; CHECK:   [[PHI1:%[0-9]+]]:gpr32common = PHI %1, %bb.1, %8, %bb.3
+  ; CHECK:   [[PHI2:%[0-9]+]]:gpr32sp = PHI [[COPY]], %bb.1, %7, %bb.3
+  ; CHECK:   [[PHI3:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.1, %6, %bb.3
+  ; CHECK:   [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[LDRWui]], 42, 0
+  ; CHECK:   [[COPY3:%[0-9]+]]:gpr32all = COPY [[ADDWri]]
+  ; CHECK:   [[SDIVWr:%[0-9]+]]:gpr32 = SDIVWr [[PHI3]], [[PHI1]]
+  ; CHECK:   [[COPY4:%[0-9]+]]:gpr32all = COPY [[SDIVWr]]
+  ; CHECK:   [[SUBSWri1:%[0-9]+]]:gpr32 = SUBSWri [[PHI2]], 1, 0, implicit-def $nzcv
+  ; CHECK:   [[COPY5:%[0-9]+]]:gpr32all = COPY [[SUBSWri1]]
+  ; CHECK:   [[ADDWri1:%[0-9]+]]:gpr32sp = ADDWri [[PHI1]], 1, 0
+  ; CHECK:   [[COPY6:%[0-9]+]]:gpr32all = COPY [[ADDWri1]]
+  ; CHECK:   Bcc 0, %bb.2, implicit $nzcv
+  ; CHECK:   B %bb.3
+  bb.0.entry:
+    successors: %bb.1(0x50000000), %bb.2(0x30000000)
+    liveins: $x0, $x1, $w3
+
+    %12:gpr32common = COPY $w3
+    %10:gpr64common = COPY $x1
+    %9:gpr64common = COPY $x0
+    %14:gpr32 = SUBSWri %12, 1, 0, implicit-def $nzcv
+    Bcc 11, %bb.2, implicit $nzcv
+    B %bb.1
+
+  bb.1.for.body.preheader:
+    successors: %bb.3(0x80000000)
+
+    %13:gpr32common = LDRWui %9, 0 :: (load 4 from %ir.read, !tbaa !6)
+    STRWui %12, %9, 0 :: (store 4 into %ir.read, !tbaa !6)
+    %15:gpr32sp = ADDWri %13, 42, 0
+    %1:gpr32all = COPY %15
+    B %bb.3
+
+  bb.2.for.cond.cleanup:
+    %2:gpr32 = PHI %12, %bb.0, %6, %bb.3
+    STRWui %2, %10, 0 :: (store 4 into %ir.write, !tbaa !6)
+    RET_ReallyLR
 
+  bb.3.for.body:
+    successors: %bb.2(0x04000000), %bb.3(0x7c000000)
+
+    %3:gpr32common = PHI %1, %bb.1, %8, %bb.3
+    %4:gpr32sp = PHI %12, %bb.1, %7, %bb.3
+    %5:gpr32 = PHI %12, %bb.1, %6, %bb.3
+    %16:gpr32 = SDIVWr %5, %3
+    %6:gpr32all = COPY %16
+    %17:gpr32 = SUBSWri %4, 1, 0, implicit-def $nzcv
+    %7:gpr32all = COPY %17
+    %18:gpr32sp = ADDWri %3, 1, 0
+    %8:gpr32all = COPY %18
+    Bcc 0, %bb.2, implicit $nzcv
+    B %bb.3
+
+...