Index: lib/Transforms/Scalar/LICM.cpp
===================================================================
--- lib/Transforms/Scalar/LICM.cpp
+++ lib/Transforms/Scalar/LICM.cpp
@@ -599,6 +599,17 @@
     if (isLoadInvariantInLoop(LI, DT, CurLoop))
       return true;
 
+    // Don't sink non-invariant loads into loops because it can introduce new
+    // data races. For example:
+    // b = *p;
+    // for (int i = 0; i < N; i++)
+    //   a[i] = b;
+    // Assuming `a` is a thread local value, it should always contain a single
+    // value. If it were to contain different values at different indices, that
+    // would be a miscompile.
+    if (!SafetyInfo)
+      return false;
+
     // Don't hoist loads which have may-aliased stores in loop.
     uint64_t Size = 0;
     if (LI->getType()->isSized())
Index: test/Transforms/LICM/loopsink.ll
===================================================================
--- test/Transforms/LICM/loopsink.ll
+++ test/Transforms/LICM/loopsink.ll
@@ -1,7 +1,8 @@
 ; RUN: opt -S -loop-sink < %s | FileCheck %s
-; RUN: opt -S -passes=loop-sink < %s | FileCheck %s
+; RUN: opt -S -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s
 
 @g = global i32 0, align 4
+@g_const = constant i32 1, align 4
 
 ;     b1
 ;    /  \
@@ -16,18 +17,21 @@
 ; b2: 15
 ; b3: 7
 ; b4: 7
-; Sink load to b2
+; Sink add to b2. Don't sink load because it's illegal to sink non-invariant
+; loads into loops.
 ; CHECK: t1
 ; CHECK: .b2:
-; CHECK: load i32, i32* @g
+; CHECK: add i32 %global, 1
+; CHECK-NOT: load i32, i32* @g
 ; CHECK: .b3:
-; CHECK-NOT:  load i32, i32* @g
+; CHECK-NOT:  add i32 %global, 1
 define i32 @t1(i32, i32) #0 !prof !0 {
   %3 = icmp eq i32 %1, 0
   br i1 %3, label %.exit, label %.preheader
 
 .preheader:
-  %invariant = load i32, i32* @g
+  %global = load i32, i32* @g
+  %invariant = add i32 %global, 1
   br label %.b1
 
 .b1:
@@ -79,22 +83,26 @@
 ; b1: 16016
 ; b3: 8
 ; b6: 8
-; Sink load to b3 and b6
+; Sink add to b3 and b6. Don't sink load because it's illegal to sink
+; non-invariant loads into loops.
 ; CHECK: t2
 ; CHECK: .preheader:
-; CHECK-NOT: load i32, i32* @g
+; CHECK-NOT: add i32 %global, 1
 ; CHECK: .b3:
-; CHECK: load i32, i32* @g
+; CHECK-NOT: load i32, i32* @g
+; CHECK: add i32 %global, 1
 ; CHECK: .b4:
 ; CHECK: .b6:
-; CHECK: load i32, i32* @g
+; CHECK-NOT: load i32, i32* @g
+; CHECK: add i32 %global, 1
 ; CHECK: .b7:
 define i32 @t2(i32, i32) #0 !prof !0 {
   %3 = icmp eq i32 %1, 0
   br i1 %3, label %.exit, label %.preheader
 
 .preheader:
-  %invariant = load i32, i32* @g
+  %global = load i32, i32* @g
+  %invariant = add i32 %global, 1
   br label %.b1
 
 .b1:
@@ -145,18 +153,21 @@
 ; preheader: 500
 ; b3: 8
 ; b5: 16008
-; Do not sink load from preheader.
+; Do not sink add from preheader.
 ; CHECK: t3
 ; CHECK: .preheader:
 ; CHECK: load i32, i32* @g
+; CHECK: add i32 %global, 1
 ; CHECK: .b1:
 ; CHECK-NOT: load i32, i32* @g
+; CHECK-NOT: add i32 %global, 1
 define i32 @t3(i32, i32) #0 !prof !0 {
   %3 = icmp eq i32 %1, 0
   br i1 %3, label %.exit, label %.preheader
 
 .preheader:
-  %invariant = load i32, i32* @g
+  %global = load i32, i32* @g
+  %invariant = add i32 %global, 1
   br label %.b1
 
 .b1:
@@ -195,16 +206,20 @@
   ret i32 10
 }
 
-; For single-BB loop with <=1 avg trip count, sink load to b1
+; For single-BB loop with <=1 avg trip count, sink add to b1.
+; Don't sink load because it's illegal to sink non-invariant loads into loops.
 ; CHECK: t4
 ; CHECK: .preheader:
-; CHECK-not: load i32, i32* @g
-; CHECK: .b1:
 ; CHECK: load i32, i32* @g
+; CHECK-NOT: add i32 %global, 1
+; CHECK: .b1:
+; CHECK-NOT: load i32, i32* @g
+; CHECK: add i32 %global, 1
 ; CHECK: .exit:
 define i32 @t4(i32, i32) #0 !prof !0 {
 .preheader:
-  %invariant = load i32, i32* @g
+  %global = load i32, i32* @g
+  %invariant = add i32 %global, 1
   br label %.b1
 
 .b1:
@@ -230,18 +245,18 @@
 ; b2: 15
 ; b3: 7
 ; b4: 7
-; There is alias store in loop, do not sink load
+; Sink load from constant memory.
 ; CHECK: t5
 ; CHECK: .preheader:
-; CHECK: load i32, i32* @g
-; CHECK: .b1:
-; CHECK-NOT: load i32, i32* @g
+; CHECK-NOT: load i32, i32* @g_const
+; CHECK: .b2:
+; CHECK: load i32, i32* @g_const
 define i32 @t5(i32, i32*) #0 !prof !0 {
   %3 = icmp eq i32 %0, 0
   br i1 %3, label %.exit, label %.preheader
 
 .preheader:
-  %invariant = load i32, i32* @g
+  %invariant = load i32, i32* @g_const
   br label %.b1
 
 .b1:
Index: test/Transforms/LICM/sink.ll
===================================================================
--- test/Transforms/LICM/sink.ll
+++ test/Transforms/LICM/sink.ll
@@ -13,7 +13,17 @@
 ;   return x;
 ; }
 ;
-; Load of global value g should not be hoisted to preheader.
+; Computations depending on the load of global value @g should not be hoisted to
+; preheader. Though load of @g will be hoisted by LICM and should not be sunk
+; back by LoopSink because sinking of non-invariant loads into loops is illegal:
+; it can introduce new data races. For example:
+; b = *p;
+; for (int i = 0; i < N; i++)
+;   a[i] = b;
+; Assuming `a` is a thread local value, it should always contain a single
+; value. If it were to contain different values at different indices, that
+; would be a miscompile.
+
 
 @g = global i32 0, align 4
 
@@ -26,6 +36,7 @@
 
 ; CHECK-LICM: .lr.ph.preheader:
 ; CHECK-LICM: load i32, i32* @g
+; CHECK-LICM: add nsw i32 %glob, 1
 ; CHECK-LICM: br label %.lr.ph
 
 .lr.ph:
@@ -35,13 +46,15 @@
   br i1 %4, label %.then, label %.combine, !prof !1
 
 .then:
-  %5 = load i32, i32* @g, align 4
+  %glob = load i32, i32* @g, align 4
+  %5 = add nsw i32 %glob, 1
   %6 = add nsw i32 %5, %.012
   %7 = mul nsw i32 %6, %5
   br label %.combine
 
 ; CHECK-SINK: .then:
-; CHECK-SINK: load i32, i32* @g
+; CHECK-SINK-NOT: load i32, i32* @g
+; CHECK-SINK: add nsw i32 %glob, 1
 ; CHECK-SINK: br label %.combine
 
 .combine: