Index: llvm/include/llvm/Analysis/TargetTransformInfo.h
===================================================================
--- llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -370,6 +370,8 @@
 
   unsigned getAssumedAddrSpace(const Value *V) const;
 
+  bool isSingleThreaded() const;
+
   std::pair<const Value *, unsigned>
   getPredicatedAddrSpace(const Value *V) const;
 
@@ -1542,6 +1544,7 @@
   virtual bool
   canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const = 0;
   virtual unsigned getAssumedAddrSpace(const Value *V) const = 0;
+  virtual bool isSingleThreaded() const = 0;
   virtual std::pair<const Value *, unsigned>
   getPredicatedAddrSpace(const Value *V) const = 0;
   virtual Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
@@ -1917,6 +1920,10 @@
     return Impl.getAssumedAddrSpace(V);
   }
 
+  bool isSingleThreaded() const override {
+    return Impl.isSingleThreaded();
+  }
+
   std::pair<const Value *, unsigned>
   getPredicatedAddrSpace(const Value *V) const override {
     return Impl.getPredicatedAddrSpace(V);
Index: llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
===================================================================
--- llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -108,6 +108,8 @@
 
   unsigned getAssumedAddrSpace(const Value *V) const { return -1; }
 
+  bool isSingleThreaded() const { return false; }
+
   std::pair<const Value *, unsigned>
   getPredicatedAddrSpace(const Value *V) const {
     return std::make_pair(nullptr, -1);
Index: llvm/include/llvm/CodeGen/BasicTTIImpl.h
===================================================================
--- llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -47,6 +47,7 @@
 #include "llvm/Support/MachineValueType.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
@@ -287,6 +288,11 @@
     return getTLI()->getTargetMachine().getAssumedAddrSpace(V);
   }
 
+  bool isSingleThreaded() const {
+    return getTLI()->getTargetMachine().Options.ThreadModel ==
+           ThreadModel::Single;
+  }
+
   std::pair<const Value *, unsigned>
   getPredicatedAddrSpace(const Value *V) const {
     return getTLI()->getTargetMachine().getPredicatedAddrSpace(V);
Index: llvm/include/llvm/Transforms/Utils/LoopUtils.h
===================================================================
--- llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -208,11 +208,12 @@
 /// \p AllowSpeculation is whether values should be hoisted even if they are not
 /// guaranteed to execute in the loop, but are safe to speculatively execute.
 bool promoteLoopAccessesToScalars(
-    const SmallSetVector<Value *, 8> &, SmallVectorImpl<BasicBlock *> &,
-    SmallVectorImpl<Instruction *> &, SmallVectorImpl<MemoryAccess *> &,
-    PredIteratorCache &, LoopInfo *, DominatorTree *, const TargetLibraryInfo *,
-    Loop *, MemorySSAUpdater &, ICFLoopSafetyInfo *,
-    OptimizationRemarkEmitter *, bool AllowSpeculation);
+    AAResults *AA, const SmallSetVector<Value *, 8> &,
+    SmallVectorImpl<BasicBlock *> &, SmallVectorImpl<Instruction *> &,
+    SmallVectorImpl<MemoryAccess *> &, PredIteratorCache &, LoopInfo *,
+    DominatorTree *, const TargetLibraryInfo *, TargetTransformInfo *, Loop *,
+    MemorySSAUpdater &, ICFLoopSafetyInfo *, OptimizationRemarkEmitter *,
+    bool AllowSpeculation);
 
 /// Does a BFS from a given node to all of its children inside a given loop.
 /// The returned vector of nodes includes the starting point.
Index: llvm/lib/Analysis/TargetTransformInfo.cpp
===================================================================
--- llvm/lib/Analysis/TargetTransformInfo.cpp
+++ llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -273,6 +273,10 @@
   return TTIImpl->getAssumedAddrSpace(V);
 }
 
+bool TargetTransformInfo::isSingleThreaded() const {
+  return TTIImpl->isSingleThreaded();
+}
+
 std::pair<const Value *, unsigned>
 TargetTransformInfo::getPredicatedAddrSpace(const Value *V) const {
   return TTIImpl->getPredicatedAddrSpace(V);
Index: llvm/lib/Transforms/Scalar/LICM.cpp
===================================================================
--- llvm/lib/Transforms/Scalar/LICM.cpp
+++ llvm/lib/Transforms/Scalar/LICM.cpp
@@ -75,6 +75,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -109,6 +110,11 @@
     "licm-control-flow-hoisting", cl::Hidden, cl::init(false),
     cl::desc("Enable control flow (and PHI) hoisting in LICM"));
 
+static cl::opt<bool>
+    ThreadModelSingle("licm-force-thread-model-single", cl::Hidden,
+                      cl::init(false),
+                      cl::desc("Allow data races in LICM pass"));
+
 static cl::opt<uint32_t> MaxNumUsesTraversed(
     "licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
     cl::desc("Max num uses visited for identifying load "
@@ -486,8 +492,9 @@
         for (const SmallSetVector<Value *, 8> &PointerMustAliases :
              collectPromotionCandidates(MSSA, AA, L)) {
           LocalPromoted |= promoteLoopAccessesToScalars(
-              PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
-              DT, TLI, L, MSSAU, &SafetyInfo, ORE, LicmAllowSpeculation);
+              AA, PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC,
+              LI, DT, TLI, TTI, L, MSSAU, &SafetyInfo, ORE,
+              LicmAllowSpeculation);
         }
         Promoted |= LocalPromoted;
       } while (LocalPromoted);
@@ -1905,13 +1912,14 @@
 /// loop invariant.
 ///
 bool llvm::promoteLoopAccessesToScalars(
-    const SmallSetVector<Value *, 8> &PointerMustAliases,
+    AAResults *AA, const SmallSetVector<Value *, 8> &PointerMustAliases,
     SmallVectorImpl<BasicBlock *> &ExitBlocks,
     SmallVectorImpl<Instruction *> &InsertPts,
     SmallVectorImpl<MemoryAccess *> &MSSAInsertPts, PredIteratorCache &PIC,
     LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
-    Loop *CurLoop, MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo,
-    OptimizationRemarkEmitter *ORE, bool AllowSpeculation) {
+    TargetTransformInfo *TTI, Loop *CurLoop, MemorySSAUpdater &MSSAU,
+    ICFLoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE,
+    bool AllowSpeculation) {
   // Verify inputs.
   assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
          SafetyInfo != nullptr &&
@@ -1961,6 +1969,7 @@
   bool SafeToInsertStore = false;
   bool StoreIsGuanteedToExecute = false;
   bool FoundLoadToPromote = false;
+  bool PointToConstantMemory = false;
 
   SmallVector<Instruction *, 64> LoopUses;
 
@@ -2070,6 +2079,8 @@
               Store->getPointerOperand(), Store->getValueOperand()->getType(),
               Store->getAlign(), MDL, Preheader->getTerminator(), DT, TLI);
         }
+        if (AA->pointsToConstantMemory(Store->getOperand(1)))
+          PointToConstantMemory = true;
       } else
         return false; // Not a load or store.
 
@@ -2112,7 +2123,9 @@
   // stores along paths which originally didn't have them without violating the
   // memory model.
   if (!SafeToInsertStore) {
-    if (IsKnownThreadLocalObject)
+    if (IsKnownThreadLocalObject ||
+        ((TTI->isSingleThreaded() || ThreadModelSingle) &&
+         !PointToConstantMemory))
       SafeToInsertStore = true;
     else {
       Value *Object = getUnderlyingObject(SomePtr);
Index: llvm/test/Transforms/LICM/promote-sink-store.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/LICM/promote-sink-store.ll
@@ -0,0 +1,93 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -licm -licm-force-thread-model-single -S %s | FileCheck %s
+
+@u = dso_local local_unnamed_addr global i32 0, align 4
+@v = dso_local local_unnamed_addr global i32 0, align 4
+
+; Function Attrs: nofree norecurse nosync nounwind uwtable
+define dso_local void @f(ptr noalias nocapture noundef readonly %0, ptr noalias nocapture noundef readonly %1, i32 noundef %2) local_unnamed_addr {
+; CHECK-LABEL: @f(
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP2:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP31:%.*]]
+; CHECK:       5:
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr @v, align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr @u, align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = zext i32 [[TMP2]] to i64
+; CHECK-NEXT:    [[V_PROMOTED:%.*]] = load i32, ptr @v, align 1
+; CHECK-NEXT:    br label [[TMP9:%.*]]
+; CHECK:       9:
+; CHECK-NEXT:    [[TMP10:%.*]] = phi i32 [ [[V_PROMOTED]], [[TMP5]] ], [ [[TMP25:%.*]], [[TMP24:%.*]] ]
+; CHECK-NEXT:    [[TMP11:%.*]] = phi i64 [ 0, [[TMP5]] ], [ [[TMP27:%.*]], [[TMP24]] ]
+; CHECK-NEXT:    [[TMP12:%.*]] = phi i32 [ [[TMP7]], [[TMP5]] ], [ [[TMP17:%.*]], [[TMP24]] ]
+; CHECK-NEXT:    [[TMP13:%.*]] = phi i32 [ [[TMP6]], [[TMP5]] ], [ [[TMP26:%.*]], [[TMP24]] ]
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4
+; CHECK-NEXT:    [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0
+; CHECK-NEXT:    [[TMP17]] = add nsw i32 [[TMP12]], 1
+; CHECK-NEXT:    br i1 [[TMP16]], label [[TMP18:%.*]], label [[TMP29:%.*]]
+; CHECK:       18:
+; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
+; CHECK-NEXT:    [[TMP21:%.*]] = icmp eq i32 [[TMP20]], 0
+; CHECK-NEXT:    br i1 [[TMP21]], label [[TMP24]], label [[TMP22:%.*]]
+; CHECK:       22:
+; CHECK-NEXT:    [[TMP23:%.*]] = add nsw i32 [[TMP13]], 1
+; CHECK-NEXT:    br label [[TMP24]]
+; CHECK:       24:
+; CHECK-NEXT:    [[TMP25]] = phi i32 [ [[TMP10]], [[TMP18]] ], [ [[TMP23]], [[TMP22]] ]
+; CHECK-NEXT:    [[TMP26]] = phi i32 [ [[TMP13]], [[TMP18]] ], [ [[TMP23]], [[TMP22]] ]
+; CHECK-NEXT:    [[TMP27]] = add nuw nsw i64 [[TMP11]], 1
+; CHECK-NEXT:    [[TMP28:%.*]] = icmp eq i64 [[TMP27]], [[TMP8]]
+; CHECK-NEXT:    br i1 [[TMP28]], label [[TMP29]], label [[TMP9]]
+; CHECK:       29:
+; CHECK-NEXT:    [[TMP30:%.*]] = phi i32 [ [[TMP25]], [[TMP24]] ], [ [[TMP10]], [[TMP9]] ]
+; CHECK-NEXT:    [[DOTLCSSA:%.*]] = phi i32 [ [[TMP17]], [[TMP24]] ], [ [[TMP17]], [[TMP9]] ]
+; CHECK-NEXT:    store i32 [[TMP30]], ptr @v, align 1
+; CHECK-NEXT:    store i32 [[DOTLCSSA]], ptr @u, align 4
+; CHECK-NEXT:    br label [[TMP31]]
+; CHECK:       31:
+; CHECK-NEXT:    ret void
+;
+  %4 = icmp sgt i32 %2, 0
+  br i1 %4, label %5, label %28
+
+5:                                                ; preds = %3
+  %6 = load i32, ptr @v, align 4
+  %7 = load i32, ptr @u, align 4
+  %8 = zext i32 %2 to i64
+  br label %9
+
+9:                                                ; preds = %5, %23
+  %10 = phi i64 [ 0, %5 ], [ %25, %23 ]
+  %11 = phi i32 [ %7, %5 ], [ %16, %23 ]
+  %12 = phi i32 [ %6, %5 ], [ %24, %23 ]
+  %13 = getelementptr inbounds i32, ptr %0, i64 %10
+  %14 = load i32, ptr %13, align 4
+  %15 = icmp eq i32 %14, 0
+  %16 = add nsw i32 %11, 1
+  br i1 %15, label %17, label %27
+
+17:                                               ; preds = %9
+  %18 = getelementptr inbounds i32, ptr %1, i64 %10
+  %19 = load i32, ptr %18, align 4
+  %20 = icmp eq i32 %19, 0
+  br i1 %20, label %23, label %21
+
+21:                                               ; preds = %17
+  %22 = add nsw i32 %12, 1
+  store i32 %22, ptr @v, align 4
+  br label %23
+
+23:                                               ; preds = %17, %21
+  %24 = phi i32 [ %12, %17 ], [ %22, %21 ]
+  %25 = add nuw nsw i64 %10, 1
+  %26 = icmp eq i64 %25, %8
+  br i1 %26, label %27, label %9
+
+27:                                               ; preds = %9, %23
+  store i32 %16, ptr @u, align 4
+  br label %28
+
+28:                                               ; preds = %27, %3
+  ret void
+}
Index: llvm/test/Transforms/LICM/without-allow-data-race.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/LICM/without-allow-data-race.ll
@@ -0,0 +1,89 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -licm -S %s | FileCheck %s
+
+@u = dso_local local_unnamed_addr global i32 0, align 4
+@v = dso_local local_unnamed_addr global i32 0, align 4
+
+; Function Attrs: nofree norecurse nosync nounwind uwtable
+define dso_local void @f(ptr noalias nocapture noundef readonly %0, ptr noalias nocapture noundef readonly %1, i32 noundef %2) local_unnamed_addr {
+; CHECK-LABEL: @f(
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP2:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP28:%.*]]
+; CHECK:       5:
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr @v, align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr @u, align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = zext i32 [[TMP2]] to i64
+; CHECK-NEXT:    br label [[TMP9:%.*]]
+; CHECK:       9:
+; CHECK-NEXT:    [[TMP10:%.*]] = phi i64 [ 0, [[TMP5]] ], [ [[TMP25:%.*]], [[TMP23:%.*]] ]
+; CHECK-NEXT:    [[TMP11:%.*]] = phi i32 [ [[TMP7]], [[TMP5]] ], [ [[TMP16:%.*]], [[TMP23]] ]
+; CHECK-NEXT:    [[TMP12:%.*]] = phi i32 [ [[TMP6]], [[TMP5]] ], [ [[TMP24:%.*]], [[TMP23]] ]
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP10]]
+; CHECK-NEXT:    [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
+; CHECK-NEXT:    [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 0
+; CHECK-NEXT:    [[TMP16]] = add nsw i32 [[TMP11]], 1
+; CHECK-NEXT:    br i1 [[TMP15]], label [[TMP17:%.*]], label [[TMP27:%.*]]
+; CHECK:       17:
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 [[TMP10]]
+; CHECK-NEXT:    [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4
+; CHECK-NEXT:    [[TMP20:%.*]] = icmp eq i32 [[TMP19]], 0
+; CHECK-NEXT:    br i1 [[TMP20]], label [[TMP23]], label [[TMP21:%.*]]
+; CHECK:       21:
+; CHECK-NEXT:    [[TMP22:%.*]] = add nsw i32 [[TMP12]], 1
+; CHECK-NEXT:    store i32 [[TMP22]], ptr @v, align 4
+; CHECK-NEXT:    br label [[TMP23]]
+; CHECK:       23:
+; CHECK-NEXT:    [[TMP24]] = phi i32 [ [[TMP12]], [[TMP17]] ], [ [[TMP22]], [[TMP21]] ]
+; CHECK-NEXT:    [[TMP25]] = add nuw nsw i64 [[TMP10]], 1
+; CHECK-NEXT:    [[TMP26:%.*]] = icmp eq i64 [[TMP25]], [[TMP8]]
+; CHECK-NEXT:    br i1 [[TMP26]], label [[TMP27]], label [[TMP9]]
+; CHECK:       27:
+; CHECK-NEXT:    [[DOTLCSSA:%.*]] = phi i32 [ [[TMP16]], [[TMP23]] ], [ [[TMP16]], [[TMP9]] ]
+; CHECK-NEXT:    store i32 [[DOTLCSSA]], ptr @u, align 4
+; CHECK-NEXT:    br label [[TMP28]]
+; CHECK:       28:
+; CHECK-NEXT:    ret void
+;
+  %4 = icmp sgt i32 %2, 0
+  br i1 %4, label %5, label %28
+
+5:                                                ; preds = %3
+  %6 = load i32, ptr @v, align 4
+  %7 = load i32, ptr @u, align 4
+  %8 = zext i32 %2 to i64
+  br label %9
+
+9:                                                ; preds = %5, %23
+  %10 = phi i64 [ 0, %5 ], [ %25, %23 ]
+  %11 = phi i32 [ %7, %5 ], [ %16, %23 ]
+  %12 = phi i32 [ %6, %5 ], [ %24, %23 ]
+  %13 = getelementptr inbounds i32, ptr %0, i64 %10
+  %14 = load i32, ptr %13, align 4
+  %15 = icmp eq i32 %14, 0
+  %16 = add nsw i32 %11, 1
+  br i1 %15, label %17, label %27
+
+17:                                               ; preds = %9
+  %18 = getelementptr inbounds i32, ptr %1, i64 %10
+  %19 = load i32, ptr %18, align 4
+  %20 = icmp eq i32 %19, 0
+  br i1 %20, label %23, label %21
+
+21:                                               ; preds = %17
+  %22 = add nsw i32 %12, 1
+  store i32 %22, ptr @v, align 4
+  br label %23
+
+23:                                               ; preds = %17, %21
+  %24 = phi i32 [ %12, %17 ], [ %22, %21 ]
+  %25 = add nuw nsw i64 %10, 1
+  %26 = icmp eq i64 %25, %8
+  br i1 %26, label %27, label %9
+
+27:                                               ; preds = %9, %23
+  store i32 %16, ptr @u, align 4
+  br label %28
+
+28:                                               ; preds = %27, %3
+  ret void
+}