Index: llvm/include/llvm/Analysis/TargetTransformInfo.h
===================================================================
--- llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -28,6 +28,7 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/AtomicOrdering.h"
 #include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/InstructionCost.h"
 #include <functional>
 #include <utility>
@@ -67,6 +68,8 @@
 struct KnownBits;
 template <typename T> class Optional;
 
+extern cl::opt<bool> AllowDataRaces;
+
 /// Information about a load/store intrinsic defined by the target.
 struct MemIntrinsicInfo {
   /// This is the pointer that the intrinsic is loading from or storing to.
Index: llvm/lib/Transforms/Scalar/LICM.cpp
===================================================================
--- llvm/lib/Transforms/Scalar/LICM.cpp
+++ llvm/lib/Transforms/Scalar/LICM.cpp
@@ -88,6 +88,8 @@
 namespace llvm {
 class BlockFrequencyInfo;
 class LPMUpdater;
+cl::opt<bool> AllowDataRaces("allow-data-races", cl::Hidden, cl::init(false),
+                             cl::desc("Allow data races in LICM pass"));
 } // namespace llvm
 
 #define DEBUG_TYPE "licm"
@@ -2111,7 +2113,7 @@
   // stores along paths which originally didn't have them without violating the
   // memory model.
   if (!SafeToInsertStore) {
-    if (IsKnownThreadLocalObject)
+    if (IsKnownThreadLocalObject || llvm::AllowDataRaces)
       SafeToInsertStore = true;
     else {
       Value *Object = getUnderlyingObject(SomePtr);
Index: llvm/test/Transforms/LICM/reg-promote.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/LICM/reg-promote.ll
@@ -0,0 +1,135 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -licm -allow-data-races -S %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux"
+
+@u = dso_local global i32 0, align 4
+@v = dso_local global i32 0, align 4
+@restrict = dso_local global i32 0, align 4
+@i = dso_local global i32 0, align 4
+
+; Function Attrs: mustprogress nounwind uwtable
+define dso_local void @_Z1fPiS_i(ptr noundef %0, ptr noundef %1, i32 noundef %2) #0 {
+; CHECK-LABEL: @_Z1fPiS_i(
+; CHECK-NEXT:    [[TMP4:%.*]] = alloca ptr, align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = alloca ptr, align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store ptr [[TMP0:%.*]], ptr [[TMP4]], align 8
+; CHECK-NEXT:    store ptr [[TMP1:%.*]], ptr [[TMP5]], align 8
+; CHECK-NEXT:    store i32 [[TMP2:%.*]], ptr [[TMP6]], align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr @restrict, align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr @restrict, align 4
+; CHECK-NEXT:    [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
+; CHECK-NEXT:    store i32 0, ptr @i, align 4
+; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[TMP4]], align 8
+; CHECK-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[TMP5]], align 8
+; CHECK-NEXT:    br label [[TMP14:%.*]]
+; CHECK:       14:
+; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr @i, align 4
+; CHECK-NEXT:    [[TMP16:%.*]] = icmp slt i32 [[TMP15]], [[TMP11]]
+; CHECK-NEXT:    br i1 [[TMP16]], label [[TMP17:%.*]], label [[DOTLOOPEXIT:%.*]]
+; CHECK:       17:
+; CHECK-NEXT:    [[TMP18:%.*]] = load i32, ptr @i, align 4
+; CHECK-NEXT:    [[TMP19:%.*]] = sext i32 [[TMP18]] to i64
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 [[TMP19]]
+; CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
+; CHECK-NEXT:    br i1 [[TMP22]], label [[TMP23:%.*]], label [[TMP26:%.*]]
+; CHECK:       23:
+; CHECK-NEXT:    [[TMP24:%.*]] = load i32, ptr @u, align 4
+; CHECK-NEXT:    [[TMP25:%.*]] = add nsw i32 [[TMP24]], 1
+; CHECK-NEXT:    store i32 [[TMP25]], ptr @u, align 4
+; CHECK-NEXT:    br label [[TMP41:%.*]]
+; CHECK:       26:
+; CHECK-NEXT:    [[TMP27:%.*]] = load i32, ptr @u, align 4
+; CHECK-NEXT:    [[TMP28:%.*]] = add nsw i32 [[TMP27]], 1
+; CHECK-NEXT:    store i32 [[TMP28]], ptr @u, align 4
+; CHECK-NEXT:    [[TMP29:%.*]] = load i32, ptr @i, align 4
+; CHECK-NEXT:    [[TMP30:%.*]] = sext i32 [[TMP29]] to i64
+; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 [[TMP30]]
+; CHECK-NEXT:    [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4
+; CHECK-NEXT:    [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0
+; CHECK-NEXT:    br i1 [[TMP33]], label [[TMP34:%.*]], label [[TMP37:%.*]]
+; CHECK:       34:
+; CHECK-NEXT:    [[TMP35:%.*]] = load i32, ptr @v, align 4
+; CHECK-NEXT:    [[TMP36:%.*]] = add nsw i32 [[TMP35]], 1
+; CHECK-NEXT:    store i32 [[TMP36]], ptr @v, align 4
+; CHECK-NEXT:    br label [[TMP37]]
+; CHECK:       37:
+; CHECK-NEXT:    br label [[TMP38:%.*]]
+; CHECK:       38:
+; CHECK-NEXT:    [[TMP39:%.*]] = load i32, ptr @i, align 4
+; CHECK-NEXT:    [[TMP40:%.*]] = add nsw i32 [[TMP39]], 1
+; CHECK-NEXT:    store i32 [[TMP40]], ptr @i, align 4
+; CHECK-NEXT:    br label [[TMP14]]
+; CHECK:       .loopexit:
+; CHECK-NEXT:    br label [[TMP41]]
+; CHECK:       41:
+; CHECK-NEXT:    ret void
+;
+  %4 = alloca ptr, align 8
+  %5 = alloca ptr, align 8
+  %6 = alloca i32, align 4
+  store ptr %0, ptr %4, align 8
+  store ptr %1, ptr %5, align 8
+  store i32 %2, ptr %6, align 4
+  %7 = load i32, ptr @restrict, align 4
+  %8 = zext i32 %7 to i64
+  %9 = load i32, ptr @restrict, align 4
+  %10 = zext i32 %9 to i64
+  store i32 0, ptr @i, align 4
+  br label %11
+
+11:                                               ; preds = %38, %3
+  %12 = load i32, ptr @i, align 4
+  %13 = load i32, ptr %6, align 4
+  %14 = icmp slt i32 %12, %13
+  br i1 %14, label %15, label %41
+
+15:                                               ; preds = %11
+  %16 = load ptr, ptr %4, align 8
+  %17 = load i32, ptr @i, align 4
+  %18 = sext i32 %17 to i64
+  %19 = getelementptr inbounds i32, ptr %16, i64 %18
+  %20 = load i32, ptr %19, align 4
+  %21 = icmp ne i32 %20, 0
+  br i1 %21, label %22, label %25
+
+22:                                               ; preds = %15
+  %23 = load i32, ptr @u, align 4
+  %24 = add nsw i32 %23, 1
+  store i32 %24, ptr @u, align 4
+  br label %41
+
+25:                                               ; preds = %15
+  %26 = load i32, ptr @u, align 4
+  %27 = add nsw i32 %26, 1
+  store i32 %27, ptr @u, align 4
+  %28 = load ptr, ptr %5, align 8
+  %29 = load i32, ptr @i, align 4
+  %30 = sext i32 %29 to i64
+  %31 = getelementptr inbounds i32, ptr %28, i64 %30
+  %32 = load i32, ptr %31, align 4
+  %33 = icmp ne i32 %32, 0
+  br i1 %33, label %34, label %37
+
+34:                                               ; preds = %25
+  %35 = load i32, ptr @v, align 4
+  %36 = add nsw i32 %35, 1
+  store i32 %36, ptr @v, align 4
+  br label %37
+
+37:                                               ; preds = %34, %25
+  br label %38
+
+38:                                               ; preds = %37
+  %39 = load i32, ptr @i, align 4
+  %40 = add nsw i32 %39, 1
+  store i32 %40, ptr @i, align 4
+  br label %11
+
+41:                                               ; preds = %22, %11
+  ret void
+}