Index: llvm/lib/Transforms/Scalar/LICM.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LICM.cpp +++ llvm/lib/Transforms/Scalar/LICM.cpp @@ -109,6 +109,10 @@ "licm-control-flow-hoisting", cl::Hidden, cl::init(false), cl::desc("Enable control flow (and PHI) hoisting in LICM")); +static cl::opt AllowDataRaces("allow-data-races", cl::Hidden, + cl::init(false), + cl::desc("Allow data races in LICM pass")); + static cl::opt MaxNumUsesTraversed( "licm-max-num-uses-traversed", cl::Hidden, cl::init(8), cl::desc("Max num uses visited for identifying load " @@ -2111,7 +2115,7 @@ // stores along paths which originally didn't have them without violating the // memory model. if (!SafeToInsertStore) { - if (IsKnownThreadLocalObject) + if (IsKnownThreadLocalObject || AllowDataRaces) SafeToInsertStore = true; else { Value *Object = getUnderlyingObject(SomePtr); Index: llvm/test/Transforms/LICM/promote-sink-store.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LICM/promote-sink-store.ll @@ -0,0 +1,93 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -licm -allow-data-races -S %s | FileCheck %s + +@u = dso_local local_unnamed_addr global i32 0, align 4 +@v = dso_local local_unnamed_addr global i32 0, align 4 + +; Function Attrs: nofree norecurse nosync nounwind uwtable +define dso_local void @f(ptr noalias nocapture noundef readonly %0, ptr noalias nocapture noundef readonly %1, i32 noundef %2) local_unnamed_addr { +; CHECK-LABEL: @f( +; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP31:%.*]] +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr @v, align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr @u, align 4 +; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP2]] to i64 +; CHECK-NEXT: [[V_PROMOTED:%.*]] = load i32, ptr @v, align 1 +; CHECK-NEXT: br label [[TMP9:%.*]] +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = phi i32 [ [[V_PROMOTED]], [[TMP5]] ], [ [[TMP25:%.*]], [[TMP24:%.*]] ] +; CHECK-NEXT: [[TMP11:%.*]] = phi i64 [ 0, [[TMP5]] ], [ [[TMP27:%.*]], [[TMP24]] ] +; CHECK-NEXT: [[TMP12:%.*]] = phi i32 [ [[TMP7]], [[TMP5]] ], [ [[TMP17:%.*]], [[TMP24]] ] +; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[TMP6]], [[TMP5]] ], [ [[TMP26:%.*]], [[TMP24]] ] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0 +; CHECK-NEXT: [[TMP17]] = add nsw i32 [[TMP12]], 1 +; CHECK-NEXT: br i1 [[TMP16]], label [[TMP18:%.*]], label [[TMP29:%.*]] +; CHECK: 18: +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP20]], 0 +; CHECK-NEXT: br i1 [[TMP21]], label [[TMP24]], label [[TMP22:%.*]] +; CHECK: 22: +; CHECK-NEXT: [[TMP23:%.*]] = add nsw i32 [[TMP13]], 1 +; CHECK-NEXT: br label [[TMP24]] +; CHECK: 24: +; CHECK-NEXT: [[TMP25]] = phi i32 [ [[TMP10]], [[TMP18]] ], [ [[TMP23]], [[TMP22]] ] +; CHECK-NEXT: [[TMP26]] = phi i32 [ [[TMP13]], [[TMP18]] ], [ [[TMP23]], [[TMP22]] ] +; CHECK-NEXT: [[TMP27]] = add nuw nsw i64 [[TMP11]], 1 +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP27]], [[TMP8]] +; CHECK-NEXT: br i1 [[TMP28]], label [[TMP29]], label [[TMP9]] +; CHECK: 29: +; CHECK-NEXT: [[TMP30:%.*]] = phi i32 [ [[TMP25]], [[TMP24]] ], [ [[TMP10]], [[TMP9]] ] +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP17]], [[TMP24]] ], [ [[TMP17]], [[TMP9]] ] +; CHECK-NEXT: store i32 [[TMP30]], ptr @v, align 1 +; CHECK-NEXT: store i32 [[DOTLCSSA]], ptr @u, align 4 +; CHECK-NEXT: br label [[TMP31]] +; CHECK: 31: +; CHECK-NEXT: ret void +; + %4 = icmp sgt i32 %2, 0 + br i1 %4, label %5, label %28 + +5: ; preds = %3 + %6 = load i32, ptr @v, align 4 + %7 = load i32, ptr @u, align 4 + %8 = zext i32 %2 to i64 + br label %9 + +9: ; preds = %5, %23 + %10 = phi i64 [ 0, %5 ], [ %25, %23 ] + %11 = phi i32 [ %7, %5 ], [ %16, %23 ] + %12 = phi i32 [ %6, %5 ], [ %24, %23 ] + %13 = getelementptr inbounds i32, ptr %0, i64 %10 + %14 = load i32, ptr %13, align 4 + %15 = icmp eq i32 %14, 0 + %16 = add nsw i32 %11, 1 + br i1 %15, label %17, label %27 + +17: ; preds = %9 + %18 = getelementptr inbounds i32, ptr %1, i64 %10 + %19 = load i32, ptr %18, align 4 + %20 = icmp eq i32 %19, 0 + br i1 %20, label %23, label %21 + +21: ; preds = %17 + %22 = add nsw i32 %12, 1 + store i32 %22, ptr @v, align 4 + br label %23 + +23: ; preds = %17, %21 + %24 = phi i32 [ %12, %17 ], [ %22, %21 ] + %25 = add nuw nsw i64 %10, 1 + %26 = icmp eq i64 %25, %8 + br i1 %26, label %27, label %9 + +27: ; preds = %9, %23 + store i32 %16, ptr @u, align 4 + br label %28 + +28: ; preds = %27, %3 + ret void +} Index: llvm/test/Transforms/LICM/without-allow-data-race.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LICM/without-allow-data-race.ll @@ -0,0 +1,89 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -licm -S %s | FileCheck %s + +@u = dso_local local_unnamed_addr global i32 0, align 4 +@v = dso_local local_unnamed_addr global i32 0, align 4 + +; Function Attrs: nofree norecurse nosync nounwind uwtable +define dso_local void @f(ptr noalias nocapture noundef readonly %0, ptr noalias nocapture noundef readonly %1, i32 noundef %2) local_unnamed_addr { +; CHECK-LABEL: @f( +; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP28:%.*]] +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr @v, align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr @u, align 4 +; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP2]] to i64 +; CHECK-NEXT: br label [[TMP9:%.*]] +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = phi i64 [ 0, [[TMP5]] ], [ [[TMP25:%.*]], [[TMP23:%.*]] ] +; CHECK-NEXT: [[TMP11:%.*]] = phi i32 [ [[TMP7]], [[TMP5]] ], [ [[TMP16:%.*]], [[TMP23]] ] +; CHECK-NEXT: [[TMP12:%.*]] = phi i32 [ [[TMP6]], [[TMP5]] ], [ [[TMP24:%.*]], [[TMP23]] ] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 0 +; CHECK-NEXT: [[TMP16]] = add nsw i32 [[TMP11]], 1 +; CHECK-NEXT: br i1 [[TMP15]], label [[TMP17:%.*]], label [[TMP27:%.*]] +; CHECK: 17: +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i32 [[TMP19]], 0 +; CHECK-NEXT: br i1 [[TMP20]], label [[TMP23]], label [[TMP21:%.*]] +; CHECK: 21: +; CHECK-NEXT: [[TMP22:%.*]] = add nsw i32 [[TMP12]], 1 +; CHECK-NEXT: store i32 [[TMP22]], ptr @v, align 4 +; CHECK-NEXT: br label [[TMP23]] +; CHECK: 23: +; CHECK-NEXT: [[TMP24]] = phi i32 [ [[TMP12]], [[TMP17]] ], [ [[TMP22]], [[TMP21]] ] +; CHECK-NEXT: [[TMP25]] = add nuw nsw i64 [[TMP10]], 1 +; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[TMP25]], [[TMP8]] +; CHECK-NEXT: br i1 [[TMP26]], label [[TMP27]], label [[TMP9]] +; CHECK: 27: +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP16]], [[TMP23]] ], [ [[TMP16]], [[TMP9]] ] +; CHECK-NEXT: store i32 [[DOTLCSSA]], ptr @u, align 4 +; CHECK-NEXT: br label [[TMP28]] +; CHECK: 28: +; CHECK-NEXT: ret void +; + %4 = icmp sgt i32 %2, 0 + br i1 %4, label %5, label %28 + +5: ; preds = %3 + %6 = load i32, ptr @v, align 4 + %7 = load i32, ptr @u, align 4 + %8 = zext i32 %2 to i64 + br label %9 + +9: ; preds = %5, %23 + %10 = phi i64 [ 0, %5 ], [ %25, %23 ] + %11 = phi i32 [ %7, %5 ], [ %16, %23 ] + %12 = phi i32 [ %6, %5 ], [ %24, %23 ] + %13 = getelementptr inbounds i32, ptr %0, i64 %10 + %14 = load i32, ptr %13, align 4 + %15 = icmp eq i32 %14, 0 + %16 = add nsw i32 %11, 1 + br i1 %15, label %17, label %27 + +17: ; preds = %9 + %18 = getelementptr inbounds i32, ptr %1, i64 %10 + %19 = load i32, ptr %18, align 4 + %20 = icmp eq i32 %19, 0 + br i1 %20, label %23, label %21 + +21: ; preds = %17 + %22 = add nsw i32 %12, 1 + store i32 %22, ptr @v, align 4 + br label %23 + +23: ; preds = %17, %21 + %24 = phi i32 [ %12, %17 ], [ %22, %21 ] + %25 = add nuw nsw i64 %10, 1 + %26 = icmp eq i64 %25, %8 + br i1 %26, label %27, label %9 + +27: ; preds = %9, %23 + store i32 %16, ptr @u, align 4 + br label %28 + +28: ; preds = %27, %3 + ret void +}