Index: llvm/lib/Transforms/Scalar/LICM.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LICM.cpp +++ llvm/lib/Transforms/Scalar/LICM.cpp @@ -109,6 +109,10 @@ "licm-control-flow-hoisting", cl::Hidden, cl::init(false), cl::desc("Enable control flow (and PHI) hoisting in LICM")); +static cl::opt AllowDataRaces("allow-data-races", cl::Hidden, + cl::init(false), + cl::desc("Allow data races in LICM pass")); + static cl::opt MaxNumUsesTraversed( "licm-max-num-uses-traversed", cl::Hidden, cl::init(8), cl::desc("Max num uses visited for identifying load " @@ -1851,7 +1855,8 @@ MSSAU.createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint); } MSSAInsertPts[i] = NewMemAcc; - MSSAU.insertDef(cast(NewMemAcc), true); + MSSAU.insertDef(cast(NewMemAcc), + AllowDataRaces ? false : true); // FIXME: true for safety, false may still be correct. } } @@ -2111,7 +2116,7 @@ // stores along paths which originally didn't have them without violating the // memory model. if (!SafeToInsertStore) { - if (IsKnownThreadLocalObject) + if (IsKnownThreadLocalObject || AllowDataRaces) SafeToInsertStore = true; else { Value *Object = getUnderlyingObject(SomePtr); Index: llvm/test/Transforms/LICM/reg-promote.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LICM/reg-promote.ll @@ -0,0 +1,187 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -licm -allow-data-races -S %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +@u = dso_local global i32 0, align 4 +@v = dso_local global i32 0, align 4 +@restrict = dso_local global i32 0, align 4 + +; Function Attrs: mustprogress nounwind uwtable +define dso_local void @_Z1fPiS_i(ptr noundef %0, ptr noundef %1, i32 noundef %2) #0 { +; CHECK-LABEL: @_Z1fPiS_i( +; CHECK-NEXT: [[TMP4:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: [[TMP5:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: [[TMP6:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP7:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP8:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store ptr [[TMP0:%.*]], ptr [[TMP4]], align 8, !tbaa [[TBAA10:![0-9]+]] +; CHECK-NEXT: store ptr [[TMP1:%.*]], ptr [[TMP5]], align 8, !tbaa [[TBAA10]] +; CHECK-NEXT: store i32 [[TMP2:%.*]], ptr [[TMP6]], align 4, !tbaa [[TBAA14:![0-9]+]] +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr @restrict, align 4, !tbaa [[TBAA14]] +; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr @restrict, align 4, !tbaa [[TBAA14]] +; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[TMP7]]) #[[ATTR2:[0-9]+]] +; CHECK-NEXT: store i32 0, ptr [[TMP7]], align 4, !tbaa [[TBAA14]] +; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA14]] +; CHECK-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP4]], align 8 +; CHECK-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP5]], align 8 +; CHECK-NEXT: [[DOTPROMOTED:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA14]] +; CHECK-NEXT: br label [[TMP16:%.*]] +; CHECK: 16: +; CHECK-NEXT: [[TMP17:%.*]] = phi i32 [ [[TMP40:%.*]], [[TMP39:%.*]] ], [ [[DOTPROMOTED]], [[TMP3:%.*]] ] +; CHECK-NEXT: [[TMP18:%.*]] = icmp slt i32 [[TMP17]], [[TMP13]] +; CHECK-NEXT: br i1 [[TMP18]], label [[TMP20:%.*]], label [[TMP19:%.*]] +; CHECK: 19: +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP17]], [[TMP16]] ] +; CHECK-NEXT: store i32 [[DOTLCSSA]], ptr [[TMP7]], align 4, !tbaa [[TBAA14]] +; CHECK-NEXT: store i32 2, ptr [[TMP8]], align 4 +; CHECK-NEXT: br label [[TMP41:%.*]] +; CHECK: 20: +; CHECK-NEXT: [[TMP21:%.*]] = sext i32 [[TMP17]] to i64 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4, !tbaa [[TBAA14]] +; CHECK-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +; CHECK-NEXT: br i1 [[TMP24]], label [[TMP25:%.*]], label [[TMP28:%.*]] +; CHECK: 25: +; CHECK-NEXT: [[DOTLCSSA1:%.*]] = phi i32 [ [[TMP17]], [[TMP20]] ] +; CHECK-NEXT: store i32 [[DOTLCSSA1]], ptr [[TMP7]], align 4, !tbaa [[TBAA14]] +; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr @u, align 4, !tbaa [[TBAA14]] +; CHECK-NEXT: [[TMP27:%.*]] = add nsw i32 [[TMP26]], 1 +; CHECK-NEXT: store i32 [[TMP27]], ptr @u, align 4, !tbaa [[TBAA14]] +; CHECK-NEXT: store i32 2, ptr [[TMP8]], align 4 +; CHECK-NEXT: br label [[TMP41]] +; CHECK: 28: +; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr @u, align 4, !tbaa [[TBAA14]] +; CHECK-NEXT: [[TMP30:%.*]] = add nsw i32 [[TMP29]], 1 +; CHECK-NEXT: store i32 [[TMP30]], ptr @u, align 4, !tbaa [[TBAA14]] +; CHECK-NEXT: [[TMP31:%.*]] = sext i32 [[TMP17]] to i64 +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i64 [[TMP31]] +; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4, !tbaa [[TBAA14]] +; CHECK-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +; CHECK-NEXT: br i1 [[TMP34]], label [[TMP35:%.*]], label [[TMP38:%.*]] +; CHECK: 35: +; CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr @v, align 4, !tbaa [[TBAA14]] +; CHECK-NEXT: [[TMP37:%.*]] = add nsw i32 [[TMP36]], 1 +; CHECK-NEXT: store i32 [[TMP37]], ptr @v, align 4, !tbaa [[TBAA14]] +; CHECK-NEXT: br label [[TMP38]] +; CHECK: 38: +; CHECK-NEXT: br label [[TMP39]] +; CHECK: 39: +; CHECK-NEXT: [[TMP40]] = add nsw i32 [[TMP17]], 1 +; CHECK-NEXT: br label [[TMP16]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK: 41: +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[TMP7]]) #[[ATTR2]] +; CHECK-NEXT: br label [[TMP42:%.*]] +; CHECK: 42: +; CHECK-NEXT: ret void +; + %4 = alloca ptr, align 8 + %5 = alloca ptr, align 8 + %6 = alloca i32, align 4 + %7 = alloca i32, align 4 + %8 = alloca i32, align 4 + store ptr %0, ptr %4, align 8, !tbaa !10 + store ptr %1, ptr %5, align 8, !tbaa !10 + store i32 %2, ptr %6, align 4, !tbaa !14 + %9 = load i32, ptr @restrict, align 4, !tbaa !14 + %10 = zext i32 %9 to i64 + %11 = load i32, ptr @restrict, align 4, !tbaa !14 + %12 = zext i32 %11 to i64 + call void @llvm.lifetime.start.p0(i64 4, ptr %7) #2 + store i32 0, ptr %7, align 4, !tbaa !14 + br label %13 + +13: ; preds = %41, %3 + %14 = load i32, ptr %7, align 4, !tbaa !14 + %15 = load i32, ptr %6, align 4, !tbaa !14 + %16 = icmp slt i32 %14, %15 + br i1 %16, label %18, label %17 + +17: ; preds = %13 + store i32 2, ptr %8, align 4 + br label %44 + +18: ; preds = %13 + %19 = load ptr, ptr %4, align 8, !tbaa !10 + %20 = load i32, ptr %7, align 4, !tbaa !14 + %21 = sext i32 %20 to i64 + %22 = getelementptr inbounds i32, ptr %19, i64 %21 + %23 = load i32, ptr %22, align 4, !tbaa !14 + %24 = icmp ne i32 %23, 0 + br i1 %24, label %25, label %28 + +25: ; preds = %18 + %26 = load i32, ptr @u, align 4, !tbaa !14 + %27 = add nsw i32 %26, 1 + store i32 %27, ptr @u, align 4, !tbaa !14 + store i32 2, ptr %8, align 4 + br label %44 + +28: ; preds = %18 + %29 = load i32, ptr @u, align 4, !tbaa !14 + %30 = add nsw i32 %29, 1 + store i32 %30, ptr @u, align 4, !tbaa !14 + %31 = load ptr, ptr %5, align 8, !tbaa !10 + %32 = load i32, ptr %7, align 4, !tbaa !14 + %33 = sext i32 %32 to i64 + %34 = getelementptr inbounds i32, ptr %31, i64 %33 + %35 = load i32, ptr %34, align 4, !tbaa !14 + %36 = icmp ne i32 %35, 0 + br i1 %36, label %37, label %40 + +37: ; preds = %28 + %38 = load i32, ptr @v, align 4, !tbaa !14 + %39 = add nsw i32 %38, 1 + store i32 %39, ptr @v, align 4, !tbaa !14 + br label %40 + +40: ; preds = %37, %28 + br label %41 + +41: ; preds = %40 + %42 = load i32, ptr %7, align 4, !tbaa !14 + %43 = add nsw i32 %42, 1 + store i32 %43, ptr %7, align 4, !tbaa !14 + br label %13, !llvm.loop !16 + +44: ; preds = %25, %17 + call void @llvm.lifetime.end.p0(i64 4, ptr %7) #2 + br label %45 + +45: ; preds = %44 + ret void +} + +; Function Attrs: argmemonly nocallback nofree nosync nounwind willreturn +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1 + +; Function Attrs: argmemonly nocallback nofree nosync nounwind willreturn +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1 + +attributes #0 = { mustprogress nounwind uwtable "approx-func-fp-math"="true" "frame-pointer"="non-leaf" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon,+outline-atomics,+v8a" "unsafe-fp-math"="true" } +attributes #1 = { argmemonly nocallback nofree nosync nounwind willreturn } +attributes #2 = { nounwind } + +!llvm.module.flags = !{!0, !1, !2, !3, !4, !5, !6, !7, !8} +!llvm.ident = !{!9} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 8, !"branch-target-enforcement", i32 0} +!2 = !{i32 8, !"sign-return-address", i32 0} +!3 = !{i32 8, !"sign-return-address-all", i32 0} +!4 = !{i32 8, !"sign-return-address-with-bkey", i32 0} +!5 = !{i32 7, !"PIC Level", i32 2} +!6 = !{i32 7, !"PIE Level", i32 2} +!7 = !{i32 7, !"uwtable", i32 2} +!8 = !{i32 7, !"frame-pointer", i32 1} +!9 = !{!"clang version 15.0.0 (https://github.com/llvm/llvm-project.git 6aff1b7b3ca71e048375a85a11e611527da6a45c)"} +!10 = !{!11, !11, i64 0} +!11 = !{!"any pointer", !12, i64 0} +!12 = !{!"omnipotent char", !13, i64 0} +!13 = !{!"Simple C++ TBAA"} +!14 = !{!15, !15, i64 0} +!15 = !{!"int", !12, i64 0} +!16 = distinct !{!16, !17} +!17 = !{!"llvm.loop.mustprogress"}