Index: llvm/include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfo.h +++ llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -28,6 +28,7 @@ #include "llvm/Pass.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/BranchProbability.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/InstructionCost.h" #include <functional> #include <utility> @@ -67,6 +68,8 @@ struct KnownBits; template <typename T> class Optional; +extern cl::opt<bool> AllowDataRaces; + /// Information about a load/store intrinsic defined by the target. struct MemIntrinsicInfo { /// This is the pointer that the intrinsic is loading from or storing to. Index: llvm/lib/Transforms/Scalar/LICM.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LICM.cpp +++ llvm/lib/Transforms/Scalar/LICM.cpp @@ -88,6 +88,8 @@ namespace llvm { class BlockFrequencyInfo; class LPMUpdater; +cl::opt<bool> AllowDataRaces("allow-data-races", cl::Hidden, cl::init(false), + cl::desc("Allow data races in LICM pass")); } // namespace llvm #define DEBUG_TYPE "licm" @@ -2111,7 +2113,7 @@ // stores along paths which originally didn't have them without violating the // memory model. if (!SafeToInsertStore) { - if (IsKnownThreadLocalObject) + if (IsKnownThreadLocalObject || llvm::AllowDataRaces) SafeToInsertStore = true; else { Value *Object = getUnderlyingObject(SomePtr); Index: llvm/test/Transforms/LICM/reg-promote.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LICM/reg-promote.ll @@ -0,0 +1,135 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -licm -allow-data-races -S %s | FileCheck %s + +target triple = "aarch64-unknown-linux" + +@u = dso_local global i32 0, align 4 +@v = dso_local global i32 0, align 4 +@restrict = dso_local global i32 0, align 4 +@i = dso_local global i32 0, align 4 + +; Function Attrs: mustprogress nounwind uwtable +define dso_local void @_Z1fPiS_i(ptr noundef %0, ptr noundef %1, i32 noundef %2) #0 { +; CHECK-LABEL: @_Z1fPiS_i( +; CHECK-NEXT: [[TMP4:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: [[TMP5:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: [[TMP6:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store ptr [[TMP0:%.*]], ptr [[TMP4]], align 8 +; CHECK-NEXT: store ptr [[TMP1:%.*]], ptr [[TMP5]], align 8 +; CHECK-NEXT: store i32 [[TMP2:%.*]], ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr @restrict, align 4 +; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr @restrict, align 4 +; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +; CHECK-NEXT: store i32 0, ptr @i, align 4 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP4]], align 8 +; CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP5]], align 8 +; CHECK-NEXT: br label [[TMP14:%.*]] +; CHECK: 14: +; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr @i, align 4 +; CHECK-NEXT: [[TMP16:%.*]] = icmp slt i32 [[TMP15]], [[TMP11]] +; CHECK-NEXT: br i1 [[TMP16]], label [[TMP17:%.*]], label [[DOTLOOPEXIT:%.*]] +; CHECK: 17: +; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr @i, align 4 +; CHECK-NEXT: [[TMP19:%.*]] = sext i32 [[TMP18]] to i64 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +; CHECK-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +; CHECK-NEXT: br i1 [[TMP22]], label [[TMP23:%.*]], label [[TMP26:%.*]] +; CHECK: 23: +; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr @u, align 4 +; CHECK-NEXT: [[TMP25:%.*]] = add nsw i32 [[TMP24]], 1 +; CHECK-NEXT: store i32 [[TMP25]], ptr @u, align 4 +; CHECK-NEXT: br label [[TMP41:%.*]] +; CHECK: 26: +; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr @u, align 4 +; CHECK-NEXT: [[TMP28:%.*]] = add nsw i32 [[TMP27]], 1 +; CHECK-NEXT: store i32 [[TMP28]], ptr @u, align 4 +; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr @i, align 4 +; CHECK-NEXT: [[TMP30:%.*]] = sext i32 [[TMP29]] to i64 +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 [[TMP30]] +; CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +; CHECK-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +; CHECK-NEXT: br i1 [[TMP33]], label [[TMP34:%.*]], label [[TMP37:%.*]] +; CHECK: 34: +; CHECK-NEXT: [[TMP35:%.*]] = load i32, ptr @v, align 4 +; CHECK-NEXT: [[TMP36:%.*]] = add nsw i32 [[TMP35]], 1 +; CHECK-NEXT: store i32 [[TMP36]], ptr @v, align 4 +; CHECK-NEXT: br label [[TMP37]] +; CHECK: 37: +; CHECK-NEXT: br label [[TMP38:%.*]] +; CHECK: 38: +; CHECK-NEXT: [[TMP39:%.*]] = load i32, ptr @i, align 4 +; CHECK-NEXT: [[TMP40:%.*]] = add nsw i32 [[TMP39]], 1 +; CHECK-NEXT: store i32 [[TMP40]], ptr @i, align 4 +; CHECK-NEXT: br label [[TMP14]] +; CHECK: .loopexit: +; CHECK-NEXT: br label [[TMP41]] +; CHECK: 41: +; CHECK-NEXT: ret void +; + %4 = alloca ptr, align 8 + %5 = alloca ptr, align 8 + %6 = alloca i32, align 4 + store ptr %0, ptr %4, align 8 + store ptr %1, ptr %5, align 8 + store i32 %2, ptr %6, align 4 + %7 = load i32, ptr @restrict, align 4 + %8 = zext i32 %7 to i64 + %9 = load i32, ptr @restrict, align 4 + %10 = zext i32 %9 to i64 + store i32 0, ptr @i, align 4 + br label %11 + +11: ; preds = %38, %3 + %12 = load i32, ptr @i, align 4 + %13 = load i32, ptr %6, align 4 + %14 = icmp slt i32 %12, %13 + br i1 %14, label %15, label %41 + +15: ; preds = %11 + %16 = load ptr, ptr %4, align 8 + %17 = load i32, ptr @i, align 4 + %18 = sext i32 %17 to i64 + %19 = getelementptr inbounds i32, ptr %16, i64 %18 + %20 = load i32, ptr %19, align 4 + %21 = icmp ne i32 %20, 0 + br i1 %21, label %22, label %25 + +22: ; preds = %15 + %23 = load i32, ptr @u, align 4 + %24 = add nsw i32 %23, 1 + store i32 %24, ptr @u, align 4 + br label %41 + +25: ; preds = %15 + %26 = load i32, ptr @u, align 4 + %27 = add nsw i32 %26, 1 + store i32 %27, ptr @u, align 4 + %28 = load ptr, ptr %5, align 8 + %29 = load i32, ptr @i, align 4 + %30 = sext i32 %29 to i64 + %31 = getelementptr inbounds i32, ptr %28, i64 %30 + %32 = load i32, ptr %31, align 4 + %33 = icmp ne i32 %32, 0 + br i1 %33, label %34, label %37 + +34: ; preds = %25 + %35 = load i32, ptr @v, align 4 + %36 = add nsw i32 %35, 1 + store i32 %36, ptr @v, align 4 + br label %37 + +37: ; preds = %34, %25 + br label %38 + +38: ; preds = %37 + %39 = load i32, ptr @i, align 4 + %40 = add nsw i32 %39, 1 + store i32 %40, ptr @i, align 4 + br label %11 + +41: ; preds = %22, %11 + ret void +}