Index: llvm/lib/Transforms/Scalar/LICM.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LICM.cpp +++ llvm/lib/Transforms/Scalar/LICM.cpp @@ -75,6 +75,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -2111,7 +2112,7 @@ // stores along paths which originally didn't have them without violating the // memory model. if (!SafeToInsertStore) { - if (IsKnownThreadLocalObject) + if (IsKnownThreadLocalObject || ThreadModel::Single) SafeToInsertStore = true; else { Value *Object = getUnderlyingObject(SomePtr); Index: llvm/test/Transforms/LICM/hoist-load-without-store.ll =================================================================== --- llvm/test/Transforms/LICM/hoist-load-without-store.ll +++ llvm/test/Transforms/LICM/hoist-load-without-store.ll @@ -26,13 +26,15 @@ ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 ; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END]], label [[FOR_BODY_CLEANUP1_CRIT_EDGE:%.*]] ; CHECK: if.end: -; CHECK-NEXT: store i32 1, i32* [[PTR]], align 4 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[N]] ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP1_CRIT_EDGE:%.*]] ; CHECK: for.body.cleanup1_crit_edge: +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP0]], [[FOR_BODY]] ] +; CHECK-NEXT: store i32 [[DOTLCSSA]], i32* [[PTR]], align 4 ; CHECK-NEXT: br label [[CLEANUP1]] ; CHECK: for.cond.cleanup1_crit_edge: +; CHECK-NEXT: store i32 1, i32* [[PTR]], align 4 ; CHECK-NEXT: br label [[CLEANUP1]] ; CHECK: cleanup1: ; CHECK-NEXT: ret void Index: llvm/test/Transforms/LICM/promote-capture.ll =================================================================== --- llvm/test/Transforms/LICM/promote-capture.ll +++ llvm/test/Transforms/LICM/promote-capture.ll @@ -120,7 +120,6 @@ ; CHECK-NEXT: br i1 [[COND]], label [[IF:%.*]], label [[LATCH]] ; CHECK: if: ; CHECK-NEXT: [[C_INC:%.*]] = add i32 [[C_INC2]], 1 -; CHECK-NEXT: store i32 [[C_INC]], i32* [[COUNT]], align 4 ; CHECK-NEXT: br label [[LATCH]] ; CHECK: latch: ; CHECK-NEXT: [[C_INC1]] = phi i32 [ [[C_INC]], [[IF]] ], [ [[C_INC2]], [[LOOP]] ] @@ -128,6 +127,8 @@ ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[LEN:%.*]] ; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]] ; CHECK: exit: +; CHECK-NEXT: [[C_INC1_LCSSA:%.*]] = phi i32 [ [[C_INC1]], [[LATCH]] ] +; CHECK-NEXT: store i32 [[C_INC1_LCSSA]], i32* [[COUNT]], align 4 ; CHECK-NEXT: ret void ; entry: Index: llvm/test/Transforms/LICM/reg-promote.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LICM/reg-promote.ll @@ -0,0 +1,135 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -licm -S %s | FileCheck %s + +target triple = "aarch64-unknown-linux" + +@u = dso_local global i32 0, align 4 +@v = dso_local global i32 0, align 4 +@restrict = dso_local global i32 0, align 4 +@i = dso_local global i32 0, align 4 + +; Function Attrs: mustprogress nounwind uwtable +define dso_local void @_Z1fPiS_i(ptr noundef %0, ptr noundef %1, i32 noundef %2) #0 { +; CHECK-LABEL: @_Z1fPiS_i( +; CHECK-NEXT: [[TMP4:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: [[TMP5:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: [[TMP6:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store ptr [[TMP0:%.*]], ptr [[TMP4]], align 8 +; CHECK-NEXT: store ptr [[TMP1:%.*]], ptr [[TMP5]], align 8 +; CHECK-NEXT: store i32 [[TMP2:%.*]], ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr @restrict, align 4 +; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr @restrict, align 4 +; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +; CHECK-NEXT: store i32 0, ptr @i, align 4 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP4]], align 8 +; CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP5]], align 8 +; CHECK-NEXT: br label [[TMP14:%.*]] +; CHECK: 14: +; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr @i, align 4 +; CHECK-NEXT: [[TMP16:%.*]] = icmp slt i32 [[TMP15]], [[TMP11]] +; CHECK-NEXT: br i1 [[TMP16]], label [[TMP17:%.*]], label [[DOTLOOPEXIT:%.*]] +; CHECK: 17: +; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr @i, align 4 +; CHECK-NEXT: [[TMP19:%.*]] = sext i32 [[TMP18]] to i64 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +; CHECK-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +; CHECK-NEXT: br i1 [[TMP22]], label [[TMP23:%.*]], label [[TMP26:%.*]] +; CHECK: 23: +; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr @u, align 4 +; CHECK-NEXT: [[TMP25:%.*]] = add nsw i32 [[TMP24]], 1 +; CHECK-NEXT: store i32 [[TMP25]], ptr @u, align 4 +; CHECK-NEXT: br label [[TMP41:%.*]] +; CHECK: 26: +; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr @u, align 4 +; CHECK-NEXT: [[TMP28:%.*]] = add nsw i32 [[TMP27]], 1 +; CHECK-NEXT: store i32 [[TMP28]], ptr @u, align 4 +; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr @i, align 4 +; CHECK-NEXT: [[TMP30:%.*]] = sext i32 [[TMP29]] to i64 +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 [[TMP30]] +; CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +; CHECK-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +; CHECK-NEXT: br i1 [[TMP33]], label [[TMP34:%.*]], label [[TMP37:%.*]] +; CHECK: 34: +; CHECK-NEXT: [[TMP35:%.*]] = load i32, ptr @v, align 4 +; CHECK-NEXT: [[TMP36:%.*]] = add nsw i32 [[TMP35]], 1 +; CHECK-NEXT: store i32 [[TMP36]], ptr @v, align 4 +; CHECK-NEXT: br label [[TMP37]] +; CHECK: 37: +; CHECK-NEXT: br label [[TMP38:%.*]] +; CHECK: 38: +; CHECK-NEXT: [[TMP39:%.*]] = load i32, ptr @i, align 4 +; CHECK-NEXT: [[TMP40:%.*]] = add nsw i32 [[TMP39]], 1 +; CHECK-NEXT: store i32 [[TMP40]], ptr @i, align 4 +; CHECK-NEXT: br label [[TMP14]] +; CHECK: .loopexit: +; CHECK-NEXT: br label [[TMP41]] +; CHECK: 41: +; CHECK-NEXT: ret void +; + %4 = alloca ptr, align 8 + %5 = alloca ptr, align 8 + %6 = alloca i32, align 4 + store ptr %0, ptr %4, align 8 + store ptr %1, ptr %5, align 8 + store i32 %2, ptr %6, align 4 + %7 = load i32, ptr @restrict, align 4 + %8 = zext i32 %7 to i64 + %9 = load i32, ptr @restrict, align 4 + %10 = zext i32 %9 to i64 + store i32 0, ptr @i, align 4 + br label %11 + +11: ; preds = %38, %3 + %12 = load i32, ptr @i, align 4 + %13 = load i32, ptr %6, align 4 + %14 = icmp slt i32 %12, %13 + br i1 %14, label %15, label %41 + +15: ; preds = %11 + %16 = load ptr, ptr %4, align 8 + %17 = load i32, ptr @i, align 4 + %18 = sext i32 %17 to i64 + %19 = getelementptr inbounds i32, ptr %16, i64 %18 + %20 = load i32, ptr %19, align 4 + %21 = icmp ne i32 %20, 0 + br i1 %21, label %22, label %25 + +22: ; preds = %15 + %23 = load i32, ptr @u, align 4 + %24 = add nsw i32 %23, 1 + store i32 %24, ptr @u, align 4 + br label %41 + +25: ; preds = %15 + %26 = load i32, ptr @u, align 4 + %27 = add nsw i32 %26, 1 + store i32 %27, ptr @u, align 4 + %28 = load ptr, ptr %5, align 8 + %29 = load i32, ptr @i, align 4 + %30 = sext i32 %29 to i64 + %31 = getelementptr inbounds i32, ptr %28, i64 %30 + %32 = load i32, ptr %31, align 4 + %33 = icmp ne i32 %32, 0 + br i1 %33, label %34, label %37 + +34: ; preds = %25 + %35 = load i32, ptr @v, align 4 + %36 = add nsw i32 %35, 1 + store i32 %36, ptr @v, align 4 + br label %37 + +37: ; preds = %34, %25 + br label %38 + +38: ; preds = %37 + %39 = load i32, ptr @i, align 4 + %40 = add nsw i32 %39, 1 + store i32 %40, ptr @i, align 4 + br label %11 + +41: ; preds = %22, %11 + ret void +} Index: llvm/test/Transforms/LICM/scalar-promote-memmodel.ll =================================================================== --- llvm/test/Transforms/LICM/scalar-promote-memmodel.ll +++ llvm/test/Transforms/LICM/scalar-promote-memmodel.ll @@ -22,13 +22,14 @@ ; CHECK-NEXT: br i1 [[TOBOOL]], label [[FOR_INC]], label [[IF_THEN:%.*]] ; CHECK: if.then: ; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[INC2]], 1 -; CHECK-NEXT: store i32 [[INC]], i32* @g, align 4 ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: ; CHECK-NEXT: [[INC1]] = phi i32 [ [[INC]], [[IF_THEN]] ], [ [[INC2]], [[FOR_BODY]] ] ; CHECK-NEXT: [[INC5]] = add nsw i32 [[I_0]], 1 ; CHECK-NEXT: br label [[FOR_COND]] ; CHECK: for.end: +; CHECK-NEXT: [[INC2_LCSSA:%.*]] = phi i32 [ [[INC2]], [[FOR_COND]] ] +; CHECK-NEXT: store i32 [[INC2_LCSSA]], i32* @g, align 4 ; CHECK-NEXT: ret void ; entry: Index: llvm/test/Transforms/LICM/scalar-promote-opaque-ptrs.ll =================================================================== --- llvm/test/Transforms/LICM/scalar-promote-opaque-ptrs.ll +++ llvm/test/Transforms/LICM/scalar-promote-opaque-ptrs.ll @@ -323,7 +323,6 @@ ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X2]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[ELSE]] ; CHECK: if: -; CHECK-NEXT: store i32 [[X2]], ptr [[LOCAL]], align 4 ; CHECK-NEXT: br label [[ELSE]] ; CHECK: else: ; CHECK-NEXT: [[X21]] = phi i32 [ [[X2]], [[IF]] ], [ [[X22]], [[LOOP]] ] @@ -331,6 +330,8 @@ ; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[NEXT]], 0 ; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]] ; CHECK: exit: +; CHECK-NEXT: [[X21_LCSSA:%.*]] = phi i32 [ [[X21]], [[ELSE]] ] +; CHECK-NEXT: store i32 [[X21_LCSSA]], ptr [[LOCAL]], align 4 ; CHECK-NEXT: [[RET:%.*]] = load i32, ptr [[LOCAL]], align 4 ; CHECK-NEXT: ret i32 [[RET]] ; Index: llvm/test/Transforms/LICM/scalar-promote.ll =================================================================== --- llvm/test/Transforms/LICM/scalar-promote.ll +++ llvm/test/Transforms/LICM/scalar-promote.ll @@ -325,7 +325,6 @@ ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X2]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[ELSE]] ; CHECK: if: -; CHECK-NEXT: store i32 [[X2]], i32* [[LOCAL]], align 4 ; CHECK-NEXT: br label [[ELSE]] ; CHECK: else: ; CHECK-NEXT: [[X21]] = phi i32 [ [[X2]], [[IF]] ], [ [[X22]], [[LOOP]] ] @@ -333,6 +332,8 @@ ; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[NEXT]], 0 ; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]] ; CHECK: exit: +; CHECK-NEXT: [[X21_LCSSA:%.*]] = phi i32 [ [[X21]], [[ELSE]] ] +; CHECK-NEXT: store i32 [[X21_LCSSA]], i32* [[LOCAL]], align 4 ; CHECK-NEXT: [[RET:%.*]] = load i32, i32* [[LOCAL]], align 4 ; CHECK-NEXT: ret i32 [[RET]] ; @@ -611,11 +612,12 @@ ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[I]], 4 ; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_LATCH]], label [[EXIT:%.*]] ; CHECK: loop.latch: -; CHECK-NEXT: store i8 [[INC1]], i8* [[PTR]], align 1 ; CHECK-NEXT: [[ADD]] = add i8 [[I]], [[INC1]] ; CHECK-NEXT: br label [[LOOP_HEADER]] ; CHECK: exit: +; CHECK-NEXT: [[INC1_LCSSA:%.*]] = phi i8 [ [[INC1]], [[LOOP_HEADER]] ] ; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i8 [ [[I]], [[LOOP_HEADER]] ] +; CHECK-NEXT: store i8 [[INC1_LCSSA]], i8* [[PTR]], align 1 ; CHECK-NEXT: ret i8 [[I_LCSSA]] ; entry: