diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -107,7 +107,7 @@ cl::init(true)); static cl::opt GVNEnableSplitBackedgeInLoadPRE("enable-split-backedge-in-load-pre", - cl::init(true)); + cl::init(false)); static cl::opt GVNEnableMemDep("enable-gvn-memdep", cl::init(true)); static cl::opt MaxNumDeps( diff --git a/llvm/test/Transforms/GVN/PRE/load-pre-metadata-accsess-group.ll b/llvm/test/Transforms/GVN/PRE/load-pre-metadata-accsess-group.ll --- a/llvm/test/Transforms/GVN/PRE/load-pre-metadata-accsess-group.ll +++ b/llvm/test/Transforms/GVN/PRE/load-pre-metadata-accsess-group.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -loops -basic-aa -gvn -enable-load-pre -S | FileCheck %s +; RUN: opt < %s -loops -basic-aa -gvn -enable-split-backedge-in-load-pre -S | FileCheck %s define dso_local void @test1(i32* nocapture readonly %aa, i32* nocapture %bb) local_unnamed_addr { ; CHECK-LABEL: @test1( diff --git a/llvm/test/Transforms/GVN/PRE/pre-loop-load-through-select.ll b/llvm/test/Transforms/GVN/PRE/pre-loop-load-through-select.ll --- a/llvm/test/Transforms/GVN/PRE/pre-loop-load-through-select.ll +++ b/llvm/test/Transforms/GVN/PRE/pre-loop-load-through-select.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes='require,loop(loop-simplifycfg),gvn' -S %s | FileCheck %s +; RUN: opt -passes='require,loop(loop-simplifycfg),gvn' -enable-split-backedge-in-load-pre -S %s | FileCheck %s define i32 @test_pointer_phi_select_same_object(i32* %ptr, i32* %end) { ; CHECK-LABEL: @test_pointer_phi_select_same_object( diff --git a/llvm/test/Transforms/GVN/PRE/rle.ll b/llvm/test/Transforms/GVN/PRE/rle.ll --- a/llvm/test/Transforms/GVN/PRE/rle.ll +++ b/llvm/test/Transforms/GVN/PRE/rle.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -data-layout="e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-n8:16:32" -basic-aa -gvn -S -dce | FileCheck %s --check-prefixes=CHECK,LE -; RUN: opt < %s -data-layout="E-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-n32" -basic-aa -gvn -S -dce | FileCheck %s --check-prefixes=CHECK,BE +; RUN: opt < %s -data-layout="e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-n8:16:32" -basic-aa -gvn -enable-split-backedge-in-load-pre -S -dce | FileCheck %s --check-prefixes=CHECK,LE +; RUN: opt < %s -data-layout="E-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-n32" -basic-aa -gvn -enable-split-backedge-in-load-pre -S -dce | FileCheck %s --check-prefixes=CHECK,BE ;; Trivial RLE test. define i32 @test0(i32 %V, i32* %P) { diff --git a/llvm/test/Transforms/GVN/PRE/volatile.ll b/llvm/test/Transforms/GVN/PRE/volatile.ll --- a/llvm/test/Transforms/GVN/PRE/volatile.ll +++ b/llvm/test/Transforms/GVN/PRE/volatile.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; Tests that check our handling of volatile instructions encountered ; when scanning for dependencies -; RUN: opt -basic-aa -gvn -S < %s | FileCheck %s +; RUN: opt -basic-aa -gvn -enable-split-backedge-in-load-pre -S < %s | FileCheck %s ; Check that we can bypass a volatile load when searching ; for dependencies of a non-volatile load diff --git a/llvm/test/Transforms/GVN/condprop-memdep-invalidation.ll b/llvm/test/Transforms/GVN/condprop-memdep-invalidation.ll --- a/llvm/test/Transforms/GVN/condprop-memdep-invalidation.ll +++ b/llvm/test/Transforms/GVN/condprop-memdep-invalidation.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=gvn -S %s | FileCheck %s +; RUN: opt -passes=gvn -enable-split-backedge-in-load-pre -S %s | FileCheck %s ; Test case for PR31651. diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reduction-known-first-value.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reduction-known-first-value.ll --- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reduction-known-first-value.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reduction-known-first-value.ll @@ -11,89 +11,26 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[FIRST:%.*]] = load i8, ptr [[PTR:%.*]], align 1 ; CHECK-NEXT: tail call void @use(i8 [[FIRST]]) #[[ATTR2:[0-9]+]] -; CHECK-NEXT: [[VAL_EXT2:%.*]] = zext i8 [[FIRST]] to i16 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i16> , i16 [[VAL_EXT2]], i64 0 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i16> [ [[TMP0]], [[ENTRY]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <8 x i16> [ zeroinitializer, [[ENTRY]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = or i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8 -; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 -; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[WIDE_LOAD]] to <8 x i16> -; CHECK-NEXT: [[TMP4:%.*]] = zext <8 x i8> [[WIDE_LOAD6]] to <8 x i16> -; CHECK-NEXT: [[TMP5]] = add <8 x i16> [[VEC_PHI]], [[TMP3]] -; CHECK-NEXT: [[TMP6]] = add <8 x i16> [[VEC_PHI5]], [[TMP4]] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, [[ENTRY]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i16> [ zeroinitializer, [[ENTRY]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8 +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = zext <8 x i8> [[WIDE_LOAD]] to <8 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[WIDE_LOAD2]] to <8 x i16> +; CHECK-NEXT: [[TMP4]] = add <8 x i16> [[VEC_PHI]], [[TMP2]] +; CHECK-NEXT: [[TMP5]] = add <8 x i16> [[VEC_PHI1]], [[TMP3]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1008 -; CHECK-NEXT: br i1 [[TMP7]], label [[LOOP_LOOP_CRIT_EDGE:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] -; CHECK: loop.loop_crit_edge: -; CHECK-NEXT: [[BIN_RDX:%.*]] = add <8 x i16> [[TMP6]], [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[BIN_RDX]]) -; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1009 -; CHECK-NEXT: [[VAL_PRE:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT]], align 1 -; CHECK-NEXT: [[VAL_EXT:%.*]] = zext i8 [[VAL_PRE]] to i16 -; CHECK-NEXT: [[ACCUM_NEXT:%.*]] = add i16 [[TMP8]], [[VAL_EXT]] -; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT_1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1010 -; CHECK-NEXT: [[VAL_PRE_1:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT_1]], align 1 -; CHECK-NEXT: [[VAL_EXT_1:%.*]] = zext i8 [[VAL_PRE_1]] to i16 -; CHECK-NEXT: [[ACCUM_NEXT_1:%.*]] = add i16 [[ACCUM_NEXT]], [[VAL_EXT_1]] -; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT_2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1011 -; CHECK-NEXT: [[VAL_PRE_2:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT_2]], align 1 -; CHECK-NEXT: [[VAL_EXT_2:%.*]] = zext i8 [[VAL_PRE_2]] to i16 -; CHECK-NEXT: [[ACCUM_NEXT_2:%.*]] = add i16 [[ACCUM_NEXT_1]], [[VAL_EXT_2]] -; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT_3:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1012 -; CHECK-NEXT: [[VAL_PRE_3:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT_3]], align 1 -; CHECK-NEXT: [[VAL_EXT_3:%.*]] = zext i8 [[VAL_PRE_3]] to i16 -; CHECK-NEXT: [[ACCUM_NEXT_3:%.*]] = add i16 [[ACCUM_NEXT_2]], [[VAL_EXT_3]] -; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT_4:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1013 -; CHECK-NEXT: [[VAL_PRE_4:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT_4]], align 1 -; CHECK-NEXT: [[VAL_EXT_4:%.*]] = zext i8 [[VAL_PRE_4]] to i16 -; CHECK-NEXT: [[ACCUM_NEXT_4:%.*]] = add i16 [[ACCUM_NEXT_3]], [[VAL_EXT_4]] -; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT_5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1014 -; CHECK-NEXT: [[VAL_PRE_5:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT_5]], align 1 -; CHECK-NEXT: [[VAL_EXT_5:%.*]] = zext i8 [[VAL_PRE_5]] to i16 -; CHECK-NEXT: [[ACCUM_NEXT_5:%.*]] = add i16 [[ACCUM_NEXT_4]], [[VAL_EXT_5]] -; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT_6:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1015 -; CHECK-NEXT: [[VAL_PRE_6:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT_6]], align 1 -; CHECK-NEXT: [[VAL_EXT_6:%.*]] = zext i8 [[VAL_PRE_6]] to i16 -; CHECK-NEXT: [[ACCUM_NEXT_6:%.*]] = add i16 [[ACCUM_NEXT_5]], [[VAL_EXT_6]] -; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT_7:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1016 -; CHECK-NEXT: [[VAL_PRE_7:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT_7]], align 1 -; CHECK-NEXT: [[VAL_EXT_7:%.*]] = zext i8 [[VAL_PRE_7]] to i16 -; CHECK-NEXT: [[ACCUM_NEXT_7:%.*]] = add i16 [[ACCUM_NEXT_6]], [[VAL_EXT_7]] -; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT_8:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1017 -; CHECK-NEXT: [[VAL_PRE_8:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT_8]], align 1 -; CHECK-NEXT: [[VAL_EXT_8:%.*]] = zext i8 [[VAL_PRE_8]] to i16 -; CHECK-NEXT: [[ACCUM_NEXT_8:%.*]] = add i16 [[ACCUM_NEXT_7]], [[VAL_EXT_8]] -; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT_9:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1018 -; CHECK-NEXT: [[VAL_PRE_9:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT_9]], align 1 -; CHECK-NEXT: [[VAL_EXT_9:%.*]] = zext i8 [[VAL_PRE_9]] to i16 -; CHECK-NEXT: [[ACCUM_NEXT_9:%.*]] = add i16 [[ACCUM_NEXT_8]], [[VAL_EXT_9]] -; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT_10:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1019 -; CHECK-NEXT: [[VAL_PRE_10:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT_10]], align 1 -; CHECK-NEXT: [[VAL_EXT_10:%.*]] = zext i8 [[VAL_PRE_10]] to i16 -; CHECK-NEXT: [[ACCUM_NEXT_10:%.*]] = add i16 [[ACCUM_NEXT_9]], [[VAL_EXT_10]] -; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT_11:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1020 -; CHECK-NEXT: [[VAL_PRE_11:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT_11]], align 1 -; CHECK-NEXT: [[VAL_EXT_11:%.*]] = zext i8 [[VAL_PRE_11]] to i16 -; CHECK-NEXT: [[ACCUM_NEXT_11:%.*]] = add i16 [[ACCUM_NEXT_10]], [[VAL_EXT_11]] -; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT_12:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1021 -; CHECK-NEXT: [[VAL_PRE_12:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT_12]], align 1 -; CHECK-NEXT: [[VAL_EXT_12:%.*]] = zext i8 [[VAL_PRE_12]] to i16 -; CHECK-NEXT: [[ACCUM_NEXT_12:%.*]] = add i16 [[ACCUM_NEXT_11]], [[VAL_EXT_12]] -; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT_13:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1022 -; CHECK-NEXT: [[VAL_PRE_13:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT_13]], align 1 -; CHECK-NEXT: [[VAL_EXT_13:%.*]] = zext i8 [[VAL_PRE_13]] to i16 -; CHECK-NEXT: [[ACCUM_NEXT_13:%.*]] = add i16 [[ACCUM_NEXT_12]], [[VAL_EXT_13]] -; CHECK-NEXT: [[GEP_PHI_TRANS_INSERT_14:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1023 -; CHECK-NEXT: [[VAL_PRE_14:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT_14]], align 1 -; CHECK-NEXT: [[VAL_EXT_14:%.*]] = zext i8 [[VAL_PRE_14]] to i16 -; CHECK-NEXT: [[ACCUM_NEXT_14:%.*]] = add i16 [[ACCUM_NEXT_13]], [[VAL_EXT_14]] -; CHECK-NEXT: ret i16 [[ACCUM_NEXT_14]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[BIN_RDX:%.*]] = add <8 x i16> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[BIN_RDX]]) +; CHECK-NEXT: ret i16 [[TMP7]] ; entry: %first = load i8, ptr %ptr