diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -290,11 +290,6 @@ LPM1.addPass(LoopInstSimplifyPass()); LPM1.addPass(LoopSimplifyCFGPass()); - // Try to remove as much code from the loop header as possible, - // to reduce amount of IR that will have to be duplicated. - // TODO: Investigate promotion cap for O1. - LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); - LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true, isLTOPreLink(Phase))); // TODO: Investigate promotion cap for O1. diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -450,10 +450,6 @@ MPM.add(createLoopInstSimplifyPass()); MPM.add(createLoopSimplifyCFGPass()); } - // Try to remove as much code from the loop header as possible, - // to reduce amount of IR that will have to be duplicated. - // TODO: Investigate promotion cap for O1. - MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); // Rotate Loop - disable header duplication at -Oz MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, PrepareForLTO)); // TODO: Investigate promotion cap for O1. diff --git a/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll b/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll --- a/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll @@ -129,20 +129,20 @@ ; GCN-O1-NEXT: Simplify the CFG ; GCN-O1-NEXT: Reassociate expressions ; GCN-O1-NEXT: Dominator Tree Construction -; GCN-O1-NEXT: Basic Alias Analysis (stateless AA impl) -; GCN-O1-NEXT: Function Alias Analysis Results -; GCN-O1-NEXT: Memory SSA ; GCN-O1-NEXT: Natural Loop Information ; GCN-O1-NEXT: Canonicalize natural loops ; GCN-O1-NEXT: LCSSA Verifier ; GCN-O1-NEXT: Loop-Closed SSA Form Pass +; GCN-O1-NEXT: Basic Alias Analysis (stateless AA impl) +; GCN-O1-NEXT: Function Alias Analysis Results ; GCN-O1-NEXT: Scalar Evolution Analysis +; GCN-O1-NEXT: Loop Pass Manager +; GCN-O1-NEXT: Rotate Loops +; GCN-O1-NEXT: Memory SSA ; GCN-O1-NEXT: Lazy Branch Probability Analysis ; GCN-O1-NEXT: Lazy Block Frequency Analysis ; GCN-O1-NEXT: Loop Pass Manager ; GCN-O1-NEXT: Loop Invariant Code Motion -; GCN-O1-NEXT: Rotate Loops -; GCN-O1-NEXT: Loop Invariant Code Motion ; GCN-O1-NEXT: Post-Dominator Tree Construction ; GCN-O1-NEXT: Legacy Divergence Analysis ; GCN-O1-NEXT: Loop Pass Manager @@ -456,20 +456,20 @@ ; GCN-O2-NEXT: Simplify the CFG ; GCN-O2-NEXT: Reassociate expressions ; GCN-O2-NEXT: Dominator Tree Construction -; GCN-O2-NEXT: Basic Alias Analysis (stateless AA impl) -; GCN-O2-NEXT: Function Alias Analysis Results -; GCN-O2-NEXT: Memory SSA ; GCN-O2-NEXT: Natural Loop Information ; GCN-O2-NEXT: Canonicalize natural loops ; GCN-O2-NEXT: LCSSA Verifier ; GCN-O2-NEXT: Loop-Closed SSA Form Pass +; GCN-O2-NEXT: Basic Alias Analysis (stateless AA impl) +; GCN-O2-NEXT: Function Alias Analysis Results ; GCN-O2-NEXT: Scalar Evolution Analysis +; GCN-O2-NEXT: Loop Pass Manager +; GCN-O2-NEXT: Rotate Loops +; GCN-O2-NEXT: Memory SSA ; GCN-O2-NEXT: Lazy Branch Probability Analysis ; GCN-O2-NEXT: Lazy Block Frequency Analysis ; GCN-O2-NEXT: Loop Pass Manager ; GCN-O2-NEXT: Loop Invariant Code Motion -; GCN-O2-NEXT: Rotate Loops -; GCN-O2-NEXT: Loop Invariant Code Motion ; GCN-O2-NEXT: Post-Dominator Tree Construction ; GCN-O2-NEXT: Legacy Divergence Analysis ; GCN-O2-NEXT: Loop Pass Manager @@ -820,20 +820,20 @@ ; GCN-O3-NEXT: Simplify the CFG ; GCN-O3-NEXT: Reassociate expressions ; GCN-O3-NEXT: Dominator Tree Construction -; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl) -; GCN-O3-NEXT: Function Alias Analysis Results -; GCN-O3-NEXT: Memory SSA ; GCN-O3-NEXT: Natural Loop Information ; GCN-O3-NEXT: Canonicalize natural loops ; GCN-O3-NEXT: LCSSA Verifier ; GCN-O3-NEXT: Loop-Closed SSA Form Pass +; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl) +; GCN-O3-NEXT: Function Alias Analysis Results ; GCN-O3-NEXT: Scalar Evolution Analysis +; GCN-O3-NEXT: Loop Pass Manager +; GCN-O3-NEXT: Rotate Loops +; GCN-O3-NEXT: Memory SSA ; GCN-O3-NEXT: Lazy Branch Probability Analysis ; GCN-O3-NEXT: Lazy Block Frequency Analysis ; GCN-O3-NEXT: Loop Pass Manager ; GCN-O3-NEXT: Loop Invariant Code Motion -; GCN-O3-NEXT: Rotate Loops -; GCN-O3-NEXT: Loop Invariant Code Motion ; GCN-O3-NEXT: Post-Dominator Tree Construction ; GCN-O3-NEXT: Legacy Divergence Analysis ; GCN-O3-NEXT: Loop Pass Manager diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -167,7 +167,7 @@ ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: LoopInstSimplifyPass ; CHECK-O-NEXT: Running pass: LoopSimplifyCFGPass -; CHECK-O-NEXT: Running pass: LICM +; CHECK-O23SZ-NEXT: Running pass: LICM ; CHECK-O-NEXT: Running pass: LoopRotatePass ; CHECK-O-NEXT: Running pass: LICM ; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass diff --git a/llvm/test/Other/new-pm-thinlto-defaults.ll b/llvm/test/Other/new-pm-thinlto-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-defaults.ll @@ -133,7 +133,7 @@ ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: LoopInstSimplifyPass ; CHECK-O-NEXT: Running pass: LoopSimplifyCFGPass -; CHECK-O-NEXT: Running pass: LICM +; CHECK-O23SZ-NEXT: Running pass: LICM ; CHECK-O-NEXT: Running pass: LoopRotatePass ; CHECK-O-NEXT: Running pass: LICM ; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -106,7 +106,7 @@ ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: LoopInstSimplifyPass ; CHECK-O-NEXT: Running pass: LoopSimplifyCFGPass -; CHECK-O-NEXT: Running pass: LICM +; CHECK-O23SZ-NEXT: Running pass: LICM ; CHECK-O-NEXT: Running pass: LoopRotatePass ; CHECK-O-NEXT: Running pass: LICM ; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -115,7 +115,7 @@ ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: LoopInstSimplifyPass ; CHECK-O-NEXT: Running pass: LoopSimplifyCFGPass -; CHECK-O-NEXT: Running pass: LICM +; CHECK-O23SZ-NEXT: Running pass: LICM ; CHECK-O-NEXT: Running pass: LoopRotatePass ; CHECK-O-NEXT: Running pass: LICM ; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass diff --git a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll @@ -144,7 +144,7 @@ ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: LoopInstSimplifyPass ; CHECK-O-NEXT: Running pass: LoopSimplifyCFGPass -; CHECK-O-NEXT: Running pass: LICM +; CHECK-O23SZ-NEXT: Running pass: LICM ; CHECK-O-NEXT: Running pass: LoopRotatePass ; CHECK-O-NEXT: Running pass: LICM ; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass diff --git a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll @@ -110,7 +110,7 @@ ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: LoopInstSimplifyPass ; CHECK-O-NEXT: Running pass: LoopSimplifyCFGPass -; CHECK-O-NEXT: Running pass: LICM +; CHECK-O23SZ-NEXT: Running pass: LICM ; CHECK-O-NEXT: Running pass: LoopRotatePass ; CHECK-O-NEXT: Running pass: LICM ; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass diff --git a/llvm/test/Transforms/IndVarSimplify/X86/pr45360.ll b/llvm/test/Transforms/IndVarSimplify/X86/pr45360.ll --- a/llvm/test/Transforms/IndVarSimplify/X86/pr45360.ll +++ b/llvm/test/Transforms/IndVarSimplify/X86/pr45360.ll @@ -22,33 +22,30 @@ ; CHECK-NEXT: bb: ; CHECK-NEXT: [[I6:%.*]] = load i32, i32* @a, align 4 ; CHECK-NEXT: [[I24:%.*]] = load i32, i32* @b, align 4 -; CHECK-NEXT: [[D_PROMOTED7:%.*]] = load i32, i32* @d, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[D_PROMOTED7]], [[I6]] +; CHECK-NEXT: [[D_PROMOTED9:%.*]] = load i32, i32* @d, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[D_PROMOTED9]], [[I6]] ; CHECK-NEXT: [[I21:%.*]] = icmp eq i32 [[TMP0]], 0 -; CHECK-NEXT: br i1 [[I21]], label [[BB27_THREAD:%.*]], label [[BB27_PREHEADER:%.*]] -; CHECK: bb27.preheader: +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br i1 [[I21]], label [[BB13_PREHEADER_BB27_THREAD_SPLIT_CRIT_EDGE:%.*]], label [[BB19_PREHEADER:%.*]] +; CHECK: bb19.preheader: ; CHECK-NEXT: [[I26:%.*]] = urem i32 [[I24]], [[TMP0]] ; CHECK-NEXT: store i32 [[I26]], i32* @e, align 4 ; CHECK-NEXT: [[I30_NOT:%.*]] = icmp eq i32 [[I26]], 0 -; CHECK-NEXT: br label [[BB27:%.*]] -; CHECK: bb27.thread: -; CHECK-NEXT: store i32 0, i32* @d, align 4 +; CHECK-NEXT: br i1 [[I30_NOT]], label [[BB32_LOOPEXIT:%.*]], label [[BB1]] +; CHECK: bb13.preheader.bb27.thread.split_crit_edge: ; CHECK-NEXT: store i32 -1, i32* @f, align 4 +; CHECK-NEXT: store i32 0, i32* @d, align 4 ; CHECK-NEXT: store i32 0, i32* @c, align 4 ; CHECK-NEXT: br label [[BB32:%.*]] -; CHECK: bb27: -; CHECK-NEXT: br i1 [[I30_NOT]], label [[BB32_LOOPEXIT:%.*]], label [[BB36:%.*]] ; CHECK: bb32.loopexit: -; CHECK-NEXT: store i32 [[TMP0]], i32* @d, align 4 ; CHECK-NEXT: store i32 -1, i32* @f, align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* @d, align 4 ; CHECK-NEXT: br label [[BB32]] ; CHECK: bb32: -; CHECK-NEXT: [[C_SINK:%.*]] = phi i32* [ @c, [[BB32_LOOPEXIT]] ], [ @e, [[BB27_THREAD]] ] +; CHECK-NEXT: [[C_SINK:%.*]] = phi i32* [ @c, [[BB32_LOOPEXIT]] ], [ @e, [[BB13_PREHEADER_BB27_THREAD_SPLIT_CRIT_EDGE]] ] ; CHECK-NEXT: store i32 0, i32* [[C_SINK]], align 4 ; CHECK-NEXT: ret i32 0 -; CHECK: bb36: -; CHECK-NEXT: store i32 1, i32* @c, align 4 -; CHECK-NEXT: br i1 [[I21]], label [[BB27_THREAD]], label [[BB27]] ; bb: %i = alloca i32, align 4 diff --git a/llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll b/llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll --- a/llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll +++ b/llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll @@ -6,21 +6,21 @@ define void @runtime_unroll_generic(i32 %arg_0, i32* %arg_1, i16* %arg_2, i16* %arg_3) { ; CHECK-A55-LABEL: @runtime_unroll_generic( ; CHECK-A55-NEXT: entry: +; CHECK-A55-NEXT: [[CMP52_NOT:%.*]] = icmp eq i32 [[ARG_0:%.*]], 0 +; CHECK-A55-NEXT: br i1 [[CMP52_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY6_LR_PH:%.*]] +; CHECK-A55: for.body6.lr.ph: ; CHECK-A55-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, i16* [[ARG_2:%.*]], i64 undef ; CHECK-A55-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i16, i16* [[ARG_3:%.*]], i64 undef ; CHECK-A55-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i32, i32* [[ARG_1:%.*]], i64 undef -; CHECK-A55-NEXT: [[CMP52_NOT:%.*]] = icmp eq i32 [[ARG_0:%.*]], 0 -; CHECK-A55-NEXT: br i1 [[CMP52_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY6_PREHEADER:%.*]] -; CHECK-A55: for.body6.preheader: ; CHECK-A55-NEXT: [[TMP0:%.*]] = add i32 [[ARG_0]], -1 ; CHECK-A55-NEXT: [[XTRAITER:%.*]] = and i32 [[ARG_0]], 3 ; CHECK-A55-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 3 -; CHECK-A55-NEXT: br i1 [[TMP1]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY6_PREHEADER_NEW:%.*]] -; CHECK-A55: for.body6.preheader.new: +; CHECK-A55-NEXT: br i1 [[TMP1]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY6_LR_PH_NEW:%.*]] +; CHECK-A55: for.body6.lr.ph.new: ; CHECK-A55-NEXT: [[UNROLL_ITER:%.*]] = and i32 [[ARG_0]], -4 ; CHECK-A55-NEXT: br label [[FOR_BODY6:%.*]] ; CHECK-A55: for.body6: -; CHECK-A55-NEXT: [[NITER:%.*]] = phi i32 [ 0, [[FOR_BODY6_PREHEADER_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_BODY6]] ] +; CHECK-A55-NEXT: [[NITER:%.*]] = phi i32 [ 0, [[FOR_BODY6_LR_PH_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_BODY6]] ] ; CHECK-A55-NEXT: [[TMP2:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2 ; CHECK-A55-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 ; CHECK-A55-NEXT: [[TMP3:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2 @@ -93,13 +93,15 @@ ; ; CHECK-GENERIC-LABEL: @runtime_unroll_generic( ; CHECK-GENERIC-NEXT: entry: +; CHECK-GENERIC-NEXT: [[CMP52_NOT:%.*]] = icmp eq i32 [[ARG_0:%.*]], 0 +; CHECK-GENERIC-NEXT: br i1 [[CMP52_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY6_LR_PH:%.*]] +; CHECK-GENERIC: for.body6.lr.ph: ; CHECK-GENERIC-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, i16* [[ARG_2:%.*]], i64 undef ; CHECK-GENERIC-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i16, i16* [[ARG_3:%.*]], i64 undef ; CHECK-GENERIC-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i32, i32* [[ARG_1:%.*]], i64 undef -; CHECK-GENERIC-NEXT: [[CMP52_NOT:%.*]] = icmp eq i32 [[ARG_0:%.*]], 0 -; CHECK-GENERIC-NEXT: br i1 [[CMP52_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY6:%.*]] +; CHECK-GENERIC-NEXT: br label [[FOR_BODY6:%.*]] ; CHECK-GENERIC: for.body6: -; CHECK-GENERIC-NEXT: [[K_03:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY6]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-GENERIC-NEXT: [[K_03:%.*]] = phi i32 [ 0, [[FOR_BODY6_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY6]] ] ; CHECK-GENERIC-NEXT: [[TMP0:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2 ; CHECK-GENERIC-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 ; CHECK-GENERIC-NEXT: [[TMP1:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2 diff --git a/llvm/test/Transforms/PhaseOrdering/X86/speculation-vs-tbaa.ll b/llvm/test/Transforms/PhaseOrdering/X86/speculation-vs-tbaa.ll --- a/llvm/test/Transforms/PhaseOrdering/X86/speculation-vs-tbaa.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/speculation-vs-tbaa.ll @@ -14,13 +14,15 @@ define void @licm(double** align 8 dereferenceable(8) %_M_start.i, i64 %numElem) { ; OLDPM_O1-LABEL: @licm( ; OLDPM_O1-NEXT: entry: -; OLDPM_O1-NEXT: [[TMP0:%.*]] = load double*, double** [[_M_START_I:%.*]], align 8 ; OLDPM_O1-NEXT: [[CMP1_NOT:%.*]] = icmp eq i64 [[NUMELEM:%.*]], 0 -; OLDPM_O1-NEXT: br i1 [[CMP1_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]] +; OLDPM_O1-NEXT: br i1 [[CMP1_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_LR_PH:%.*]] +; OLDPM_O1: for.body.lr.ph: +; OLDPM_O1-NEXT: [[TMP0:%.*]] = load double*, double** [[_M_START_I:%.*]], align 8, !tbaa [[TBAA3:![0-9]+]] +; OLDPM_O1-NEXT: br label [[FOR_BODY:%.*]] ; OLDPM_O1: for.body: -; OLDPM_O1-NEXT: [[K_02:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; OLDPM_O1-NEXT: [[K_02:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] ; OLDPM_O1-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds double, double* [[TMP0]], i64 [[K_02]] -; OLDPM_O1-NEXT: store double 2.000000e+00, double* [[ADD_PTR_I]], align 8, !tbaa [[TBAA3:![0-9]+]] +; OLDPM_O1-NEXT: store double 2.000000e+00, double* [[ADD_PTR_I]], align 8, !tbaa [[TBAA8:![0-9]+]] ; OLDPM_O1-NEXT: [[INC]] = add nuw i64 [[K_02]], 1 ; OLDPM_O1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[NUMELEM]] ; OLDPM_O1-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] @@ -29,12 +31,12 @@ ; ; OLDPM_O2-LABEL: @licm( ; OLDPM_O2-NEXT: entry: -; OLDPM_O2-NEXT: [[TMP0:%.*]] = load double*, double** [[_M_START_I:%.*]], align 8 ; OLDPM_O2-NEXT: [[CMP1_NOT:%.*]] = icmp eq i64 [[NUMELEM:%.*]], 0 -; OLDPM_O2-NEXT: br i1 [[CMP1_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] -; OLDPM_O2: for.body.preheader: +; OLDPM_O2-NEXT: br i1 [[CMP1_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_LR_PH:%.*]] +; OLDPM_O2: for.body.lr.ph: +; OLDPM_O2-NEXT: [[TMP0:%.*]] = load double*, double** [[_M_START_I:%.*]], align 8, !tbaa [[TBAA3:![0-9]+]] ; OLDPM_O2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUMELEM]], 4 -; OLDPM_O2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER3:%.*]], label [[VECTOR_PH:%.*]] +; OLDPM_O2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_PH:%.*]] ; OLDPM_O2: vector.ph: ; OLDPM_O2-NEXT: [[N_VEC:%.*]] = and i64 [[NUMELEM]], -4 ; OLDPM_O2-NEXT: br label [[VECTOR_BODY:%.*]] @@ -42,37 +44,37 @@ ; OLDPM_O2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; OLDPM_O2-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, double* [[TMP0]], i64 [[INDEX]] ; OLDPM_O2-NEXT: [[TMP2:%.*]] = bitcast double* [[TMP1]] to <2 x double>* -; OLDPM_O2-NEXT: store <2 x double> , <2 x double>* [[TMP2]], align 8, !tbaa [[TBAA3:![0-9]+]] +; OLDPM_O2-NEXT: store <2 x double> , <2 x double>* [[TMP2]], align 8, !tbaa [[TBAA8:![0-9]+]] ; OLDPM_O2-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, double* [[TMP1]], i64 2 ; OLDPM_O2-NEXT: [[TMP4:%.*]] = bitcast double* [[TMP3]] to <2 x double>* -; OLDPM_O2-NEXT: store <2 x double> , <2 x double>* [[TMP4]], align 8, !tbaa [[TBAA3]] +; OLDPM_O2-NEXT: store <2 x double> , <2 x double>* [[TMP4]], align 8, !tbaa [[TBAA8]] ; OLDPM_O2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; OLDPM_O2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; OLDPM_O2-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; OLDPM_O2-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; OLDPM_O2: middle.block: ; OLDPM_O2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[NUMELEM]] -; OLDPM_O2-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_PREHEADER3]] -; OLDPM_O2: for.body.preheader3: -; OLDPM_O2-NEXT: [[K_02_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] +; OLDPM_O2-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_PREHEADER]] +; OLDPM_O2: for.body.preheader: +; OLDPM_O2-NEXT: [[K_02_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] ; OLDPM_O2-NEXT: br label [[FOR_BODY:%.*]] ; OLDPM_O2: for.body: -; OLDPM_O2-NEXT: [[K_02:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[K_02_PH]], [[FOR_BODY_PREHEADER3]] ] +; OLDPM_O2-NEXT: [[K_02:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[K_02_PH]], [[FOR_BODY_PREHEADER]] ] ; OLDPM_O2-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds double, double* [[TMP0]], i64 [[K_02]] -; OLDPM_O2-NEXT: store double 2.000000e+00, double* [[ADD_PTR_I]], align 8, !tbaa [[TBAA3]] +; OLDPM_O2-NEXT: store double 2.000000e+00, double* [[ADD_PTR_I]], align 8, !tbaa [[TBAA8]] ; OLDPM_O2-NEXT: [[INC]] = add nuw i64 [[K_02]], 1 ; OLDPM_O2-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[NUMELEM]] -; OLDPM_O2-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; OLDPM_O2-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; OLDPM_O2: for.cond.cleanup: ; OLDPM_O2-NEXT: ret void ; ; OLDPM_O3-LABEL: @licm( ; OLDPM_O3-NEXT: entry: -; OLDPM_O3-NEXT: [[TMP0:%.*]] = load double*, double** [[_M_START_I:%.*]], align 8 ; OLDPM_O3-NEXT: [[CMP1_NOT:%.*]] = icmp eq i64 [[NUMELEM:%.*]], 0 -; OLDPM_O3-NEXT: br i1 [[CMP1_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] -; OLDPM_O3: for.body.preheader: +; OLDPM_O3-NEXT: br i1 [[CMP1_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_LR_PH:%.*]] +; OLDPM_O3: for.body.lr.ph: +; OLDPM_O3-NEXT: [[TMP0:%.*]] = load double*, double** [[_M_START_I:%.*]], align 8, !tbaa [[TBAA3:![0-9]+]] ; OLDPM_O3-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUMELEM]], 4 -; OLDPM_O3-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER3:%.*]], label [[VECTOR_PH:%.*]] +; OLDPM_O3-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_PH:%.*]] ; OLDPM_O3: vector.ph: ; OLDPM_O3-NEXT: [[N_VEC:%.*]] = and i64 [[NUMELEM]], -4 ; OLDPM_O3-NEXT: br label [[VECTOR_BODY:%.*]] @@ -80,38 +82,40 @@ ; OLDPM_O3-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; OLDPM_O3-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, double* [[TMP0]], i64 [[INDEX]] ; OLDPM_O3-NEXT: [[TMP2:%.*]] = bitcast double* [[TMP1]] to <2 x double>* -; OLDPM_O3-NEXT: store <2 x double> , <2 x double>* [[TMP2]], align 8, !tbaa [[TBAA3:![0-9]+]] +; OLDPM_O3-NEXT: store <2 x double> , <2 x double>* [[TMP2]], align 8, !tbaa [[TBAA8:![0-9]+]] ; OLDPM_O3-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, double* [[TMP1]], i64 2 ; OLDPM_O3-NEXT: [[TMP4:%.*]] = bitcast double* [[TMP3]] to <2 x double>* -; OLDPM_O3-NEXT: store <2 x double> , <2 x double>* [[TMP4]], align 8, !tbaa [[TBAA3]] +; OLDPM_O3-NEXT: store <2 x double> , <2 x double>* [[TMP4]], align 8, !tbaa [[TBAA8]] ; OLDPM_O3-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; OLDPM_O3-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; OLDPM_O3-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; OLDPM_O3-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; OLDPM_O3: middle.block: ; OLDPM_O3-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[NUMELEM]] -; OLDPM_O3-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_PREHEADER3]] -; OLDPM_O3: for.body.preheader3: -; OLDPM_O3-NEXT: [[K_02_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] +; OLDPM_O3-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_PREHEADER]] +; OLDPM_O3: for.body.preheader: +; OLDPM_O3-NEXT: [[K_02_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] ; OLDPM_O3-NEXT: br label [[FOR_BODY:%.*]] ; OLDPM_O3: for.body: -; OLDPM_O3-NEXT: [[K_02:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[K_02_PH]], [[FOR_BODY_PREHEADER3]] ] +; OLDPM_O3-NEXT: [[K_02:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[K_02_PH]], [[FOR_BODY_PREHEADER]] ] ; OLDPM_O3-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds double, double* [[TMP0]], i64 [[K_02]] -; OLDPM_O3-NEXT: store double 2.000000e+00, double* [[ADD_PTR_I]], align 8, !tbaa [[TBAA3]] +; OLDPM_O3-NEXT: store double 2.000000e+00, double* [[ADD_PTR_I]], align 8, !tbaa [[TBAA8]] ; OLDPM_O3-NEXT: [[INC]] = add nuw i64 [[K_02]], 1 ; OLDPM_O3-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[NUMELEM]] -; OLDPM_O3-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; OLDPM_O3-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; OLDPM_O3: for.cond.cleanup: ; OLDPM_O3-NEXT: ret void ; ; NEWPM_O1-LABEL: @licm( ; NEWPM_O1-NEXT: entry: -; NEWPM_O1-NEXT: [[TMP0:%.*]] = load double*, double** [[_M_START_I:%.*]], align 8 ; NEWPM_O1-NEXT: [[CMP1_NOT:%.*]] = icmp eq i64 [[NUMELEM:%.*]], 0 -; NEWPM_O1-NEXT: br i1 [[CMP1_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]] +; NEWPM_O1-NEXT: br i1 [[CMP1_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_LR_PH:%.*]] +; NEWPM_O1: for.body.lr.ph: +; NEWPM_O1-NEXT: [[TMP0:%.*]] = load double*, double** [[_M_START_I:%.*]], align 8, !tbaa [[TBAA3:![0-9]+]] +; NEWPM_O1-NEXT: br label [[FOR_BODY:%.*]] ; NEWPM_O1: for.body: -; NEWPM_O1-NEXT: [[K_02:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; NEWPM_O1-NEXT: [[K_02:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] ; NEWPM_O1-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds double, double* [[TMP0]], i64 [[K_02]] -; NEWPM_O1-NEXT: store double 2.000000e+00, double* [[ADD_PTR_I]], align 8, !tbaa [[TBAA3:![0-9]+]] +; NEWPM_O1-NEXT: store double 2.000000e+00, double* [[ADD_PTR_I]], align 8, !tbaa [[TBAA8:![0-9]+]] ; NEWPM_O1-NEXT: [[INC]] = add nuw i64 [[K_02]], 1 ; NEWPM_O1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[NUMELEM]] ; NEWPM_O1-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] diff --git a/llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll b/llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll --- a/llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll @@ -27,28 +27,32 @@ ; OLDPM_O2-NEXT: entry: ; OLDPM_O2-NEXT: [[BASE_I_I:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR:%.*]], %class.FloatVecPair* [[FVP]], i64 0, i32 1, i32 0 ; OLDPM_O2-NEXT: [[TMP0:%.*]] = load %class.HomemadeVector.0*, %class.HomemadeVector.0** [[BASE_I_I]], align 8, !tbaa [[TBAA0:![0-9]+]] -; OLDPM_O2-NEXT: [[SIZE4_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0:%.*]], %class.HomemadeVector.0* [[TMP0]], i64 undef, i32 1 -; OLDPM_O2-NEXT: [[TMP1:%.*]] = load i32, i32* [[SIZE4_I]], align 8, !tbaa [[TBAA6:![0-9]+]] -; OLDPM_O2-NEXT: [[CMP510_NOT_I:%.*]] = icmp eq i32 [[TMP1]], 0 -; OLDPM_O2-NEXT: br i1 [[CMP510_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT:%.*]], label [[FOR_BODY7_LR_PH_I:%.*]] +; OLDPM_O2-NEXT: [[SIZE410_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0:%.*]], %class.HomemadeVector.0* [[TMP0]], i64 undef, i32 1 +; OLDPM_O2-NEXT: [[TMP1:%.*]] = load i32, i32* [[SIZE410_I]], align 8, !tbaa [[TBAA6:![0-9]+]] +; OLDPM_O2-NEXT: [[CMP511_NOT_I:%.*]] = icmp eq i32 [[TMP1]], 0 +; OLDPM_O2-NEXT: br i1 [[CMP511_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT:%.*]], label [[FOR_BODY7_LR_PH_I:%.*]] ; OLDPM_O2: for.body7.lr.ph.i: ; OLDPM_O2-NEXT: [[BASE_I4_I:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR]], %class.FloatVecPair* [[FVP]], i64 0, i32 0, i32 0 -; OLDPM_O2-NEXT: [[BASE_I6_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], %class.HomemadeVector.0* [[TMP0]], i64 undef, i32 0 -; OLDPM_O2-NEXT: [[TMP2:%.*]] = load float*, float** [[BASE_I6_I]], align 8, !tbaa [[TBAA8:![0-9]+]] -; OLDPM_O2-NEXT: [[ARRAYIDX_I7_I:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 undef -; OLDPM_O2-NEXT: [[TMP3:%.*]] = load %class.HomemadeVector.0*, %class.HomemadeVector.0** [[BASE_I4_I]], align 8, !tbaa [[TBAA0]] -; OLDPM_O2-NEXT: [[BASE_I2_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], %class.HomemadeVector.0* [[TMP3]], i64 undef, i32 0 -; OLDPM_O2-NEXT: [[TMP4:%.*]] = load float*, float** [[BASE_I2_I]], align 8, !tbaa [[TBAA8]] -; OLDPM_O2-NEXT: [[ARRAYIDX_I3_I:%.*]] = getelementptr inbounds float, float* [[TMP4]], i64 undef -; OLDPM_O2-NEXT: [[DOTPRE_I:%.*]] = load float, float* [[ARRAYIDX_I3_I]], align 4, !tbaa [[TBAA9:![0-9]+]] -; OLDPM_O2-NEXT: br label [[FOR_BODY7_I:%.*]] +; OLDPM_O2-NEXT: [[TMP2:%.*]] = load %class.HomemadeVector.0*, %class.HomemadeVector.0** [[BASE_I4_I]], align 8, !tbaa [[TBAA0]] +; OLDPM_O2-NEXT: [[BASE_I2_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], %class.HomemadeVector.0* [[TMP2]], i64 undef, i32 0 +; OLDPM_O2-NEXT: [[TMP3:%.*]] = load float*, float** [[BASE_I2_I]], align 8, !tbaa [[TBAA8:![0-9]+]] +; OLDPM_O2-NEXT: [[ARRAYIDX_I3_I:%.*]] = getelementptr inbounds float, float* [[TMP3]], i64 undef +; OLDPM_O2-NEXT: [[BASE_I6_PEEL_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], %class.HomemadeVector.0* [[TMP0]], i64 undef, i32 0 +; OLDPM_O2-NEXT: [[TMP4:%.*]] = load float*, float** [[BASE_I6_PEEL_I]], align 8, !tbaa [[TBAA8]] +; OLDPM_O2-NEXT: [[ARRAYIDX_I7_PEEL_I:%.*]] = getelementptr inbounds float, float* [[TMP4]], i64 undef +; OLDPM_O2-NEXT: [[TMP5:%.*]] = load float, float* [[ARRAYIDX_I7_PEEL_I]], align 4, !tbaa [[TBAA9:![0-9]+]] +; OLDPM_O2-NEXT: [[TMP6:%.*]] = load float, float* [[ARRAYIDX_I3_I]], align 4, !tbaa [[TBAA9]] +; OLDPM_O2-NEXT: [[ADD_PEEL_I:%.*]] = fadd float [[TMP5]], [[TMP6]] +; OLDPM_O2-NEXT: store float [[ADD_PEEL_I]], float* [[ARRAYIDX_I3_I]], align 4, !tbaa [[TBAA9]] +; OLDPM_O2-NEXT: [[EXITCOND_PEEL_NOT_I:%.*]] = icmp eq i32 [[TMP1]], 1 +; OLDPM_O2-NEXT: br i1 [[EXITCOND_PEEL_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT]], label [[FOR_BODY7_I:%.*]] ; OLDPM_O2: for.body7.i: -; OLDPM_O2-NEXT: [[TMP5:%.*]] = phi float [ [[DOTPRE_I]], [[FOR_BODY7_LR_PH_I]] ], [ [[ADD_I:%.*]], [[FOR_BODY7_I]] ] -; OLDPM_O2-NEXT: [[J_011_I:%.*]] = phi i32 [ 0, [[FOR_BODY7_LR_PH_I]] ], [ [[INC_I:%.*]], [[FOR_BODY7_I]] ] -; OLDPM_O2-NEXT: [[TMP6:%.*]] = load float, float* [[ARRAYIDX_I7_I]], align 4, !tbaa [[TBAA9]] -; OLDPM_O2-NEXT: [[ADD_I]] = fadd float [[TMP5]], [[TMP6]] +; OLDPM_O2-NEXT: [[TMP7:%.*]] = phi float [ [[ADD_I:%.*]], [[FOR_BODY7_I]] ], [ [[ADD_PEEL_I]], [[FOR_BODY7_LR_PH_I]] ] +; OLDPM_O2-NEXT: [[J_012_I:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY7_I]] ], [ 1, [[FOR_BODY7_LR_PH_I]] ] +; OLDPM_O2-NEXT: [[TMP8:%.*]] = load float, float* [[ARRAYIDX_I7_PEEL_I]], align 4, !tbaa [[TBAA9]] +; OLDPM_O2-NEXT: [[ADD_I]] = fadd float [[TMP7]], [[TMP8]] ; OLDPM_O2-NEXT: store float [[ADD_I]], float* [[ARRAYIDX_I3_I]], align 4, !tbaa [[TBAA9]] -; OLDPM_O2-NEXT: [[INC_I]] = add nuw i32 [[J_011_I]], 1 +; OLDPM_O2-NEXT: [[INC_I]] = add nuw i32 [[J_012_I]], 1 ; OLDPM_O2-NEXT: [[EXITCOND_NOT_I:%.*]] = icmp eq i32 [[INC_I]], [[TMP1]] ; OLDPM_O2-NEXT: br i1 [[EXITCOND_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT]], label [[FOR_BODY7_I]], !llvm.loop [[LOOP11:![0-9]+]] ; OLDPM_O2: _ZN12FloatVecPair6vecIncEv.exit: @@ -59,28 +63,32 @@ ; OLDPM_O3-NEXT: entry: ; OLDPM_O3-NEXT: [[BASE_I_I:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR:%.*]], %class.FloatVecPair* [[FVP]], i64 0, i32 1, i32 0 ; OLDPM_O3-NEXT: [[TMP0:%.*]] = load %class.HomemadeVector.0*, %class.HomemadeVector.0** [[BASE_I_I]], align 8, !tbaa [[TBAA0:![0-9]+]] -; OLDPM_O3-NEXT: [[SIZE4_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0:%.*]], %class.HomemadeVector.0* [[TMP0]], i64 undef, i32 1 -; OLDPM_O3-NEXT: [[TMP1:%.*]] = load i32, i32* [[SIZE4_I]], align 8, !tbaa [[TBAA6:![0-9]+]] -; OLDPM_O3-NEXT: [[CMP510_NOT_I:%.*]] = icmp eq i32 [[TMP1]], 0 -; OLDPM_O3-NEXT: br i1 [[CMP510_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT:%.*]], label [[FOR_BODY7_LR_PH_I:%.*]] +; OLDPM_O3-NEXT: [[SIZE410_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0:%.*]], %class.HomemadeVector.0* [[TMP0]], i64 undef, i32 1 +; OLDPM_O3-NEXT: [[TMP1:%.*]] = load i32, i32* [[SIZE410_I]], align 8, !tbaa [[TBAA6:![0-9]+]] +; OLDPM_O3-NEXT: [[CMP511_NOT_I:%.*]] = icmp eq i32 [[TMP1]], 0 +; OLDPM_O3-NEXT: br i1 [[CMP511_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT:%.*]], label [[FOR_BODY7_LR_PH_I:%.*]] ; OLDPM_O3: for.body7.lr.ph.i: ; OLDPM_O3-NEXT: [[BASE_I4_I:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR]], %class.FloatVecPair* [[FVP]], i64 0, i32 0, i32 0 -; OLDPM_O3-NEXT: [[BASE_I6_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], %class.HomemadeVector.0* [[TMP0]], i64 undef, i32 0 -; OLDPM_O3-NEXT: [[TMP2:%.*]] = load float*, float** [[BASE_I6_I]], align 8, !tbaa [[TBAA8:![0-9]+]] -; OLDPM_O3-NEXT: [[ARRAYIDX_I7_I:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 undef -; OLDPM_O3-NEXT: [[TMP3:%.*]] = load %class.HomemadeVector.0*, %class.HomemadeVector.0** [[BASE_I4_I]], align 8, !tbaa [[TBAA0]] -; OLDPM_O3-NEXT: [[BASE_I2_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], %class.HomemadeVector.0* [[TMP3]], i64 undef, i32 0 -; OLDPM_O3-NEXT: [[TMP4:%.*]] = load float*, float** [[BASE_I2_I]], align 8, !tbaa [[TBAA8]] -; OLDPM_O3-NEXT: [[ARRAYIDX_I3_I:%.*]] = getelementptr inbounds float, float* [[TMP4]], i64 undef -; OLDPM_O3-NEXT: [[DOTPRE_I:%.*]] = load float, float* [[ARRAYIDX_I3_I]], align 4, !tbaa [[TBAA9:![0-9]+]] -; OLDPM_O3-NEXT: br label [[FOR_BODY7_I:%.*]] +; OLDPM_O3-NEXT: [[TMP2:%.*]] = load %class.HomemadeVector.0*, %class.HomemadeVector.0** [[BASE_I4_I]], align 8, !tbaa [[TBAA0]] +; OLDPM_O3-NEXT: [[BASE_I2_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], %class.HomemadeVector.0* [[TMP2]], i64 undef, i32 0 +; OLDPM_O3-NEXT: [[TMP3:%.*]] = load float*, float** [[BASE_I2_I]], align 8, !tbaa [[TBAA8:![0-9]+]] +; OLDPM_O3-NEXT: [[ARRAYIDX_I3_I:%.*]] = getelementptr inbounds float, float* [[TMP3]], i64 undef +; OLDPM_O3-NEXT: [[BASE_I6_PEEL_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], %class.HomemadeVector.0* [[TMP0]], i64 undef, i32 0 +; OLDPM_O3-NEXT: [[TMP4:%.*]] = load float*, float** [[BASE_I6_PEEL_I]], align 8, !tbaa [[TBAA8]] +; OLDPM_O3-NEXT: [[ARRAYIDX_I7_PEEL_I:%.*]] = getelementptr inbounds float, float* [[TMP4]], i64 undef +; OLDPM_O3-NEXT: [[TMP5:%.*]] = load float, float* [[ARRAYIDX_I7_PEEL_I]], align 4, !tbaa [[TBAA9:![0-9]+]] +; OLDPM_O3-NEXT: [[TMP6:%.*]] = load float, float* [[ARRAYIDX_I3_I]], align 4, !tbaa [[TBAA9]] +; OLDPM_O3-NEXT: [[ADD_PEEL_I:%.*]] = fadd float [[TMP5]], [[TMP6]] +; OLDPM_O3-NEXT: store float [[ADD_PEEL_I]], float* [[ARRAYIDX_I3_I]], align 4, !tbaa [[TBAA9]] +; OLDPM_O3-NEXT: [[EXITCOND_PEEL_NOT_I:%.*]] = icmp eq i32 [[TMP1]], 1 +; OLDPM_O3-NEXT: br i1 [[EXITCOND_PEEL_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT]], label [[FOR_BODY7_I:%.*]] ; OLDPM_O3: for.body7.i: -; OLDPM_O3-NEXT: [[TMP5:%.*]] = phi float [ [[DOTPRE_I]], [[FOR_BODY7_LR_PH_I]] ], [ [[ADD_I:%.*]], [[FOR_BODY7_I]] ] -; OLDPM_O3-NEXT: [[J_011_I:%.*]] = phi i32 [ 0, [[FOR_BODY7_LR_PH_I]] ], [ [[INC_I:%.*]], [[FOR_BODY7_I]] ] -; OLDPM_O3-NEXT: [[TMP6:%.*]] = load float, float* [[ARRAYIDX_I7_I]], align 4, !tbaa [[TBAA9]] -; OLDPM_O3-NEXT: [[ADD_I]] = fadd float [[TMP5]], [[TMP6]] +; OLDPM_O3-NEXT: [[TMP7:%.*]] = phi float [ [[ADD_I:%.*]], [[FOR_BODY7_I]] ], [ [[ADD_PEEL_I]], [[FOR_BODY7_LR_PH_I]] ] +; OLDPM_O3-NEXT: [[J_012_I:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY7_I]] ], [ 1, [[FOR_BODY7_LR_PH_I]] ] +; OLDPM_O3-NEXT: [[TMP8:%.*]] = load float, float* [[ARRAYIDX_I7_PEEL_I]], align 4, !tbaa [[TBAA9]] +; OLDPM_O3-NEXT: [[ADD_I]] = fadd float [[TMP7]], [[TMP8]] ; OLDPM_O3-NEXT: store float [[ADD_I]], float* [[ARRAYIDX_I3_I]], align 4, !tbaa [[TBAA9]] -; OLDPM_O3-NEXT: [[INC_I]] = add nuw i32 [[J_011_I]], 1 +; OLDPM_O3-NEXT: [[INC_I]] = add nuw i32 [[J_012_I]], 1 ; OLDPM_O3-NEXT: [[EXITCOND_NOT_I:%.*]] = icmp eq i32 [[INC_I]], [[TMP1]] ; OLDPM_O3-NEXT: br i1 [[EXITCOND_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT]], label [[FOR_BODY7_I]], !llvm.loop [[LOOP11:![0-9]+]] ; OLDPM_O3: _ZN12FloatVecPair6vecIncEv.exit: @@ -91,27 +99,32 @@ ; NEWPM_O1-NEXT: entry: ; NEWPM_O1-NEXT: [[BASE_I_I:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR:%.*]], %class.FloatVecPair* [[FVP]], i64 0, i32 1, i32 0 ; NEWPM_O1-NEXT: [[TMP0:%.*]] = load %class.HomemadeVector.0*, %class.HomemadeVector.0** [[BASE_I_I]], align 8, !tbaa [[TBAA0:![0-9]+]] -; NEWPM_O1-NEXT: [[SIZE4_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0:%.*]], %class.HomemadeVector.0* [[TMP0]], i64 undef, i32 1 -; NEWPM_O1-NEXT: [[TMP1:%.*]] = load i32, i32* [[SIZE4_I]], align 8, !tbaa [[TBAA6:![0-9]+]] -; NEWPM_O1-NEXT: [[CMP510_NOT_I:%.*]] = icmp eq i32 [[TMP1]], 0 -; NEWPM_O1-NEXT: br i1 [[CMP510_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT:%.*]], label [[FOR_BODY7_LR_PH_I:%.*]] +; NEWPM_O1-NEXT: [[SIZE410_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0:%.*]], %class.HomemadeVector.0* [[TMP0]], i64 undef, i32 1 +; NEWPM_O1-NEXT: [[TMP1:%.*]] = load i32, i32* [[SIZE410_I]], align 8, !tbaa [[TBAA6:![0-9]+]] +; NEWPM_O1-NEXT: [[CMP511_NOT_I:%.*]] = icmp eq i32 [[TMP1]], 0 +; NEWPM_O1-NEXT: br i1 [[CMP511_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT:%.*]], label [[FOR_BODY7_LR_PH_I:%.*]] ; NEWPM_O1: for.body7.lr.ph.i: ; NEWPM_O1-NEXT: [[BASE_I6_I:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR]], %class.FloatVecPair* [[FVP]], i64 0, i32 0, i32 0 -; NEWPM_O1-NEXT: [[BASE_I4_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], %class.HomemadeVector.0* [[TMP0]], i64 undef, i32 0 -; NEWPM_O1-NEXT: [[TMP2:%.*]] = load float*, float** [[BASE_I4_I]], align 8, !tbaa [[TBAA8:![0-9]+]] -; NEWPM_O1-NEXT: [[ARRAYIDX_I5_I:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 undef -; NEWPM_O1-NEXT: [[TMP3:%.*]] = load %class.HomemadeVector.0*, %class.HomemadeVector.0** [[BASE_I6_I]], align 8, !tbaa [[TBAA0]] -; NEWPM_O1-NEXT: [[BASE_I8_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], %class.HomemadeVector.0* [[TMP3]], i64 undef, i32 0 -; NEWPM_O1-NEXT: [[TMP4:%.*]] = load float*, float** [[BASE_I8_I]], align 8, !tbaa [[TBAA8]] -; NEWPM_O1-NEXT: [[ARRAYIDX_I9_I:%.*]] = getelementptr inbounds float, float* [[TMP4]], i64 undef -; NEWPM_O1-NEXT: br label [[FOR_BODY7_I:%.*]] -; NEWPM_O1: for.body7.i: -; NEWPM_O1-NEXT: [[J_011_I:%.*]] = phi i32 [ 0, [[FOR_BODY7_LR_PH_I]] ], [ [[INC_I:%.*]], [[FOR_BODY7_I]] ] -; NEWPM_O1-NEXT: [[TMP5:%.*]] = load float, float* [[ARRAYIDX_I5_I]], align 4, !tbaa [[TBAA9:![0-9]+]] +; NEWPM_O1-NEXT: [[TMP2:%.*]] = load %class.HomemadeVector.0*, %class.HomemadeVector.0** [[BASE_I6_I]], align 8, !tbaa [[TBAA0]] +; NEWPM_O1-NEXT: [[BASE_I8_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], %class.HomemadeVector.0* [[TMP2]], i64 undef, i32 0 +; NEWPM_O1-NEXT: [[TMP3:%.*]] = load float*, float** [[BASE_I8_I]], align 8, !tbaa [[TBAA8:![0-9]+]] +; NEWPM_O1-NEXT: [[ARRAYIDX_I9_I:%.*]] = getelementptr inbounds float, float* [[TMP3]], i64 undef +; NEWPM_O1-NEXT: [[BASE_I4_PEEL_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], %class.HomemadeVector.0* [[TMP0]], i64 undef, i32 0 +; NEWPM_O1-NEXT: [[TMP4:%.*]] = load float*, float** [[BASE_I4_PEEL_I]], align 8, !tbaa [[TBAA8]] +; NEWPM_O1-NEXT: [[ARRAYIDX_I5_PEEL_I:%.*]] = getelementptr inbounds float, float* [[TMP4]], i64 undef +; NEWPM_O1-NEXT: [[TMP5:%.*]] = load float, float* [[ARRAYIDX_I5_PEEL_I]], align 4, !tbaa [[TBAA9:![0-9]+]] ; NEWPM_O1-NEXT: [[TMP6:%.*]] = load float, float* [[ARRAYIDX_I9_I]], align 4, !tbaa [[TBAA9]] -; NEWPM_O1-NEXT: [[ADD_I:%.*]] = fadd float [[TMP5]], [[TMP6]] +; NEWPM_O1-NEXT: [[ADD_PEEL_I:%.*]] = fadd float [[TMP5]], [[TMP6]] +; NEWPM_O1-NEXT: store float [[ADD_PEEL_I]], float* [[ARRAYIDX_I9_I]], align 4, !tbaa [[TBAA9]] +; NEWPM_O1-NEXT: [[EXITCOND_PEEL_NOT_I:%.*]] = icmp eq i32 [[TMP1]], 1 +; NEWPM_O1-NEXT: br i1 [[EXITCOND_PEEL_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT]], label [[FOR_BODY7_I:%.*]] +; NEWPM_O1: for.body7.i: +; NEWPM_O1-NEXT: [[J_012_I:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY7_I]] ], [ 1, [[FOR_BODY7_LR_PH_I]] ] +; NEWPM_O1-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX_I5_PEEL_I]], align 4, !tbaa [[TBAA9]] +; NEWPM_O1-NEXT: [[TMP8:%.*]] = load float, float* [[ARRAYIDX_I9_I]], align 4, !tbaa [[TBAA9]] +; NEWPM_O1-NEXT: [[ADD_I:%.*]] = fadd float [[TMP7]], [[TMP8]] ; NEWPM_O1-NEXT: store float [[ADD_I]], float* [[ARRAYIDX_I9_I]], align 4, !tbaa [[TBAA9]] -; NEWPM_O1-NEXT: [[INC_I]] = add nuw i32 [[J_011_I]], 1 +; NEWPM_O1-NEXT: [[INC_I]] = add nuw i32 [[J_012_I]], 1 ; NEWPM_O1-NEXT: [[EXITCOND_NOT_I:%.*]] = icmp eq i32 [[INC_I]], [[TMP1]] ; NEWPM_O1-NEXT: br i1 [[EXITCOND_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT]], label [[FOR_BODY7_I]], !llvm.loop [[LOOP11:![0-9]+]] ; NEWPM_O1: _ZN12FloatVecPair6vecIncEv.exit: @@ -194,16 +207,18 @@ ; OLDPM_O1-SAME: (%class.FloatVecPair* [[THIS:%.*]]) local_unnamed_addr comdat align 2 { ; OLDPM_O1-NEXT: entry: ; OLDPM_O1-NEXT: [[VSRC23:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR:%.*]], %class.FloatVecPair* [[THIS]], i64 0, i32 1 -; OLDPM_O1-NEXT: [[VSRCDST:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR]], %class.FloatVecPair* [[THIS]], i64 0, i32 0 ; OLDPM_O1-NEXT: [[CALL2:%.*]] = call %class.HomemadeVector.0* @_ZN14HomemadeVectorIS_IfLj8EELj8EEixEj(%class.HomemadeVector* nonnull [[VSRC23]]) ; OLDPM_O1-NEXT: [[SIZE43:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0:%.*]], %class.HomemadeVector.0* [[CALL2]], i64 0, i32 1 ; OLDPM_O1-NEXT: [[TMP0:%.*]] = load i32, i32* [[SIZE43]], align 8, !tbaa [[TBAA0:![0-9]+]] ; OLDPM_O1-NEXT: [[CMP54_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 -; OLDPM_O1-NEXT: br i1 [[CMP54_NOT]], label [[FOR_COND_CLEANUP6:%.*]], label [[FOR_BODY7:%.*]] +; OLDPM_O1-NEXT: br i1 [[CMP54_NOT]], label [[FOR_COND_CLEANUP6:%.*]], label [[FOR_BODY7_LR_PH:%.*]] +; OLDPM_O1: for.body7.lr.ph: +; OLDPM_O1-NEXT: [[VSRCDST:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR]], %class.FloatVecPair* [[THIS]], i64 0, i32 0 +; OLDPM_O1-NEXT: br label [[FOR_BODY7:%.*]] ; OLDPM_O1: for.cond.cleanup6: ; OLDPM_O1-NEXT: ret void ; OLDPM_O1: for.body7: -; OLDPM_O1-NEXT: [[J_05:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY7]] ], [ 0, [[ENTRY:%.*]] ] +; OLDPM_O1-NEXT: [[J_05:%.*]] = phi i32 [ 0, [[FOR_BODY7_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY7]] ] ; OLDPM_O1-NEXT: [[CALL9:%.*]] = call %class.HomemadeVector.0* @_ZN14HomemadeVectorIS_IfLj8EELj8EEixEj(%class.HomemadeVector* nonnull [[VSRC23]]) ; OLDPM_O1-NEXT: [[CALL10:%.*]] = call float* @_ZN14HomemadeVectorIfLj8EEixEj(%class.HomemadeVector.0* [[CALL9]]) ; OLDPM_O1-NEXT: [[TMP1:%.*]] = load float, float* [[CALL10]], align 4, !tbaa [[TBAA6:![0-9]+]] diff --git a/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll b/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll --- a/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll +++ b/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll @@ -77,19 +77,19 @@ ; ROTATED_LATER_OLDPM-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] ; ROTATED_LATER_OLDPM: for.cond.preheader: ; ROTATED_LATER_OLDPM-NEXT: [[SUB:%.*]] = add nsw i32 [[WIDTH]], -1 -; ROTATED_LATER_OLDPM-NEXT: [[CMP13_NOT:%.*]] = icmp eq i32 [[WIDTH]], 1 -; ROTATED_LATER_OLDPM-NEXT: br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]] -; ROTATED_LATER_OLDPM: for.cond.cleanup: +; ROTATED_LATER_OLDPM-NEXT: br label [[FOR_COND:%.*]] +; ROTATED_LATER_OLDPM: for.cond: +; ROTATED_LATER_OLDPM-NEXT: [[I_0:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY:%.*]] ], [ 0, [[FOR_COND_PREHEADER]] ] +; ROTATED_LATER_OLDPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[I_0]], [[SUB]] ; ROTATED_LATER_OLDPM-NEXT: tail call void @f0() +; ROTATED_LATER_OLDPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] +; ROTATED_LATER_OLDPM: for.cond.cleanup: ; ROTATED_LATER_OLDPM-NEXT: tail call void @f2() ; ROTATED_LATER_OLDPM-NEXT: br label [[RETURN]] ; ROTATED_LATER_OLDPM: for.body: -; ROTATED_LATER_OLDPM-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_COND_PREHEADER]] ] -; ROTATED_LATER_OLDPM-NEXT: tail call void @f0() ; ROTATED_LATER_OLDPM-NEXT: tail call void @f1() -; ROTATED_LATER_OLDPM-NEXT: [[INC]] = add nuw nsw i32 [[I_04]], 1 -; ROTATED_LATER_OLDPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[SUB]] -; ROTATED_LATER_OLDPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] +; ROTATED_LATER_OLDPM-NEXT: [[INC]] = add nuw i32 [[I_0]], 1 +; ROTATED_LATER_OLDPM-NEXT: br label [[FOR_COND]] ; ROTATED_LATER_OLDPM: return: ; ROTATED_LATER_OLDPM-NEXT: ret void ;