diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -292,11 +292,6 @@ LPM1.addPass(LoopInstSimplifyPass()); LPM1.addPass(LoopSimplifyCFGPass()); - // Try to remove as much code from the loop header as possible, - // to reduce amount of IR that will have to be duplicated. - // TODO: Investigate promotion cap for O1. - LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); - LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true, isLTOPreLink(Phase))); // TODO: Investigate promotion cap for O1. @@ -469,11 +464,6 @@ LPM1.addPass(LoopInstSimplifyPass()); LPM1.addPass(LoopSimplifyCFGPass()); - // Try to remove as much code from the loop header as possible, - // to reduce amount of IR that will have to be duplicated. - // TODO: Investigate promotion cap for O1. - LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); - // Disable header duplication in loop rotation at -Oz. LPM1.addPass( LoopRotatePass(Level != OptimizationLevel::Oz, isLTOPreLink(Phase))); diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -457,10 +457,6 @@ MPM.add(createLoopInstSimplifyPass()); MPM.add(createLoopSimplifyCFGPass()); } - // Try to remove as much code from the loop header as possible, - // to reduce amount of IR that will have to be duplicated. - // TODO: Investigate promotion cap for O1. - MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); // Rotate Loop - disable header duplication at -Oz MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, PrepareForLTO)); // TODO: Investigate promotion cap for O1. diff --git a/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll b/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll --- a/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll @@ -129,20 +129,20 @@ ; GCN-O1-NEXT: Simplify the CFG ; GCN-O1-NEXT: Reassociate expressions ; GCN-O1-NEXT: Dominator Tree Construction -; GCN-O1-NEXT: Basic Alias Analysis (stateless AA impl) -; GCN-O1-NEXT: Function Alias Analysis Results -; GCN-O1-NEXT: Memory SSA ; GCN-O1-NEXT: Natural Loop Information ; GCN-O1-NEXT: Canonicalize natural loops ; GCN-O1-NEXT: LCSSA Verifier ; GCN-O1-NEXT: Loop-Closed SSA Form Pass +; GCN-O1-NEXT: Basic Alias Analysis (stateless AA impl) +; GCN-O1-NEXT: Function Alias Analysis Results ; GCN-O1-NEXT: Scalar Evolution Analysis +; GCN-O1-NEXT: Loop Pass Manager +; GCN-O1-NEXT: Rotate Loops +; GCN-O1-NEXT: Memory SSA ; GCN-O1-NEXT: Lazy Branch Probability Analysis ; GCN-O1-NEXT: Lazy Block Frequency Analysis ; GCN-O1-NEXT: Loop Pass Manager ; GCN-O1-NEXT: Loop Invariant Code Motion -; GCN-O1-NEXT: Rotate Loops -; GCN-O1-NEXT: Loop Invariant Code Motion ; GCN-O1-NEXT: Post-Dominator Tree Construction ; GCN-O1-NEXT: Legacy Divergence Analysis ; GCN-O1-NEXT: Loop Pass Manager @@ -456,20 +456,20 @@ ; GCN-O2-NEXT: Simplify the CFG ; GCN-O2-NEXT: Reassociate expressions ; GCN-O2-NEXT: Dominator Tree Construction -; GCN-O2-NEXT: Basic Alias Analysis (stateless AA impl) -; GCN-O2-NEXT: Function Alias Analysis Results -; GCN-O2-NEXT: Memory SSA ; GCN-O2-NEXT: Natural Loop Information ; GCN-O2-NEXT: Canonicalize natural loops ; GCN-O2-NEXT: LCSSA Verifier ; GCN-O2-NEXT: Loop-Closed SSA Form Pass +; GCN-O2-NEXT: Basic Alias Analysis (stateless AA impl) +; GCN-O2-NEXT: Function Alias Analysis Results ; GCN-O2-NEXT: Scalar Evolution Analysis +; GCN-O2-NEXT: Loop Pass Manager +; GCN-O2-NEXT: Rotate Loops +; GCN-O2-NEXT: Memory SSA ; GCN-O2-NEXT: Lazy Branch Probability Analysis ; GCN-O2-NEXT: Lazy Block Frequency Analysis ; GCN-O2-NEXT: Loop Pass Manager ; GCN-O2-NEXT: Loop Invariant Code Motion -; GCN-O2-NEXT: Rotate Loops -; GCN-O2-NEXT: Loop Invariant Code Motion ; GCN-O2-NEXT: Post-Dominator Tree Construction ; GCN-O2-NEXT: Legacy Divergence Analysis ; GCN-O2-NEXT: Loop Pass Manager @@ -820,20 +820,20 @@ ; GCN-O3-NEXT: Simplify the CFG ; GCN-O3-NEXT: Reassociate expressions ; GCN-O3-NEXT: Dominator Tree Construction -; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl) -; GCN-O3-NEXT: Function Alias Analysis Results -; GCN-O3-NEXT: Memory SSA ; GCN-O3-NEXT: Natural Loop Information ; GCN-O3-NEXT: Canonicalize natural loops ; GCN-O3-NEXT: LCSSA Verifier ; GCN-O3-NEXT: Loop-Closed SSA Form Pass +; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl) +; GCN-O3-NEXT: Function Alias Analysis Results ; GCN-O3-NEXT: Scalar Evolution Analysis +; GCN-O3-NEXT: Loop Pass Manager +; GCN-O3-NEXT: Rotate Loops +; GCN-O3-NEXT: Memory SSA ; GCN-O3-NEXT: Lazy Branch Probability Analysis ; GCN-O3-NEXT: Lazy Block Frequency Analysis ; GCN-O3-NEXT: Loop Pass Manager ; GCN-O3-NEXT: Loop Invariant Code Motion -; GCN-O3-NEXT: Rotate Loops -; GCN-O3-NEXT: Loop Invariant Code Motion ; GCN-O3-NEXT: Post-Dominator Tree Construction ; GCN-O3-NEXT: Legacy Divergence Analysis ; GCN-O3-NEXT: Loop Pass Manager diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -167,7 +167,6 @@ ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: LoopInstSimplifyPass ; CHECK-O-NEXT: Running pass: LoopSimplifyCFGPass -; CHECK-O-NEXT: Running pass: LICM ; CHECK-O-NEXT: Running pass: LoopRotatePass ; CHECK-O-NEXT: Running pass: LICM ; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass diff --git a/llvm/test/Other/new-pm-thinlto-defaults.ll b/llvm/test/Other/new-pm-thinlto-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-defaults.ll @@ -133,7 +133,6 @@ ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: LoopInstSimplifyPass ; CHECK-O-NEXT: Running pass: LoopSimplifyCFGPass -; CHECK-O-NEXT: Running pass: LICM ; CHECK-O-NEXT: Running pass: LoopRotatePass ; CHECK-O-NEXT: Running pass: LICM ; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -106,7 +106,6 @@ ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: LoopInstSimplifyPass ; CHECK-O-NEXT: Running pass: LoopSimplifyCFGPass -; CHECK-O-NEXT: Running pass: LICM ; CHECK-O-NEXT: Running pass: LoopRotatePass ; CHECK-O-NEXT: Running pass: LICM ; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -115,7 +115,6 @@ ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: LoopInstSimplifyPass ; CHECK-O-NEXT: Running pass: LoopSimplifyCFGPass -; CHECK-O-NEXT: Running pass: LICM ; CHECK-O-NEXT: Running pass: LoopRotatePass ; CHECK-O-NEXT: Running pass: LICM ; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass diff --git a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll @@ -144,7 +144,6 @@ ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: LoopInstSimplifyPass ; CHECK-O-NEXT: Running pass: LoopSimplifyCFGPass -; CHECK-O-NEXT: Running pass: LICM ; CHECK-O-NEXT: Running pass: LoopRotatePass ; CHECK-O-NEXT: Running pass: LICM ; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass diff --git a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll @@ -110,7 +110,6 @@ ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: LoopInstSimplifyPass ; CHECK-O-NEXT: Running pass: LoopSimplifyCFGPass -; CHECK-O-NEXT: Running pass: LICM ; CHECK-O-NEXT: Running pass: LoopRotatePass ; CHECK-O-NEXT: Running pass: LICM ; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass diff --git a/llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll b/llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll --- a/llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll +++ b/llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll @@ -6,21 +6,21 @@ define void @runtime_unroll_generic(i32 %arg_0, i32* %arg_1, i16* %arg_2, i16* %arg_3) { ; CHECK-A55-LABEL: @runtime_unroll_generic( ; CHECK-A55-NEXT: entry: +; CHECK-A55-NEXT: [[CMP52_NOT:%.*]] = icmp eq i32 [[ARG_0:%.*]], 0 +; CHECK-A55-NEXT: br i1 [[CMP52_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY6_LR_PH:%.*]] +; CHECK-A55: for.body6.lr.ph: ; CHECK-A55-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, i16* [[ARG_2:%.*]], i64 undef ; CHECK-A55-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i16, i16* [[ARG_3:%.*]], i64 undef ; CHECK-A55-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i32, i32* [[ARG_1:%.*]], i64 undef -; CHECK-A55-NEXT: [[CMP52_NOT:%.*]] = icmp eq i32 [[ARG_0:%.*]], 0 -; CHECK-A55-NEXT: br i1 [[CMP52_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY6_PREHEADER:%.*]] -; CHECK-A55: for.body6.preheader: ; CHECK-A55-NEXT: [[TMP0:%.*]] = add i32 [[ARG_0]], -1 ; CHECK-A55-NEXT: [[XTRAITER:%.*]] = and i32 [[ARG_0]], 3 ; CHECK-A55-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 3 -; CHECK-A55-NEXT: br i1 [[TMP1]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY6_PREHEADER_NEW:%.*]] -; CHECK-A55: for.body6.preheader.new: +; CHECK-A55-NEXT: br i1 [[TMP1]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY6_LR_PH_NEW:%.*]] +; CHECK-A55: for.body6.lr.ph.new: ; CHECK-A55-NEXT: [[UNROLL_ITER:%.*]] = and i32 [[ARG_0]], -4 ; CHECK-A55-NEXT: br label [[FOR_BODY6:%.*]] ; CHECK-A55: for.body6: -; CHECK-A55-NEXT: [[NITER:%.*]] = phi i32 [ 0, [[FOR_BODY6_PREHEADER_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_BODY6]] ] +; CHECK-A55-NEXT: [[NITER:%.*]] = phi i32 [ 0, [[FOR_BODY6_LR_PH_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_BODY6]] ] ; CHECK-A55-NEXT: [[TMP2:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2 ; CHECK-A55-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 ; CHECK-A55-NEXT: [[TMP3:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2 @@ -93,13 +93,15 @@ ; ; CHECK-GENERIC-LABEL: @runtime_unroll_generic( ; CHECK-GENERIC-NEXT: entry: +; CHECK-GENERIC-NEXT: [[CMP52_NOT:%.*]] = icmp eq i32 [[ARG_0:%.*]], 0 +; CHECK-GENERIC-NEXT: br i1 [[CMP52_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY6_LR_PH:%.*]] +; CHECK-GENERIC: for.body6.lr.ph: ; CHECK-GENERIC-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, i16* [[ARG_2:%.*]], i64 undef ; CHECK-GENERIC-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i16, i16* [[ARG_3:%.*]], i64 undef ; CHECK-GENERIC-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i32, i32* [[ARG_1:%.*]], i64 undef -; CHECK-GENERIC-NEXT: [[CMP52_NOT:%.*]] = icmp eq i32 [[ARG_0:%.*]], 0 -; CHECK-GENERIC-NEXT: br i1 [[CMP52_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY6:%.*]] +; CHECK-GENERIC-NEXT: br label [[FOR_BODY6:%.*]] ; CHECK-GENERIC: for.body6: -; CHECK-GENERIC-NEXT: [[K_03:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY6]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-GENERIC-NEXT: [[K_03:%.*]] = phi i32 [ 0, [[FOR_BODY6_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY6]] ] ; CHECK-GENERIC-NEXT: [[TMP0:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2 ; CHECK-GENERIC-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 ; CHECK-GENERIC-NEXT: [[TMP1:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2 diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll --- a/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll @@ -88,10 +88,10 @@ define void @matrix_extract_insert_loop(i32 %i, [225 x double]* nonnull align 8 dereferenceable(1800) %A, [225 x double]* nonnull align 8 dereferenceable(1800) %B) { ; CHECK-LABEL: @matrix_extract_insert_loop( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP212_NOT:%.*]] = icmp eq i32 [[I:%.*]], 0 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast [225 x double]* [[A:%.*]] to <225 x double>* -; CHECK-NEXT: [[CONV6:%.*]] = zext i32 [[I:%.*]] to i64 +; CHECK-NEXT: [[CONV6:%.*]] = zext i32 [[I]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast [225 x double]* [[B:%.*]] to <225 x double>* -; CHECK-NEXT: [[CMP212_NOT:%.*]] = icmp eq i32 [[I]], 0 ; CHECK-NEXT: br i1 [[CMP212_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER_US:%.*]] ; CHECK: for.cond1.preheader.us: ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[I]], 225 @@ -145,8 +145,8 @@ ; CHECK-NEXT: br label [[FOR_BODY4_US_2:%.*]] ; CHECK: for.body4.us.2: ; CHECK-NEXT: [[K_013_US_2:%.*]] = phi i32 [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_1]] ], [ [[INC_US_2:%.*]], [[FOR_BODY4_US_2]] ] -; CHECK-NEXT: [[NARROW16:%.*]] = add nuw nsw i32 [[K_013_US_2]], 30 -; CHECK-NEXT: [[TMP17:%.*]] = zext i32 [[NARROW16]] to i64 +; CHECK-NEXT: [[NARROW17:%.*]] = add nuw nsw i32 [[K_013_US_2]], 30 +; CHECK-NEXT: [[TMP17:%.*]] = zext i32 [[NARROW17]] to i64 ; CHECK-NEXT: [[TMP18:%.*]] = icmp ult i32 [[K_013_US_2]], 195 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP18]]) ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP0]], i64 0, i64 [[TMP17]] @@ -168,8 +168,8 @@ ; CHECK-NEXT: br label [[FOR_BODY4_US_3:%.*]] ; CHECK: for.body4.us.3: ; CHECK-NEXT: [[K_013_US_3:%.*]] = phi i32 [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_2]] ], [ [[INC_US_3:%.*]], [[FOR_BODY4_US_3]] ] -; CHECK-NEXT: [[NARROW17:%.*]] = add nuw nsw i32 [[K_013_US_3]], 45 -; CHECK-NEXT: [[TMP24:%.*]] = zext i32 [[NARROW17]] to i64 +; CHECK-NEXT: [[NARROW18:%.*]] = add nuw nsw i32 [[K_013_US_3]], 45 +; CHECK-NEXT: [[TMP24:%.*]] = zext i32 [[NARROW18]] to i64 ; CHECK-NEXT: [[TMP25:%.*]] = icmp ult i32 [[K_013_US_3]], 180 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP25]]) ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP0]], i64 0, i64 [[TMP24]] diff --git a/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll b/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll --- a/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll @@ -44,15 +44,15 @@ ; OLDPM_O2-LABEL: define {{[^@]+}}@_Z7computeRSt6vectorIiSaIiEEy ; OLDPM_O2-SAME: (%"class.std::vector"* nocapture noundef nonnull readonly align 8 dereferenceable(24) [[DATA:%.*]], i64 noundef [[NUMELEMS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; OLDPM_O2-NEXT: entry: -; OLDPM_O2-NEXT: [[_M_START_I:%.*]] = getelementptr inbounds %"class.std::vector", %"class.std::vector"* [[DATA]], i64 0, i32 0, i32 0, i32 0, i32 0 ; OLDPM_O2-NEXT: [[CMP26_NOT:%.*]] = icmp eq i64 [[NUMELEMS]], 0 +; OLDPM_O2-NEXT: [[_M_START_I:%.*]] = getelementptr inbounds %"class.std::vector", %"class.std::vector"* [[DATA]], i64 0, i32 0, i32 0, i32 0, i32 0 +; OLDPM_O2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[_M_START_I]], align 8 ; OLDPM_O2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUMELEMS]], 8 ; OLDPM_O2-NEXT: [[N_VEC:%.*]] = and i64 [[NUMELEMS]], -8 ; OLDPM_O2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[NUMELEMS]] ; OLDPM_O2-NEXT: br label [[FOR_COND1_PREHEADER:%.*]] ; OLDPM_O2: for.cond1.preheader: ; OLDPM_O2-NEXT: [[I_08:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC7:%.*]], [[FOR_COND_CLEANUP3:%.*]] ] -; OLDPM_O2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[_M_START_I]], align 8 ; OLDPM_O2-NEXT: br i1 [[CMP26_NOT]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4_PREHEADER:%.*]] ; OLDPM_O2: for.body4.preheader: ; OLDPM_O2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY4_PREHEADER11:%.*]], label [[VECTOR_BODY:%.*]] @@ -97,8 +97,9 @@ ; OLDPM_O3-LABEL: define {{[^@]+}}@_Z7computeRSt6vectorIiSaIiEEy ; OLDPM_O3-SAME: (%"class.std::vector"* nocapture noundef nonnull readonly align 8 dereferenceable(24) [[DATA:%.*]], i64 noundef [[NUMELEMS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; OLDPM_O3-NEXT: entry: -; OLDPM_O3-NEXT: [[_M_START_I:%.*]] = getelementptr inbounds %"class.std::vector", %"class.std::vector"* [[DATA]], i64 0, i32 0, i32 0, i32 0, i32 0 ; OLDPM_O3-NEXT: [[CMP26_NOT:%.*]] = icmp eq i64 [[NUMELEMS]], 0 +; OLDPM_O3-NEXT: [[_M_START_I:%.*]] = getelementptr inbounds %"class.std::vector", %"class.std::vector"* [[DATA]], i64 0, i32 0, i32 0, i32 0, i32 0 +; OLDPM_O3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[_M_START_I]], align 8 ; OLDPM_O3-NEXT: br i1 [[CMP26_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER_US_PREHEADER:%.*]] ; OLDPM_O3: for.cond1.preheader.us.preheader: ; OLDPM_O3-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUMELEMS]], 8 @@ -107,7 +108,6 @@ ; OLDPM_O3-NEXT: br label [[FOR_COND1_PREHEADER_US:%.*]] ; OLDPM_O3: for.cond1.preheader.us: ; OLDPM_O3-NEXT: [[I_08_US:%.*]] = phi i64 [ [[INC7_US:%.*]], [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ] -; OLDPM_O3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[_M_START_I]], align 8 ; OLDPM_O3-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY4_US_PREHEADER:%.*]], label [[VECTOR_BODY:%.*]] ; OLDPM_O3: vector.body: ; OLDPM_O3-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[FOR_COND1_PREHEADER_US]] ] @@ -150,12 +150,12 @@ ; NEWPM_O1-LABEL: define {{[^@]+}}@_Z7computeRSt6vectorIiSaIiEEy ; NEWPM_O1-SAME: (%"class.std::vector"* nocapture noundef nonnull readonly align 8 dereferenceable(24) [[DATA:%.*]], i64 noundef [[NUMELEMS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; NEWPM_O1-NEXT: entry: -; NEWPM_O1-NEXT: [[_M_START_I:%.*]] = getelementptr inbounds %"class.std::vector", %"class.std::vector"* [[DATA]], i64 0, i32 0, i32 0, i32 0, i32 0 ; NEWPM_O1-NEXT: [[CMP26_NOT:%.*]] = icmp eq i64 [[NUMELEMS]], 0 +; NEWPM_O1-NEXT: [[_M_START_I:%.*]] = getelementptr inbounds %"class.std::vector", %"class.std::vector"* [[DATA]], i64 0, i32 0, i32 0, i32 0, i32 0 +; NEWPM_O1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[_M_START_I]], align 8 ; NEWPM_O1-NEXT: br label [[FOR_COND1_PREHEADER:%.*]] ; NEWPM_O1: for.cond1.preheader: ; NEWPM_O1-NEXT: [[I_08:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC7:%.*]], [[FOR_COND_CLEANUP3:%.*]] ] -; NEWPM_O1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[_M_START_I]], align 8 ; NEWPM_O1-NEXT: br i1 [[CMP26_NOT]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4:%.*]] ; NEWPM_O1: for.cond.cleanup: ; NEWPM_O1-NEXT: ret void @@ -176,15 +176,15 @@ ; NEWPM_O2-LABEL: define {{[^@]+}}@_Z7computeRSt6vectorIiSaIiEEy ; NEWPM_O2-SAME: (%"class.std::vector"* nocapture noundef nonnull readonly align 8 dereferenceable(24) [[DATA:%.*]], i64 noundef [[NUMELEMS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; NEWPM_O2-NEXT: entry: -; NEWPM_O2-NEXT: [[_M_START_I:%.*]] = getelementptr inbounds %"class.std::vector", %"class.std::vector"* [[DATA]], i64 0, i32 0, i32 0, i32 0, i32 0 ; NEWPM_O2-NEXT: [[CMP26_NOT:%.*]] = icmp eq i64 [[NUMELEMS]], 0 +; NEWPM_O2-NEXT: [[_M_START_I:%.*]] = getelementptr inbounds %"class.std::vector", %"class.std::vector"* [[DATA]], i64 0, i32 0, i32 0, i32 0, i32 0 +; NEWPM_O2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[_M_START_I]], align 8 ; NEWPM_O2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUMELEMS]], 8 ; NEWPM_O2-NEXT: [[N_VEC:%.*]] = and i64 [[NUMELEMS]], -8 ; NEWPM_O2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[NUMELEMS]] ; NEWPM_O2-NEXT: br label [[FOR_COND1_PREHEADER:%.*]] ; NEWPM_O2: for.cond1.preheader: ; NEWPM_O2-NEXT: [[I_08:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC7:%.*]], [[FOR_COND_CLEANUP3:%.*]] ] -; NEWPM_O2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[_M_START_I]], align 8 ; NEWPM_O2-NEXT: br i1 [[CMP26_NOT]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4_PREHEADER:%.*]] ; NEWPM_O2: for.body4.preheader: ; NEWPM_O2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY4_PREHEADER11:%.*]], label [[VECTOR_BODY:%.*]] @@ -229,8 +229,9 @@ ; NEWPM_O3-LABEL: define {{[^@]+}}@_Z7computeRSt6vectorIiSaIiEEy ; NEWPM_O3-SAME: (%"class.std::vector"* nocapture noundef nonnull readonly align 8 dereferenceable(24) [[DATA:%.*]], i64 noundef [[NUMELEMS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; NEWPM_O3-NEXT: entry: -; NEWPM_O3-NEXT: [[_M_START_I:%.*]] = getelementptr inbounds %"class.std::vector", %"class.std::vector"* [[DATA]], i64 0, i32 0, i32 0, i32 0, i32 0 ; NEWPM_O3-NEXT: [[CMP26_NOT:%.*]] = icmp eq i64 [[NUMELEMS]], 0 +; NEWPM_O3-NEXT: [[_M_START_I:%.*]] = getelementptr inbounds %"class.std::vector", %"class.std::vector"* [[DATA]], i64 0, i32 0, i32 0, i32 0, i32 0 +; NEWPM_O3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[_M_START_I]], align 8 ; NEWPM_O3-NEXT: br i1 [[CMP26_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER_US_PREHEADER:%.*]] ; NEWPM_O3: for.cond1.preheader.us.preheader: ; NEWPM_O3-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUMELEMS]], 8 @@ -239,7 +240,6 @@ ; NEWPM_O3-NEXT: br label [[FOR_COND1_PREHEADER_US:%.*]] ; NEWPM_O3: for.cond1.preheader.us: ; NEWPM_O3-NEXT: [[I_08_US:%.*]] = phi i64 [ [[INC7_US:%.*]], [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ] -; NEWPM_O3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[_M_START_I]], align 8 ; NEWPM_O3-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY4_US_PREHEADER:%.*]], label [[VECTOR_BODY:%.*]] ; NEWPM_O3: vector.body: ; NEWPM_O3-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[FOR_COND1_PREHEADER_US]] ] diff --git a/llvm/test/Transforms/PhaseOrdering/X86/speculation-vs-tbaa.ll b/llvm/test/Transforms/PhaseOrdering/X86/speculation-vs-tbaa.ll --- a/llvm/test/Transforms/PhaseOrdering/X86/speculation-vs-tbaa.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/speculation-vs-tbaa.ll @@ -14,13 +14,15 @@ define void @licm(double** align 8 dereferenceable(8) %_M_start.i, i64 %numElem) { ; OLDPM_O1-LABEL: @licm( ; OLDPM_O1-NEXT: entry: -; OLDPM_O1-NEXT: [[TMP0:%.*]] = load double*, double** [[_M_START_I:%.*]], align 8 ; OLDPM_O1-NEXT: [[CMP1_NOT:%.*]] = icmp eq i64 [[NUMELEM:%.*]], 0 -; OLDPM_O1-NEXT: br i1 [[CMP1_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]] +; OLDPM_O1-NEXT: br i1 [[CMP1_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_LR_PH:%.*]] +; OLDPM_O1: for.body.lr.ph: +; OLDPM_O1-NEXT: [[TMP0:%.*]] = load double*, double** [[_M_START_I:%.*]], align 8, !tbaa [[TBAA3:![0-9]+]] +; OLDPM_O1-NEXT: br label [[FOR_BODY:%.*]] ; OLDPM_O1: for.body: -; OLDPM_O1-NEXT: [[K_02:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; OLDPM_O1-NEXT: [[K_02:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] ; OLDPM_O1-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds double, double* [[TMP0]], i64 [[K_02]] -; OLDPM_O1-NEXT: store double 2.000000e+00, double* [[ADD_PTR_I]], align 8, !tbaa [[TBAA3:![0-9]+]] +; OLDPM_O1-NEXT: store double 2.000000e+00, double* [[ADD_PTR_I]], align 8, !tbaa [[TBAA8:![0-9]+]] ; OLDPM_O1-NEXT: [[INC]] = add nuw i64 [[K_02]], 1 ; OLDPM_O1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[NUMELEM]] ; OLDPM_O1-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] @@ -29,12 +31,12 @@ ; ; OLDPM_O23-LABEL: @licm( ; OLDPM_O23-NEXT: entry: -; OLDPM_O23-NEXT: [[TMP0:%.*]] = load double*, double** [[_M_START_I:%.*]], align 8 ; OLDPM_O23-NEXT: [[CMP1_NOT:%.*]] = icmp eq i64 [[NUMELEM:%.*]], 0 -; OLDPM_O23-NEXT: br i1 [[CMP1_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] -; OLDPM_O23: for.body.preheader: +; OLDPM_O23-NEXT: br i1 [[CMP1_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_LR_PH:%.*]] +; OLDPM_O23: for.body.lr.ph: +; OLDPM_O23-NEXT: [[TMP0:%.*]] = load double*, double** [[_M_START_I:%.*]], align 8, !tbaa [[TBAA3:![0-9]+]] ; OLDPM_O23-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUMELEM]], 4 -; OLDPM_O23-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER3:%.*]], label [[VECTOR_PH:%.*]] +; OLDPM_O23-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_PH:%.*]] ; OLDPM_O23: vector.ph: ; OLDPM_O23-NEXT: [[N_VEC:%.*]] = and i64 [[NUMELEM]], -4 ; OLDPM_O23-NEXT: br label [[VECTOR_BODY:%.*]] @@ -42,38 +44,40 @@ ; OLDPM_O23-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; OLDPM_O23-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, double* [[TMP0]], i64 [[INDEX]] ; OLDPM_O23-NEXT: [[TMP2:%.*]] = bitcast double* [[TMP1]] to <2 x double>* -; OLDPM_O23-NEXT: store <2 x double> , <2 x double>* [[TMP2]], align 8, !tbaa [[TBAA3:![0-9]+]] +; OLDPM_O23-NEXT: store <2 x double> , <2 x double>* [[TMP2]], align 8, !tbaa [[TBAA8:![0-9]+]] ; OLDPM_O23-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, double* [[TMP1]], i64 2 ; OLDPM_O23-NEXT: [[TMP4:%.*]] = bitcast double* [[TMP3]] to <2 x double>* -; OLDPM_O23-NEXT: store <2 x double> , <2 x double>* [[TMP4]], align 8, !tbaa [[TBAA3]] +; OLDPM_O23-NEXT: store <2 x double> , <2 x double>* [[TMP4]], align 8, !tbaa [[TBAA8]] ; OLDPM_O23-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; OLDPM_O23-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; OLDPM_O23-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; OLDPM_O23-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; OLDPM_O23: middle.block: ; OLDPM_O23-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[NUMELEM]] -; OLDPM_O23-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_PREHEADER3]] -; OLDPM_O23: for.body.preheader3: -; OLDPM_O23-NEXT: [[K_02_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] +; OLDPM_O23-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_PREHEADER]] +; OLDPM_O23: for.body.preheader: +; OLDPM_O23-NEXT: [[K_02_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] ; OLDPM_O23-NEXT: br label [[FOR_BODY:%.*]] ; OLDPM_O23: for.body: -; OLDPM_O23-NEXT: [[K_02:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[K_02_PH]], [[FOR_BODY_PREHEADER3]] ] +; OLDPM_O23-NEXT: [[K_02:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[K_02_PH]], [[FOR_BODY_PREHEADER]] ] ; OLDPM_O23-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds double, double* [[TMP0]], i64 [[K_02]] -; OLDPM_O23-NEXT: store double 2.000000e+00, double* [[ADD_PTR_I]], align 8, !tbaa [[TBAA3]] +; OLDPM_O23-NEXT: store double 2.000000e+00, double* [[ADD_PTR_I]], align 8, !tbaa [[TBAA8]] ; OLDPM_O23-NEXT: [[INC]] = add nuw i64 [[K_02]], 1 ; OLDPM_O23-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[NUMELEM]] -; OLDPM_O23-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; OLDPM_O23-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; OLDPM_O23: for.cond.cleanup: ; OLDPM_O23-NEXT: ret void ; ; NEWPM_O1-LABEL: @licm( ; NEWPM_O1-NEXT: entry: -; NEWPM_O1-NEXT: [[TMP0:%.*]] = load double*, double** [[_M_START_I:%.*]], align 8 ; NEWPM_O1-NEXT: [[CMP1_NOT:%.*]] = icmp eq i64 [[NUMELEM:%.*]], 0 -; NEWPM_O1-NEXT: br i1 [[CMP1_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]] +; NEWPM_O1-NEXT: br i1 [[CMP1_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_LR_PH:%.*]] +; NEWPM_O1: for.body.lr.ph: +; NEWPM_O1-NEXT: [[TMP0:%.*]] = load double*, double** [[_M_START_I:%.*]], align 8, !tbaa [[TBAA3:![0-9]+]] +; NEWPM_O1-NEXT: br label [[FOR_BODY:%.*]] ; NEWPM_O1: for.body: -; NEWPM_O1-NEXT: [[K_02:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; NEWPM_O1-NEXT: [[K_02:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] ; NEWPM_O1-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds double, double* [[TMP0]], i64 [[K_02]] -; NEWPM_O1-NEXT: store double 2.000000e+00, double* [[ADD_PTR_I]], align 8, !tbaa [[TBAA3:![0-9]+]] +; NEWPM_O1-NEXT: store double 2.000000e+00, double* [[ADD_PTR_I]], align 8, !tbaa [[TBAA8:![0-9]+]] ; NEWPM_O1-NEXT: [[INC]] = add nuw i64 [[K_02]], 1 ; NEWPM_O1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[NUMELEM]] ; NEWPM_O1-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] @@ -82,12 +86,12 @@ ; ; NEWPM_O23-LABEL: @licm( ; NEWPM_O23-NEXT: entry: -; NEWPM_O23-NEXT: [[TMP0:%.*]] = load double*, double** [[_M_START_I:%.*]], align 8 ; NEWPM_O23-NEXT: [[CMP1_NOT:%.*]] = icmp eq i64 [[NUMELEM:%.*]], 0 -; NEWPM_O23-NEXT: br i1 [[CMP1_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] -; NEWPM_O23: for.body.preheader: +; NEWPM_O23-NEXT: br i1 [[CMP1_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_LR_PH:%.*]] +; NEWPM_O23: for.body.lr.ph: +; NEWPM_O23-NEXT: [[TMP0:%.*]] = load double*, double** [[_M_START_I:%.*]], align 8, !tbaa [[TBAA3:![0-9]+]] ; NEWPM_O23-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUMELEM]], 4 -; NEWPM_O23-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER3:%.*]], label [[VECTOR_PH:%.*]] +; NEWPM_O23-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_PH:%.*]] ; NEWPM_O23: vector.ph: ; NEWPM_O23-NEXT: [[N_VEC:%.*]] = and i64 [[NUMELEM]], -4 ; NEWPM_O23-NEXT: br label [[VECTOR_BODY:%.*]] @@ -95,26 +99,26 @@ ; NEWPM_O23-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; NEWPM_O23-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, double* [[TMP0]], i64 [[INDEX]] ; NEWPM_O23-NEXT: [[TMP2:%.*]] = bitcast double* [[TMP1]] to <2 x double>* -; NEWPM_O23-NEXT: store <2 x double> , <2 x double>* [[TMP2]], align 8, !tbaa [[TBAA3:![0-9]+]] +; NEWPM_O23-NEXT: store <2 x double> , <2 x double>* [[TMP2]], align 8, !tbaa [[TBAA8:![0-9]+]] ; NEWPM_O23-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, double* [[TMP1]], i64 2 ; NEWPM_O23-NEXT: [[TMP4:%.*]] = bitcast double* [[TMP3]] to <2 x double>* -; NEWPM_O23-NEXT: store <2 x double> , <2 x double>* [[TMP4]], align 8, !tbaa [[TBAA3]] +; NEWPM_O23-NEXT: store <2 x double> , <2 x double>* [[TMP4]], align 8, !tbaa [[TBAA8]] ; NEWPM_O23-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; NEWPM_O23-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; NEWPM_O23-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; NEWPM_O23-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; NEWPM_O23: middle.block: ; NEWPM_O23-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[NUMELEM]] -; NEWPM_O23-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_PREHEADER3]] -; NEWPM_O23: for.body.preheader3: -; NEWPM_O23-NEXT: [[K_02_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] +; NEWPM_O23-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_PREHEADER]] +; NEWPM_O23: for.body.preheader: +; NEWPM_O23-NEXT: [[K_02_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] ; NEWPM_O23-NEXT: br label [[FOR_BODY:%.*]] ; NEWPM_O23: for.body: -; NEWPM_O23-NEXT: [[K_02:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[K_02_PH]], [[FOR_BODY_PREHEADER3]] ] +; NEWPM_O23-NEXT: [[K_02:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[K_02_PH]], [[FOR_BODY_PREHEADER]] ] ; NEWPM_O23-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds double, double* [[TMP0]], i64 [[K_02]] -; NEWPM_O23-NEXT: store double 2.000000e+00, double* [[ADD_PTR_I]], align 8, !tbaa [[TBAA3]] +; NEWPM_O23-NEXT: store double 2.000000e+00, double* [[ADD_PTR_I]], align 8, !tbaa [[TBAA8]] ; NEWPM_O23-NEXT: [[INC]] = add nuw i64 [[K_02]], 1 ; NEWPM_O23-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[NUMELEM]] -; NEWPM_O23-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; NEWPM_O23-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; NEWPM_O23: for.cond.cleanup: ; NEWPM_O23-NEXT: ret void ; diff --git a/llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll b/llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll --- a/llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll @@ -27,28 +27,32 @@ ; OLDPM_O23-NEXT: entry: ; OLDPM_O23-NEXT: [[BASE_I_I:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR:%.*]], %class.FloatVecPair* [[FVP]], i64 0, i32 1, i32 0 ; OLDPM_O23-NEXT: [[TMP0:%.*]] = load %class.HomemadeVector.0*, %class.HomemadeVector.0** [[BASE_I_I]], align 8, !tbaa [[TBAA0:![0-9]+]] -; OLDPM_O23-NEXT: [[SIZE4_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0:%.*]], %class.HomemadeVector.0* [[TMP0]], i64 undef, i32 1 -; OLDPM_O23-NEXT: [[TMP1:%.*]] = load i32, i32* [[SIZE4_I]], align 8, !tbaa [[TBAA6:![0-9]+]] -; OLDPM_O23-NEXT: [[CMP510_NOT_I:%.*]] = icmp eq i32 [[TMP1]], 0 -; OLDPM_O23-NEXT: br i1 [[CMP510_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT:%.*]], label [[FOR_BODY7_LR_PH_I:%.*]] +; OLDPM_O23-NEXT: [[SIZE410_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0:%.*]], %class.HomemadeVector.0* [[TMP0]], i64 undef, i32 1 +; OLDPM_O23-NEXT: [[TMP1:%.*]] = load i32, i32* [[SIZE410_I]], align 8, !tbaa [[TBAA6:![0-9]+]] +; OLDPM_O23-NEXT: [[CMP511_NOT_I:%.*]] = icmp eq i32 [[TMP1]], 0 +; OLDPM_O23-NEXT: br i1 [[CMP511_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT:%.*]], label [[FOR_BODY7_LR_PH_I:%.*]] ; OLDPM_O23: for.body7.lr.ph.i: ; OLDPM_O23-NEXT: [[BASE_I4_I:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR]], %class.FloatVecPair* [[FVP]], i64 0, i32 0, i32 0 -; OLDPM_O23-NEXT: [[BASE_I6_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], %class.HomemadeVector.0* [[TMP0]], i64 undef, i32 0 -; OLDPM_O23-NEXT: [[TMP2:%.*]] = load float*, float** [[BASE_I6_I]], align 8, !tbaa [[TBAA8:![0-9]+]] -; OLDPM_O23-NEXT: [[ARRAYIDX_I7_I:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 undef -; OLDPM_O23-NEXT: [[TMP3:%.*]] = load %class.HomemadeVector.0*, %class.HomemadeVector.0** [[BASE_I4_I]], align 8, !tbaa [[TBAA0]] -; OLDPM_O23-NEXT: [[BASE_I2_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], %class.HomemadeVector.0* [[TMP3]], i64 undef, i32 0 -; OLDPM_O23-NEXT: [[TMP4:%.*]] = load float*, float** [[BASE_I2_I]], align 8, !tbaa [[TBAA8]] -; OLDPM_O23-NEXT: [[ARRAYIDX_I3_I:%.*]] = getelementptr inbounds float, float* [[TMP4]], i64 undef -; OLDPM_O23-NEXT: [[DOTPRE_I:%.*]] = load float, float* [[ARRAYIDX_I3_I]], align 4, !tbaa [[TBAA9:![0-9]+]] -; OLDPM_O23-NEXT: br label [[FOR_BODY7_I:%.*]] +; OLDPM_O23-NEXT: [[TMP2:%.*]] = load %class.HomemadeVector.0*, %class.HomemadeVector.0** [[BASE_I4_I]], align 8, !tbaa [[TBAA0]] +; OLDPM_O23-NEXT: [[BASE_I2_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], %class.HomemadeVector.0* [[TMP2]], i64 undef, i32 0 +; OLDPM_O23-NEXT: [[TMP3:%.*]] = load float*, float** [[BASE_I2_I]], align 8, !tbaa [[TBAA8:![0-9]+]] +; OLDPM_O23-NEXT: [[ARRAYIDX_I3_I:%.*]] = getelementptr inbounds float, float* [[TMP3]], i64 undef +; OLDPM_O23-NEXT: [[BASE_I6_PEEL_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], %class.HomemadeVector.0* [[TMP0]], i64 undef, i32 0 +; OLDPM_O23-NEXT: [[TMP4:%.*]] = load float*, float** [[BASE_I6_PEEL_I]], align 8, !tbaa [[TBAA8]] +; OLDPM_O23-NEXT: [[ARRAYIDX_I7_PEEL_I:%.*]] = getelementptr inbounds float, float* [[TMP4]], i64 undef +; OLDPM_O23-NEXT: [[TMP5:%.*]] = load float, float* [[ARRAYIDX_I7_PEEL_I]], align 4, !tbaa [[TBAA9:![0-9]+]] +; OLDPM_O23-NEXT: [[TMP6:%.*]] = load float, float* [[ARRAYIDX_I3_I]], align 4, !tbaa [[TBAA9]] +; OLDPM_O23-NEXT: [[ADD_PEEL_I:%.*]] = fadd float [[TMP5]], [[TMP6]] +; OLDPM_O23-NEXT: store float [[ADD_PEEL_I]], float* [[ARRAYIDX_I3_I]], align 4, !tbaa [[TBAA9]] +; OLDPM_O23-NEXT: [[EXITCOND_PEEL_NOT_I:%.*]] = icmp eq i32 [[TMP1]], 1 +; OLDPM_O23-NEXT: br i1 [[EXITCOND_PEEL_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT]], label [[FOR_BODY7_I:%.*]] ; OLDPM_O23: for.body7.i: -; OLDPM_O23-NEXT: [[TMP5:%.*]] = phi float [ [[DOTPRE_I]], [[FOR_BODY7_LR_PH_I]] ], [ [[ADD_I:%.*]], [[FOR_BODY7_I]] ] -; OLDPM_O23-NEXT: [[J_011_I:%.*]] = phi i32 [ 0, [[FOR_BODY7_LR_PH_I]] ], [ [[INC_I:%.*]], [[FOR_BODY7_I]] ] -; OLDPM_O23-NEXT: [[TMP6:%.*]] = load float, float* [[ARRAYIDX_I7_I]], align 4, !tbaa [[TBAA9]] -; OLDPM_O23-NEXT: [[ADD_I]] = fadd float [[TMP5]], [[TMP6]] +; OLDPM_O23-NEXT: [[TMP7:%.*]] = phi float [ [[ADD_I:%.*]], [[FOR_BODY7_I]] ], [ [[ADD_PEEL_I]], [[FOR_BODY7_LR_PH_I]] ] +; OLDPM_O23-NEXT: [[J_012_I:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY7_I]] ], [ 1, [[FOR_BODY7_LR_PH_I]] ] +; OLDPM_O23-NEXT: [[TMP8:%.*]] = load float, float* [[ARRAYIDX_I7_PEEL_I]], align 4, !tbaa [[TBAA9]] +; OLDPM_O23-NEXT: [[ADD_I]] = fadd float [[TMP7]], [[TMP8]] ; OLDPM_O23-NEXT: store float [[ADD_I]], float* [[ARRAYIDX_I3_I]], align 4, !tbaa [[TBAA9]] -; OLDPM_O23-NEXT: [[INC_I]] = add nuw i32 [[J_011_I]], 1 +; OLDPM_O23-NEXT: [[INC_I]] = add nuw i32 [[J_012_I]], 1 ; OLDPM_O23-NEXT: [[EXITCOND_NOT_I:%.*]] = icmp eq i32 [[INC_I]], [[TMP1]] ; OLDPM_O23-NEXT: br i1 [[EXITCOND_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT]], label [[FOR_BODY7_I]], !llvm.loop [[LOOP11:![0-9]+]] ; OLDPM_O23: _ZN12FloatVecPair6vecIncEv.exit: @@ -59,27 +63,32 @@ ; NEWPM_O1-NEXT: entry: ; NEWPM_O1-NEXT: [[BASE_I_I:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR:%.*]], %class.FloatVecPair* [[FVP]], i64 0, i32 1, i32 0 ; NEWPM_O1-NEXT: [[TMP0:%.*]] = load %class.HomemadeVector.0*, %class.HomemadeVector.0** [[BASE_I_I]], align 8, !tbaa [[TBAA0:![0-9]+]] -; NEWPM_O1-NEXT: [[SIZE4_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0:%.*]], %class.HomemadeVector.0* [[TMP0]], i64 undef, i32 1 -; NEWPM_O1-NEXT: [[TMP1:%.*]] = load i32, i32* [[SIZE4_I]], align 8, !tbaa [[TBAA6:![0-9]+]] -; NEWPM_O1-NEXT: [[CMP510_NOT_I:%.*]] = icmp eq i32 [[TMP1]], 0 -; NEWPM_O1-NEXT: br i1 [[CMP510_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT:%.*]], label [[FOR_BODY7_LR_PH_I:%.*]] +; NEWPM_O1-NEXT: [[SIZE410_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0:%.*]], %class.HomemadeVector.0* [[TMP0]], i64 undef, i32 1 +; NEWPM_O1-NEXT: [[TMP1:%.*]] = load i32, i32* [[SIZE410_I]], align 8, !tbaa [[TBAA6:![0-9]+]] +; NEWPM_O1-NEXT: [[CMP511_NOT_I:%.*]] = icmp eq i32 [[TMP1]], 0 +; NEWPM_O1-NEXT: br i1 [[CMP511_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT:%.*]], label [[FOR_BODY7_LR_PH_I:%.*]] ; NEWPM_O1: for.body7.lr.ph.i: ; NEWPM_O1-NEXT: [[BASE_I6_I:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR]], %class.FloatVecPair* [[FVP]], i64 0, i32 0, i32 0 -; NEWPM_O1-NEXT: [[BASE_I4_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], %class.HomemadeVector.0* [[TMP0]], i64 undef, i32 0 -; NEWPM_O1-NEXT: [[TMP2:%.*]] = load float*, float** [[BASE_I4_I]], align 8, !tbaa [[TBAA8:![0-9]+]] -; NEWPM_O1-NEXT: [[ARRAYIDX_I5_I:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 undef -; NEWPM_O1-NEXT: [[TMP3:%.*]] = load %class.HomemadeVector.0*, %class.HomemadeVector.0** [[BASE_I6_I]], align 8, !tbaa [[TBAA0]] -; NEWPM_O1-NEXT: [[BASE_I8_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], %class.HomemadeVector.0* [[TMP3]], i64 undef, i32 0 -; NEWPM_O1-NEXT: [[TMP4:%.*]] = load float*, float** [[BASE_I8_I]], align 8, !tbaa [[TBAA8]] -; NEWPM_O1-NEXT: [[ARRAYIDX_I9_I:%.*]] = getelementptr inbounds float, float* [[TMP4]], i64 undef -; NEWPM_O1-NEXT: br label [[FOR_BODY7_I:%.*]] -; NEWPM_O1: for.body7.i: -; NEWPM_O1-NEXT: [[J_011_I:%.*]] = phi i32 [ 0, [[FOR_BODY7_LR_PH_I]] ], [ [[INC_I:%.*]], [[FOR_BODY7_I]] ] -; NEWPM_O1-NEXT: [[TMP5:%.*]] = load float, float* [[ARRAYIDX_I5_I]], align 4, !tbaa [[TBAA9:![0-9]+]] +; NEWPM_O1-NEXT: [[TMP2:%.*]] = load %class.HomemadeVector.0*, %class.HomemadeVector.0** [[BASE_I6_I]], align 8, !tbaa [[TBAA0]] +; NEWPM_O1-NEXT: [[BASE_I8_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], %class.HomemadeVector.0* [[TMP2]], i64 undef, i32 0 +; NEWPM_O1-NEXT: [[TMP3:%.*]] = load float*, float** [[BASE_I8_I]], align 8, !tbaa [[TBAA8:![0-9]+]] +; NEWPM_O1-NEXT: [[ARRAYIDX_I9_I:%.*]] = getelementptr inbounds float, float* [[TMP3]], i64 undef +; NEWPM_O1-NEXT: [[BASE_I4_PEEL_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], %class.HomemadeVector.0* [[TMP0]], i64 undef, i32 0 +; NEWPM_O1-NEXT: [[TMP4:%.*]] = load float*, float** [[BASE_I4_PEEL_I]], align 8, !tbaa [[TBAA8]] +; NEWPM_O1-NEXT: [[ARRAYIDX_I5_PEEL_I:%.*]] = getelementptr inbounds float, float* [[TMP4]], i64 undef +; NEWPM_O1-NEXT: [[TMP5:%.*]] = load float, float* [[ARRAYIDX_I5_PEEL_I]], align 4, !tbaa [[TBAA9:![0-9]+]] ; NEWPM_O1-NEXT: [[TMP6:%.*]] = load float, float* [[ARRAYIDX_I9_I]], align 4, !tbaa [[TBAA9]] -; NEWPM_O1-NEXT: [[ADD_I:%.*]] = fadd float [[TMP5]], [[TMP6]] +; NEWPM_O1-NEXT: [[ADD_PEEL_I:%.*]] = fadd float [[TMP5]], [[TMP6]] +; NEWPM_O1-NEXT: store float [[ADD_PEEL_I]], float* [[ARRAYIDX_I9_I]], align 4, !tbaa [[TBAA9]] +; NEWPM_O1-NEXT: [[EXITCOND_PEEL_NOT_I:%.*]] = icmp eq i32 [[TMP1]], 1 +; NEWPM_O1-NEXT: br i1 [[EXITCOND_PEEL_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT]], label [[FOR_BODY7_I:%.*]] +; NEWPM_O1: for.body7.i: +; NEWPM_O1-NEXT: [[J_012_I:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY7_I]] ], [ 1, [[FOR_BODY7_LR_PH_I]] ] +; NEWPM_O1-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX_I5_PEEL_I]], align 4, !tbaa [[TBAA9]] +; NEWPM_O1-NEXT: [[TMP8:%.*]] = load float, float* [[ARRAYIDX_I9_I]], align 4, !tbaa [[TBAA9]] +; NEWPM_O1-NEXT: [[ADD_I:%.*]] = fadd float [[TMP7]], [[TMP8]] ; NEWPM_O1-NEXT: store float [[ADD_I]], float* [[ARRAYIDX_I9_I]], align 4, !tbaa [[TBAA9]] -; NEWPM_O1-NEXT: [[INC_I]] = add nuw i32 [[J_011_I]], 1 +; NEWPM_O1-NEXT: [[INC_I]] = add nuw i32 [[J_012_I]], 1 ; NEWPM_O1-NEXT: [[EXITCOND_NOT_I:%.*]] = icmp eq i32 [[INC_I]], [[TMP1]] ; NEWPM_O1-NEXT: br i1 [[EXITCOND_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT]], label [[FOR_BODY7_I]], !llvm.loop [[LOOP11:![0-9]+]] ; NEWPM_O1: _ZN12FloatVecPair6vecIncEv.exit: @@ -90,28 +99,32 @@ ; NEWPM_O23-NEXT: entry: ; NEWPM_O23-NEXT: [[BASE_I_I:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR:%.*]], %class.FloatVecPair* [[FVP]], i64 0, i32 1, i32 0 ; NEWPM_O23-NEXT: [[TMP0:%.*]] = load %class.HomemadeVector.0*, %class.HomemadeVector.0** [[BASE_I_I]], align 8, !tbaa [[TBAA0:![0-9]+]] -; NEWPM_O23-NEXT: [[SIZE4_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0:%.*]], %class.HomemadeVector.0* [[TMP0]], i64 undef, i32 1 -; NEWPM_O23-NEXT: [[TMP1:%.*]] = load i32, i32* [[SIZE4_I]], align 8, !tbaa [[TBAA6:![0-9]+]] -; NEWPM_O23-NEXT: [[CMP510_NOT_I:%.*]] = icmp eq i32 [[TMP1]], 0 -; NEWPM_O23-NEXT: br i1 [[CMP510_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT:%.*]], label [[FOR_BODY7_LR_PH_I:%.*]] +; NEWPM_O23-NEXT: [[SIZE410_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0:%.*]], %class.HomemadeVector.0* [[TMP0]], i64 undef, i32 1 +; NEWPM_O23-NEXT: [[TMP1:%.*]] = load i32, i32* [[SIZE410_I]], align 8, !tbaa [[TBAA6:![0-9]+]] +; NEWPM_O23-NEXT: [[CMP511_NOT_I:%.*]] = icmp eq i32 [[TMP1]], 0 +; NEWPM_O23-NEXT: br i1 [[CMP511_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT:%.*]], label [[FOR_BODY7_LR_PH_I:%.*]] ; NEWPM_O23: for.body7.lr.ph.i: ; NEWPM_O23-NEXT: [[BASE_I6_I:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR]], %class.FloatVecPair* [[FVP]], i64 0, i32 0, i32 0 -; NEWPM_O23-NEXT: [[BASE_I4_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], %class.HomemadeVector.0* [[TMP0]], i64 undef, i32 0 -; NEWPM_O23-NEXT: [[TMP2:%.*]] = load float*, float** [[BASE_I4_I]], align 8, !tbaa [[TBAA8:![0-9]+]] -; NEWPM_O23-NEXT: [[ARRAYIDX_I5_I:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 undef -; NEWPM_O23-NEXT: [[TMP3:%.*]] = load %class.HomemadeVector.0*, %class.HomemadeVector.0** [[BASE_I6_I]], align 8, !tbaa [[TBAA0]] -; NEWPM_O23-NEXT: [[BASE_I8_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], %class.HomemadeVector.0* [[TMP3]], i64 undef, i32 0 -; NEWPM_O23-NEXT: [[TMP4:%.*]] = load float*, float** [[BASE_I8_I]], align 8, !tbaa [[TBAA8]] -; NEWPM_O23-NEXT: [[ARRAYIDX_I9_I:%.*]] = getelementptr inbounds float, float* [[TMP4]], i64 undef -; NEWPM_O23-NEXT: [[DOTPRE_I:%.*]] = load float, float* [[ARRAYIDX_I9_I]], align 4, !tbaa [[TBAA9:![0-9]+]] -; NEWPM_O23-NEXT: br label [[FOR_BODY7_I:%.*]] +; NEWPM_O23-NEXT: [[TMP2:%.*]] = load %class.HomemadeVector.0*, %class.HomemadeVector.0** [[BASE_I6_I]], align 8, !tbaa [[TBAA0]] +; NEWPM_O23-NEXT: [[BASE_I8_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], %class.HomemadeVector.0* [[TMP2]], i64 undef, i32 0 +; NEWPM_O23-NEXT: [[TMP3:%.*]] = load float*, float** [[BASE_I8_I]], align 8, !tbaa [[TBAA8:![0-9]+]] +; NEWPM_O23-NEXT: [[ARRAYIDX_I9_I:%.*]] = getelementptr inbounds float, float* [[TMP3]], i64 undef +; NEWPM_O23-NEXT: [[BASE_I4_PEEL_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], %class.HomemadeVector.0* [[TMP0]], i64 undef, i32 0 +; NEWPM_O23-NEXT: [[TMP4:%.*]] = load float*, float** [[BASE_I4_PEEL_I]], align 8, !tbaa [[TBAA8]] +; NEWPM_O23-NEXT: [[ARRAYIDX_I5_PEEL_I:%.*]] = getelementptr inbounds float, float* [[TMP4]], i64 undef +; NEWPM_O23-NEXT: [[TMP5:%.*]] = load float, float* [[ARRAYIDX_I5_PEEL_I]], align 4, !tbaa [[TBAA9:![0-9]+]] +; NEWPM_O23-NEXT: [[TMP6:%.*]] = load float, float* [[ARRAYIDX_I9_I]], align 4, !tbaa [[TBAA9]] +; NEWPM_O23-NEXT: [[ADD_PEEL_I:%.*]] = fadd float [[TMP5]], [[TMP6]] +; NEWPM_O23-NEXT: store float [[ADD_PEEL_I]], float* [[ARRAYIDX_I9_I]], align 4, !tbaa [[TBAA9]] +; NEWPM_O23-NEXT: [[EXITCOND_PEEL_NOT_I:%.*]] = icmp eq i32 [[TMP1]], 1 +; NEWPM_O23-NEXT: br i1 [[EXITCOND_PEEL_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT]], label [[FOR_BODY7_I:%.*]] ; NEWPM_O23: for.body7.i: -; NEWPM_O23-NEXT: [[TMP5:%.*]] = phi float [ [[DOTPRE_I]], [[FOR_BODY7_LR_PH_I]] ], [ [[ADD_I:%.*]], [[FOR_BODY7_I]] ] -; NEWPM_O23-NEXT: [[J_011_I:%.*]] = phi i32 [ 0, [[FOR_BODY7_LR_PH_I]] ], [ [[INC_I:%.*]], [[FOR_BODY7_I]] ] -; NEWPM_O23-NEXT: [[TMP6:%.*]] = load float, float* [[ARRAYIDX_I5_I]], align 4, !tbaa [[TBAA9]] -; NEWPM_O23-NEXT: [[ADD_I]] = fadd float [[TMP5]], [[TMP6]] +; NEWPM_O23-NEXT: [[TMP7:%.*]] = phi float [ [[ADD_I:%.*]], [[FOR_BODY7_I]] ], [ [[ADD_PEEL_I]], [[FOR_BODY7_LR_PH_I]] ] +; NEWPM_O23-NEXT: [[J_012_I:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY7_I]] ], [ 1, [[FOR_BODY7_LR_PH_I]] ] +; NEWPM_O23-NEXT: [[TMP8:%.*]] = load float, float* [[ARRAYIDX_I5_PEEL_I]], align 4, !tbaa [[TBAA9]] +; NEWPM_O23-NEXT: [[ADD_I]] = fadd float [[TMP7]], [[TMP8]] ; NEWPM_O23-NEXT: store float [[ADD_I]], float* [[ARRAYIDX_I9_I]], align 4, !tbaa [[TBAA9]] -; NEWPM_O23-NEXT: [[INC_I]] = add nuw i32 [[J_011_I]], 1 +; NEWPM_O23-NEXT: [[INC_I]] = add nuw i32 [[J_012_I]], 1 ; NEWPM_O23-NEXT: [[EXITCOND_NOT_I:%.*]] = icmp eq i32 [[INC_I]], [[TMP1]] ; NEWPM_O23-NEXT: br i1 [[EXITCOND_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT]], label [[FOR_BODY7_I]], !llvm.loop [[LOOP11:![0-9]+]] ; NEWPM_O23: _ZN12FloatVecPair6vecIncEv.exit: @@ -130,16 +143,18 @@ ; OLDPM_O1-SAME: (%class.FloatVecPair* [[THIS:%.*]]) local_unnamed_addr comdat align 2 { ; OLDPM_O1-NEXT: entry: ; OLDPM_O1-NEXT: [[VSRC23:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR:%.*]], %class.FloatVecPair* [[THIS]], i64 0, i32 1 -; OLDPM_O1-NEXT: [[VSRCDST:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR]], %class.FloatVecPair* [[THIS]], i64 0, i32 0 ; OLDPM_O1-NEXT: [[CALL2:%.*]] = call %class.HomemadeVector.0* @_ZN14HomemadeVectorIS_IfLj8EELj8EEixEj(%class.HomemadeVector* nonnull [[VSRC23]]) ; OLDPM_O1-NEXT: [[SIZE43:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0:%.*]], %class.HomemadeVector.0* [[CALL2]], i64 0, i32 1 ; OLDPM_O1-NEXT: [[TMP0:%.*]] = load i32, i32* [[SIZE43]], align 8, !tbaa [[TBAA0:![0-9]+]] ; OLDPM_O1-NEXT: [[CMP54_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 -; OLDPM_O1-NEXT: br i1 [[CMP54_NOT]], label [[FOR_COND_CLEANUP6:%.*]], label [[FOR_BODY7:%.*]] +; OLDPM_O1-NEXT: br i1 [[CMP54_NOT]], label [[FOR_COND_CLEANUP6:%.*]], label [[FOR_BODY7_LR_PH:%.*]] +; OLDPM_O1: for.body7.lr.ph: +; OLDPM_O1-NEXT: [[VSRCDST:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR]], %class.FloatVecPair* [[THIS]], i64 0, i32 0 +; OLDPM_O1-NEXT: br label [[FOR_BODY7:%.*]] ; OLDPM_O1: for.cond.cleanup6: ; OLDPM_O1-NEXT: ret void ; OLDPM_O1: for.body7: -; OLDPM_O1-NEXT: [[J_05:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY7]] ], [ 0, [[ENTRY:%.*]] ] +; OLDPM_O1-NEXT: [[J_05:%.*]] = phi i32 [ 0, [[FOR_BODY7_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY7]] ] ; OLDPM_O1-NEXT: [[CALL9:%.*]] = call %class.HomemadeVector.0* @_ZN14HomemadeVectorIS_IfLj8EELj8EEixEj(%class.HomemadeVector* nonnull [[VSRC23]]) ; OLDPM_O1-NEXT: [[CALL10:%.*]] = call float* @_ZN14HomemadeVectorIfLj8EEixEj(%class.HomemadeVector.0* [[CALL9]]) ; OLDPM_O1-NEXT: [[TMP1:%.*]] = load float, float* [[CALL10]], align 4, !tbaa [[TBAA6:![0-9]+]] diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll b/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll --- a/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll @@ -15,18 +15,18 @@ ; CHECK-LABEL: @vdiv( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] -; CHECK: for.body.preheader: +; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body.lr.ph: ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 16 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER18:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double* [[X:%.*]], i64 [[WIDE_TRIP_COUNT]] ; CHECK-NEXT: [[SCEVGEP7:%.*]] = getelementptr double, double* [[Y:%.*]], i64 [[WIDE_TRIP_COUNT]] ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt double* [[SCEVGEP7]], [[X]] ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt double* [[SCEVGEP]], [[Y]] ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[FOR_BODY_PREHEADER18]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[FOR_BODY_PREHEADER]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967280 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[A:%.*]], i64 0 @@ -77,9 +77,9 @@ ; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]] -; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY_PREHEADER18]] -; CHECK: for.body.preheader18: -; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY_PREHEADER]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[FOR_BODY_LR_PH]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: [[TMP25:%.*]] = xor i64 [[INDVARS_IV_PH]], -1 ; CHECK-NEXT: [[TMP26:%.*]] = add nsw i64 [[TMP25]], [[WIDE_TRIP_COUNT]] ; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 3 @@ -101,17 +101,17 @@ ; CHECK-NEXT: [[PROL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[PROL_ITER_NEXT]], [[XTRAITER]] ; CHECK-NEXT: br i1 [[PROL_ITER_CMP_NOT]], label [[FOR_BODY_PROL_LOOPEXIT]], label [[FOR_BODY_PROL]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: for.body.prol.loopexit: -; CHECK-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER18]] ], [ [[INDVARS_IV_NEXT_PROL]], [[FOR_BODY_PROL]] ] +; CHECK-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT_PROL]], [[FOR_BODY_PROL]] ] ; CHECK-NEXT: [[TMP29:%.*]] = icmp ult i64 [[TMP26]], 3 -; CHECK-NEXT: br i1 [[TMP29]], label [[FOR_END]], label [[FOR_BODY_PREHEADER18_NEW:%.*]] -; CHECK: for.body.preheader18.new: +; CHECK-NEXT: br i1 [[TMP29]], label [[FOR_END]], label [[FOR_BODY_PREHEADER_NEW:%.*]] +; CHECK: for.body.preheader.new: ; CHECK-NEXT: [[TMP30:%.*]] = fdiv fast double 1.000000e+00, [[A]] ; CHECK-NEXT: [[TMP31:%.*]] = fdiv fast double 1.000000e+00, [[A]] ; CHECK-NEXT: [[TMP32:%.*]] = fdiv fast double 1.000000e+00, [[A]] ; CHECK-NEXT: [[TMP33:%.*]] = fdiv fast double 1.000000e+00, [[A]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_UNR]], [[FOR_BODY_PREHEADER18_NEW]] ], [ [[INDVARS_IV_NEXT_3:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_UNR]], [[FOR_BODY_PREHEADER_NEW]] ], [ [[INDVARS_IV_NEXT_3:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[T0:%.*]] = load double, double* [[ARRAYIDX]], align 8, !tbaa [[TBAA3]] ; CHECK-NEXT: [[TMP34:%.*]] = fmul fast double [[T0]], [[TMP30]] diff --git a/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll b/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll --- a/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll +++ b/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll @@ -77,19 +77,19 @@ ; ROTATED_LATER_OLDPM-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] ; ROTATED_LATER_OLDPM: for.cond.preheader: ; ROTATED_LATER_OLDPM-NEXT: [[SUB:%.*]] = add nsw i32 [[WIDTH]], -1 -; ROTATED_LATER_OLDPM-NEXT: [[CMP13_NOT:%.*]] = icmp eq i32 [[WIDTH]], 1 -; ROTATED_LATER_OLDPM-NEXT: br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]] -; ROTATED_LATER_OLDPM: for.cond.cleanup: +; ROTATED_LATER_OLDPM-NEXT: br label [[FOR_COND:%.*]] +; ROTATED_LATER_OLDPM: for.cond: +; ROTATED_LATER_OLDPM-NEXT: [[I_0:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY:%.*]] ], [ 0, [[FOR_COND_PREHEADER]] ] +; ROTATED_LATER_OLDPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[I_0]], [[SUB]] ; ROTATED_LATER_OLDPM-NEXT: tail call void @f0() +; ROTATED_LATER_OLDPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] +; ROTATED_LATER_OLDPM: for.cond.cleanup: ; ROTATED_LATER_OLDPM-NEXT: tail call void @f2() ; ROTATED_LATER_OLDPM-NEXT: br label [[RETURN]] ; ROTATED_LATER_OLDPM: for.body: -; ROTATED_LATER_OLDPM-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_COND_PREHEADER]] ] -; ROTATED_LATER_OLDPM-NEXT: tail call void @f0() ; ROTATED_LATER_OLDPM-NEXT: tail call void @f1() -; ROTATED_LATER_OLDPM-NEXT: [[INC]] = add nuw nsw i32 [[I_04]], 1 -; ROTATED_LATER_OLDPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[SUB]] -; ROTATED_LATER_OLDPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] +; ROTATED_LATER_OLDPM-NEXT: [[INC]] = add nuw i32 [[I_0]], 1 +; ROTATED_LATER_OLDPM-NEXT: br label [[FOR_COND]] ; ROTATED_LATER_OLDPM: return: ; ROTATED_LATER_OLDPM-NEXT: ret void ; @@ -99,19 +99,19 @@ ; ROTATED_LATER_NEWPM-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] ; ROTATED_LATER_NEWPM: for.cond.preheader: ; ROTATED_LATER_NEWPM-NEXT: [[SUB:%.*]] = add nsw i32 [[WIDTH]], -1 -; ROTATED_LATER_NEWPM-NEXT: [[CMP13_NOT:%.*]] = icmp eq i32 [[WIDTH]], 1 -; ROTATED_LATER_NEWPM-NEXT: br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]] -; ROTATED_LATER_NEWPM: for.cond.cleanup: +; ROTATED_LATER_NEWPM-NEXT: br label [[FOR_COND:%.*]] +; ROTATED_LATER_NEWPM: for.cond: +; ROTATED_LATER_NEWPM-NEXT: [[I_0:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY:%.*]] ], [ 0, [[FOR_COND_PREHEADER]] ] +; ROTATED_LATER_NEWPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[I_0]], [[SUB]] ; ROTATED_LATER_NEWPM-NEXT: tail call void @f0() +; ROTATED_LATER_NEWPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] +; ROTATED_LATER_NEWPM: for.cond.cleanup: ; ROTATED_LATER_NEWPM-NEXT: tail call void @f2() ; ROTATED_LATER_NEWPM-NEXT: br label [[RETURN]] ; ROTATED_LATER_NEWPM: for.body: -; ROTATED_LATER_NEWPM-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_COND_PREHEADER]] ] -; ROTATED_LATER_NEWPM-NEXT: tail call void @f0() ; ROTATED_LATER_NEWPM-NEXT: tail call void @f1() -; ROTATED_LATER_NEWPM-NEXT: [[INC]] = add nuw nsw i32 [[I_04]], 1 -; ROTATED_LATER_NEWPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[SUB]] -; ROTATED_LATER_NEWPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] +; ROTATED_LATER_NEWPM-NEXT: [[INC]] = add nuw i32 [[I_0]], 1 +; ROTATED_LATER_NEWPM-NEXT: br label [[FOR_COND]] ; ROTATED_LATER_NEWPM: return: ; ROTATED_LATER_NEWPM-NEXT: ret void ;