diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h --- a/llvm/include/llvm/Transforms/Scalar.h +++ b/llvm/include/llvm/Transforms/Scalar.h @@ -133,7 +133,8 @@ // Pass *createLICMPass(); Pass *createLICMPass(unsigned LicmMssaOptCap, - unsigned LicmMssaNoAccForPromotionCap); + unsigned LicmMssaNoAccForPromotionCap, + bool AllowSpeculation); //===----------------------------------------------------------------------===// // diff --git a/llvm/include/llvm/Transforms/Scalar/LICM.h b/llvm/include/llvm/Transforms/Scalar/LICM.h --- a/llvm/include/llvm/Transforms/Scalar/LICM.h +++ b/llvm/include/llvm/Transforms/Scalar/LICM.h @@ -46,14 +46,17 @@ class LICMPass : public PassInfoMixin { unsigned LicmMssaOptCap; unsigned LicmMssaNoAccForPromotionCap; + bool LicmAllowSpeculation; public: LICMPass() : LicmMssaOptCap(SetLicmMssaOptCap), LicmMssaNoAccForPromotionCap(SetLicmMssaNoAccForPromotionCap) {} - LICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap) + LICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap, + bool LicmAllowSpeculation) : LicmMssaOptCap(LicmMssaOptCap), - LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {} + LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap), + LicmAllowSpeculation(LicmAllowSpeculation) {} PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U); }; @@ -62,14 +65,17 @@ class LNICMPass : public PassInfoMixin { unsigned LicmMssaOptCap; unsigned LicmMssaNoAccForPromotionCap; + bool LicmAllowSpeculation; public: LNICMPass() : LicmMssaOptCap(SetLicmMssaOptCap), LicmMssaNoAccForPromotionCap(SetLicmMssaNoAccForPromotionCap) {} - LNICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap) + LNICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap, + bool LicmAllowSpeculation) : LicmMssaOptCap(LicmMssaOptCap), - LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {} + LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap), + LicmAllowSpeculation(LicmAllowSpeculation) {} PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U); }; diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -171,10 +171,13 @@ /// BlockFrequencyInfo, TargetLibraryInfo, Loop, AliasSet information for all /// instructions of the loop and loop safety information as arguments. /// Diagnostics is emitted via \p ORE. It returns changed status. +/// \p AllowSpeculation is whether values should be hoisted even if they are not +/// guaranteed to execute in the loop, but are safe to speculatively execute. bool hoistRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, BlockFrequencyInfo *, TargetLibraryInfo *, Loop *, MemorySSAUpdater *, ScalarEvolution *, ICFLoopSafetyInfo *, - SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, bool); + SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, bool, + bool AllowSpeculation); /// This function deletes dead loops. The caller of this function needs to /// guarantee that the loop is infact dead. @@ -204,12 +207,14 @@ /// LoopInfo, DominatorTree, Loop, AliasSet information for all instructions /// of the loop and loop safety information as arguments. /// Diagnostics is emitted via \p ORE. It returns changed status. +/// \p AllowSpeculation is whether values should be hoisted even if they are not +/// guaranteed to execute in the loop. bool promoteLoopAccessesToScalars( const SmallSetVector &, SmallVectorImpl &, SmallVectorImpl &, SmallVectorImpl &, PredIteratorCache &, LoopInfo *, DominatorTree *, const TargetLibraryInfo *, Loop *, MemorySSAUpdater *, ICFLoopSafetyInfo *, - OptimizationRemarkEmitter *); + OptimizationRemarkEmitter *, bool AllowSpeculation); /// Does a BFS from a given node to all of its children inside a given loop. /// The returned vector of nodes includes the starting point. diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -293,14 +293,19 @@ LPM1.addPass(LoopSimplifyCFGPass()); // Try to remove as much code from the loop header as possible, - // to reduce amount of IR that will have to be duplicated. + // to reduce amount of IR that will have to be duplicated. However, + // do not perform speculative hoisting the first time as LICM + // will destroy metadata that may not need to be destroyed if run + // after loop rotation. // TODO: Investigate promotion cap for O1. - LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); + LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/false)); LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true, isLTOPreLink(Phase))); // TODO: Investigate promotion cap for O1. - LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); + LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); LPM1.addPass(SimpleLoopUnswitchPass()); if (EnableLoopFlatten) LPM1.addPass(LoopFlattenPass()); @@ -470,15 +475,20 @@ LPM1.addPass(LoopSimplifyCFGPass()); // Try to remove as much code from the loop header as possible, - // to reduce amount of IR that will have to be duplicated. + // to reduce amount of IR that will have to be duplicated. However, + // do not perform speculative hoisting the first time as LICM + // will destroy metadata that may not need to be destroyed if run + // after loop rotation. // TODO: Investigate promotion cap for O1. - LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); + LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/false)); // Disable header duplication in loop rotation at -Oz. LPM1.addPass( LoopRotatePass(Level != OptimizationLevel::Oz, isLTOPreLink(Phase))); // TODO: Investigate promotion cap for O1. - LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); + LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); LPM1.addPass( SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3 && EnableO3NonTrivialUnswitching)); @@ -575,7 +585,8 @@ FPM.addPass(DSEPass()); FPM.addPass(createFunctionToLoopPassAdaptor( - LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), + LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true), /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true)); FPM.addPass(CoroElidePass()); @@ -1019,7 +1030,8 @@ ExtraPasses.addPass(CorrelatedValuePropagationPass()); ExtraPasses.addPass(InstCombinePass()); LoopPassManager LPM; - LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); + LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3)); ExtraPasses.addPass( @@ -1087,7 +1099,8 @@ FPM.addPass( RequireAnalysisPass()); FPM.addPass(createFunctionToLoopPassAdaptor( - LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), + LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true), /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true)); } @@ -1627,7 +1640,8 @@ FunctionPassManager MainFPM; MainFPM.addPass(createFunctionToLoopPassAdaptor( - LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), + LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true), /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true)); if (RunNewGVN) diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -458,13 +458,18 @@ MPM.add(createLoopSimplifyCFGPass()); } // Try to remove as much code from the loop header as possible, - // to reduce amount of IR that will have to be duplicated. + // to reduce amount of IR that will have to be duplicated. However, + // do not perform speculative hoisting the first time as LICM + // will destroy metadata that may not need to be destroyed if run + // after loop rotation. // TODO: Investigate promotion cap for O1. - MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/false)); // Rotate Loop - disable header duplication at -Oz MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, PrepareForLTO)); // TODO: Investigate promotion cap for O1. - MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); if (EnableSimpleLoopUnswitch) MPM.add(createSimpleLoopUnswitchLegacyPass()); else @@ -529,7 +534,8 @@ // TODO: Investigate if this is too expensive at O1. if (OptLevel > 1) { MPM.add(createDeadStoreEliminationPass()); // Delete dead stores - MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); } addExtensionsToPM(EP_ScalarOptimizerLate, MPM); @@ -588,7 +594,8 @@ PM.add(createEarlyCSEPass()); PM.add(createCorrelatedValuePropagationPass()); PM.add(createInstructionCombiningPass()); - PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); PM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); PM.add(createCFGSimplificationPass( SimplifyCFGOptions().convertSwitchRangeToICmp(true))); @@ -651,7 +658,8 @@ // unrolled loop is a inner loop, then the prologue will be inside the // outer loop. LICM pass can help to promote the runtime check out if the // checked value is loop invariant. - PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); } PM.add(createWarnMissedTransformationsPass()); @@ -898,7 +906,8 @@ // later might get benefit of no-alias assumption in clone loop. if (UseLoopVersioningLICM) { MPM.add(createLoopVersioningLICMPass()); // Do LoopVersioningLICM - MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); } // We add a fresh GlobalsModRef run at this point. This is particularly @@ -1133,7 +1142,8 @@ // Run a few AA driven optimizations here and now, to cleanup the code. PM.add(createGlobalsAAWrapperPass()); // IP alias analysis. - PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); PM.add(NewGVN ? createNewGVNPass() : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies. PM.add(createMemCpyOptPass()); // Remove dead memcpys. diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -149,13 +149,11 @@ BlockFrequencyInfo *BFI, const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE); -static bool isSafeToExecuteUnconditionally(Instruction &Inst, - const DominatorTree *DT, - const TargetLibraryInfo *TLI, - const Loop *CurLoop, - const LoopSafetyInfo *SafetyInfo, - OptimizationRemarkEmitter *ORE, - const Instruction *CtxI = nullptr); +static bool isSafeToExecuteUnconditionally( + Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI, + const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, + OptimizationRemarkEmitter *ORE, const Instruction *CtxI, + bool AllowSpeculation); static bool pointerInvalidatedByLoop(MemoryLocation MemLoc, AliasSetTracker *CurAST, Loop *CurLoop, AAResults *AA); @@ -188,21 +186,26 @@ OptimizationRemarkEmitter *ORE, bool LoopNestMode = false); LoopInvariantCodeMotion(unsigned LicmMssaOptCap, - unsigned LicmMssaNoAccForPromotionCap) + unsigned LicmMssaNoAccForPromotionCap, + bool LicmAllowSpeculation) : LicmMssaOptCap(LicmMssaOptCap), - LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {} + LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap), + LicmAllowSpeculation(LicmAllowSpeculation) {} private: unsigned LicmMssaOptCap; unsigned LicmMssaNoAccForPromotionCap; + bool LicmAllowSpeculation; }; struct LegacyLICMPass : public LoopPass { static char ID; // Pass identification, replacement for typeid LegacyLICMPass( unsigned LicmMssaOptCap = SetLicmMssaOptCap, - unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap) - : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap) { + unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap, + bool LicmAllowSpeculation = true) + : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + LicmAllowSpeculation) { initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry()); } @@ -265,7 +268,8 @@ // but ORE cannot be preserved (see comment before the pass definition). OptimizationRemarkEmitter ORE(L.getHeader()->getParent()); - LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap); + LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + LicmAllowSpeculation); if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, AR.BFI, &AR.TLI, &AR.TTI, &AR.SE, AR.MSSA, &ORE)) return PreservedAnalyses::all(); @@ -290,7 +294,8 @@ // but ORE cannot be preserved (see comment before the pass definition). OptimizationRemarkEmitter ORE(LN.getParent()); - LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap); + LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + LicmAllowSpeculation); Loop &OutermostLoop = LN.getOutermostLoop(); bool Changed = LICM.runOnLoop(&OutermostLoop, &AR.AA, &AR.LI, &AR.DT, AR.BFI, @@ -321,8 +326,10 @@ Pass *llvm::createLICMPass() { return new LegacyLICMPass(); } Pass *llvm::createLICMPass(unsigned LicmMssaOptCap, - unsigned LicmMssaNoAccForPromotionCap) { - return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap); + unsigned LicmMssaNoAccForPromotionCap, + bool LicmAllowSpeculation) { + return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + LicmAllowSpeculation); } llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(bool IsSink, Loop *L, @@ -418,7 +425,8 @@ Flags.setIsSink(false); if (Preheader) Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, L, - &MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode); + &MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode, + LicmAllowSpeculation); // Now that all loop invariants have been removed from the loop, promote any // memory references to scalars that we can. @@ -460,8 +468,8 @@ for (const SmallSetVector &PointerMustAliases : collectPromotionCandidates(MSSA, AA, L)) { LocalPromoted |= promoteLoopAccessesToScalars( - PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, - LI, DT, TLI, L, &MSSAU, &SafetyInfo, ORE); + PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI, + DT, TLI, L, &MSSAU, &SafetyInfo, ORE, LicmAllowSpeculation); } Promoted |= LocalPromoted; } while (LocalPromoted); @@ -825,7 +833,8 @@ MemorySSAUpdater *MSSAU, ScalarEvolution *SE, ICFLoopSafetyInfo *SafetyInfo, SinkAndHoistLICMFlags &Flags, - OptimizationRemarkEmitter *ORE, bool LoopNestMode) { + OptimizationRemarkEmitter *ORE, bool LoopNestMode, + bool AllowSpeculation) { // Verify inputs. assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && CurLoop != nullptr && MSSAU != nullptr && SafetyInfo != nullptr && @@ -877,7 +886,7 @@ true, &Flags, ORE) && isSafeToExecuteUnconditionally( I, DT, TLI, CurLoop, SafetyInfo, ORE, - CurLoop->getLoopPreheader()->getTerminator())) { + CurLoop->getLoopPreheader()->getTerminator(), AllowSpeculation)) { hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo, MSSAU, SE, ORE); HoistedInstructions.push_back(&I); @@ -1774,14 +1783,12 @@ /// Only sink or hoist an instruction if it is not a trapping instruction, /// or if the instruction is known not to trap when moved to the preheader. /// or if it is a trapping instruction and is guaranteed to execute. -static bool isSafeToExecuteUnconditionally(Instruction &Inst, - const DominatorTree *DT, - const TargetLibraryInfo *TLI, - const Loop *CurLoop, - const LoopSafetyInfo *SafetyInfo, - OptimizationRemarkEmitter *ORE, - const Instruction *CtxI) { - if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT, TLI)) +static bool isSafeToExecuteUnconditionally( + Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI, + const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, + OptimizationRemarkEmitter *ORE, const Instruction *CtxI, + bool AllowSpeculation) { + if (AllowSpeculation && isSafeToSpeculativelyExecute(&Inst, CtxI, DT, TLI)) return true; bool GuaranteedToExecute = @@ -1949,7 +1956,7 @@ SmallVectorImpl &MSSAInsertPts, PredIteratorCache &PIC, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, Loop *CurLoop, MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo, - OptimizationRemarkEmitter *ORE) { + OptimizationRemarkEmitter *ORE, bool AllowSpeculation) { // Verify inputs. assert(LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && @@ -2054,9 +2061,9 @@ // to execute does as well. Thus we can increase our guaranteed // alignment as well. if (!DereferenceableInPH || (InstAlignment > Alignment)) - if (isSafeToExecuteUnconditionally(*Load, DT, TLI, CurLoop, - SafetyInfo, ORE, - Preheader->getTerminator())) { + if (isSafeToExecuteUnconditionally( + *Load, DT, TLI, CurLoop, SafetyInfo, ORE, + Preheader->getTerminator(), AllowSpeculation)) { DereferenceableInPH = true; Alignment = std::max(Alignment, InstAlignment); } diff --git a/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll b/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll --- a/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll @@ -44,15 +44,15 @@ ; OLDPM_O2-LABEL: define {{[^@]+}}@_Z7computeRSt6vectorIiSaIiEEy ; OLDPM_O2-SAME: (%"class.std::vector"* nocapture noundef nonnull readonly align 8 dereferenceable(24) [[DATA:%.*]], i64 noundef [[NUMELEMS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; OLDPM_O2-NEXT: entry: -; OLDPM_O2-NEXT: [[_M_START_I:%.*]] = getelementptr inbounds %"class.std::vector", %"class.std::vector"* [[DATA]], i64 0, i32 0, i32 0, i32 0, i32 0 ; OLDPM_O2-NEXT: [[CMP26_NOT:%.*]] = icmp eq i64 [[NUMELEMS]], 0 +; OLDPM_O2-NEXT: [[_M_START_I:%.*]] = getelementptr inbounds %"class.std::vector", %"class.std::vector"* [[DATA]], i64 0, i32 0, i32 0, i32 0, i32 0 +; OLDPM_O2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[_M_START_I]], align 8 ; OLDPM_O2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUMELEMS]], 8 ; OLDPM_O2-NEXT: [[N_VEC:%.*]] = and i64 [[NUMELEMS]], -8 ; OLDPM_O2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[NUMELEMS]] ; OLDPM_O2-NEXT: br label [[FOR_COND1_PREHEADER:%.*]] ; OLDPM_O2: for.cond1.preheader: ; OLDPM_O2-NEXT: [[I_08:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC7:%.*]], [[FOR_COND_CLEANUP3:%.*]] ] -; OLDPM_O2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[_M_START_I]], align 8 ; OLDPM_O2-NEXT: br i1 [[CMP26_NOT]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4_PREHEADER:%.*]] ; OLDPM_O2: for.body4.preheader: ; OLDPM_O2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY4_PREHEADER11:%.*]], label [[VECTOR_BODY:%.*]] @@ -97,8 +97,9 @@ ; OLDPM_O3-LABEL: define {{[^@]+}}@_Z7computeRSt6vectorIiSaIiEEy ; OLDPM_O3-SAME: (%"class.std::vector"* nocapture noundef nonnull readonly align 8 dereferenceable(24) [[DATA:%.*]], i64 noundef [[NUMELEMS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; OLDPM_O3-NEXT: entry: -; OLDPM_O3-NEXT: [[_M_START_I:%.*]] = getelementptr inbounds %"class.std::vector", %"class.std::vector"* [[DATA]], i64 0, i32 0, i32 0, i32 0, i32 0 ; OLDPM_O3-NEXT: [[CMP26_NOT:%.*]] = icmp eq i64 [[NUMELEMS]], 0 +; OLDPM_O3-NEXT: [[_M_START_I:%.*]] = getelementptr inbounds %"class.std::vector", %"class.std::vector"* [[DATA]], i64 0, i32 0, i32 0, i32 0, i32 0 +; OLDPM_O3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[_M_START_I]], align 8 ; OLDPM_O3-NEXT: br i1 [[CMP26_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER_US_PREHEADER:%.*]] ; OLDPM_O3: for.cond1.preheader.us.preheader: ; OLDPM_O3-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUMELEMS]], 8 @@ -107,7 +108,6 @@ ; OLDPM_O3-NEXT: br label [[FOR_COND1_PREHEADER_US:%.*]] ; OLDPM_O3: for.cond1.preheader.us: ; OLDPM_O3-NEXT: [[I_08_US:%.*]] = phi i64 [ [[INC7_US:%.*]], [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ] -; OLDPM_O3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[_M_START_I]], align 8 ; OLDPM_O3-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY4_US_PREHEADER:%.*]], label [[VECTOR_BODY:%.*]] ; OLDPM_O3: vector.body: ; OLDPM_O3-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[FOR_COND1_PREHEADER_US]] ] @@ -150,12 +150,12 @@ ; NEWPM_O1-LABEL: define {{[^@]+}}@_Z7computeRSt6vectorIiSaIiEEy ; NEWPM_O1-SAME: (%"class.std::vector"* nocapture noundef nonnull readonly align 8 dereferenceable(24) [[DATA:%.*]], i64 noundef [[NUMELEMS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; NEWPM_O1-NEXT: entry: -; NEWPM_O1-NEXT: [[_M_START_I:%.*]] = getelementptr inbounds %"class.std::vector", %"class.std::vector"* [[DATA]], i64 0, i32 0, i32 0, i32 0, i32 0 ; NEWPM_O1-NEXT: [[CMP26_NOT:%.*]] = icmp eq i64 [[NUMELEMS]], 0 +; NEWPM_O1-NEXT: [[_M_START_I:%.*]] = getelementptr inbounds %"class.std::vector", %"class.std::vector"* [[DATA]], i64 0, i32 0, i32 0, i32 0, i32 0 +; NEWPM_O1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[_M_START_I]], align 8 ; NEWPM_O1-NEXT: br label [[FOR_COND1_PREHEADER:%.*]] ; NEWPM_O1: for.cond1.preheader: ; NEWPM_O1-NEXT: [[I_08:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC7:%.*]], [[FOR_COND_CLEANUP3:%.*]] ] -; NEWPM_O1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[_M_START_I]], align 8 ; NEWPM_O1-NEXT: br i1 [[CMP26_NOT]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4:%.*]] ; NEWPM_O1: for.cond.cleanup: ; NEWPM_O1-NEXT: ret void @@ -176,15 +176,15 @@ ; NEWPM_O2-LABEL: define {{[^@]+}}@_Z7computeRSt6vectorIiSaIiEEy ; NEWPM_O2-SAME: (%"class.std::vector"* nocapture noundef nonnull readonly align 8 dereferenceable(24) [[DATA:%.*]], i64 noundef [[NUMELEMS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; NEWPM_O2-NEXT: entry: -; NEWPM_O2-NEXT: [[_M_START_I:%.*]] = getelementptr inbounds %"class.std::vector", %"class.std::vector"* [[DATA]], i64 0, i32 0, i32 0, i32 0, i32 0 ; NEWPM_O2-NEXT: [[CMP26_NOT:%.*]] = icmp eq i64 [[NUMELEMS]], 0 +; NEWPM_O2-NEXT: [[_M_START_I:%.*]] = getelementptr inbounds %"class.std::vector", %"class.std::vector"* [[DATA]], i64 0, i32 0, i32 0, i32 0, i32 0 +; NEWPM_O2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[_M_START_I]], align 8 ; NEWPM_O2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUMELEMS]], 8 ; NEWPM_O2-NEXT: [[N_VEC:%.*]] = and i64 [[NUMELEMS]], -8 ; NEWPM_O2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[NUMELEMS]] ; NEWPM_O2-NEXT: br label [[FOR_COND1_PREHEADER:%.*]] ; NEWPM_O2: for.cond1.preheader: ; NEWPM_O2-NEXT: [[I_08:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC7:%.*]], [[FOR_COND_CLEANUP3:%.*]] ] -; NEWPM_O2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[_M_START_I]], align 8 ; NEWPM_O2-NEXT: br i1 [[CMP26_NOT]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4_PREHEADER:%.*]] ; NEWPM_O2: for.body4.preheader: ; NEWPM_O2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY4_PREHEADER11:%.*]], label [[VECTOR_BODY:%.*]] @@ -229,8 +229,9 @@ ; NEWPM_O3-LABEL: define {{[^@]+}}@_Z7computeRSt6vectorIiSaIiEEy ; NEWPM_O3-SAME: (%"class.std::vector"* nocapture noundef nonnull readonly align 8 dereferenceable(24) [[DATA:%.*]], i64 noundef [[NUMELEMS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; NEWPM_O3-NEXT: entry: -; NEWPM_O3-NEXT: [[_M_START_I:%.*]] = getelementptr inbounds %"class.std::vector", %"class.std::vector"* [[DATA]], i64 0, i32 0, i32 0, i32 0, i32 0 ; NEWPM_O3-NEXT: [[CMP26_NOT:%.*]] = icmp eq i64 [[NUMELEMS]], 0 +; NEWPM_O3-NEXT: [[_M_START_I:%.*]] = getelementptr inbounds %"class.std::vector", %"class.std::vector"* [[DATA]], i64 0, i32 0, i32 0, i32 0, i32 0 +; NEWPM_O3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[_M_START_I]], align 8 ; NEWPM_O3-NEXT: br i1 [[CMP26_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER_US_PREHEADER:%.*]] ; NEWPM_O3: for.cond1.preheader.us.preheader: ; NEWPM_O3-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUMELEMS]], 8 @@ -239,7 +240,6 @@ ; NEWPM_O3-NEXT: br label [[FOR_COND1_PREHEADER_US:%.*]] ; NEWPM_O3: for.cond1.preheader.us: ; NEWPM_O3-NEXT: [[I_08_US:%.*]] = phi i64 [ [[INC7_US:%.*]], [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ] -; NEWPM_O3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[_M_START_I]], align 8 ; NEWPM_O3-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY4_US_PREHEADER:%.*]], label [[VECTOR_BODY:%.*]] ; NEWPM_O3: vector.body: ; NEWPM_O3-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[FOR_COND1_PREHEADER_US]] ] diff --git a/llvm/test/Transforms/PhaseOrdering/X86/speculation-vs-tbaa.ll b/llvm/test/Transforms/PhaseOrdering/X86/speculation-vs-tbaa.ll --- a/llvm/test/Transforms/PhaseOrdering/X86/speculation-vs-tbaa.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/speculation-vs-tbaa.ll @@ -14,13 +14,15 @@ define void @licm(double** align 8 dereferenceable(8) %_M_start.i, i64 %numElem) { ; OLDPM_O1-LABEL: @licm( ; OLDPM_O1-NEXT: entry: -; OLDPM_O1-NEXT: [[TMP0:%.*]] = load double*, double** [[_M_START_I:%.*]], align 8 ; OLDPM_O1-NEXT: [[CMP1_NOT:%.*]] = icmp eq i64 [[NUMELEM:%.*]], 0 -; OLDPM_O1-NEXT: br i1 [[CMP1_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]] +; OLDPM_O1-NEXT: br i1 [[CMP1_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_LR_PH:%.*]] +; OLDPM_O1: for.body.lr.ph: +; OLDPM_O1-NEXT: [[TMP0:%.*]] = load double*, double** [[_M_START_I:%.*]], align 8, !tbaa [[TBAA3:![0-9]+]] +; OLDPM_O1-NEXT: br label [[FOR_BODY:%.*]] ; OLDPM_O1: for.body: -; OLDPM_O1-NEXT: [[K_02:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; OLDPM_O1-NEXT: [[K_02:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] ; OLDPM_O1-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds double, double* [[TMP0]], i64 [[K_02]] -; OLDPM_O1-NEXT: store double 2.000000e+00, double* [[ADD_PTR_I]], align 8, !tbaa [[TBAA3:![0-9]+]] +; OLDPM_O1-NEXT: store double 2.000000e+00, double* [[ADD_PTR_I]], align 8, !tbaa [[TBAA8:![0-9]+]] ; OLDPM_O1-NEXT: [[INC]] = add nuw i64 [[K_02]], 1 ; OLDPM_O1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[NUMELEM]] ; OLDPM_O1-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] @@ -29,12 +31,12 @@ ; ; OLDPM_O23-LABEL: @licm( ; OLDPM_O23-NEXT: entry: -; OLDPM_O23-NEXT: [[TMP0:%.*]] = load double*, double** [[_M_START_I:%.*]], align 8 ; OLDPM_O23-NEXT: [[CMP1_NOT:%.*]] = icmp eq i64 [[NUMELEM:%.*]], 0 -; OLDPM_O23-NEXT: br i1 [[CMP1_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] -; OLDPM_O23: for.body.preheader: +; OLDPM_O23-NEXT: br i1 [[CMP1_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_LR_PH:%.*]] +; OLDPM_O23: for.body.lr.ph: +; OLDPM_O23-NEXT: [[TMP0:%.*]] = load double*, double** [[_M_START_I:%.*]], align 8, !tbaa [[TBAA3:![0-9]+]] ; OLDPM_O23-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUMELEM]], 4 -; OLDPM_O23-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER3:%.*]], label [[VECTOR_PH:%.*]] +; OLDPM_O23-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_PH:%.*]] ; OLDPM_O23: vector.ph: ; OLDPM_O23-NEXT: [[N_VEC:%.*]] = and i64 [[NUMELEM]], -4 ; OLDPM_O23-NEXT: br label [[VECTOR_BODY:%.*]] @@ -42,38 +44,40 @@ ; OLDPM_O23-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; OLDPM_O23-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, double* [[TMP0]], i64 [[INDEX]] ; OLDPM_O23-NEXT: [[TMP2:%.*]] = bitcast double* [[TMP1]] to <2 x double>* -; OLDPM_O23-NEXT: store <2 x double> , <2 x double>* [[TMP2]], align 8, !tbaa [[TBAA3:![0-9]+]] +; OLDPM_O23-NEXT: store <2 x double> , <2 x double>* [[TMP2]], align 8, !tbaa [[TBAA8:![0-9]+]] ; OLDPM_O23-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, double* [[TMP1]], i64 2 ; OLDPM_O23-NEXT: [[TMP4:%.*]] = bitcast double* [[TMP3]] to <2 x double>* -; OLDPM_O23-NEXT: store <2 x double> , <2 x double>* [[TMP4]], align 8, !tbaa [[TBAA3]] +; OLDPM_O23-NEXT: store <2 x double> , <2 x double>* [[TMP4]], align 8, !tbaa [[TBAA8]] ; OLDPM_O23-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; OLDPM_O23-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; OLDPM_O23-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; OLDPM_O23-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; OLDPM_O23: middle.block: ; OLDPM_O23-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[NUMELEM]] -; OLDPM_O23-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_PREHEADER3]] -; OLDPM_O23: for.body.preheader3: -; OLDPM_O23-NEXT: [[K_02_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] +; OLDPM_O23-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_PREHEADER]] +; OLDPM_O23: for.body.preheader: +; OLDPM_O23-NEXT: [[K_02_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] ; OLDPM_O23-NEXT: br label [[FOR_BODY:%.*]] ; OLDPM_O23: for.body: -; OLDPM_O23-NEXT: [[K_02:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[K_02_PH]], [[FOR_BODY_PREHEADER3]] ] +; OLDPM_O23-NEXT: [[K_02:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[K_02_PH]], [[FOR_BODY_PREHEADER]] ] ; OLDPM_O23-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds double, double* [[TMP0]], i64 [[K_02]] -; OLDPM_O23-NEXT: store double 2.000000e+00, double* [[ADD_PTR_I]], align 8, !tbaa [[TBAA3]] +; OLDPM_O23-NEXT: store double 2.000000e+00, double* [[ADD_PTR_I]], align 8, !tbaa [[TBAA8]] ; OLDPM_O23-NEXT: [[INC]] = add nuw i64 [[K_02]], 1 ; OLDPM_O23-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[NUMELEM]] -; OLDPM_O23-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; OLDPM_O23-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; OLDPM_O23: for.cond.cleanup: ; OLDPM_O23-NEXT: ret void ; ; NEWPM_O1-LABEL: @licm( ; NEWPM_O1-NEXT: entry: -; NEWPM_O1-NEXT: [[TMP0:%.*]] = load double*, double** [[_M_START_I:%.*]], align 8 ; NEWPM_O1-NEXT: [[CMP1_NOT:%.*]] = icmp eq i64 [[NUMELEM:%.*]], 0 -; NEWPM_O1-NEXT: br i1 [[CMP1_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]] +; NEWPM_O1-NEXT: br i1 [[CMP1_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_LR_PH:%.*]] +; NEWPM_O1: for.body.lr.ph: +; NEWPM_O1-NEXT: [[TMP0:%.*]] = load double*, double** [[_M_START_I:%.*]], align 8, !tbaa [[TBAA3:![0-9]+]] +; NEWPM_O1-NEXT: br label [[FOR_BODY:%.*]] ; NEWPM_O1: for.body: -; NEWPM_O1-NEXT: [[K_02:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; NEWPM_O1-NEXT: [[K_02:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] ; NEWPM_O1-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds double, double* [[TMP0]], i64 [[K_02]] -; NEWPM_O1-NEXT: store double 2.000000e+00, double* [[ADD_PTR_I]], align 8, !tbaa [[TBAA3:![0-9]+]] +; NEWPM_O1-NEXT: store double 2.000000e+00, double* [[ADD_PTR_I]], align 8, !tbaa [[TBAA8:![0-9]+]] ; NEWPM_O1-NEXT: [[INC]] = add nuw i64 [[K_02]], 1 ; NEWPM_O1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[NUMELEM]] ; NEWPM_O1-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] @@ -82,12 +86,12 @@ ; ; NEWPM_O23-LABEL: @licm( ; NEWPM_O23-NEXT: entry: -; NEWPM_O23-NEXT: [[TMP0:%.*]] = load double*, double** [[_M_START_I:%.*]], align 8 ; NEWPM_O23-NEXT: [[CMP1_NOT:%.*]] = icmp eq i64 [[NUMELEM:%.*]], 0 -; NEWPM_O23-NEXT: br i1 [[CMP1_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] -; NEWPM_O23: for.body.preheader: +; NEWPM_O23-NEXT: br i1 [[CMP1_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_LR_PH:%.*]] +; NEWPM_O23: for.body.lr.ph: +; NEWPM_O23-NEXT: [[TMP0:%.*]] = load double*, double** [[_M_START_I:%.*]], align 8, !tbaa [[TBAA3:![0-9]+]] ; NEWPM_O23-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUMELEM]], 4 -; NEWPM_O23-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER3:%.*]], label [[VECTOR_PH:%.*]] +; NEWPM_O23-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_PH:%.*]] ; NEWPM_O23: vector.ph: ; NEWPM_O23-NEXT: [[N_VEC:%.*]] = and i64 [[NUMELEM]], -4 ; NEWPM_O23-NEXT: br label [[VECTOR_BODY:%.*]] @@ -95,26 +99,26 @@ ; NEWPM_O23-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; NEWPM_O23-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, double* [[TMP0]], i64 [[INDEX]] ; NEWPM_O23-NEXT: [[TMP2:%.*]] = bitcast double* [[TMP1]] to <2 x double>* -; NEWPM_O23-NEXT: store <2 x double> , <2 x double>* [[TMP2]], align 8, !tbaa [[TBAA3:![0-9]+]] +; NEWPM_O23-NEXT: store <2 x double> , <2 x double>* [[TMP2]], align 8, !tbaa [[TBAA8:![0-9]+]] ; NEWPM_O23-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, double* [[TMP1]], i64 2 ; NEWPM_O23-NEXT: [[TMP4:%.*]] = bitcast double* [[TMP3]] to <2 x double>* -; NEWPM_O23-NEXT: store <2 x double> , <2 x double>* [[TMP4]], align 8, !tbaa [[TBAA3]] +; NEWPM_O23-NEXT: store <2 x double> , <2 x double>* [[TMP4]], align 8, !tbaa [[TBAA8]] ; NEWPM_O23-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; NEWPM_O23-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; NEWPM_O23-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; NEWPM_O23-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; NEWPM_O23: middle.block: ; NEWPM_O23-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[NUMELEM]] -; NEWPM_O23-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_PREHEADER3]] -; NEWPM_O23: for.body.preheader3: -; NEWPM_O23-NEXT: [[K_02_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] +; NEWPM_O23-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_PREHEADER]] +; NEWPM_O23: for.body.preheader: +; NEWPM_O23-NEXT: [[K_02_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] ; NEWPM_O23-NEXT: br label [[FOR_BODY:%.*]] ; NEWPM_O23: for.body: -; NEWPM_O23-NEXT: [[K_02:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[K_02_PH]], [[FOR_BODY_PREHEADER3]] ] +; NEWPM_O23-NEXT: [[K_02:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[K_02_PH]], [[FOR_BODY_PREHEADER]] ] ; NEWPM_O23-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds double, double* [[TMP0]], i64 [[K_02]] -; NEWPM_O23-NEXT: store double 2.000000e+00, double* [[ADD_PTR_I]], align 8, !tbaa [[TBAA3]] +; NEWPM_O23-NEXT: store double 2.000000e+00, double* [[ADD_PTR_I]], align 8, !tbaa [[TBAA8]] ; NEWPM_O23-NEXT: [[INC]] = add nuw i64 [[K_02]], 1 ; NEWPM_O23-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[NUMELEM]] -; NEWPM_O23-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; NEWPM_O23-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; NEWPM_O23: for.cond.cleanup: ; NEWPM_O23-NEXT: ret void ; diff --git a/llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll b/llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll --- a/llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll @@ -32,10 +32,10 @@ ; OLDPM_O23-NEXT: [[CMP510_NOT_I:%.*]] = icmp eq i32 [[TMP1]], 0 ; OLDPM_O23-NEXT: br i1 [[CMP510_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT:%.*]], label [[FOR_BODY7_LR_PH_I:%.*]] ; OLDPM_O23: for.body7.lr.ph.i: -; OLDPM_O23-NEXT: [[BASE_I4_I:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR]], %class.FloatVecPair* [[FVP]], i64 0, i32 0, i32 0 ; OLDPM_O23-NEXT: [[BASE_I6_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], %class.HomemadeVector.0* [[TMP0]], i64 undef, i32 0 ; OLDPM_O23-NEXT: [[TMP2:%.*]] = load float*, float** [[BASE_I6_I]], align 8, !tbaa [[TBAA8:![0-9]+]] ; OLDPM_O23-NEXT: [[ARRAYIDX_I7_I:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 undef +; OLDPM_O23-NEXT: [[BASE_I4_I:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR]], %class.FloatVecPair* [[FVP]], i64 0, i32 0, i32 0 ; OLDPM_O23-NEXT: [[TMP3:%.*]] = load %class.HomemadeVector.0*, %class.HomemadeVector.0** [[BASE_I4_I]], align 8, !tbaa [[TBAA0]] ; OLDPM_O23-NEXT: [[BASE_I2_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], %class.HomemadeVector.0* [[TMP3]], i64 undef, i32 0 ; OLDPM_O23-NEXT: [[TMP4:%.*]] = load float*, float** [[BASE_I2_I]], align 8, !tbaa [[TBAA8]] @@ -64,10 +64,10 @@ ; NEWPM_O1-NEXT: [[CMP510_NOT_I:%.*]] = icmp eq i32 [[TMP1]], 0 ; NEWPM_O1-NEXT: br i1 [[CMP510_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT:%.*]], label [[FOR_BODY7_LR_PH_I:%.*]] ; NEWPM_O1: for.body7.lr.ph.i: -; NEWPM_O1-NEXT: [[BASE_I6_I:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR]], %class.FloatVecPair* [[FVP]], i64 0, i32 0, i32 0 ; NEWPM_O1-NEXT: [[BASE_I4_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], %class.HomemadeVector.0* [[TMP0]], i64 undef, i32 0 ; NEWPM_O1-NEXT: [[TMP2:%.*]] = load float*, float** [[BASE_I4_I]], align 8, !tbaa [[TBAA8:![0-9]+]] ; NEWPM_O1-NEXT: [[ARRAYIDX_I5_I:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 undef +; NEWPM_O1-NEXT: [[BASE_I6_I:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR]], %class.FloatVecPair* [[FVP]], i64 0, i32 0, i32 0 ; NEWPM_O1-NEXT: [[TMP3:%.*]] = load %class.HomemadeVector.0*, %class.HomemadeVector.0** [[BASE_I6_I]], align 8, !tbaa [[TBAA0]] ; NEWPM_O1-NEXT: [[BASE_I8_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], %class.HomemadeVector.0* [[TMP3]], i64 undef, i32 0 ; NEWPM_O1-NEXT: [[TMP4:%.*]] = load float*, float** [[BASE_I8_I]], align 8, !tbaa [[TBAA8]] @@ -95,10 +95,10 @@ ; NEWPM_O23-NEXT: [[CMP510_NOT_I:%.*]] = icmp eq i32 [[TMP1]], 0 ; NEWPM_O23-NEXT: br i1 [[CMP510_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT:%.*]], label [[FOR_BODY7_LR_PH_I:%.*]] ; NEWPM_O23: for.body7.lr.ph.i: -; NEWPM_O23-NEXT: [[BASE_I6_I:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR]], %class.FloatVecPair* [[FVP]], i64 0, i32 0, i32 0 ; NEWPM_O23-NEXT: [[BASE_I4_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], %class.HomemadeVector.0* [[TMP0]], i64 undef, i32 0 ; NEWPM_O23-NEXT: [[TMP2:%.*]] = load float*, float** [[BASE_I4_I]], align 8, !tbaa [[TBAA8:![0-9]+]] ; NEWPM_O23-NEXT: [[ARRAYIDX_I5_I:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 undef +; NEWPM_O23-NEXT: [[BASE_I6_I:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR]], %class.FloatVecPair* [[FVP]], i64 0, i32 0, i32 0 ; NEWPM_O23-NEXT: [[TMP3:%.*]] = load %class.HomemadeVector.0*, %class.HomemadeVector.0** [[BASE_I6_I]], align 8, !tbaa [[TBAA0]] ; NEWPM_O23-NEXT: [[BASE_I8_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], %class.HomemadeVector.0* [[TMP3]], i64 undef, i32 0 ; NEWPM_O23-NEXT: [[TMP4:%.*]] = load float*, float** [[BASE_I8_I]], align 8, !tbaa [[TBAA8]] @@ -130,16 +130,18 @@ ; OLDPM_O1-SAME: (%class.FloatVecPair* [[THIS:%.*]]) local_unnamed_addr comdat align 2 { ; OLDPM_O1-NEXT: entry: ; OLDPM_O1-NEXT: [[VSRC23:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR:%.*]], %class.FloatVecPair* [[THIS]], i64 0, i32 1 -; OLDPM_O1-NEXT: [[VSRCDST:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR]], %class.FloatVecPair* [[THIS]], i64 0, i32 0 ; OLDPM_O1-NEXT: [[CALL2:%.*]] = call %class.HomemadeVector.0* @_ZN14HomemadeVectorIS_IfLj8EELj8EEixEj(%class.HomemadeVector* nonnull [[VSRC23]]) ; OLDPM_O1-NEXT: [[SIZE43:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0:%.*]], %class.HomemadeVector.0* [[CALL2]], i64 0, i32 1 ; OLDPM_O1-NEXT: [[TMP0:%.*]] = load i32, i32* [[SIZE43]], align 8, !tbaa [[TBAA0:![0-9]+]] ; OLDPM_O1-NEXT: [[CMP54_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 -; OLDPM_O1-NEXT: br i1 [[CMP54_NOT]], label [[FOR_COND_CLEANUP6:%.*]], label [[FOR_BODY7:%.*]] +; OLDPM_O1-NEXT: br i1 [[CMP54_NOT]], label [[FOR_COND_CLEANUP6:%.*]], label [[FOR_BODY7_LR_PH:%.*]] +; OLDPM_O1: for.body7.lr.ph: +; OLDPM_O1-NEXT: [[VSRCDST:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR]], %class.FloatVecPair* [[THIS]], i64 0, i32 0 +; OLDPM_O1-NEXT: br label [[FOR_BODY7:%.*]] ; OLDPM_O1: for.cond.cleanup6: ; OLDPM_O1-NEXT: ret void ; OLDPM_O1: for.body7: -; OLDPM_O1-NEXT: [[J_05:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY7]] ], [ 0, [[ENTRY:%.*]] ] +; OLDPM_O1-NEXT: [[J_05:%.*]] = phi i32 [ 0, [[FOR_BODY7_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY7]] ] ; OLDPM_O1-NEXT: [[CALL9:%.*]] = call %class.HomemadeVector.0* @_ZN14HomemadeVectorIS_IfLj8EELj8EEixEj(%class.HomemadeVector* nonnull [[VSRC23]]) ; OLDPM_O1-NEXT: [[CALL10:%.*]] = call float* @_ZN14HomemadeVectorIfLj8EEixEj(%class.HomemadeVector.0* [[CALL9]]) ; OLDPM_O1-NEXT: [[TMP1:%.*]] = load float, float* [[CALL10]], align 4, !tbaa [[TBAA6:![0-9]+]]