diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h --- a/llvm/include/llvm/Transforms/Scalar.h +++ b/llvm/include/llvm/Transforms/Scalar.h @@ -133,7 +133,8 @@ // Pass *createLICMPass(); Pass *createLICMPass(unsigned LicmMssaOptCap, - unsigned LicmMssaNoAccForPromotionCap); + unsigned LicmMssaNoAccForPromotionCap, + bool SpeculativelyHoistOnInformationLoss); //===----------------------------------------------------------------------===// // diff --git a/llvm/include/llvm/Transforms/Scalar/LICM.h b/llvm/include/llvm/Transforms/Scalar/LICM.h --- a/llvm/include/llvm/Transforms/Scalar/LICM.h +++ b/llvm/include/llvm/Transforms/Scalar/LICM.h @@ -46,14 +46,17 @@ class LICMPass : public PassInfoMixin { unsigned LicmMssaOptCap; unsigned LicmMssaNoAccForPromotionCap; + unsigned LicmSpeculate; public: LICMPass() : LicmMssaOptCap(SetLicmMssaOptCap), LicmMssaNoAccForPromotionCap(SetLicmMssaNoAccForPromotionCap) {} - LICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap) + LICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap, + bool LicmSpeculate) : LicmMssaOptCap(LicmMssaOptCap), - LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {} + LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap), + LicmSpeculate(LicmSpeculate) {} PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U); }; @@ -62,14 +65,17 @@ class LNICMPass : public PassInfoMixin { unsigned LicmMssaOptCap; unsigned LicmMssaNoAccForPromotionCap; + unsigned LicmSpeculate; public: LNICMPass() : LicmMssaOptCap(SetLicmMssaOptCap), LicmMssaNoAccForPromotionCap(SetLicmMssaNoAccForPromotionCap) {} - LNICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap) + LNICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap, + bool LicmSpeculate) : LicmMssaOptCap(LicmMssaOptCap), - LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {} + LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap), + LicmSpeculate(LicmSpeculate) {} PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U); }; diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -171,10 +171,13 @@ /// BlockFrequencyInfo, TargetLibraryInfo, Loop, AliasSet information for all /// instructions of the loop and loop safety information as arguments. /// Diagnostics is emitted via \p ORE. It returns changed status. +/// \p Speculate is whether values should be hoisted even if they are not +/// guaranteed to execute in the loop. bool hoistRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, BlockFrequencyInfo *, TargetLibraryInfo *, Loop *, MemorySSAUpdater *, ScalarEvolution *, ICFLoopSafetyInfo *, - SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, bool); + SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, bool, + bool Speculate); /// This function deletes dead loops. The caller of this function needs to /// guarantee that the loop is infact dead. @@ -204,12 +207,14 @@ /// LoopInfo, DominatorTree, Loop, AliasSet information for all instructions /// of the loop and loop safety information as arguments. /// Diagnostics is emitted via \p ORE. It returns changed status. +/// \p Speculate is whether values should be hoisted even if they are not +/// guaranteed to execute in the loop. bool promoteLoopAccessesToScalars( const SmallSetVector &, SmallVectorImpl &, SmallVectorImpl &, SmallVectorImpl &, PredIteratorCache &, LoopInfo *, DominatorTree *, const TargetLibraryInfo *, Loop *, MemorySSAUpdater *, ICFLoopSafetyInfo *, - OptimizationRemarkEmitter *); + OptimizationRemarkEmitter *, bool Speculate); /// Does a BFS from a given node to all of its children inside a given loop. /// The returned vector of nodes includes the starting point. diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -293,12 +293,14 @@ // Try to remove as much code from the loop header as possible, // to reduce amount of IR that will have to be duplicated. // TODO: Investigate promotion cap for O1. - LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); + LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*speculate=*/false)); LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true, isLTOPreLink(Phase))); // TODO: Investigate promotion cap for O1. - LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); + LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*speculate=*/true)); LPM1.addPass(SimpleLoopUnswitchPass()); if (EnableLoopFlatten) LPM1.addPass(LoopFlattenPass()); @@ -465,13 +467,15 @@ // Try to remove as much code from the loop header as possible, // to reduce amount of IR that will have to be duplicated. // TODO: Investigate promotion cap for O1. - LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); + LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*speculate=*/false)); // Disable header duplication in loop rotation at -Oz. LPM1.addPass( LoopRotatePass(Level != OptimizationLevel::Oz, isLTOPreLink(Phase))); // TODO: Investigate promotion cap for O1. - LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); + LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*speculate=*/true)); LPM1.addPass( SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3 && EnableO3NonTrivialUnswitching)); @@ -567,7 +571,8 @@ FPM.addPass(DSEPass()); FPM.addPass(createFunctionToLoopPassAdaptor( - LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), + LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*speculate=*/true), /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true)); FPM.addPass(CoroElidePass()); @@ -1007,7 +1012,8 @@ ExtraPasses.addPass(CorrelatedValuePropagationPass()); ExtraPasses.addPass(InstCombinePass()); LoopPassManager LPM; - LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); + LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*speculate=*/true)); LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3)); ExtraPasses.addPass( @@ -1073,7 +1079,8 @@ FPM.addPass( RequireAnalysisPass()); FPM.addPass(createFunctionToLoopPassAdaptor( - LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), + LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*speculate=*/true), /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true)); } @@ -1612,7 +1619,8 @@ FunctionPassManager MainFPM; MainFPM.addPass(createFunctionToLoopPassAdaptor( - LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), + LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*speculate=*/true), /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true)); if (RunNewGVN) diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -453,11 +453,13 @@ // Try to remove as much code from the loop header as possible, // to reduce amount of IR that will have to be duplicated. // TODO: Investigate promotion cap for O1. - MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + /*Speculate=*/false)); // Rotate Loop - disable header duplication at -Oz MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, PrepareForLTO)); // TODO: Investigate promotion cap for O1. - MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + /*Speculate=*/true)); if (EnableSimpleLoopUnswitch) MPM.add(createSimpleLoopUnswitchLegacyPass()); else @@ -521,7 +523,8 @@ // TODO: Investigate if this is too expensive at O1. if (OptLevel > 1) { MPM.add(createDeadStoreEliminationPass()); // Delete dead stores - MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + /*Speculate*/ true)); } addExtensionsToPM(EP_ScalarOptimizerLate, MPM); @@ -580,7 +583,8 @@ PM.add(createEarlyCSEPass()); PM.add(createCorrelatedValuePropagationPass()); PM.add(createInstructionCombiningPass()); - PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + /*Speculate*/ true)); PM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); PM.add(createCFGSimplificationPass()); PM.add(createInstructionCombiningPass()); @@ -641,7 +645,8 @@ // unrolled loop is a inner loop, then the prologue will be inside the // outer loop. LICM pass can help to promote the runtime check out if the // checked value is loop invariant. - PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + /*Speculate*/ true)); } PM.add(createWarnMissedTransformationsPass()); @@ -886,7 +891,8 @@ // later might get benefit of no-alias assumption in clone loop. if (UseLoopVersioningLICM) { MPM.add(createLoopVersioningLICMPass()); // Do LoopVersioningLICM - MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + /*Speculate*/ true)); } // We add a fresh GlobalsModRef run at this point. This is particularly @@ -1120,7 +1126,8 @@ // Run a few AA driven optimizations here and now, to cleanup the code. PM.add(createGlobalsAAWrapperPass()); // IP alias analysis. - PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + /*Speculate*/ true)); PM.add(NewGVN ? createNewGVNPass() : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies. PM.add(createMemCpyOptPass()); // Remove dead memcpys. diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -149,13 +149,10 @@ BlockFrequencyInfo *BFI, const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE); -static bool isSafeToExecuteUnconditionally(Instruction &Inst, - const DominatorTree *DT, - const TargetLibraryInfo *TLI, - const Loop *CurLoop, - const LoopSafetyInfo *SafetyInfo, - OptimizationRemarkEmitter *ORE, - const Instruction *CtxI = nullptr); +static bool isSafeToExecuteUnconditionally( + Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI, + const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, + OptimizationRemarkEmitter *ORE, const Instruction *CtxI, bool Speculate); static bool pointerInvalidatedByLoop(MemoryLocation MemLoc, AliasSetTracker *CurAST, Loop *CurLoop, AAResults *AA); @@ -188,21 +185,26 @@ OptimizationRemarkEmitter *ORE, bool LoopNestMode = false); LoopInvariantCodeMotion(unsigned LicmMssaOptCap, - unsigned LicmMssaNoAccForPromotionCap) + unsigned LicmMssaNoAccForPromotionCap, + bool LicmSpeculate) : LicmMssaOptCap(LicmMssaOptCap), - LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {} + LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap), + LicmSpeculate(LicmSpeculate) {} private: unsigned LicmMssaOptCap; unsigned LicmMssaNoAccForPromotionCap; + bool LicmSpeculate; }; struct LegacyLICMPass : public LoopPass { static char ID; // Pass identification, replacement for typeid LegacyLICMPass( unsigned LicmMssaOptCap = SetLicmMssaOptCap, - unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap) - : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap) { + unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap, + bool LicmSpeculate = true) + : LoopPass(ID), + LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, LicmSpeculate) { initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry()); } @@ -265,7 +267,8 @@ // but ORE cannot be preserved (see comment before the pass definition). OptimizationRemarkEmitter ORE(L.getHeader()->getParent()); - LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap); + LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + LicmSpeculate); if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, AR.BFI, &AR.TLI, &AR.TTI, &AR.SE, AR.MSSA, &ORE)) return PreservedAnalyses::all(); @@ -290,7 +293,8 @@ // but ORE cannot be preserved (see comment before the pass definition). OptimizationRemarkEmitter ORE(LN.getParent()); - LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap); + LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + LicmSpeculate); Loop &OutermostLoop = LN.getOutermostLoop(); bool Changed = LICM.runOnLoop(&OutermostLoop, &AR.AA, &AR.LI, &AR.DT, AR.BFI, @@ -321,8 +325,10 @@ Pass *llvm::createLICMPass() { return new LegacyLICMPass(); } Pass *llvm::createLICMPass(unsigned LicmMssaOptCap, - unsigned LicmMssaNoAccForPromotionCap) { - return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap); + unsigned LicmMssaNoAccForPromotionCap, + bool LicmSpeculate) { + return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + LicmSpeculate); } llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(bool IsSink, Loop *L, @@ -418,7 +424,8 @@ Flags.setIsSink(false); if (Preheader) Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, L, - &MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode); + &MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode, + LicmSpeculate); // Now that all loop invariants have been removed from the loop, promote any // memory references to scalars that we can. @@ -460,8 +467,8 @@ for (const SmallSetVector &PointerMustAliases : collectPromotionCandidates(MSSA, AA, L)) { LocalPromoted |= promoteLoopAccessesToScalars( - PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, - LI, DT, TLI, L, &MSSAU, &SafetyInfo, ORE); + PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI, + DT, TLI, L, &MSSAU, &SafetyInfo, ORE, LicmSpeculate); } Promoted |= LocalPromoted; } while (LocalPromoted); @@ -825,7 +832,8 @@ MemorySSAUpdater *MSSAU, ScalarEvolution *SE, ICFLoopSafetyInfo *SafetyInfo, SinkAndHoistLICMFlags &Flags, - OptimizationRemarkEmitter *ORE, bool LoopNestMode) { + OptimizationRemarkEmitter *ORE, bool LoopNestMode, + bool Speculate) { // Verify inputs. assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && CurLoop != nullptr && MSSAU != nullptr && SafetyInfo != nullptr && @@ -877,7 +885,7 @@ true, &Flags, ORE) && isSafeToExecuteUnconditionally( I, DT, TLI, CurLoop, SafetyInfo, ORE, - CurLoop->getLoopPreheader()->getTerminator())) { + CurLoop->getLoopPreheader()->getTerminator(), Speculate)) { hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo, MSSAU, SE, ORE); HoistedInstructions.push_back(&I); @@ -1774,14 +1782,11 @@ /// Only sink or hoist an instruction if it is not a trapping instruction, /// or if the instruction is known not to trap when moved to the preheader. /// or if it is a trapping instruction and is guaranteed to execute. -static bool isSafeToExecuteUnconditionally(Instruction &Inst, - const DominatorTree *DT, - const TargetLibraryInfo *TLI, - const Loop *CurLoop, - const LoopSafetyInfo *SafetyInfo, - OptimizationRemarkEmitter *ORE, - const Instruction *CtxI) { - if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT, TLI)) +static bool isSafeToExecuteUnconditionally( + Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI, + const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, + OptimizationRemarkEmitter *ORE, const Instruction *CtxI, bool Speculate) { + if (Speculate && isSafeToSpeculativelyExecute(&Inst, CtxI, DT, TLI)) return true; bool GuaranteedToExecute = @@ -1949,7 +1954,7 @@ SmallVectorImpl &MSSAInsertPts, PredIteratorCache &PIC, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, Loop *CurLoop, MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo, - OptimizationRemarkEmitter *ORE) { + OptimizationRemarkEmitter *ORE, bool Speculate) { // Verify inputs. assert(LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && @@ -2054,9 +2059,9 @@ // to execute does as well. Thus we can increase our guaranteed // alignment as well. if (!DereferenceableInPH || (InstAlignment > Alignment)) - if (isSafeToExecuteUnconditionally(*Load, DT, TLI, CurLoop, - SafetyInfo, ORE, - Preheader->getTerminator())) { + if (isSafeToExecuteUnconditionally( + *Load, DT, TLI, CurLoop, SafetyInfo, ORE, + Preheader->getTerminator(), Speculate)) { DereferenceableInPH = true; Alignment = std::max(Alignment, InstAlignment); } diff --git a/llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll b/llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll --- a/llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll @@ -21,10 +21,10 @@ ; OLDPM-NEXT: [[CMP510_NOT_I:%.*]] = icmp eq i32 [[TMP1]], 0 ; OLDPM-NEXT: br i1 [[CMP510_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT:%.*]], label [[FOR_BODY7_LR_PH_I:%.*]] ; OLDPM: for.body7.lr.ph.i: -; OLDPM-NEXT: [[BASE_I4_I:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR]], %class.FloatVecPair* [[FVP]], i64 0, i32 0, i32 0 ; OLDPM-NEXT: [[BASE_I6_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], %class.HomemadeVector.0* [[TMP0]], i64 undef, i32 0 ; OLDPM-NEXT: [[TMP2:%.*]] = load float*, float** [[BASE_I6_I]], align 8, !tbaa [[TBAA8:![0-9]+]] ; OLDPM-NEXT: [[ARRAYIDX_I7_I:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 undef +; OLDPM-NEXT: [[BASE_I4_I:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR]], %class.FloatVecPair* [[FVP]], i64 0, i32 0, i32 0 ; OLDPM-NEXT: [[TMP3:%.*]] = load %class.HomemadeVector.0*, %class.HomemadeVector.0** [[BASE_I4_I]], align 8, !tbaa [[TBAA0]] ; OLDPM-NEXT: [[BASE_I2_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], %class.HomemadeVector.0* [[TMP3]], i64 undef, i32 0 ; OLDPM-NEXT: [[TMP4:%.*]] = load float*, float** [[BASE_I2_I]], align 8, !tbaa [[TBAA8]] @@ -52,10 +52,10 @@ ; NEWPM-NEXT: [[CMP510_NOT_I:%.*]] = icmp eq i32 [[TMP1]], 0 ; NEWPM-NEXT: br i1 [[CMP510_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT:%.*]], label [[FOR_BODY7_LR_PH_I:%.*]] ; NEWPM: for.body7.lr.ph.i: -; NEWPM-NEXT: [[BASE_I6_I:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR]], %class.FloatVecPair* [[FVP]], i64 0, i32 0, i32 0 ; NEWPM-NEXT: [[BASE_I4_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], %class.HomemadeVector.0* [[TMP0]], i64 undef, i32 0 ; NEWPM-NEXT: [[TMP2:%.*]] = load float*, float** [[BASE_I4_I]], align 8, !tbaa [[TBAA8:![0-9]+]] ; NEWPM-NEXT: [[ARRAYIDX_I5_I:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 undef +; NEWPM-NEXT: [[BASE_I6_I:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR]], %class.FloatVecPair* [[FVP]], i64 0, i32 0, i32 0 ; NEWPM-NEXT: [[TMP3:%.*]] = load %class.HomemadeVector.0*, %class.HomemadeVector.0** [[BASE_I6_I]], align 8, !tbaa [[TBAA0]] ; NEWPM-NEXT: [[BASE_I8_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], %class.HomemadeVector.0* [[TMP3]], i64 undef, i32 0 ; NEWPM-NEXT: [[TMP4:%.*]] = load float*, float** [[BASE_I8_I]], align 8, !tbaa [[TBAA8]] diff --git a/llvm/test/Transforms/PhaseOrdering/speclicm.ll b/llvm/test/Transforms/PhaseOrdering/speclicm.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/speclicm.ll @@ -0,0 +1,52 @@ +; RUN: opt -O1 -S < %s | FileCheck %s +; RUN: opt -O1 -S -enable-new-pm=0 < %s | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: mustprogress nofree norecurse nosync nounwind uwtable +define void @licm(double** align 8 dereferenceable(8) %_M_start.i, i64 %numElem) { +entry: + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %k.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %cmp = icmp ult i64 %k.0, %numElem + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.body: ; preds = %for.cond + %0 = load double*, double** %_M_start.i, align 8, !tbaa !3 + %add.ptr.i = getelementptr inbounds double, double* %0, i64 %k.0 + store double 2.000000e+00, double* %add.ptr.i, align 8, !tbaa !8 + %inc = add nuw i64 %k.0, 1 + br label %for.cond + +for.cond.cleanup: ; preds = %for.cond + ret void +} + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"uwtable", i32 1} +!2 = !{!"clang version 15.0.0 (https://github.com/llvm/llvm-project.git fc510998f7c287df2bc1304673e0cd8452d50b31)"} +!3 = !{!4, !5, i64 0} +!4 = !{!"_ZTSNSt12_Vector_baseIdSaIdEE17_Vector_impl_dataE", !5, i64 0, !5, i64 8, !5, i64 16} +!5 = !{!"any pointer", !6, i64 0} +!6 = !{!"omnipotent char", !7, i64 0} +!7 = !{!"Simple C++ TBAA"} +!8 = !{!9, !9, i64 0} +!9 = !{!"double", !6, i64 0} + +; The LICM prior to Loop-Rotate should not speculate instructions +; lest it lose information it could have kept when running after +; Loop-Rotate + +; CHECK: define void @licm(double** nocapture readonly align 8 dereferenceable(8) %_M_start.i, i64 %numElem) +; CHECK-NEXT: entry: +; CHECK-NEXT: %cmp1.not = icmp eq i64 %numElem, 0 +; CHECK-NEXT: br i1 %cmp1.not, label %for.cond.cleanup, label %for.body.lr.ph + +; CHECK: for.body.lr.ph: ; preds = %entry +; CHECK-NEXT: %{{.*}} = load double*, double** %_M_start.i, align 8, !tbaa !{{0-9:.+}} +; CHECK-NEXT: br label %for.body