Index: llvm/lib/Transforms/InstCombine/InstCombineInternal.h =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -311,6 +311,13 @@ using BuilderTy = IRBuilder; BuilderTy &Builder; + enum Change { + // Fully ordered: std::max(Change1, Change2) results in the biggest change. + NoChange, + PreservedCFG, + ChangedCFG + }; + private: // Mode in which we are running the combiner. const bool MinimizeSize; @@ -331,7 +338,7 @@ // combining and will be updated to reflect any changes. LoopInfo *LI; - bool MadeIRChange = false; + Change MadeIRChange = Change::NoChange; public: InstCombiner(InstCombineWorklist &Worklist, BuilderTy &Builder, @@ -344,9 +351,7 @@ DL(DL), SQ(DL, &TLI, &DT, &AC), ORE(ORE), BFI(BFI), PSI(PSI), LI(LI) {} /// Run the combiner over the entire worklist until it is empty. - /// - /// \returns true if the IR is changed. - bool run(); + Change run(); AssumptionCache &getAssumptionCache() const { return AC; } @@ -731,7 +736,8 @@ Worklist.remove(&I); I.eraseFromParent(); - MadeIRChange = true; + MadeIRChange = std::max(MadeIRChange, Change::PreservedCFG); + return nullptr; // Don't do anything with FI } Index: llvm/lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2720,6 +2720,7 @@ !isa(X)) { // Swap Destinations and condition... BI.swapSuccessors(); + MadeIRChange = std::max(MadeIRChange, Change::ChangedCFG); return replaceOperand(BI, 0, X); } @@ -2739,6 +2740,7 @@ CmpInst *Cond = cast(BI.getCondition()); Cond->setPredicate(CmpInst::getInversePredicate(Pred)); BI.swapSuccessors(); + MadeIRChange = std::max(MadeIRChange, Change::ChangedCFG); Worklist.push(Cond); return &BI; } @@ -3381,7 +3383,7 @@ return true; } -bool InstCombiner::run() { +InstCombiner::Change InstCombiner::run() { while (!Worklist.isEmpty()) { // Walk deferred instructions in reverse order, and push them to the // worklist, which means they'll end up popped from the worklist in-order. @@ -3424,7 +3426,7 @@ ++NumConstProp; if (isInstructionTriviallyDead(I, &TLI)) eraseInstFromFunction(*I); - MadeIRChange = true; + MadeIRChange = std::max(MadeIRChange, Change::PreservedCFG); continue; } } @@ -3463,7 +3465,7 @@ // Okay, the CFG is simple enough, try to sink this instruction. if (TryToSinkInstruction(I, UserParent)) { LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n'); - MadeIRChange = true; + MadeIRChange = std::max(MadeIRChange, Change::PreservedCFG); // We'll add uses of the sunk instruction below, but since sinking // can expose opportunities for it's *operands* add them to the // worklist @@ -3529,7 +3531,7 @@ Worklist.push(I); } } - MadeIRChange = true; + MadeIRChange = std::max(MadeIRChange, Change::PreservedCFG); } } @@ -3662,7 +3664,7 @@ return MadeIRChange; } -static bool combineInstructionsOverFunction( +static InstCombiner::Change combineInstructionsOverFunction( Function &F, InstCombineWorklist &Worklist, AliasAnalysis *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, @@ -3682,9 +3684,10 @@ // Lower dbg.declare intrinsics otherwise their value may be clobbered // by instcombiner. - bool MadeIRChange = false; + InstCombiner::Change MadeIRChange = InstCombiner::Change::NoChange; if (ShouldLowerDbgDeclare) - MadeIRChange = LowerDbgDeclare(F); + if (LowerDbgDeclare(F)) + MadeIRChange = InstCombiner::Change::PreservedCFG; // Iterate while there is work to do. unsigned Iteration = 0; @@ -3707,16 +3710,18 @@ LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on " << F.getName() << "\n"); - MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist); + if (prepareICWorklistFromFunction(F, DL, &TLI, Worklist)) + MadeIRChange = std::max(MadeIRChange, InstCombiner::Change::PreservedCFG); InstCombiner IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, DT, ORE, BFI, PSI, DL, LI); IC.MaxArraySizeForCombine = MaxArraySize; - if (!IC.run()) + InstCombiner::Change Changed = IC.run(); + if (Changed == InstCombiner::Change::NoChange) break; - MadeIRChange = true; + MadeIRChange = std::max(MadeIRChange, Changed); } return MadeIRChange; @@ -3743,14 +3748,16 @@ auto *BFI = (PSI && PSI->hasProfileSummary()) ? &AM.getResult(F) : nullptr; - if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE, BFI, - PSI, MaxIterations, LI)) + InstCombiner::Change Changed = combineInstructionsOverFunction( + F, Worklist, AA, AC, TLI, DT, ORE, BFI, PSI, MaxIterations, LI); + if (Changed == InstCombiner::Change::NoChange) // No changes, all analyses are preserved. return PreservedAnalyses::all(); // Mark all the analyses that instcombine updates as preserved. PreservedAnalyses PA; - PA.preserveSet(); + if (Changed <= InstCombiner::Change::PreservedCFG) + PA.preserveSet(); PA.preserve(); PA.preserve(); PA.preserve(); @@ -3758,13 +3765,11 @@ } void InstructionCombiningPass::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); - AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); Index: llvm/test/Other/opt-O2-pipeline.ll =================================================================== --- llvm/test/Other/opt-O2-pipeline.ll +++ llvm/test/Other/opt-O2-pipeline.ll @@ -83,6 +83,7 @@ ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Combine redundant instructions ; CHECK-NEXT: Conditionally eliminate dead library calls +; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: Post-Dominator Tree Construction ; CHECK-NEXT: Branch Probability Analysis @@ -123,6 +124,8 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Combine redundant instructions +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: Canonicalize natural loops ; CHECK-NEXT: LCSSA Verifier ; CHECK-NEXT: Loop-Closed SSA Form Pass @@ -153,6 +156,7 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Combine redundant instructions +; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Lazy Value Information Analysis ; CHECK-NEXT: Jump Threading ; CHECK-NEXT: Value Propagation @@ -255,6 +259,8 @@ ; CHECK-NEXT: Optimize scalar/vector ops ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Combine redundant instructions +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: Canonicalize natural loops ; CHECK-NEXT: LCSSA Verifier ; CHECK-NEXT: Loop-Closed SSA Form Pass @@ -265,7 +271,9 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Combine redundant instructions +; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Memory SSA +; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: Canonicalize natural loops ; CHECK-NEXT: LCSSA Verifier ; CHECK-NEXT: Loop-Closed SSA Form Pass Index: llvm/test/Other/opt-O3-pipeline.ll =================================================================== --- llvm/test/Other/opt-O3-pipeline.ll +++ llvm/test/Other/opt-O3-pipeline.ll @@ -88,6 +88,7 @@ ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Combine redundant instructions ; CHECK-NEXT: Conditionally eliminate dead library calls +; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: Post-Dominator Tree Construction ; CHECK-NEXT: Branch Probability Analysis @@ -128,6 +129,8 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Combine redundant instructions +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: Canonicalize natural loops ; CHECK-NEXT: LCSSA Verifier ; CHECK-NEXT: Loop-Closed SSA Form Pass @@ -158,6 +161,7 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Combine redundant instructions +; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Lazy Value Information Analysis ; CHECK-NEXT: Jump Threading ; CHECK-NEXT: Value Propagation @@ -260,6 +264,8 @@ ; CHECK-NEXT: Optimize scalar/vector ops ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Combine redundant instructions +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: Canonicalize natural loops ; CHECK-NEXT: LCSSA Verifier ; CHECK-NEXT: Loop-Closed SSA Form Pass @@ -270,7 +276,9 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Combine redundant instructions +; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Memory SSA +; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: Canonicalize natural loops ; CHECK-NEXT: LCSSA Verifier ; CHECK-NEXT: Loop-Closed SSA Form Pass Index: llvm/test/Other/opt-Os-pipeline.ll =================================================================== --- llvm/test/Other/opt-Os-pipeline.ll +++ llvm/test/Other/opt-Os-pipeline.ll @@ -82,6 +82,10 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Combine redundant instructions +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Lazy Branch Probability Analysis +; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Tail Call Elimination ; CHECK-NEXT: Simplify the CFG @@ -109,6 +113,8 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Combine redundant instructions +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: Canonicalize natural loops ; CHECK-NEXT: LCSSA Verifier ; CHECK-NEXT: Loop-Closed SSA Form Pass @@ -139,6 +145,7 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Combine redundant instructions +; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Lazy Value Information Analysis ; CHECK-NEXT: Jump Threading ; CHECK-NEXT: Value Propagation @@ -241,6 +248,8 @@ ; CHECK-NEXT: Optimize scalar/vector ops ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Combine redundant instructions +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: Canonicalize natural loops ; CHECK-NEXT: LCSSA Verifier ; CHECK-NEXT: Loop-Closed SSA Form Pass @@ -251,7 +260,9 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Combine redundant instructions +; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Memory SSA +; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: Canonicalize natural loops ; CHECK-NEXT: LCSSA Verifier ; CHECK-NEXT: Loop-Closed SSA Form Pass Index: llvm/test/Other/optimization-remarks-invalidation.ll =================================================================== --- llvm/test/Other/optimization-remarks-invalidation.ll +++ llvm/test/Other/optimization-remarks-invalidation.ll @@ -10,7 +10,7 @@ ; ; Check that passes which preserve BFI don't invalidate the emitter. ; RUN: opt %s -disable-output -aa-pipeline=basic-aa 2>&1 \ -; RUN: -passes='require,instcombine,require,loop(licm)' -debug-pass-manager \ +; RUN: -passes='require,instsimplify,require,loop(licm)' -debug-pass-manager \ ; RUN: -pass-remarks=licm -pass-remarks-with-hotness \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-PM-PRESERVE ; @@ -31,7 +31,7 @@ define void @hoist(i32* %array, i32* noalias %p) { ; CHECK-PM-PRESERVE: Running analysis: OptimizationRemarkEmitterAnalysis -; CHECK-PM-PRESERVE: Running pass: InstCombinePass +; CHECK-PM-PRESERVE: Running pass: InstSimplifyPass ; CHECK-PM-PRESERVE-NOT: Invalidating analysis: OptimizationRemarkEmitterAnalysis ; CHECK-PM-PRESERVE-NOT: Running analysis: OptimizationRemarkEmitterAnalysis ; CHECK-PM-PRESERVE: Running pass: LICMPass @@ -51,7 +51,8 @@ Loop: %j = phi i32 [ 0, %Entry ], [ %Next, %Loop ] - %addr = getelementptr i32, i32* %array, i32 %j + %arr = getelementptr i32, i32* %array, i32 0 + %addr = getelementptr i32, i32* %arr, i32 %j %a = load i32, i32* %addr ; CHECK: remark: /tmp/kk.c:2:20: hoisting load %b = load i32, i32* %p, !dbg !8 Index: llvm/test/Transforms/InstCombine/infinite-loop-postdom.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InstCombine/infinite-loop-postdom.ll @@ -0,0 +1,59 @@ +; RUN: opt < %s -postdomtree -analyze | FileCheck %s +; RUN: opt < %s -passes='print' 2>&1 | FileCheck %s +; RUN: opt < %s -disable-output -branch-prob -instcombine -block-freq -verify-dom-info + +; Show that Predicate Canonicalization (InstCombine) invalidates PostDomTree +; if the basic block is post-dom unreachable. +; +define void @test1(i24 %a, i24 %b) { +entry: + br label %LOOP + +LOOP: + %f = icmp uge i24 %a, %b + br i1 %f, label %B1, label %B2 + +B1: + br label %B2 + +B2: + br label %LOOP +} + +; The same as @test1 except the LOOP condition canonicalized (as by instcombine). +define void @test2(i24 %a, i24 %b) { +entry: + br label %LOOP + +LOOP: + %f = icmp ult i24 %a, %b + br i1 %f, label %B2, label %B1 + +B1: + br label %B2 + +B2: + br label %LOOP +} + +; PostDomTrees are different. + +; CHECK-LABEL: test1 +; CHECK-NEXT: =============================-------------------------------- +; CHECK-NEXT: Inorder PostDominator Tree: DFSNumbers invalid: 0 slow queries. +; CHECK-NEXT: [1] <> +; CHECK-NEXT: [2] %B2 +; CHECK-NEXT: [3] %LOOP +; CHECK-NEXT: [4] %entry +; CHECK-NEXT: [3] %B1 +; CHECK-NEXT: Roots: %B2 + +; CHECK-LABEL: test2 +; CHECK-NEXT: =============================-------------------------------- +; CHECK-NEXT: Inorder PostDominator Tree: DFSNumbers invalid: 0 slow queries. +; CHECK-NEXT: [1] <> +; CHECK-NEXT: [2] %B1 +; CHECK-NEXT: [3] %LOOP +; CHECK-NEXT: [4] %entry +; CHECK-NEXT: [4] %B2 +; CHECK-NEXT: Roots: %B1