Index: include/llvm/Transforms/Utils/Local.h =================================================================== --- include/llvm/Transforms/Utils/Local.h +++ include/llvm/Transforms/Utils/Local.h @@ -20,6 +20,7 @@ #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Operator.h" +#include "llvm/ADT/SmallPtrSet.h" namespace llvm { @@ -137,7 +138,8 @@ /// the basic block that was pointed to. /// bool SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, - unsigned BonusInstThreshold, AssumptionCache *AC = nullptr); + unsigned BonusInstThreshold, AssumptionCache *AC = nullptr, + SmallPtrSet * LoopHeaders = nullptr); /// FlatternCFG - This function is used to flatten a CFG. For /// example, it uses parallel-and and parallel-or mode to collapse Index: lib/Transforms/Scalar/JumpThreading.cpp =================================================================== --- lib/Transforms/Scalar/JumpThreading.cpp +++ lib/Transforms/Scalar/JumpThreading.cpp @@ -201,10 +201,14 @@ // Can't thread an unconditional jump, but if the block is "almost // empty", we can replace uses of it with uses of the successor and make // this dead. + // We should not eliminate the loop header either, because eliminating + // a loop header might later prevent LoopSimplify from transforming nested + // loops into simplifed form. if (BI && BI->isUnconditional() && BB != &BB->getParent()->getEntryBlock() && // If the terminator is the only non-phi instruction, try to nuke it. - BB->getFirstNonPHIOrDbg()->isTerminator()) { + BB->getFirstNonPHIOrDbg()->isTerminator() && + !LoopHeaders.count(BB)) { // Since TryToSimplifyUncondBranchFromEmptyBlock may delete the // block, we have to make sure it isn't in the LoopHeaders set. We // reinsert afterward if needed. Index: lib/Transforms/Scalar/SimplifyCFGPass.cpp =================================================================== --- lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -27,6 +27,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/CFG.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" @@ -131,12 +132,19 @@ unsigned BonusInstThreshold) { bool Changed = false; bool LocalChange = true; + + SmallVector, 32> Edges; + FindFunctionBackedges(F, Edges); + SmallPtrSet LoopHeaders; + for (unsigned i = 0, e = Edges.size(); i != e; ++i) + LoopHeaders.insert(const_cast(Edges[i].second)); + while (LocalChange) { LocalChange = false; // Loop over all of the basic blocks and remove them if they are unneeded. for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) { - if (SimplifyCFG(BBIt++, TTI, BonusInstThreshold, AC)) { + if (SimplifyCFG(BBIt++, TTI, BonusInstThreshold, AC, &LoopHeaders)) { LocalChange = true; ++NumSimpl; } Index: lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- lib/Transforms/Utils/SimplifyCFG.cpp +++ lib/Transforms/Utils/SimplifyCFG.cpp @@ -113,6 +113,7 @@ const DataLayout &DL; unsigned BonusInstThreshold; AssumptionCache *AC; + SmallPtrSet * LoopHeaders; Value *isValueEqualityComparison(TerminatorInst *TI); BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI, std::vector &Cases); @@ -133,8 +134,10 @@ public: SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout &DL, - unsigned BonusInstThreshold, AssumptionCache *AC) - : TTI(TTI), DL(DL), BonusInstThreshold(BonusInstThreshold), AC(AC) {} + unsigned BonusInstThreshold, AssumptionCache *AC, + SmallPtrSet * LoopHeaders) + : TTI(TTI), DL(DL), BonusInstThreshold(BonusInstThreshold), + AC(AC), LoopHeaders(LoopHeaders) {} bool run(BasicBlock *BB); }; } @@ -2949,6 +2952,7 @@ // The landingpad is now unreachable. Zap it. BB->eraseFromParent(); + if (LoopHeaders) LoopHeaders->erase(BB); return true; } @@ -3147,9 +3151,11 @@ } // If we eliminated all predecessors of the block, delete the block now. - if (pred_empty(BB)) + if (pred_empty(BB)) { // We know there are no successors, so just nuke the block. BB->eraseFromParent(); + if (LoopHeaders) LoopHeaders->erase(BB); + } return true; } @@ -3275,6 +3281,7 @@ BB != &BB->getParent()->getEntryBlock()) { // We know there are no successors, so just nuke the block. BB->eraseFromParent(); + if (LoopHeaders) LoopHeaders->erase(BB); return true; } @@ -4612,8 +4619,14 @@ return true; // If the Terminator is the only non-phi instruction, simplify the block. + // if LoopHeader is provided, check if the block is a loop header + // (This is for early invocations before loop simplify and vectorization + // to keep canonical loop forms for loops with volatile induction variables. + // These blocks can be eliminated when the pass is invoked later + // in the back-end.) BasicBlock::iterator I = BB->getFirstNonPHIOrDbg(); if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() && + (!LoopHeaders || (LoopHeaders && !LoopHeaders->count(BB))) && TryToSimplifyUncondBranchFromEmptyBlock(BB)) return true; @@ -4875,7 +4888,8 @@ /// of the CFG. It returns true if a modification was made. /// bool llvm::SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, - unsigned BonusInstThreshold, AssumptionCache *AC) { + unsigned BonusInstThreshold, AssumptionCache *AC, + SmallPtrSet * LoopHeaders) { return SimplifyCFGOpt(TTI, BB->getModule()->getDataLayout(), - BonusInstThreshold, AC).run(BB); + BonusInstThreshold, AC, LoopHeaders).run(BB); } Index: test/Transforms/LoopUnswitch/2015-06-17-Metadata.ll =================================================================== --- test/Transforms/LoopUnswitch/2015-06-17-Metadata.ll +++ test/Transforms/LoopUnswitch/2015-06-17-Metadata.ll @@ -16,23 +16,23 @@ %cmp1 = icmp eq i32 %a, 12345 br i1 %cmp1, label %if.then, label %if.else, !prof !0 ; CHECK: %cmp1 = icmp eq i32 %a, 12345 -; CHECK-NEXT: br i1 %cmp1, label %if.then.us, label %if.else, !prof !0 +; CHECK-NEXT: br i1 %cmp1, label %for.body.us, label %for.body, !prof !0 if.then: ; preds = %for.body -; CHECK: if.then.us: +; CHECK: for.body.us: ; CHECK: add nsw i32 %{{.*}}, 123 ; CHECK: %exitcond.us = icmp eq i32 %inc.us, %b -; CHECK: br i1 %exitcond.us, label %for.cond.cleanup, label %if.then.us +; CHECK: br i1 %exitcond.us, label %for.cond.cleanup, label %for.body.us %add = add nsw i32 %add.i, 123 br label %for.inc if.else: ; preds = %for.body %mul = mul nsw i32 %mul.i, %b br label %for.inc -; CHECK: if.else: +; CHECK: for.body: ; CHECK: %mul = mul nsw i32 %mul.i, %b ; CHECK: %inc = add nuw nsw i32 %inc.i, 1 ; CHECK: %exitcond = icmp eq i32 %inc, %b -; CHECK: br i1 %exitcond, label %for.cond.cleanup, label %if.else +; CHECK: br i1 %exitcond, label %for.cond.cleanup, label %for.body for.inc: ; preds = %if.then, %if.else %mul.p = phi i32 [ %b, %if.then ], [ %mul, %if.else ] %add.p = phi i32 [ %add, %if.then ], [ %a, %if.else ] Index: test/Transforms/LoopUnswitch/infinite-loop.ll =================================================================== --- test/Transforms/LoopUnswitch/infinite-loop.ll +++ test/Transforms/LoopUnswitch/infinite-loop.ll @@ -16,10 +16,10 @@ ; CHECK-NEXT: br i1 %a, label %entry.split, label %abort0.split ; CHECK: entry.split: -; CHECK-NEXT: br i1 %b, label %cond.end, label %abort1.split +; CHECK-NEXT: br i1 %b, label %for.body, label %abort1.split -; CHECK: cond.end: -; CHECK-NEXT: br label %cond.end +; CHECK: for.body: +; CHECK-NEXT: br label %for.body ; CHECK: abort0.split: ; CHECK-NEXT: call void @end0() [[NOR_NUW:#[0-9]+]] Index: test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll =================================================================== --- test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll +++ test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -simplifycfg -S > %t ; RUN: not grep "^BB.tomerge" %t -; RUN: grep "^BB.nomerge" %t | count 2 +; RUN: grep "^BB.nomerge" %t | count 4 ; ModuleID = '' declare i1 @foo() @@ -54,24 +54,24 @@ ret void } -; This function can be merged +; This function can't be merged (for keeping canonical loop structures) define void @c() { entry: - br label %BB.tomerge + br label %BB.nomerge -BB.tomerge: ; preds = %Common, %entry +BB.nomerge: ; preds = %Common, %entry br label %Succ Succ: ; preds = %Common, %BB.tomerge, %Pre-Exit ; This phi has identical values for Common and (through BB) Common, ; blocks can't be merged - %b = phi i32 [ 1, %BB.tomerge ], [ 1, %Common ], [ 2, %Pre-Exit ] + %b = phi i32 [ 1, %BB.nomerge ], [ 1, %Common ], [ 2, %Pre-Exit ] %conde = call i1 @foo( ) ; [#uses=1] br i1 %conde, label %Common, label %Pre-Exit Common: ; preds = %Succ %cond = call i1 @foo( ) ; [#uses=1] - br i1 %cond, label %BB.tomerge, label %Succ + br i1 %cond, label %BB.nomerge, label %Succ Pre-Exit: ; preds = %Succ ; This adds a backedge, so the %b phi node gets a third branch and is @@ -83,25 +83,25 @@ ret void } -; This function can be merged +; This function can't be merged (for keeping canonical loop structures) define void @d() { entry: - br label %BB.tomerge + br label %BB.nomerge -BB.tomerge: ; preds = %Common, %entry +BB.nomerge: ; preds = %Common, %entry ; This phi has a matching value (0) with below phi (0), so blocks ; can be merged. %a = phi i32 [ 1, %entry ], [ 0, %Common ] ; [#uses=1] br label %Succ Succ: ; preds = %Common, %BB.tomerge - %b = phi i32 [ %a, %BB.tomerge ], [ 0, %Common ] ; [#uses=0] + %b = phi i32 [ %a, %BB.nomerge ], [ 0, %Common ] ; [#uses=0] %conde = call i1 @foo( ) ; [#uses=1] br i1 %conde, label %Common, label %Exit Common: ; preds = %Succ %cond = call i1 @foo( ) ; [#uses=1] - br i1 %cond, label %BB.tomerge, label %Succ + br i1 %cond, label %BB.nomerge, label %Succ Exit: ; preds = %Succ ret void @@ -110,21 +110,21 @@ ; This function can be merged define void @e() { entry: - br label %BB.tomerge + br label %Succ -BB.tomerge: ; preds = %Use, %entry +Succ: ; preds = %Use, %entry ; This phi is used somewhere else than Succ, but this should not prevent ; merging this block %a = phi i32 [ 1, %entry ], [ 0, %Use ] ; [#uses=1] - br label %Succ + br label %BB.tomerge -Succ: ; preds = %BB.tomerge +BB.tomerge: ; preds = %BB.tomerge %conde = call i1 @foo( ) ; [#uses=1] br i1 %conde, label %Use, label %Exit Use: ; preds = %Succ %cond = call i1 @bar( i32 %a ) ; [#uses=1] - br i1 %cond, label %BB.tomerge, label %Exit + br i1 %cond, label %Succ, label %Exit Exit: ; preds = %Use, %Succ ret void Index: test/Transforms/SimplifyCFG/EqualPHIEdgeBlockMerge.ll =================================================================== --- test/Transforms/SimplifyCFG/EqualPHIEdgeBlockMerge.ll +++ test/Transforms/SimplifyCFG/EqualPHIEdgeBlockMerge.ll @@ -5,7 +5,7 @@ ; RUN: not grep X: %t ; RUN: not grep 'switch i32[^U]+%U' %t ; RUN: not grep "^BB.tomerge" %t -; RUN: grep "^BB.nomerge" %t | count 2 +; RUN: grep "^BB.nomerge" %t | count 4 ; ; ModuleID = '' @@ -179,24 +179,24 @@ ret void } -; This function can be merged +; This function can't be merged (for keeping canonical loop structures) define void @c() { entry: - br label %BB.tomerge + br label %BB.nomerge -BB.tomerge: ; preds = %Common, %entry +BB.nomerge: ; preds = %Common, %entry br label %Succ Succ: ; preds = %Common, %BB.tomerge, %Pre-Exit ; This phi has identical values for Common and (through BB) Common, ; blocks can't be merged - %b = phi i32 [ 1, %BB.tomerge ], [ 1, %Common ], [ 2, %Pre-Exit ] + %b = phi i32 [ 1, %BB.nomerge ], [ 1, %Common ], [ 2, %Pre-Exit ] %conde = call i1 @foo( ) ; [#uses=1] br i1 %conde, label %Common, label %Pre-Exit Common: ; preds = %Succ %cond = call i1 @foo( ) ; [#uses=1] - br i1 %cond, label %BB.tomerge, label %Succ + br i1 %cond, label %BB.nomerge, label %Succ Pre-Exit: ; preds = %Succ ; This adds a backedge, so the %b phi node gets a third branch and is @@ -208,25 +208,25 @@ ret void } -; This function can be merged +; This function can't be merged (for keeping canonical loop structures) define void @d() { entry: - br label %BB.tomerge + br label %BB.nomerge -BB.tomerge: ; preds = %Common, %entry +BB.nomerge: ; preds = %Common, %entry ; This phi has a matching value (0) with below phi (0), so blocks ; can be merged. %a = phi i32 [ 1, %entry ], [ 0, %Common ] ; [#uses=1] br label %Succ Succ: ; preds = %Common, %BB.tomerge - %b = phi i32 [ %a, %BB.tomerge ], [ 0, %Common ] ; [#uses=0] + %b = phi i32 [ %a, %BB.nomerge ], [ 0, %Common ] ; [#uses=0] %conde = call i1 @foo( ) ; [#uses=1] br i1 %conde, label %Common, label %Exit Common: ; preds = %Succ %cond = call i1 @foo( ) ; [#uses=1] - br i1 %cond, label %BB.tomerge, label %Succ + br i1 %cond, label %BB.nomerge, label %Succ Exit: ; preds = %Succ ret void @@ -235,21 +235,21 @@ ; This function can be merged define void @e() { entry: - br label %BB.tomerge + br label %Succ -BB.tomerge: ; preds = %Use, %entry +Succ: ; preds = %Use, %entry ; This phi is used somewhere else than Succ, but this should not prevent ; merging this block %a = phi i32 [ 1, %entry ], [ 0, %Use ] ; [#uses=1] - br label %Succ + br label %BB.tomerge -Succ: ; preds = %BB.tomerge +BB.tomerge: ; preds = %Succ %conde = call i1 @foo( ) ; [#uses=1] br i1 %conde, label %Use, label %Exit Use: ; preds = %Succ %cond = call i1 @bar( i32 %a ) ; [#uses=1] - br i1 %cond, label %BB.tomerge, label %Exit + br i1 %cond, label %Succ, label %Exit Exit: ; preds = %Use, %Succ ret void