diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -2407,7 +2407,8 @@ /// Given a BB that starts with the specified two-entry PHI node, /// see if we can eliminate it. static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, - const DataLayout &DL) { + const DataLayout &DL, + SmallPtrSetImpl *LoopHeaders) { // Ok, this is a two entry PHI node. Check to see if this is a simple "if // statement", which has a very simple dominance structure. Basically, we // are trying to find the condition that is being branched on, which @@ -2515,6 +2516,36 @@ return Changed; } } + + // If we know about loop headers, try to avoid conditionally hoisting too much + // into in loops. + if (LoopHeaders && LoopHeaders->size() <= 10 && + BudgetRemaining <= int(TwoEntryPHINodeFoldingThreshold * + TargetTransformInfo::TCC_Basic) / + 2) { + BasicBlock *Pred = DomBlock; + while (Pred) { + if (LoopHeaders->contains(Pred)) + return false; + auto *NewPred = Pred->getSinglePredecessor(); + if (!NewPred) { + // Unfortunately no DT is available here. Try to peak through some + // common patterns. + SmallVector Predecessors(pred_begin(Pred), + pred_end(Pred)); + if (Predecessors.size() != 2) + break; + if (Predecessors[0]->getSinglePredecessor() != + Predecessors[1]->getSinglePredecessor()) + break; + NewPred = Predecessors[0]->getSinglePredecessor(); + } + Pred = NewPred; + if (Pred == DomBlock) + break; + } + } + assert(DomBlock && "Failed to find root DomBlock"); LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond @@ -6275,7 +6306,7 @@ // eliminate it, do so now. if (auto *PN = dyn_cast(BB->begin())) if (PN->getNumIncomingValues() == 2) - Changed |= FoldTwoEntryPHINode(PN, TTI, DL); + Changed |= FoldTwoEntryPHINode(PN, TTI, DL, LoopHeaders); } Instruction *Terminator = BB->getTerminator(); diff --git a/llvm/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll b/llvm/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll --- a/llvm/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll +++ b/llvm/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll @@ -411,12 +411,16 @@ ; ALL-NEXT: entry: ; ALL-NEXT: br label [[LOOP_HEADER:%.*]] ; ALL: loop.header: -; ALL-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_HEADER]] ] +; ALL-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[COND_END:%.*]] ] ; ALL-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0 +; ALL-NEXT: br i1 [[TOBOOL]], label [[COND_END]], label [[COND_TRUE:%.*]] +; ALL: cond.true: ; ALL-NEXT: [[XOR:%.*]] = xor i32 [[X]], -1 ; ALL-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[XOR]], i1 true) ; ALL-NEXT: [[CAST:%.*]] = trunc i32 [[TMP0]] to i16 -; ALL-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i16 32, i16 [[CAST]] +; ALL-NEXT: br label [[COND_END]] +; ALL: cond.end: +; ALL-NEXT: [[COND:%.*]] = phi i16 [ [[CAST]], [[COND_TRUE]] ], [ 32, [[LOOP_HEADER]] ] ; ALL-NEXT: store i16 [[COND]], i16* [[PTR:%.*]], align 2 ; ALL-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; ALL-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], 100