Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -4541,6 +4541,42 @@ TTI->getUserCost(I) >= TargetTransformInfo::TCC_Expensive; } +/// Return true if SI2 depends on any select instruction between SI1 and SI2 +static bool isDependentSI(SelectInst *SI1, SelectInst *SI2) { + if (SI1 == SI2) + return false; + for (BasicBlock::iterator It = BasicBlock::iterator(SI1); + It !=SI1->getParent()->end(); ++It) { + SelectInst *SI = dyn_cast(&*It); + if (SI == SI2) + return false; + if (SI == SI2->getTrueValue() || SI == SI2->getFalseValue()) + return true; + } + assert(0 && "SI1 does not dominate SI2 in the same BB"); + return true; +} + +/// Find all consecutive select instructions that share the same condition with +/// SI, and store them in ASI. Return true if all select instruction does not +/// depend on each other. +static bool findAdjacentSelectInstructions(SelectInst *SI, + SmallVector &ASI) { + bool ret = true; + for (BasicBlock::iterator It = BasicBlock::iterator(SI); + It != SI->getParent()->end(); ++It) { + SelectInst *I = dyn_cast(&*It); + if (I && SI->getCondition() == I->getCondition()) { + ASI.push_back(I); + if (isDependentSI(SI, I)) + ret = false; + } else { + break; + } + } + return ret; +} + /// Returns true if a SelectInst should be turned into an explicit branch. static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI, const TargetLowering *TLI, @@ -4586,6 +4622,16 @@ /// If we have a SelectInst that will likely profit from branch prediction, /// turn it into a branch. bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { + SmallVector ASI; + bool ShouldOptimize = findAdjacentSelectInstructions(SI, ASI); + + // Increment the current iterator to skip all the rest of select instructions + // because they will be either "not lowered" or "all lowered" to branch. + CurInstIterator = std::next(ASI.back()->getIterator()); + + if (!ShouldOptimize) + return false; + bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1); // Can we convert the 'select' to CF ? @@ -4632,7 +4678,7 @@ // First, we split the block containing the select into 2 blocks. BasicBlock *StartBlock = SI->getParent(); - BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(SI)); + BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(ASI.back())); BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end"); // Delete the unconditional branch that was just created by the split. @@ -4642,22 +4688,30 @@ // At least one will become an actual new basic block. BasicBlock *TrueBlock = nullptr; BasicBlock *FalseBlock = nullptr; + BranchInst *TrueBranch = nullptr; + BranchInst *FalseBranch = nullptr; // Sink expensive instructions into the conditional blocks to avoid executing // them speculatively. - if (sinkSelectOperand(TTI, SI->getTrueValue())) { - TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink", - EndBlock->getParent(), EndBlock); - auto *TrueBranch = BranchInst::Create(EndBlock, TrueBlock); - auto *TrueInst = cast(SI->getTrueValue()); - TrueInst->moveBefore(TrueBranch); - } - if (sinkSelectOperand(TTI, SI->getFalseValue())) { - FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink", - EndBlock->getParent(), EndBlock); - auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock); - auto *FalseInst = cast(SI->getFalseValue()); - FalseInst->moveBefore(FalseBranch); + for (SelectInst *SI : ASI) { + if (sinkSelectOperand(TTI, SI->getTrueValue())) { + if (TrueBlock == nullptr) { + TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink", + EndBlock->getParent(), EndBlock); + TrueBranch = BranchInst::Create(EndBlock, TrueBlock); + } + auto *TrueInst = cast(SI->getTrueValue()); + TrueInst->moveBefore(TrueBranch); + } + if (sinkSelectOperand(TTI, SI->getFalseValue())) { + if (FalseBlock == nullptr) { + FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink", + EndBlock->getParent(), EndBlock); + FalseBranch = BranchInst::Create(EndBlock, FalseBlock); + } + auto *FalseInst = cast(SI->getFalseValue()); + FalseInst->moveBefore(FalseBranch); + } } // If there was nothing to sink, then arbitrarily choose the 'false' side @@ -4686,18 +4740,21 @@ BranchInst::Create(TrueBlock, FalseBlock, SI->getCondition(), SI); } - // The select itself is replaced with a PHI Node. - PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front()); - PN->takeName(SI); - PN->addIncoming(SI->getTrueValue(), TrueBlock); - PN->addIncoming(SI->getFalseValue(), FalseBlock); - - SI->replaceAllUsesWith(PN); - SI->eraseFromParent(); + for (auto It = ASI.rbegin(); It != ASI.rend(); ++It) { + SelectInst *SI = *It; + // The select itself is replaced with a PHI Node. + PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front()); + PN->takeName(SI); + PN->addIncoming(SI->getTrueValue(), TrueBlock); + PN->addIncoming(SI->getFalseValue(), FalseBlock); + + SI->replaceAllUsesWith(PN); + SI->eraseFromParent(); + ++NumSelectsExpanded; + } // Instruct OptimizeBlock to skip to the next block. CurInstIterator = StartBlock->end(); - ++NumSelectsExpanded; return true; } Index: test/CodeGen/X86/codegen-prepare-select.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/codegen-prepare-select.ll @@ -0,0 +1,45 @@ +; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK-NOT: cmov +; CHECK: %select.false +; CHECK-NOT: %select.false2 + +; Function Attrs: norecurse nounwind readonly uwtable +define i32* @_Z8rbsearchiPiii(i32, i32* readonly, i32, i32) local_unnamed_addr #0 { + %5 = icmp sgt i32 %2, 1 + br i1 %5, label %.lr.ph.preheader, label %._crit_edge + +.lr.ph.preheader: + br label %.lr.ph + +.lr.ph: + %.016 = phi i32* [ %.1, %.lr.ph ], [ %1, %.lr.ph.preheader ] + %.01415 = phi i32 [ %.sink, %.lr.ph ], [ %2, %.lr.ph.preheader ] + %6 = ashr i32 %.01415, 1 + %7 = mul nsw i32 %6, %3 + %8 = sext i32 %7 to i64 + %9 = getelementptr inbounds i32, i32* %.016, i64 %8 + %10 = load i32, i32* %9, align 4, !tbaa !1 + %11 = icmp sgt i32 %10, %0 + %12 = sub nsw i32 %.01415, %6 + %.1 = select i1 %11, i32* %.016, i32* %9, !prof !5 + %.sink = select i1 %11, i32 %6, i32 %12, !prof !5 + %13 = icmp sgt i32 %.sink, 1 + br i1 %13, label %.lr.ph, label %._crit_edge.loopexit + +._crit_edge.loopexit: + br label %._crit_edge + +._crit_edge: + %.0.lcssa = phi i32* [ %1, %4 ], [ %.1, %._crit_edge.loopexit ] + ret i32* %.0.lcssa +} + +!1 = !{!2, !2, i64 0} +!2 = !{!"int", !3, i64 0} +!3 = !{!"omnipotent char", !4, i64 0} +!4 = !{!"Simple C++ TBAA"} +!5 = !{!"branch_weights", i32 1, i32 2000}