Index: llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp +++ llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp @@ -3846,8 +3846,20 @@ return MadeChange; } +/// Check if V (an operand of a select instruction) is an expensive instruction +/// that is only used once. +static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V) { + auto *I = dyn_cast(V); + if (I && I->hasOneUse() && + TTI->getUserCost(I) == TargetTransformInfo::TCC_Expensive) + return true; + + return false; +} + /// Returns true if a SelectInst should be turned into an explicit branch. -static bool isFormingBranchFromSelectProfitable(SelectInst *SI) { +static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI, + SelectInst *SI) { // FIXME: This should use the same heuristics as IfConversion to determine // whether a select is better represented as a branch. This requires that // branch probability metadata is preserved for the select, which is not the @@ -3868,8 +3880,17 @@ // on a load from memory. But if the load is used more than once, do not // change the select to a branch because the load is probably needed // regardless of whether the branch is taken or not. - return ((isa(CmpOp0) && CmpOp0->hasOneUse()) || - (isa(CmpOp1) && CmpOp1->hasOneUse())); + if ((isa(CmpOp0) && CmpOp0->hasOneUse()) || + (isa(CmpOp1) && CmpOp1->hasOneUse())) + return true; + + // If either operand of the select is expensive and only needed on one side + // of the select, we should form a branch. + if (sinkSelectOperand(TTI, SI->getTrueValue()) || + sinkSelectOperand(TTI, SI->getFalseValue())) + return true; + + return false; } @@ -3895,34 +3916,97 @@ // We have efficient codegen support for the select instruction. // Check if it is profitable to keep this 'select'. if (!TLI->isPredictableSelectExpensive() || - !isFormingBranchFromSelectProfitable(SI)) + !isFormingBranchFromSelectProfitable(TTI, SI)) return false; } ModifiedDT = true; + // Transform a sequence like this: + // start: + // %cmp = cmp uge i32 %a, %b + // %sel = select i1 %cmp, i32 %c, i32 %d + // + // Into: + // start: + // %cmp = cmp uge i32 %a, %b + // br i1 %cmp, label %select.true, label %select.false + // select.true: + // br label %select.end + // select.false: + // br label %select.end + // select.end: + // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ] + // + // In addition, we may sink instructions that produce %c or %d from + // the entry block into the destination(s) of the new branch. + // If the true or false blocks do not contain a sunken instruction, that + // block and its branch may be optimized away. In that case, one side of the + // first branch will point directly to select.end, and the corresponding PHI + // predecessor block will be the start block. + // First, we split the block containing the select into 2 blocks. BasicBlock *StartBlock = SI->getParent(); BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(SI)); - BasicBlock *NextBlock = StartBlock->splitBasicBlock(SplitPt, "select.end"); + BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end"); - // Create a new block serving as the landing pad for the branch. - BasicBlock *SmallBlock = BasicBlock::Create(SI->getContext(), "select.mid", - NextBlock->getParent(), NextBlock); - - // Move the unconditional branch from the block with the select in it into our - // landing pad block. + // Delete the unconditional branch that was just created by the split. StartBlock->getTerminator()->eraseFromParent(); - BranchInst::Create(NextBlock, SmallBlock); + + // These are the new basic blocks for the conditional branch. + // At least one will become an actual new basic block. + BasicBlock *TrueBlock = nullptr; + BasicBlock *FalseBlock = nullptr; + + // Sink expensive instructions into the conditional blocks to avoid executing + // them speculatively. + if (sinkSelectOperand(TTI, SI->getTrueValue())) { + TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink", + EndBlock->getParent(), EndBlock); + auto *TrueBranch = BranchInst::Create(EndBlock, TrueBlock); + auto *TrueInst = cast(SI->getTrueValue()); + TrueInst->moveBefore(TrueBranch); + } + if (sinkSelectOperand(TTI, SI->getFalseValue())) { + FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink", + EndBlock->getParent(), EndBlock); + auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock); + auto *FalseInst = cast(SI->getFalseValue()); + FalseInst->moveBefore(FalseBranch); + } + + // If there was nothing to sink, then arbitrarily choose the 'false' side + // for a new input value to the PHI. + if (TrueBlock == FalseBlock) { + assert(TrueBlock == nullptr && + "Unexpected basic block transform while optimizing select"); + + FalseBlock = BasicBlock::Create(SI->getContext(), "select.false", + EndBlock->getParent(), EndBlock); + BranchInst::Create(EndBlock, FalseBlock); + } // Insert the real conditional branch based on the original condition. - BranchInst::Create(NextBlock, SmallBlock, SI->getCondition(), SI); + // If we did not create a new block for one of the 'true' or 'false' paths + // of the condition, it means that side of the branch goes to the end block + // directly and the path originates from the start block from the point of + // view of the new PHI. + if (TrueBlock == nullptr) { + BranchInst::Create(EndBlock, FalseBlock, SI->getCondition(), SI); + TrueBlock = StartBlock; + } else if (FalseBlock == nullptr) { + BranchInst::Create(TrueBlock, EndBlock, SI->getCondition(), SI); + FalseBlock = StartBlock; + } else { + BranchInst::Create(TrueBlock, FalseBlock, SI->getCondition(), SI); + } // The select itself is replaced with a PHI Node. - PHINode *PN = PHINode::Create(SI->getType(), 2, "", &NextBlock->front()); + PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front()); PN->takeName(SI); - PN->addIncoming(SI->getTrueValue(), StartBlock); - PN->addIncoming(SI->getFalseValue(), SmallBlock); + PN->addIncoming(SI->getTrueValue(), TrueBlock); + PN->addIncoming(SI->getFalseValue(), FalseBlock); + SI->replaceAllUsesWith(PN); SI->eraseFromParent(); Index: llvm/trunk/test/Transforms/CodeGenPrepare/select.ll =================================================================== --- llvm/trunk/test/Transforms/CodeGenPrepare/select.ll +++ llvm/trunk/test/Transforms/CodeGenPrepare/select.ll @@ -0,0 +1,114 @@ +; RUN: opt -codegenprepare -S < %s | FileCheck %s + +target triple = "x86_64-unknown-unknown" + +; Nothing to sink here, but this gets converted to a branch to +; avoid stalling an out-of-order CPU on a predictable branch. + +define i32 @no_sink(double %a, double* %b, i32 %x, i32 %y) { +entry: + %load = load double, double* %b, align 8 + %cmp = fcmp olt double %load, %a + %sel = select i1 %cmp, i32 %x, i32 %y + ret i32 %sel + +; CHECK-LABEL: @no_sink( +; CHECK: %load = load double, double* %b, align 8 +; CHECK: %cmp = fcmp olt double %load, %a +; CHECK: br i1 %cmp, label %select.end, label %select.false +; CHECK: select.false: +; CHECK: br label %select.end +; CHECK: select.end: +; CHECK: %sel = phi i32 [ %x, %entry ], [ %y, %select.false ] +; CHECK: ret i32 %sel +} + + +; An 'fdiv' is expensive, so sink it rather than speculatively execute it. + +define float @fdiv_true_sink(float %a, float %b) { +entry: + %div = fdiv float %a, %b + %cmp = fcmp ogt float %a, 1.0 + %sel = select i1 %cmp, float %div, float 2.0 + ret float %sel + +; CHECK-LABEL: @fdiv_true_sink( +; CHECK: %cmp = fcmp ogt float %a, 1.0 +; CHECK: br i1 %cmp, label %select.true.sink, label %select.end +; CHECK: select.true.sink: +; CHECK: %div = fdiv float %a, %b +; CHECK: br label %select.end +; CHECK: select.end: +; CHECK: %sel = phi float [ %div, %select.true.sink ], [ 2.000000e+00, %entry ] +; CHECK: ret float %sel +} + +define float @fdiv_false_sink(float %a, float %b) { +entry: + %div = fdiv float %a, %b + %cmp = fcmp ogt float %a, 3.0 + %sel = select i1 %cmp, float 4.0, float %div + ret float %sel + +; CHECK-LABEL: @fdiv_false_sink( +; CHECK: %cmp = fcmp ogt float %a, 3.0 +; CHECK: br i1 %cmp, label %select.end, label %select.false.sink +; CHECK: select.false.sink: +; CHECK: %div = fdiv float %a, %b +; CHECK: br label %select.end +; CHECK: select.end: +; CHECK: %sel = phi float [ 4.000000e+00, %entry ], [ %div, %select.false.sink ] +; CHECK: ret float %sel +} + +define float @fdiv_both_sink(float %a, float %b) { +entry: + %div1 = fdiv float %a, %b + %div2 = fdiv float %b, %a + %cmp = fcmp ogt float %a, 5.0 + %sel = select i1 %cmp, float %div1, float %div2 + ret float %sel + +; CHECK-LABEL: @fdiv_both_sink( +; CHECK: %cmp = fcmp ogt float %a, 5.0 +; CHECK: br i1 %cmp, label %select.true.sink, label %select.false.sink +; CHECK: select.true.sink: +; CHECK: %div1 = fdiv float %a, %b +; CHECK: br label %select.end +; CHECK: select.false.sink: +; CHECK: %div2 = fdiv float %b, %a +; CHECK: br label %select.end +; CHECK: select.end: +; CHECK: %sel = phi float [ %div1, %select.true.sink ], [ %div2, %select.false.sink ] +; CHECK: ret float %sel +} + +; An 'fadd' is not too expensive, so it's ok to speculate. + +define float @fadd_no_sink(float %a, float %b) { + %add = fadd float %a, %b + %cmp = fcmp ogt float 6.0, %a + %sel = select i1 %cmp, float %add, float 7.0 + ret float %sel + +; CHECK-LABEL: @fadd_no_sink( +; CHECK: %sel = select i1 %cmp, float %add, float 7.0 +} + +; Possible enhancement: sinkability is only calculated with the direct +; operand of the select, so we don't try to sink this. The fdiv cost is not +; taken into account. + +define float @fdiv_no_sink(float %a, float %b) { +entry: + %div = fdiv float %a, %b + %add = fadd float %div, %b + %cmp = fcmp ogt float %a, 1.0 + %sel = select i1 %cmp, float %add, float 8.0 + ret float %sel + +; CHECK-LABEL: @fdiv_no_sink( +; CHECK: %sel = select i1 %cmp, float %add, float 8.0 +} +