Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -3840,8 +3840,21 @@ return MadeChange; } +/// If V (an operand of a select instruction) is an expensive instruction that +/// is only used once, return it. Otherwise, return a nullptr. +static Instruction *sinkSelectOperand(const TargetTransformInfo *TTI, + Value *V) { + auto *I = dyn_cast(V); + if (I && I->hasOneUse() && + TTI->getUserCost(I) == TargetTransformInfo::TCC_Expensive) + return I; + + return nullptr; +} + /// Returns true if a SelectInst should be turned into an explicit branch. -static bool isFormingBranchFromSelectProfitable(SelectInst *SI) { +static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI, + SelectInst *SI) { // FIXME: This should use the same heuristics as IfConversion to determine // whether a select is better represented as a branch. This requires that // branch probability metadata is preserved for the select, which is not the @@ -3862,8 +3875,16 @@ // on a load from memory. But if the load is used more than once, do not // change the select to a branch because the load is probably needed // regardless of whether the branch is taken or not. - return ((isa(CmpOp0) && CmpOp0->hasOneUse()) || - (isa(CmpOp1) && CmpOp1->hasOneUse())); + if ((isa(CmpOp0) && CmpOp0->hasOneUse()) || + (isa(CmpOp1) && CmpOp1->hasOneUse())) + return true; + + // If either operand of the select is expensive, we should form a branch. + if (sinkSelectOperand(TTI, SI->getTrueValue()) || + sinkSelectOperand(TTI, SI->getFalseValue())) + return true; + + return false; } @@ -3889,34 +3910,79 @@ // We have efficient codegen support for the select instruction. // Check if it is profitable to keep this 'select'. if (!TLI->isPredictableSelectExpensive() || - !isFormingBranchFromSelectProfitable(SI)) + !isFormingBranchFromSelectProfitable(TTI, SI)) return false; } ModifiedDT = true; + // Transform a sequence like this: + // %cmp = cmp uge i32 %a, %b + // %sel = select i1 %cmp, i32 %c, i32 %d + // + // Into: + // %cmp = cmp uge i32 %a, %b + // br i1 %cmp, label %select.end, label %select.false + // select.false: + // br label %select.end + // select.end: + // %sel = phi i32 [ %c, %entry ], [ %d, %select.false ] + // + // In addition, we may sink instructions that produce %c or %d from + // the entry block into the landing pad(s) for the new branch. + // First, we split the block containing the select into 2 blocks. BasicBlock *StartBlock = SI->getParent(); BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(SI)); - BasicBlock *NextBlock = StartBlock->splitBasicBlock(SplitPt, "select.end"); - - // Create a new block serving as the landing pad for the branch. - BasicBlock *SmallBlock = BasicBlock::Create(SI->getContext(), "select.mid", - NextBlock->getParent(), NextBlock); + BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end"); - // Move the unconditional branch from the block with the select in it into our - // landing pad block. + // Delete the unconditional branch that was just created by the split. StartBlock->getTerminator()->eraseFromParent(); - BranchInst::Create(NextBlock, SmallBlock); + + // The new basic blocks for the conditional branch are initialized to the end + // block, but at least one will become an actual new landing pad. + BasicBlock *TrueBlock = EndBlock; + BasicBlock *FalseBlock = EndBlock; + + // Sink expensive instructions into the conditional blocks to avoid executing + // them speculatively. + if (Instruction *SinkTrue = sinkSelectOperand(TTI, SI->getTrueValue())) { + TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink", + EndBlock->getParent(), EndBlock); + BranchInst *TrueBranch = BranchInst::Create(EndBlock, TrueBlock); + SinkTrue->moveBefore(TrueBranch); + } + if (Instruction *SinkFalse = sinkSelectOperand(TTI, SI->getFalseValue())) { + FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink", + EndBlock->getParent(), EndBlock); + BranchInst *FalseBranch = BranchInst::Create(EndBlock, FalseBlock); + SinkFalse->moveBefore(FalseBranch); + } + + // If there was nothing to sink, then arbitrarily choose the 'false' side + // for a new landing pad input for the PHI. + if (TrueBlock == FalseBlock) { + FalseBlock = BasicBlock::Create(SI->getContext(), "select.false", + EndBlock->getParent(), EndBlock); + BranchInst::Create(EndBlock, FalseBlock); + } // Insert the real conditional branch based on the original condition. - BranchInst::Create(NextBlock, SmallBlock, SI->getCondition(), SI); + BranchInst::Create(TrueBlock, FalseBlock, SI->getCondition(), SI); + + // If we did not create a new block for one of the 'true' or 'false' paths + // of the condition, it means that path actually originates from the start + // block from the point of view of the new PHI. + if (TrueBlock == EndBlock) + TrueBlock = StartBlock; + else if (FalseBlock == EndBlock) + FalseBlock = StartBlock; // The select itself is replaced with a PHI Node. - PHINode *PN = PHINode::Create(SI->getType(), 2, "", NextBlock->begin()); + PHINode *PN = PHINode::Create(SI->getType(), 2, "", EndBlock->begin()); PN->takeName(SI); - PN->addIncoming(SI->getTrueValue(), StartBlock); - PN->addIncoming(SI->getFalseValue(), SmallBlock); + PN->addIncoming(SI->getTrueValue(), TrueBlock); + PN->addIncoming(SI->getFalseValue(), FalseBlock); SI->replaceAllUsesWith(PN); SI->eraseFromParent(); Index: test/Transforms/CodeGenPrepare/select.ll =================================================================== --- test/Transforms/CodeGenPrepare/select.ll +++ test/Transforms/CodeGenPrepare/select.ll @@ -0,0 +1,114 @@ +; RUN: opt -codegenprepare -S < %s | FileCheck %s + +target triple = "x86_64-unknown-unknown" + +; Nothing to sink here, but this gets converted to a branch to +; avoid stalling an out-of-order CPU on a predictable branch. + +define i32 @no_sink(double %a, double* %b, i32 %x, i32 %y) { +entry: + %load = load double, double* %b, align 8 + %cmp = fcmp olt double %load, %a + %sel = select i1 %cmp, i32 %x, i32 %y + ret i32 %sel + +; CHECK-LABEL: @no_sink( +; CHECK: %load = load double, double* %b, align 8 +; CHECK: %cmp = fcmp olt double %load, %a +; CHECK: br i1 %cmp, label %select.end, label %select.false +; CHECK: select.false: +; CHECK: br label %select.end +; CHECK: select.end: +; CHECK: %sel = phi i32 [ %x, %entry ], [ %y, %select.false ] +; CHECK: ret i32 %sel +} + + +; An 'fdiv' is expensive, so sink it rather than speculatively execute it. + +define float @fdiv_true_sink(float %a, float %b) { +entry: + %div = fdiv float %a, %b + %cmp = fcmp ogt float %a, 1.0 + %sel = select i1 %cmp, float %div, float 2.0 + ret float %sel + +; CHECK-LABEL: @fdiv_true_sink( +; CHECK: %cmp = fcmp ogt float %a, 1.0 +; CHECK: br i1 %cmp, label %select.true.sink, label %select.end +; CHECK: select.true.sink: +; CHECK: %div = fdiv float %a, %b +; CHECK: br label %select.end +; CHECK: select.end: +; CHECK: %sel = phi float [ %div, %select.true.sink ], [ 2.000000e+00, %entry ] +; CHECK: ret float %sel +} + +define float @fdiv_false_sink(float %a, float %b) { +entry: + %div = fdiv float %a, %b + %cmp = fcmp ogt float %a, 3.0 + %sel = select i1 %cmp, float 4.0, float %div + ret float %sel + +; CHECK-LABEL: @fdiv_false_sink( +; CHECK: %cmp = fcmp ogt float %a, 3.0 +; CHECK: br i1 %cmp, label %select.end, label %select.false.sink +; CHECK: select.false.sink: +; CHECK: %div = fdiv float %a, %b +; CHECK: br label %select.end +; CHECK: select.end: +; CHECK: %sel = phi float [ 4.000000e+00, %entry ], [ %div, %select.false.sink ] +; CHECK: ret float %sel +} + +define float @fdiv_both_sink(float %a, float %b) { +entry: + %div1 = fdiv float %a, %b + %div2 = fdiv float %b, %a + %cmp = fcmp ogt float %a, 5.0 + %sel = select i1 %cmp, float %div1, float %div2 + ret float %sel + +; CHECK-LABEL: @fdiv_both_sink( +; CHECK: %cmp = fcmp ogt float %a, 5.0 +; CHECK: br i1 %cmp, label %select.true.sink, label %select.false.sink +; CHECK: select.true.sink: +; CHECK: %div1 = fdiv float %a, %b +; CHECK: br label %select.end +; CHECK: select.false.sink: +; CHECK: %div2 = fdiv float %b, %a +; CHECK: br label %select.end +; CHECK: select.end: +; CHECK: %sel = phi float [ %div1, %select.true.sink ], [ %div2, %select.false.sink ] +; CHECK: ret float %sel +} + +; An 'fadd' is not too expensive, so it's ok to speculate. + +define float @fadd_no_sink(float %a, float %b) { + %add = fadd float %a, %b + %cmp = fcmp ogt float 6.0, %a + %sel = select i1 %cmp, float %add, float 7.0 + ret float %sel + +; CHECK-LABEL: @fadd_no_sink( +; CHECK: %sel = select i1 %cmp, float %add, float 7.0 +} + +; Possible future enhancement: sinkability is only calculated with the direct +; operand of the select, so we don't try to sink this. The fdiv cost is not +; taken into account. + +define float @fdiv_no_sink(float %a, float %b) { +entry: + %div = fdiv float %a, %b + %add = fadd float %div, %b + %cmp = fcmp ogt float %a, 1.0 + %sel = select i1 %cmp, float %add, float 8.0 + ret float %sel + +; CHECK-LABEL: @fdiv_no_sink( +; CHECK: %sel = select i1 %cmp, float %add, float 8.0 +} +