diff --git a/llvm/lib/CodeGen/SelectOptimize.cpp b/llvm/lib/CodeGen/SelectOptimize.cpp --- a/llvm/lib/CodeGen/SelectOptimize.cpp +++ b/llvm/lib/CodeGen/SelectOptimize.cpp @@ -10,16 +10,40 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instruction.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; +#define DEBUG_TYPE "select-optimize" + +STATISTIC(NumSelectsConverted, "Number of selects converted"); + namespace { class SelectOptimize : public FunctionPass { + const TargetMachine *TM = nullptr; + const TargetSubtargetInfo *TSI; + const TargetLowering *TLI = nullptr; + const LoopInfo *LI; + std::unique_ptr BFI; + std::unique_ptr BPI; + public: static char ID; SelectOptimize() : FunctionPass(ID) { @@ -28,16 +52,202 @@ bool runOnFunction(Function &F) override; - void getAnalysisUsage(AnalysisUsage &AU) const override {} + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + } + +private: + // Select groups consist of consecutive select instructions with the same + // condition. + using SelectGroup = SmallVector; + using SelectGroups = SmallVector; + + bool optimizeSelects(Function &F); + void convertProfitableSIGroups(SelectGroups &ProfSIGroups); + void collectSelectGroups(BasicBlock &BB, SelectGroups &SIGroups); + bool isSelectKindSupported(SelectInst *SI); }; } // namespace char SelectOptimize::ID = 0; -INITIALIZE_PASS(SelectOptimize, "select-optimize", "Optimize selects", false, - false) + +INITIALIZE_PASS_BEGIN(SelectOptimize, DEBUG_TYPE, "Optimize selects", false, + false) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_END(SelectOptimize, DEBUG_TYPE, "Optimize selects", false, + false) FunctionPass *llvm::createSelectOptimizePass() { return new SelectOptimize(); } bool SelectOptimize::runOnFunction(Function &F) { - llvm_unreachable("Unimplemented"); + TM = &getAnalysis().getTM(); + TSI = TM->getSubtargetImpl(F); + TLI = TSI->getTargetLowering(); + LI = &getAnalysis().getLoopInfo(); + BPI.reset(new BranchProbabilityInfo(F, *LI)); + BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI)); + + return optimizeSelects(F); +} + +bool SelectOptimize::optimizeSelects(Function &F) { + // Collect all the select groups. + SelectGroups SIGroups; + for (BasicBlock &BB : F) { + collectSelectGroups(BB, SIGroups); + } + + // Determine for which select groups it is profitable converting to branches. + SelectGroups ProfSIGroups; + // For now assume that all select groups can be profitably converted to + // branches. + for (SelectGroup &ASI : SIGroups) { + ProfSIGroups.push_back(ASI); + } + + // Convert to branches the select groups that were deemed + // profitable-to-convert. + convertProfitableSIGroups(ProfSIGroups); + + // Code modified if at least one select group was converted. + return !ProfSIGroups.empty(); +} + +/// If \p isTrue is true, return the true value of \p SI, otherwise return +/// false value of \p SI. If the true/false value of \p SI is defined by any +/// select instructions in \p Selects, look through the defining select +/// instruction until the true/false value is not defined in \p Selects. +static Value * +getTrueOrFalseValue(SelectInst *SI, bool isTrue, + const SmallPtrSet &Selects) { + Value *V = nullptr; + for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI); + DefSI = dyn_cast(V)) { + assert(DefSI->getCondition() == SI->getCondition() && + "The condition of DefSI does not match with SI"); + V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue()); + } + assert(V && "Failed to get select true/false value"); + return V; +} + +void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { + for (SelectGroup &ASI : ProfSIGroups) { + // TODO: eliminate the redundancy of logic transforming selects to branches + // by removing CodeGenPrepare::optimizeSelectInst and optimizing here + // selects for all cases (with and without profile information). + + // Transform a sequence like this: + // start: + // %cmp = cmp uge i32 %a, %b + // %sel = select i1 %cmp, i32 %c, i32 %d + // + // Into: + // start: + // %cmp = cmp uge i32 %a, %b + // %cmp.frozen = freeze %cmp + // br i1 %cmp.frozen, label %select.end, label %select.false + // select.false: + // br label %select.end + // select.end: + // %sel = phi i32 [ %c, %start ], [ %d, %select.false ] + // + // %cmp should be frozen, otherwise it may introduce undefined behavior. + + // We split the block containing the select(s) into two blocks. + SelectInst *SI = ASI.front(); + SelectInst *LastSI = ASI.back(); + BasicBlock *StartBlock = SI->getParent(); + BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI)); + BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end"); + BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock).getFrequency()); + // Delete the unconditional branch that was just created by the split. + StartBlock->getTerminator()->eraseFromParent(); + + // These are the new basic blocks for the conditional branch. + // For now, no instruction sinking to the true/false blocks. + // Thus both True and False blocks will be empty. + BasicBlock *TrueBlock = nullptr, *FalseBlock = nullptr; + + // Use the 'false' side for a new input value to the PHI. + FalseBlock = BasicBlock::Create(SI->getContext(), "select.false", + EndBlock->getParent(), EndBlock); + auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock); + FalseBranch->setDebugLoc(SI->getDebugLoc()); + + // For the 'true' side the path originates from the start block from the + // point view of the new PHI. + TrueBlock = StartBlock; + + // Insert the real conditional branch based on the original condition. + BasicBlock *TT, *FT; + TT = EndBlock; + FT = FalseBlock; + IRBuilder<> IB(SI); + auto *CondFr = + IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen"); + IB.CreateCondBr(CondFr, TT, FT, SI); + + SmallPtrSet INS; + INS.insert(ASI.begin(), ASI.end()); + // Use reverse iterator because later select may use the value of the + // earlier select, and we need to propagate value through earlier select + // to get the PHI operand. + for (auto It = ASI.rbegin(); It != ASI.rend(); ++It) { + SelectInst *SI = *It; + // The select itself is replaced with a PHI Node. + PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front()); + PN->takeName(SI); + PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock); + PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock); + PN->setDebugLoc(SI->getDebugLoc()); + + SI->replaceAllUsesWith(PN); + SI->eraseFromParent(); + INS.erase(SI); + ++NumSelectsConverted; + } + } +} + +void SelectOptimize::collectSelectGroups(BasicBlock &BB, + SelectGroups &SIGroups) { + BasicBlock::iterator BBIt = BB.begin(); + while (BBIt != BB.end()) { + Instruction *I = &*BBIt++; + if (SelectInst *SI = dyn_cast(I)) { + SelectGroup SIGroup; + SIGroup.push_back(SI); + while (BBIt != BB.end()) { + SelectInst *NI = dyn_cast(&*BBIt); + if (NI && SI->getCondition() == NI->getCondition()) { + SIGroup.push_back(NI); + ++BBIt; + } else { + break; + } + } + + // If the select type is not supported, no point optimizing it. + // Instruction selection will take care of it. + if (!isSelectKindSupported(SI)) + continue; + + SIGroups.push_back(SIGroup); + } + } +} + +bool SelectOptimize::isSelectKindSupported(SelectInst *SI) { + bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1); + if (VectorCond) + return false; + TargetLowering::SelectSupportKind SelectKind; + if (SI->getType()->isVectorTy()) + SelectKind = TargetLowering::ScalarCondVectorVal; + else + SelectKind = TargetLowering::ScalarValSelect; + return TLI->isSelectSupported(SelectKind); } diff --git a/llvm/test/CodeGen/X86/select-optimize-opt.ll b/llvm/test/CodeGen/X86/select-optimize-opt.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/select-optimize-opt.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -mtriple=x86_64-unknown-unknown -select-optimize -S < %s | FileCheck %s + +;; Opt-level tests to ensure that profile metadata are preserved across +;; select-to-branch transformations. + +; If a select is obviously predictable, turn it into a branch. +define i32 @weighted_select(i32 %a, i32 %b, i1 %cmp) { +; CHECK-LABEL: @weighted_select( +; CHECK-NEXT: [[SEL_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] +; CHECK-NEXT: br i1 [[SEL_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF0:![0-9]+]] +; CHECK: select.false: +; CHECK-NEXT: br label [[SELECT_END]] +; CHECK: select.end: +; CHECK-NEXT: [[SEL:%.*]] = phi i32 [ [[A:%.*]], [[TMP0:%.*]] ], [ [[B:%.*]], [[SELECT_FALSE]] ] +; CHECK-NEXT: ret i32 [[SEL]] +; + %sel = select i1 %cmp, i32 %a, i32 %b, !prof !0 + ret i32 %sel +} + +; If two selects in a row are predictable, turn them into branches. +define i32 @weighted_selects(i32 %a, i32 %b) { +; CHECK-LABEL: @weighted_selects( +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], 0 +; CHECK-NEXT: [[SEL_FROZEN:%.*]] = freeze i1 [[CMP]] +; CHECK-NEXT: br i1 [[SEL_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF0]] +; CHECK: select.false: +; CHECK-NEXT: br label [[SELECT_END]] +; CHECK: select.end: +; CHECK-NEXT: [[SEL:%.*]] = phi i32 [ [[A]], [[TMP0:%.*]] ], [ [[B:%.*]], [[SELECT_FALSE]] ] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[SEL]], 0 +; CHECK-NEXT: [[SEL1_FROZEN:%.*]] = freeze i1 [[CMP1]] +; CHECK-NEXT: br i1 [[SEL1_FROZEN]], label [[SELECT_END1:%.*]], label [[SELECT_FALSE2:%.*]], !prof [[PROF1:![0-9]+]] +; CHECK: select.false2: +; CHECK-NEXT: br label [[SELECT_END1]] +; CHECK: select.end1: +; CHECK-NEXT: [[SEL1:%.*]] = phi i32 [ [[B]], [[SELECT_END]] ], [ [[A]], [[SELECT_FALSE2]] ] +; CHECK-NEXT: ret i32 [[SEL1]] +; + %cmp = icmp ne i32 %a, 0 + %sel = select i1 %cmp, i32 %a, i32 %b, !prof !0 + %cmp1 = icmp ne i32 %sel, 0 + %sel1 = select i1 %cmp1, i32 %b, i32 %a, !prof !1 + ret i32 %sel1 +} + +; If select group predictable, turn it into a branch. +define i32 @weighted_select_group(i32 %a, i32 %b, i32 %c, i1 %cmp) { +; CHECK-LABEL: @weighted_select_group( +; CHECK-NEXT: [[SEL1_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] +; CHECK-NEXT: br i1 [[SEL1_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF0]] +; CHECK: select.false: +; CHECK-NEXT: br label [[SELECT_END]] +; CHECK: select.end: +; CHECK-NEXT: [[SEL1:%.*]] = phi i32 [ [[A:%.*]], [[TMP0:%.*]] ], [ [[B:%.*]], [[SELECT_FALSE]] ] +; CHECK-NEXT: [[SEL2:%.*]] = phi i32 [ [[C:%.*]], [[TMP0]] ], [ [[A]], [[SELECT_FALSE]] ] +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[SEL1]], [[SEL2]] +; CHECK-NEXT: ret i32 [[ADD]] +; + %sel1 = select i1 %cmp, i32 %a, i32 %b, !prof !0 + %sel2 = select i1 %cmp, i32 %c, i32 %a, !prof !0 + %add = add i32 %sel1, %sel2 + ret i32 %add +} + +!0 = !{!"branch_weights", i32 1, i32 100} +!1 = !{!"branch_weights", i32 100, i32 1} diff --git a/llvm/test/CodeGen/X86/select-optimize.ll b/llvm/test/CodeGen/X86/select-optimize.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/select-optimize.ll @@ -0,0 +1,56 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-unknown-unknown -disable-select-optimize=false -x86-cmov-converter=false -disable-cgp-select2branch=true < %s | FileCheck %s + +; Single select converted to branch +define i32 @single_select(i32 %a, i32 %b, i1 %cmp) { +; CHECK-LABEL: single_select: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: testb $1, %dl +; CHECK-NEXT: jne .LBB0_2 +; CHECK-NEXT: # %bb.1: # %select.false +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: .LBB0_2: # %select.end +; CHECK-NEXT: retq + %sel = select i1 %cmp, i32 %a, i32 %b + ret i32 %sel +} + +; Select group converted to branch +define i32 @select_group(i32 %a, i32 %b, i32 %c, i1 %cmp) { +; CHECK-LABEL: select_group: +; CHECK: # %bb.0: +; CHECK-NEXT: testb $1, %cl +; CHECK-NEXT: je .LBB1_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: addl %edx, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB1_2: # %select.false +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: addl %edi, %eax +; CHECK-NEXT: retq + %sel1 = select i1 %cmp, i32 %a, i32 %b + %sel2 = select i1 %cmp, i32 %c, i32 %a + %add = add i32 %sel1, %sel2 + ret i32 %add +} + +; Select group with intra-group dependence converted to branch +define i32 @select_group_intra_group(i32 %a, i32 %b, i32 %c, i1 %cmp) { +; CHECK-LABEL: select_group_intra_group: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: testb $1, %cl +; CHECK-NEXT: jne .LBB2_2 +; CHECK-NEXT: # %bb.1: # %select.false +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movl %esi, %edx +; CHECK-NEXT: .LBB2_2: # %select.end +; CHECK-NEXT: subl %edx, %eax +; CHECK-NEXT: retq + %sel1 = select i1 %cmp, i32 %a, i32 %b + %sel2 = select i1 %cmp, i32 %c, i32 %sel1 + %sub = sub i32 %sel1, %sel2 + ret i32 %sub +}