diff --git a/llvm/lib/CodeGen/SelectOptimize.cpp b/llvm/lib/CodeGen/SelectOptimize.cpp --- a/llvm/lib/CodeGen/SelectOptimize.cpp +++ b/llvm/lib/CodeGen/SelectOptimize.cpp @@ -15,37 +15,65 @@ #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Utils/SizeOpts.h" +#include +#include +#include +#include +#include using namespace llvm; #define DEBUG_TYPE "select-optimize" +STATISTIC(NumSelectOptAnalyzed, + "Number of select groups considered for conversion to branch"); +STATISTIC(NumSelectConvertedExpColdOperand, + "Number of select groups converted due to expensive cold operand"); +STATISTIC(NumSelectConvertedHighPred, + "Number of select groups converted due to high-predictability"); +STATISTIC(NumSelectUnPred, + "Number of select groups not converted due to unpredictability"); STATISTIC(NumSelectsConverted, "Number of selects converted"); +static cl::opt ColdOperandThreshold( + "cold-operand-threshold", + cl::desc("Maximum frequency of path for an operand to be considered cold."), + cl::init(20), cl::Hidden); + namespace { class SelectOptimize : public FunctionPass { const TargetMachine *TM = nullptr; const TargetSubtargetInfo *TSI; const TargetLowering *TLI = nullptr; + const TargetTransformInfo *TTI = nullptr; const LoopInfo *LI; + DominatorTree *DT; std::unique_ptr BFI; std::unique_ptr BPI; + ProfileSummaryInfo *PSI; + OptimizationRemarkEmitter *ORE; public: static char ID; + SelectOptimize() : FunctionPass(ID) { initializeSelectOptimizePass(*PassRegistry::getPassRegistry()); } @@ -53,8 +81,12 @@ bool runOnFunction(Function &F) override; void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); AU.addRequired(); + AU.addRequired(); + AU.addRequired(); AU.addRequired(); + AU.addRequired(); } private: @@ -64,8 +96,17 @@ using SelectGroups = SmallVector; bool optimizeSelects(Function &F); + void optimizeSelectsBase(Function &F, SelectGroups &ProfSIGroups); + void optimizeSelectsInnerLoops(Function &F, SelectGroups &ProfSIGroups); void convertProfitableSIGroups(SelectGroups &ProfSIGroups); void collectSelectGroups(BasicBlock &BB, SelectGroups &SIGroups); + void findProfitableSIGroupsBase(SelectGroups &SIGroups, + SelectGroups &ProfSIGroups); + bool isConvertToBranchProfitableBase(const SmallVector &ASI); + bool hasExpensiveColdOperand(const SmallVector &ASI); + void getOneUseSlice(Instruction *I, SmallVector &Slice); + bool isSelectHighlyPredictable(const SelectInst *SI); + bool isExpensiveInst(Instruction *I); bool isSelectKindSupported(SelectInst *SI); }; } // namespace @@ -75,7 +116,11 @@ INITIALIZE_PASS_BEGIN(SelectOptimize, DEBUG_TYPE, "Optimize selects", false, false) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) INITIALIZE_PASS_END(SelectOptimize, DEBUG_TYPE, "Optimize selects", false, false) @@ -85,27 +130,41 @@ TM = &getAnalysis().getTM(); TSI = TM->getSubtargetImpl(F); TLI = TSI->getTargetLowering(); + + // If none of the select types is supported then skip this pass. + // This is an optimization pass. Legality issues will be handled by + // instruction selection. + if (!TLI->isSelectSupported(TargetLowering::ScalarValSelect) && + !TLI->isSelectSupported(TargetLowering::ScalarCondVectorVal) && + !TLI->isSelectSupported(TargetLowering::VectorMaskSelect)) + return false; + + // If even a predictable select is cheap, then a branch cannot be cheaper. + if (!TLI->isPredictableSelectExpensive()) + return false; + + TTI = &getAnalysis().getTTI(F); + DT = &getAnalysis().getDomTree(); LI = &getAnalysis().getLoopInfo(); BPI.reset(new BranchProbabilityInfo(F, *LI)); BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI)); + PSI = &getAnalysis().getPSI(); + ORE = &getAnalysis().getORE(); + + // When optimizing for size, selects are preferable over branches. + if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI.get())) + return false; return optimizeSelects(F); } bool SelectOptimize::optimizeSelects(Function &F) { - // Collect all the select groups. - SelectGroups SIGroups; - for (BasicBlock &BB : F) { - collectSelectGroups(BB, SIGroups); - } - // Determine for which select groups it is profitable converting to branches. SelectGroups ProfSIGroups; - // For now assume that all select groups can be profitably converted to - // branches. - for (SelectGroup &ASI : SIGroups) { - ProfSIGroups.push_back(ASI); - } + // Base heuristics apply only to non-loops and outer loops. + optimizeSelectsBase(F, ProfSIGroups); + // Separate heuristics for inner-most loops. + optimizeSelectsInnerLoops(F, ProfSIGroups); // Convert to branches the select groups that were deemed // profitable-to-convert. @@ -115,6 +174,25 @@ return !ProfSIGroups.empty(); } +void SelectOptimize::optimizeSelectsBase(Function &F, + SelectGroups &ProfSIGroups) { + // Collect all the select groups. + SelectGroups SIGroups; + for (BasicBlock &BB : F) { + // Base heuristics apply only to non-loops and outer loops. + Loop *L = LI->getLoopFor(&BB); + if (L && L->isInnermost()) + continue; + collectSelectGroups(BB, SIGroups); + } + + // Determine for which select groups it is profitable converting to branches. + findProfitableSIGroupsBase(SIGroups, ProfSIGroups); +} + +void SelectOptimize::optimizeSelectsInnerLoops(Function &F, + SelectGroups &ProfSIGroups) {} + /// If \p isTrue is true, return the true value of \p SI, otherwise return /// false value of \p SI. If the true/false value of \p SI is defined by any /// select instructions in \p Selects, look through the defining select @@ -256,6 +334,141 @@ } } +void SelectOptimize::findProfitableSIGroupsBase(SelectGroups &SIGroups, + SelectGroups &ProfSIGroups) { + for (SelectGroup &ASI : SIGroups) { + ++NumSelectOptAnalyzed; + if (isConvertToBranchProfitableBase(ASI)) + ProfSIGroups.push_back(ASI); + } +} + +bool SelectOptimize::isConvertToBranchProfitableBase( + const SmallVector &ASI) { + SelectInst *SI = ASI.front(); + OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", SI); + OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", SI); + + // If unpredictable, branch form is more profitable. + if (SI->getMetadata(LLVMContext::MD_unpredictable)) { + ++NumSelectUnPred; + ORmiss << "Not converted to branch because of unpredictable branch. "; + ORE->emit(ORmiss); + return false; + } + + // If highly predictable, branch form is more profitable. + if (isSelectHighlyPredictable(SI)) { + ++NumSelectConvertedHighPred; + OR << "Converted to branch because of highly predictable branch. "; + ORE->emit(OR); + return true; + } + + // Look for expensive operands in the cold operand's (if any) dependence + // slice of any of the selects in the group. + if (hasExpensiveColdOperand(ASI)) { + ++NumSelectConvertedExpColdOperand; + OR << "Converted to branch because of expensive cold operand."; + ORE->emit(OR); + return true; + } + + ORmiss << "Not profitable to convert to branch (base heuristic)."; + ORE->emit(ORmiss); + return false; +} + +bool SelectOptimize::hasExpensiveColdOperand( + const SmallVector &ASI) { + bool ColdOperand = false; + uint64_t TrueWeight, FalseWeight; + if (ASI.front()->extractProfMetadata(TrueWeight, FalseWeight)) { + uint64_t Min = std::min(TrueWeight, FalseWeight); + uint64_t Sum = TrueWeight + FalseWeight; + // Is there a path with frequency 100 * Min; + } + if (!ColdOperand) + return false; + // Check if the cold path's dependence slice has an expensive operation for + // any of the selects of the group. + for (SelectInst *SI : ASI) { + Instruction *ColdI = nullptr; + if (TrueWeight < FalseWeight) + ColdI = dyn_cast(SI->getTrueValue()); + else + ColdI = dyn_cast(SI->getFalseValue()); + if (ColdI) { + SmallVector ColdSlice; + getOneUseSlice(ColdI, ColdSlice); + for (auto *ColdII : ColdSlice) + if (isExpensiveInst(ColdII)) + return true; + } + } + return false; +} + +void SelectOptimize::getOneUseSlice(Instruction *I, + SmallVector &Slice) { + std::queue Worklist; + Worklist.push(I); + SmallPtrSet Visited; + const Loop *L = LI->getLoopFor(I->getParent()); + while (!Worklist.empty()) { + Instruction *II = Worklist.front(); + Worklist.pop(); + + if (Visited.count(II)) + continue; + Visited.insert(II); + + if (!II->hasOneUse()) + continue; + + // Avoid considering instructions in outer or independent loops, or non-loop + // instructions (instructions with potentially much less frequency than the + // operand of the select). + const Loop *LII = LI->getLoopFor(II->getParent()); + if (L && L != LII && (!LII || !L->contains(LII))) + continue; + + // Avoid phi nodes that are not part of a loop-carried dependence. + // Could source from a cold basic block. + if (isa(II)) { + if (!LII) + continue; + if (LII->getHeader() != II->getParent()) + continue; + } + + Slice.push_back(II); + for (unsigned k = 0; k < II->getNumOperands(); ++k) + if (auto *OpI = dyn_cast(II->getOperand(k))) + Worklist.push(OpI); + } +} + +bool SelectOptimize::isSelectHighlyPredictable(const SelectInst *SI) { + uint64_t TrueWeight, FalseWeight; + if (SI->extractProfMetadata(TrueWeight, FalseWeight)) { + uint64_t Max = std::max(TrueWeight, FalseWeight); + uint64_t Sum = TrueWeight + FalseWeight; + if (Sum != 0) { + auto Probability = BranchProbability::getBranchProbability(Max, Sum); + if (Probability > TTI->getPredictableBranchThreshold()) + return true; + } + } + return false; +} + +bool SelectOptimize::isExpensiveInst(Instruction *I) { + return TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency) >= + TargetTransformInfo::TCC_Expensive; +} + bool SelectOptimize::isSelectKindSupported(SelectInst *SI) { bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1); if (VectorCond) diff --git a/llvm/test/CodeGen/X86/select-optimize.ll b/llvm/test/CodeGen/X86/select-optimize.ll --- a/llvm/test/CodeGen/X86/select-optimize.ll +++ b/llvm/test/CodeGen/X86/select-optimize.ll @@ -1,47 +1,133 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -mtriple=x86_64-unknown-unknown -select-optimize -S < %s | FileCheck %s -; Single select converted to branch -define i32 @single_select(i32 %a, i32 %b, i1 %cmp) { -; CHECK-LABEL: @single_select( +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Test base heuristic 1: +;; highly-biased selects assumed to be highly predictable, converted to branches +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; If a select is obviously predictable, turn it into a branch. +define i32 @weighted_select1(i32 %a, i32 %b, i1 %cmp) { +; CHECK-LABEL: @weighted_select1( ; CHECK-NEXT: [[SEL_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] -; CHECK-NEXT: br i1 [[SEL_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF2:![0-9]+]] +; CHECK-NEXT: br i1 [[SEL_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF16:![0-9]+]] ; CHECK: select.false: ; CHECK-NEXT: br label [[SELECT_END]] ; CHECK: select.end: ; CHECK-NEXT: [[SEL:%.*]] = phi i32 [ [[A:%.*]], [[TMP0:%.*]] ], [ [[B:%.*]], [[SELECT_FALSE]] ] ; CHECK-NEXT: ret i32 [[SEL]] ; - %sel = select i1 %cmp, i32 %a, i32 %b, !prof !0 + %sel = select i1 %cmp, i32 %a, i32 %b, !prof !15 + ret i32 %sel +} + +; If a select is obviously predictable (reversed profile weights), +; turn it into a branch. +define i32 @weighted_select2(i32 %a, i32 %b, i1 %cmp) { +; CHECK-LABEL: @weighted_select2( +; CHECK-NEXT: [[SEL_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] +; CHECK-NEXT: br i1 [[SEL_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF17:![0-9]+]] +; CHECK: select.false: +; CHECK-NEXT: br label [[SELECT_END]] +; CHECK: select.end: +; CHECK-NEXT: [[SEL:%.*]] = phi i32 [ [[A:%.*]], [[TMP0:%.*]] ], [ [[B:%.*]], [[SELECT_FALSE]] ] +; CHECK-NEXT: ret i32 [[SEL]] +; + %sel = select i1 %cmp, i32 %a, i32 %b, !prof !16 + ret i32 %sel +} + +; Not obvioulsy predictable select. +define i32 @weighted_select3(i32 %a, i32 %b, i1 %cmp) { +; CHECK-LABEL: @weighted_select3( +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], !prof [[PROF18:![0-9]+]] +; CHECK-NEXT: ret i32 [[SEL]] +; + %sel = select i1 %cmp, i32 %a, i32 %b, !prof !17 + ret i32 %sel +} + +; Unpredictable select should not form a branch. +define i32 @unpred_select(i32 %a, i32 %b, i1 %cmp) { +; CHECK-LABEL: @unpred_select( +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], !unpredictable !19 +; CHECK-NEXT: ret i32 [[SEL]] +; + %sel = select i1 %cmp, i32 %a, i32 %b, !unpredictable !20 + ret i32 %sel +} + +; Predictable select in function with optsize attribute should not form branch. +define i32 @weighted_select_optsize(i32 %a, i32 %b, i1 %cmp) optsize { +; CHECK-LABEL: @weighted_select_optsize( +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], !prof [[PROF16]] +; CHECK-NEXT: ret i32 [[SEL]] +; + %sel = select i1 %cmp, i32 %a, i32 %b, !prof !15 ret i32 %sel } -; Select group converted to branch -define i32 @select_group(i32 %a, i32 %b, i32 %c, i1 %cmp) { -; CHECK-LABEL: @select_group( +define i32 @weighted_select_pgso(i32 %a, i32 %b, i1 %cmp) !prof !14 { +; CHECK-LABEL: @weighted_select_pgso( +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], !prof [[PROF16]] +; CHECK-NEXT: ret i32 [[SEL]] +; + %sel = select i1 %cmp, i32 %a, i32 %b, !prof !15 + ret i32 %sel +} + +; If two selects in a row are predictable, turn them into branches. +define i32 @weighted_selects(i32 %a, i32 %b) !prof !19 { +; CHECK-LABEL: @weighted_selects( +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], 0 +; CHECK-NEXT: [[SEL_FROZEN:%.*]] = freeze i1 [[CMP]] +; CHECK-NEXT: br i1 [[SEL_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF16]] +; CHECK: select.false: +; CHECK-NEXT: br label [[SELECT_END]] +; CHECK: select.end: +; CHECK-NEXT: [[SEL:%.*]] = phi i32 [ [[A]], [[TMP0:%.*]] ], [ [[B:%.*]], [[SELECT_FALSE]] ] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[SEL]], 0 +; CHECK-NEXT: [[SEL1_FROZEN:%.*]] = freeze i1 [[CMP1]] +; CHECK-NEXT: br i1 [[SEL1_FROZEN]], label [[SELECT_END1:%.*]], label [[SELECT_FALSE2:%.*]], !prof [[PROF16]] +; CHECK: select.false2: +; CHECK-NEXT: br label [[SELECT_END1]] +; CHECK: select.end1: +; CHECK-NEXT: [[SEL1:%.*]] = phi i32 [ [[B]], [[SELECT_END]] ], [ [[A]], [[SELECT_FALSE2]] ] +; CHECK-NEXT: ret i32 [[SEL1]] +; + %cmp = icmp ne i32 %a, 0 + %sel = select i1 %cmp, i32 %a, i32 %b, !prof !15 + %cmp1 = icmp ne i32 %sel, 0 + %sel1 = select i1 %cmp1, i32 %b, i32 %a, !prof !15 + ret i32 %sel1 +} + +; If select group predictable, turn it into a branch. +define i32 @weighted_select_group(i32 %a, i32 %b, i32 %c, i1 %cmp) !prof !19 { +; CHECK-LABEL: @weighted_select_group( ; CHECK-NEXT: [[SEL1_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] -; CHECK-NEXT: br i1 [[SEL1_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF2]] +; CHECK-NEXT: br i1 [[SEL1_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF16]] ; CHECK: select.false: ; CHECK-NEXT: br label [[SELECT_END]] ; CHECK: select.end: ; CHECK-NEXT: [[SEL1:%.*]] = phi i32 [ [[A:%.*]], [[TMP0:%.*]] ], [ [[B:%.*]], [[SELECT_FALSE]] ] ; CHECK-NEXT: [[SEL2:%.*]] = phi i32 [ [[C:%.*]], [[TMP0]] ], [ [[A]], [[SELECT_FALSE]] ] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[SEL1]], metadata [[META3:![0-9]+]], metadata !DIExpression()), !dbg [[DBG8:![0-9]+]] +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[SEL1]], metadata [[META22:![0-9]+]], metadata !DIExpression()), !dbg [[DBG26:![0-9]+]] ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[SEL1]], [[SEL2]] ; CHECK-NEXT: ret i32 [[ADD]] ; - %sel1 = select i1 %cmp, i32 %a, i32 %b, !prof !0 - call void @llvm.dbg.value(metadata i32 %sel1, metadata !4, metadata !DIExpression()), !dbg !DILocation(scope: !3) - %sel2 = select i1 %cmp, i32 %c, i32 %a, !prof !0 + %sel1 = select i1 %cmp, i32 %a, i32 %b, !prof !15 + call void @llvm.dbg.value(metadata i32 %sel1, metadata !24, metadata !DIExpression()), !dbg !DILocation(scope: !23) + %sel2 = select i1 %cmp, i32 %c, i32 %a, !prof !15 %add = add i32 %sel1, %sel2 ret i32 %add } -; Select group with intra-group dependence converted to branch +; Predictable select group with intra-group dependence converted to branch define i32 @select_group_intra_group(i32 %a, i32 %b, i32 %c, i1 %cmp) { ; CHECK-LABEL: @select_group_intra_group( ; CHECK-NEXT: [[SEL1_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] -; CHECK-NEXT: br i1 [[SEL1_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF2]] +; CHECK-NEXT: br i1 [[SEL1_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF16]] ; CHECK: select.false: ; CHECK-NEXT: br label [[SELECT_END]] ; CHECK: select.end: @@ -50,22 +136,110 @@ ; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[SEL1]], [[SEL2]] ; CHECK-NEXT: ret i32 [[SUB]] ; - %sel1 = select i1 %cmp, i32 %a, i32 %b, !prof !0 - %sel2 = select i1 %cmp, i32 %c, i32 %sel1, !prof !0 + %sel1 = select i1 %cmp, i32 %a, i32 %b,!prof !15 + %sel2 = select i1 %cmp, i32 %c, i32 %sel1, !prof !15 %sub = sub i32 %sel1, %sel2 ret i32 %sub } +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Test base heuristic 2: +;; look for expensive instructions in the one-use slice of the cold path +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; Select with cold one-use load value operand should form branch and +; sink load +define i32 @expensive_val_operand1(i32* nocapture %a, i32 %y, i1 %cmp) { +; CHECK-LABEL: @expensive_val_operand1( +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[A:%.*]], align 8 +; CHECK-NEXT: [[SEL_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] +; CHECK-NEXT: br i1 [[SEL_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF18]] +; CHECK: select.false: +; CHECK-NEXT: br label [[SELECT_END]] +; CHECK: select.end: +; CHECK-NEXT: [[SEL:%.*]] = phi i32 [ [[LOAD]], [[TMP0:%.*]] ], [ [[Y:%.*]], [[SELECT_FALSE]] ] +; CHECK-NEXT: ret i32 [[SEL]] +; + %load = load i32, i32* %a, align 8 + %sel = select i1 %cmp, i32 %load, i32 %y, !prof !17 + ret i32 %sel +} + +; Expensive hot value operand and cheap cold value operand. +define i32 @expensive_val_operand2(i32* nocapture %a, i32 %x, i1 %cmp) { +; CHECK-LABEL: @expensive_val_operand2( +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[A:%.*]], align 8 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP:%.*]], i32 [[X:%.*]], i32 [[LOAD]], !prof [[PROF18]] +; CHECK-NEXT: ret i32 [[SEL]] +; + %load = load i32, i32* %a, align 8 + %sel = select i1 %cmp, i32 %x, i32 %load, !prof !17 + ret i32 %sel +} + +; Cold value operand with load in its one-use dependence slice shoud result +; into a branch with sinked dependence slice. +define i32 @expensive_val_operand3(i32* nocapture %a, i32 %b, i32 %y, i1 %cmp) { +; CHECK-LABEL: @expensive_val_operand3( +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[A:%.*]], align 8 +; CHECK-NEXT: [[X:%.*]] = add i32 [[LOAD]], [[B:%.*]] +; CHECK-NEXT: [[SEL_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] +; CHECK-NEXT: br i1 [[SEL_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF18]] +; CHECK: select.false: +; CHECK-NEXT: br label [[SELECT_END]] +; CHECK: select.end: +; CHECK-NEXT: [[SEL:%.*]] = phi i32 [ [[X]], [[TMP0:%.*]] ], [ [[Y:%.*]], [[SELECT_FALSE]] ] +; CHECK-NEXT: ret i32 [[SEL]] +; + %load = load i32, i32* %a, align 8 + %x = add i32 %load, %b + %sel = select i1 %cmp, i32 %x, i32 %y, !prof !17 + ret i32 %sel +} + +; Multiple uses of the load value operand. +define i32 @expensive_val_operand4(i32 %a, i32* nocapture %b, i32 %x, i1 %cmp) { +; CHECK-LABEL: @expensive_val_operand4( +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[B:%.*]], align 4 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP:%.*]], i32 [[X:%.*]], i32 [[LOAD]] +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[SEL]], [[LOAD]] +; CHECK-NEXT: ret i32 [[ADD]] +; + %load = load i32, i32* %b, align 4 + %sel = select i1 %cmp, i32 %x, i32 %load + %add = add i32 %sel, %load + ret i32 %add +} + ; Function Attrs: nounwind readnone speculatable willreturn declare void @llvm.dbg.value(metadata, metadata, metadata) -!llvm.module.flags = !{!6, !7} - -!0 = !{!"branch_weights", i32 1, i32 100} -!1 = !DIFile(filename: "test.c", directory: "/test") -!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 15.0.0", isOptimized: true, emissionKind: FullDebug, globals: !5, splitDebugInlining: false, nameTableKind: None) -!3 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, unit: !2) -!4 = !DILocalVariable(name: "x", scope: !3) -!5 = !{} -!6 = !{i32 2, !"Dwarf Version", i32 4} -!7 = !{i32 1, !"Debug Info Version", i32 3} +!llvm.module.flags = !{!0, !26, !27} +!0 = !{i32 1, !"ProfileSummary", !1} +!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} +!2 = !{!"ProfileFormat", !"InstrProf"} +!3 = !{!"TotalCount", i64 10000} +!4 = !{!"MaxCount", i64 10} +!5 = !{!"MaxInternalCount", i64 1} +!6 = !{!"MaxFunctionCount", i64 1000} +!7 = !{!"NumCounts", i64 3} +!8 = !{!"NumFunctions", i64 3} +!9 = !{!"DetailedSummary", !10} +!10 = !{!11, !12, !13} +!11 = !{i32 10000, i64 100, i32 1} +!12 = !{i32 999000, i64 100, i32 1} +!13 = !{i32 999999, i64 1, i32 2} +!14 = !{!"function_entry_count", i64 0} +!15 = !{!"branch_weights", i32 1, i32 100} +!16 = !{!"branch_weights", i32 100, i32 1} +!17 = !{!"branch_weights", i32 1, i32 99} +!18 = !{!"branch_weights", i32 50, i32 50} +!19 = !{!"function_entry_count", i64 100} +!20 = !{} +!21 = !DIFile(filename: "test.c", directory: "/test") +!22 = distinct !DICompileUnit(language: DW_LANG_C99, file: !21, producer: "clang version 15.0.0", isOptimized: true, emissionKind: FullDebug, globals: !25, splitDebugInlining: false, nameTableKind: None) +!23 = distinct !DISubprogram(name: "test", scope: !21, file: !21, line: 1, unit: !22) +!24 = !DILocalVariable(name: "x", scope: !23) +!25 = !{} +!26 = !{i32 2, !"Dwarf Version", i32 4} +!27 = !{i32 1, !"Debug Info Version", i32 3}