Index: lib/Transforms/IPO/SampleProfile.cpp =================================================================== --- lib/Transforms/IPO/SampleProfile.cpp +++ lib/Transforms/IPO/SampleProfile.cpp @@ -125,7 +125,7 @@ void propagateWeights(Function &F); uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge); void buildEdges(Function &F); - bool propagateThroughEdges(Function &F); + bool propagateThroughEdges(Function &F, bool UpdateBlockCount); void computeDominanceAndLoopInfo(Function &F); unsigned getOffset(unsigned L, unsigned H) const; void clearFunctionData(); @@ -460,11 +460,17 @@ if (!FS) return std::error_code(); - // Ignore all dbg_value intrinsics. - const IntrinsicInst *II = dyn_cast(&Inst); - if (II && II->getIntrinsicID() == Intrinsic::dbg_value) + // Ignore all intrinsics and branch instructions. + if (isa(Inst) || isa(Inst)) return std::error_code(); + // If a call instruction is inlined in profile, but not inlined here, + // it means that the inlined callsite has no sample, thus the call + // instruction should have 0 count. + const CallInst *CI = dyn_cast(&Inst); + if (CI && findCalleeFunctionSamples(*CI)) + return 0; + const DILocation *DIL = DLoc; unsigned Lineno = DLoc.getLine(); unsigned HeaderLineno = DIL->getScope()->getSubprogram()->getLine(); @@ -488,13 +494,6 @@ << Inst << " (line offset: " << Lineno - HeaderLineno << "." << DIL->getDiscriminator() << " - weight: " << R.get() << ")\n"); - } else { - // If a call instruction is inlined in profile, but not inlined here, - // it means that the inlined callsite has no sample, thus the call - // instruction should have 0 count. - const CallInst *CI = dyn_cast(&Inst); - if (CI && findCalleeFunctionSamples(*CI)) - R = 0; } return R; } @@ -695,6 +694,9 @@ bool IsInSameLoop = LI->getLoopFor(BB1) == LI->getLoopFor(BB2); if (BB1 != BB2 && IsDomParent && IsInSameLoop) { EquivalenceClass[BB2] = EC; + if (VisitedBlocks.count(BB2)) { + VisitedBlocks.insert(EC); + } // If BB2 is heavier than BB1, make BB2 have the same weight // as BB1. @@ -707,7 +709,11 @@ Weight = std::max(Weight, BlockWeights[BB2]); } } - BlockWeights[EC] = Weight; + if (EC == &EC->getParent()->getEntryBlock()) { + BlockWeights[EC] = Samples->getHeadSamples() + 1; + } else { + BlockWeights[EC] = Weight; + } } /// \brief Find equivalence classes. @@ -798,9 +804,12 @@ /// count of the basic block, if needed. /// /// \param F Function to process. +/// \param UpdateBlockCount Whether we could update basic block counts that has +/// already been annotated. /// /// \returns True if new weights were assigned to edges or blocks. -bool SampleProfileLoader::propagateThroughEdges(Function &F) { +bool SampleProfileLoader::propagateThroughEdges(Function &F, + bool UpdateBlockCount) { bool Changed = false; DEBUG(dbgs() << "\nPropagation through edges\n"); for (const auto &BI : F) { @@ -892,11 +901,32 @@ EdgeWeights[UnknownEdge] = BBWeight - TotalWeight; else EdgeWeights[UnknownEdge] = 0; + const BasicBlock *OTHER_EC; + if (i == 0) + OTHER_EC = EquivalenceClass[UnknownEdge.first]; + else + OTHER_EC = EquivalenceClass[UnknownEdge.second]; + if (VisitedBlocks.count(OTHER_EC) && EdgeWeights[UnknownEdge] > BlockWeights[OTHER_EC]) + EdgeWeights[UnknownEdge] = BlockWeights[OTHER_EC]; VisitedEdges.insert(UnknownEdge); Changed = true; DEBUG(dbgs() << "Set weight for edge: "; printEdgeWeight(dbgs(), UnknownEdge)); } + } else if (VisitedBlocks.count(EC) && BlockWeights[EC] == 0) { + if (i == 0) { + for (auto *Pred : Predecessors[BB]) { + Edge E = std::make_pair(Pred, BB); + EdgeWeights[E] = 0; + VisitedEdges.insert(E); + } + } else { + for (auto *Succ : Successors[BB]) { + Edge E = std::make_pair(BB, Succ); + EdgeWeights[E] = 0; + VisitedEdges.insert(E); + } + } } else if (SelfReferentialEdge.first && VisitedBlocks.count(EC)) { uint64_t &BBWeight = BlockWeights[BB]; // We have a self-referential edge and the weight of BB is known. @@ -909,6 +939,11 @@ DEBUG(dbgs() << "Set self-referential edge weight to: "; printEdgeWeight(dbgs(), SelfReferentialEdge)); } + if (UpdateBlockCount && !VisitedBlocks.count(EC) && TotalWeight > 0) { + BlockWeights[EC] = TotalWeight; + VisitedBlocks.insert(EC); + Changed = true; + } } } @@ -968,7 +1003,19 @@ // Add an entry count to the function using the samples gathered // at the function entry. - F.setEntryCount(Samples->getHeadSamples()); + F.setEntryCount(Samples->getHeadSamples() + 1); + + for (auto &BI : F) { + BasicBlock *BB = &BI; + Loop *L = LI->getLoopFor(BB); + if (!L) { + continue; + } + BasicBlock *Header = L->getHeader(); + if (Header && BlockWeights[BB] > BlockWeights[Header]) { + BlockWeights[Header] = BlockWeights[BB]; + } + } // Before propagation starts, build, for each block, a list of // unique predecessors and successors. This is necessary to handle @@ -979,7 +1026,18 @@ // Propagate until we converge or we go past the iteration limit. while (Changed && I++ < SampleProfileMaxPropagateIterations) { - Changed = propagateThroughEdges(F); + Changed = propagateThroughEdges(F, false); + } + + VisitedEdges.clear(); + Changed = true; + while (Changed && I++ < SampleProfileMaxPropagateIterations) { + Changed = propagateThroughEdges(F, false); + } + + Changed = true; + while (Changed && I++ < SampleProfileMaxPropagateIterations) { + Changed = propagateThroughEdges(F, true); } // Generate MD_prof metadata for every branch instruction using the @@ -1025,7 +1083,7 @@ DEBUG(dbgs() << " (saturated due to uint32_t overflow)"); Weight = std::numeric_limits::max(); } - Weights.push_back(static_cast(Weight)); + Weights.push_back(static_cast(Weight + 1)); if (Weight != 0) { if (Weight > MaxWeight) { MaxWeight = Weight; @@ -1036,21 +1094,17 @@ // Only set weights if there is at least one non-zero weight. // In any other case, let the analyzer set weights. - if (MaxWeight > 0) { - DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n"); - TI->setMetadata(llvm::LLVMContext::MD_prof, - MDB.createBranchWeights(Weights)); - DebugLoc BranchLoc = TI->getDebugLoc(); - emitOptimizationRemark( - Ctx, DEBUG_TYPE, F, MaxDestLoc, - Twine("most popular destination for conditional branches at ") + - ((BranchLoc) ? Twine(BranchLoc->getFilename() + ":" + - Twine(BranchLoc.getLine()) + ":" + - Twine(BranchLoc.getCol())) - : Twine(""))); - } else { - DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n"); - } + DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n"); + TI->setMetadata(llvm::LLVMContext::MD_prof, + MDB.createBranchWeights(Weights)); + DebugLoc BranchLoc = TI->getDebugLoc(); + emitOptimizationRemark( + Ctx, DEBUG_TYPE, F, MaxDestLoc, + Twine("most popular destination for conditional branches at ") + + ((BranchLoc) ? Twine(BranchLoc->getFilename() + ":" + + Twine(BranchLoc.getLine()) + ":" + + Twine(BranchLoc.getCol())) + : Twine(""))); } } Index: test/Transforms/SampleProfile/Inputs/branch.prof =================================================================== --- test/Transforms/SampleProfile/Inputs/branch.prof +++ test/Transforms/SampleProfile/Inputs/branch.prof @@ -1,4 +1,4 @@ -main:15680:0 +main:15680:2500 1: 2500 4: 1000 5: 1000 Index: test/Transforms/SampleProfile/Inputs/fnptr.prof =================================================================== --- test/Transforms/SampleProfile/Inputs/fnptr.prof +++ test/Transforms/SampleProfile/Inputs/fnptr.prof @@ -3,6 +3,7 @@ _Z3bari:20301:1437 1: 1437 main:184019:0 + 3: 0 4: 534 6: 2080 9: 2064 _Z3bari:1471 _Z3fooi:631 Index: test/Transforms/SampleProfile/branch.ll =================================================================== --- test/Transforms/SampleProfile/branch.ll +++ test/Transforms/SampleProfile/branch.ll @@ -49,8 +49,8 @@ %0 = load i32, i32* %argc.addr, align 4, !dbg !21 %cmp = icmp slt i32 %0, 2, !dbg !23 br i1 %cmp, label %if.then, label %if.end, !dbg !24 -; CHECK: edge entry -> if.then probability is 0x4ccccccd / 0x80000000 = 60.00% -; CHECK: edge entry -> if.end probability is 0x33333333 / 0x80000000 = 40.00% +; CHECK: edge entry -> if.then probability is 0x4ccf6b16 / 0x80000000 = 60.01% +; CHECK: edge entry -> if.end probability is 0x333094ea / 0x80000000 = 39.99% if.then: ; preds = %entry store i32 1, i32* %retval, align 4, !dbg !25 @@ -67,8 +67,8 @@ %3 = load i32, i32* %limit, align 4, !dbg !32 %cmp1 = icmp sgt i32 %3, 100, !dbg !34 br i1 %cmp1, label %if.then.2, label %if.else, !dbg !35 -; CHECK: edge if.end -> if.then.2 probability is 0x66666666 / 0x80000000 = 80.00% -; CHECK: edge if.end -> if.else probability is 0x1999999a / 0x80000000 = 20.00% +; CHECK: edge if.end -> if.then.2 probability is 0x6652c748 / 0x80000000 = 79.94% +; CHECK: edge if.end -> if.else probability is 0x19ad38b8 / 0x80000000 = 20.06% if.then.2: ; preds = %if.end call void @llvm.dbg.declare(metadata double* %s, metadata !36, metadata !17), !dbg !38 Index: test/Transforms/SampleProfile/calls.ll =================================================================== --- test/Transforms/SampleProfile/calls.ll +++ test/Transforms/SampleProfile/calls.ll @@ -48,8 +48,8 @@ store i32 %inc, i32* %i, align 4, !dbg !14 %cmp = icmp slt i32 %0, 400000000, !dbg !14 br i1 %cmp, label %while.body, label %while.end, !dbg !14 -; CHECK: edge while.cond -> while.body probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] -; CHECK: edge while.cond -> while.end probability is 0x00000000 / 0x80000000 = 0.00% +; CHECK: edge while.cond -> while.body probability is 0x7ffa4e20 / 0x80000000 = 99.98% [HOT edge] +; CHECK: edge while.cond -> while.end probability is 0x0005b1e0 / 0x80000000 = 0.02% while.body: ; preds = %while.cond %1 = load i32, i32* %i, align 4, !dbg !16 @@ -59,8 +59,8 @@ ; both branches out of while.body had the same weight. In reality, ; the edge while.body->if.then is taken most of the time. ; -; CHECK: edge while.body -> if.else probability is 0x00000000 / 0x80000000 = 0.00% -; CHECK: edge while.body -> if.then probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge while.body -> if.else probability is 0x0005b1e0 / 0x80000000 = 0.02% +; CHECK: edge while.body -> if.then probability is 0x7ffa4e20 / 0x80000000 = 99.98% [HOT edge] if.then: ; preds = %while.body Index: test/Transforms/SampleProfile/discriminator.ll =================================================================== --- test/Transforms/SampleProfile/discriminator.ll +++ test/Transforms/SampleProfile/discriminator.ll @@ -35,15 +35,15 @@ %0 = load i32, i32* %i.addr, align 4, !dbg !12 %cmp = icmp slt i32 %0, 100, !dbg !12 br i1 %cmp, label %while.body, label %while.end, !dbg !12 -; CHECK: edge while.cond -> while.body probability is 0x7ebb907a / 0x80000000 = 99.01% [HOT edge] -; CHECK: edge while.cond -> while.end probability is 0x01446f86 / 0x80000000 = 0.99% +; CHECK: edge while.cond -> while.body probability is 0x7d83ba68 / 0x80000000 = 98.06% [HOT edge] +; CHECK: edge while.cond -> while.end probability is 0x027c4598 / 0x80000000 = 1.94% while.body: ; preds = %while.cond %1 = load i32, i32* %i.addr, align 4, !dbg !14 %cmp1 = icmp slt i32 %1, 50, !dbg !14 br i1 %cmp1, label %if.then, label %if.end, !dbg !14 -; CHECK: edge while.body -> if.then probability is 0x06666666 / 0x80000000 = 5.00% -; CHECK: edge while.body -> if.end probability is 0x7999999a / 0x80000000 = 95.00% [HOT edge] +; CHECK: edge while.body -> if.then probability is 0x07878788 / 0x80000000 = 5.88% +; CHECK: edge while.body -> if.end probability is 0x78787878 / 0x80000000 = 94.12% [HOT edge] if.then: ; preds = %while.body %2 = load i32, i32* %x, align 4, !dbg !17 Index: test/Transforms/SampleProfile/entry_counts.ll =================================================================== --- test/Transforms/SampleProfile/entry_counts.ll +++ test/Transforms/SampleProfile/entry_counts.ll @@ -2,7 +2,7 @@ ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/entry_counts.prof -S | FileCheck %s ; According to the profile, function empty() was called 13,293 times. -; CHECK: {{.*}} = !{!"function_entry_count", i64 13293} +; CHECK: {{.*}} = !{!"function_entry_count", i64 13294} define void @empty() !dbg !4 { entry: Index: test/Transforms/SampleProfile/fnptr.ll =================================================================== --- test/Transforms/SampleProfile/fnptr.ll +++ test/Transforms/SampleProfile/fnptr.ll @@ -8,12 +8,12 @@ ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/fnptr.prof | opt -analyze -branch-prob | FileCheck %s ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/fnptr.binprof | opt -analyze -branch-prob | FileCheck %s -; CHECK: edge for.body3 -> if.then probability is 0x1a4f3959 / 0x80000000 = 20.55% -; CHECK: edge for.body3 -> if.else probability is 0x65b0c6a7 / 0x80000000 = 79.45% -; CHECK: edge for.inc -> for.inc12 probability is 0x20dc8dc9 / 0x80000000 = 25.67% -; CHECK: edge for.inc -> for.body3 probability is 0x5f237237 / 0x80000000 = 74.33% -; CHECK: edge for.inc12 -> for.end14 probability is 0x00000000 / 0x80000000 = 0.00% -; CHECK: edge for.inc12 -> for.cond1.preheader probability is 0x80000000 / 0x80000000 = 100.00% +; CHECK: edge for.body3 -> if.then probability is 0x1a56a56a / 0x80000000 = 20.58% +; CHECK: edge for.body3 -> if.else probability is 0x65a95a96 / 0x80000000 = 79.42% +; CHECK: edge for.inc -> for.inc12 probability is 0x000fdc50 / 0x80000000 = 0.05% +; CHECK: edge for.inc -> for.body3 probability is 0x7ff023b0 / 0x80000000 = 99.95% +; CHECK: edge for.inc12 -> for.end14 probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK: edge for.inc12 -> for.cond1.preheader probability is 0x40000000 / 0x80000000 = 50.00% ; Original C++ test case. ; Index: test/Transforms/SampleProfile/offset.ll =================================================================== --- test/Transforms/SampleProfile/offset.ll +++ test/Transforms/SampleProfile/offset.ll @@ -29,8 +29,8 @@ %0 = load i32, i32* %a.addr, align 4, !dbg !14 %cmp = icmp sgt i32 %0, 0, !dbg !18 br i1 %cmp, label %if.then, label %if.else, !dbg !19 -; CHECK: edge entry -> if.then probability is 0x0147ae14 / 0x80000000 = 1.00% -; CHECK: edge entry -> if.else probability is 0x7eb851ec / 0x80000000 = 99.00% [HOT edge] +; CHECK: edge entry -> if.then probability is 0x0167ba82 / 0x80000000 = 1.10% +; CHECK: edge entry -> if.else probability is 0x7e98457e / 0x80000000 = 98.90% [HOT edge] if.then: ; preds = %entry store i32 10, i32* %retval, align 4, !dbg !20 Index: test/Transforms/SampleProfile/propagate.ll =================================================================== --- test/Transforms/SampleProfile/propagate.ll +++ test/Transforms/SampleProfile/propagate.ll @@ -85,8 +85,8 @@ %div = sdiv i64 %7, 3, !dbg !43 %cmp2 = icmp sgt i64 %6, %div, !dbg !44 br i1 %cmp2, label %if.then3, label %if.end, !dbg !45 -; CHECK: edge for.body -> if.then3 probability is 0x51451451 / 0x80000000 = 63.49% -; CHECK: edge for.body -> if.end probability is 0x2ebaebaf / 0x80000000 = 36.51% +; CHECK: edge for.body -> if.then3 probability is 0x51292fa6 / 0x80000000 = 63.41% +; CHECK: edge for.body -> if.end probability is 0x2ed6d05a / 0x80000000 = 36.59% if.then3: ; preds = %for.body %8 = load i32, i32* %x.addr, align 4, !dbg !46 @@ -100,8 +100,8 @@ %div4 = sdiv i64 %10, 4, !dbg !51 %cmp5 = icmp sgt i64 %9, %div4, !dbg !52 br i1 %cmp5, label %if.then6, label %if.else7, !dbg !53 -; CHECK: edge if.end -> if.then6 probability is 0x5dbaa1dc / 0x80000000 = 73.23% -; CHECK: edge if.end -> if.else7 probability is 0x22455e24 / 0x80000000 = 26.77% +; CHECK: edge if.end -> if.then6 probability is 0x5d89d89e / 0x80000000 = 73.08% +; CHECK: edge if.end -> if.else7 probability is 0x22762762 / 0x80000000 = 26.92% if.then6: ; preds = %if.end %11 = load i32, i32* %y.addr, align 4, !dbg !54 @@ -121,8 +121,8 @@ %13 = load i64, i64* %j, align 8, !dbg !64 %cmp9 = icmp slt i64 %13, 100, !dbg !67 br i1 %cmp9, label %for.body10, label %for.end, !dbg !68 -; CHECK: edge for.cond8 -> for.body10 probability is 0x7e985735 / 0x80000000 = 98.90% [HOT edge] -; CHECK: edge for.cond8 -> for.end probability is 0x0167a8cb / 0x80000000 = 1.10% +; CHECK: edge for.cond8 -> for.body10 probability is 0x7e941a89 / 0x80000000 = 98.89% [HOT edge] +; CHECK: edge for.cond8 -> for.end probability is 0x016be577 / 0x80000000 = 1.11% for.body10: ; preds = %for.cond8