diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -540,21 +540,36 @@ return getInstWeightImpl(Inst); } +// Here use error_code to represent: 1) The dangling probe. 2) Ignore the weight +// of non-probe instruction. So if all instructions of the BB give error_code, +// tell the inference algorithm to infer the BB weight. ErrorOr SampleProfileLoader::getProbeWeight(const Instruction &Inst) { assert(FunctionSamples::ProfileIsProbeBased && "Profile is not pseudo probe based"); Optional Probe = extractProbe(Inst); + // Ignore the non-probe instruction. If none of the instruction in the BB is + // probe, we choose to infer the BB's weight. if (!Probe) return std::error_code(); - // Ignore danling probes since they are logically deleted and should not - // consume any profile samples. + // This is not the dangling probe from the training pass but generated by the + // current compilation. Ignore this since they are logically deleted and + // should not consume any profile samples. if (Probe->isDangling()) return std::error_code(); const FunctionSamples *FS = findFunctionSamples(Inst); + // If none of the instruction has FunctionSample, we choose to return zero + // value sample to indicate the BB is cold. This could happen when the + // instruction is from inlinee and no profile data is found. + // FIXME: This should not be affected by the source drift issue as 1) if the + // newly added function is top-level inliner, it won't match the CFG checksum + // in the function profile or 2) if it's the inlinee, the inlinee should have + // a profile, otherwise it wouldn't be inlined. For non-probe based profile, + // we can improve it by adding a switch for profile-sample-block-accurate for + // block level counts in the future. if (!FS) - return std::error_code(); + return 0; // For non-CS profile, If a direct call/invoke instruction is inlined in // profile (findCalleeFunctionSamples returns non-empty result), but not