Index: llvm/trunk/lib/Transforms/IPO/SampleProfile.cpp =================================================================== --- llvm/trunk/lib/Transforms/IPO/SampleProfile.cpp +++ llvm/trunk/lib/Transforms/IPO/SampleProfile.cpp @@ -130,6 +130,12 @@ "callsite and function as having 0 samples. Otherwise, treat " "un-sampled callsites and functions conservatively as unknown. ")); +static cl::opt ProfileAccurateForSymsInList( + "profile-accurate-for-symsinlist", cl::Hidden, cl::ZeroOrMore, + cl::init(true), + cl::desc("For symbols in profile symbol list, regard their profiles to " + "be accurate. It may be overriden by profile-sample-accurate. ")); + namespace { using BlockWeightMap = DenseMap; @@ -418,8 +424,12 @@ // names, inline instance names and call target names. StringSet<> NamesInProfile; - // Showing whether ProfileSampleAccurate is enabled for current function. - bool ProfSampleAccEnabled = false; + // For symbol in profile symbol list, whether to regard their profiles + // to be accurate. It is mainly decided by existance of profile symbol + // list and -profile-accurate-for-symsinlist flag, but it can be + // overriden by -profile-sample-accurate or profile-sample-accurate + // attribute. + bool ProfAccForSymsInList; }; class SampleProfileLoaderLegacyPass : public ModulePass { @@ -476,7 +486,8 @@ /// sample count with the hot cutoff computed by ProfileSummaryInfo, it is /// regarded as hot if the count is above the cutoff value. /// -/// When profile-sample-accurate is enabled, functions without profile will +/// When ProfileAccurateForSymsInList is enabled and profile symbol list +/// is present, functions in the profile symbol list but without profile will /// be regarded as cold and much less inlining will happen in CGSCC inlining /// pass, so we tend to lower the hot criteria here to allow more early /// inlining to happen for warm callsites and it is helpful for performance. @@ -487,7 +498,7 @@ assert(PSI && "PSI is expected to be non null"); uint64_t CallsiteTotalSamples = CallsiteFS->getTotalSamples(); - if (ProfSampleAccEnabled) + if (ProfAccForSymsInList) return !PSI->isColdCount(CallsiteTotalSamples); else return PSI->isHotCount(CallsiteTotalSamples); @@ -890,6 +901,14 @@ Function &F, DenseSet &InlinedGUIDs) { DenseSet PromotedInsns; + // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure + // Profile symbol list is ignored when profile-sample-accurate is on. + assert((!ProfAccForSymsInList || + (!ProfileSampleAccurate && + !F.hasFnAttribute("profile-sample-accurate"))) && + "ProfAccForSymsInList should be false when profile-sample-accurate " + "is enabled"); + DenseMap localNotInlinedCallSites; bool Changed = false; while (true) { @@ -1667,7 +1686,10 @@ ProfileIsValid = (Reader->read() == sampleprof_error::success); PSL = Reader->getProfileSymbolList(); - if (ProfileSampleAccurate) { + // While profile-sample-accurate is on, ignore symbol list. + ProfAccForSymsInList = + ProfileAccurateForSymsInList && PSL && !ProfileSampleAccurate; + if (ProfAccForSymsInList) { NamesInProfile.clear(); if (auto NameTable = Reader->getNameTable()) NamesInProfile.insert(NameTable->begin(), NameTable->end()); @@ -1765,37 +1787,38 @@ // this will be overwritten in emitAnnotations. uint64_t initialEntryCount = -1; - ProfSampleAccEnabled = - ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate"); - if (ProfSampleAccEnabled) { - // PSL -- profile symbol list include all the symbols in sampled binary. - // It is used to prevent new functions to be treated as cold. - if (PSL) { - // Profile symbol list is available, initialize the entry count to 0 - // only for functions in the list. - if (PSL->contains(F.getName())) - initialEntryCount = 0; - - // When ProfileSampleAccurate is true, function without sample will be - // regarded as cold. To minimize the potential negative performance - // impact it could have, we want to be a little conservative here - // saying if a function shows up in the profile, no matter as outline - // function, inline instance or call targets, treat the function as not - // being cold. This will handle the cases such as most callsites of a - // function are inlined in sampled binary but not inlined in current - // build (because of source code drift, imprecise debug information, or - // the callsites are all cold individually but not cold - // accumulatively...), so the outline function showing up as cold in - // sampled binary will actually not be cold after current build. - StringRef CanonName = FunctionSamples::getCanonicalFnName(F); - if (NamesInProfile.count(CanonName)) - initialEntryCount = -1; - } else { - // If there is no profile symbol list available, initialize all the - // function entry counts to 0. It means all the functions without - // profile will be regarded as cold. + ProfAccForSymsInList = ProfileAccurateForSymsInList && PSL; + if (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate")) { + // initialize all the function entry counts to 0. It means all the + // functions without profile will be regarded as cold. + initialEntryCount = 0; + // profile-sample-accurate is a user assertion which has a higher precedence + // than symbol list. When profile-sample-accurate is on, ignore symbol list. + ProfAccForSymsInList = false; + } + + // PSL -- profile symbol list include all the symbols in sampled binary. + // If ProfileAccurateForSymsInList is enabled, PSL is used to treat + // old functions without samples being cold, without having to worry + // about new and hot functions being mistakenly treated as cold. + if (ProfAccForSymsInList) { + // Initialize the entry count to 0 for functions in the list. + if (PSL->contains(F.getName())) initialEntryCount = 0; - } + + // Function in the symbol list but without sample will be regarded as + // cold. To minimize the potential negative performance impact it could + // have, we want to be a little conservative here saying if a function + // shows up in the profile, no matter as outline function, inline instance + // or call targets, treat the function as not being cold. This will handle + // the cases such as most callsites of a function are inlined in sampled + // binary but not inlined in current build (because of source code drift, + // imprecise debug information, or the callsites are all cold individually + // but not cold accumulatively...), so the outline function showing up as + // cold in sampled binary will actually not be cold after current build. + StringRef CanonName = FunctionSamples::getCanonicalFnName(F); + if (NamesInProfile.count(CanonName)) + initialEntryCount = -1; } F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real)); Index: llvm/trunk/test/Transforms/SampleProfile/compressed-profile-symbol-list.ll =================================================================== --- llvm/trunk/test/Transforms/SampleProfile/compressed-profile-symbol-list.ll +++ llvm/trunk/test/Transforms/SampleProfile/compressed-profile-symbol-list.ll @@ -1,5 +1,5 @@ ; REQUIRES: zlib ; Append inline.prof with profile symbol list and save it after compression. ; RUN: llvm-profdata merge --sample --prof-sym-list=%S/Inputs/profile-symbol-list.text --compress-prof-sym-list=true --extbinary %S/Inputs/inline.prof --output=%t.profdata -; RUN: opt < %S/Inputs/profile-symbol-list.ll -sample-profile -profile-sample-accurate -sample-profile-file=%t.profdata -S | FileCheck %S/Inputs/profile-symbol-list.ll -; RUN: opt < %S/Inputs/profile-symbol-list.ll -passes=sample-profile -profile-sample-accurate -sample-profile-file=%t.profdata -S | FileCheck %S/Inputs/profile-symbol-list.ll +; RUN: opt < %S/Inputs/profile-symbol-list.ll -sample-profile -profile-accurate-for-symsinlist -sample-profile-file=%t.profdata -S | FileCheck %S/Inputs/profile-symbol-list.ll +; RUN: opt < %S/Inputs/profile-symbol-list.ll -passes=sample-profile -profile-accurate-for-symsinlist -sample-profile-file=%t.profdata -S | FileCheck %S/Inputs/profile-symbol-list.ll Index: llvm/trunk/test/Transforms/SampleProfile/profile-sample-accurate.ll =================================================================== --- llvm/trunk/test/Transforms/SampleProfile/profile-sample-accurate.ll +++ llvm/trunk/test/Transforms/SampleProfile/profile-sample-accurate.ll @@ -1,8 +1,18 @@ -; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/profsampleacc.extbinary.afdo -profile-summary-cutoff-hot=600000 -profile-sample-accurate -S | FileCheck %s -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profsampleacc.extbinary.afdo -profile-summary-cutoff-hot=600000 -profile-sample-accurate -S | FileCheck %s +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/profsampleacc.extbinary.afdo -profile-summary-cutoff-hot=600000 -profile-sample-accurate -S | FileCheck %s --check-prefix=CALL_SUM_IS_WARM +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profsampleacc.extbinary.afdo -profile-summary-cutoff-hot=900000 -profile-sample-accurate -S | FileCheck %s --check-prefix=CALL_SUM_IS_HOT +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/profsampleacc.extbinary.afdo -profile-summary-cutoff-hot=600000 -profile-sample-accurate -S | FileCheck %s --check-prefix=CALL_SUM_IS_WARM +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profsampleacc.extbinary.afdo -profile-summary-cutoff-hot=900000 -profile-sample-accurate -S | FileCheck %s --check-prefix=CALL_SUM_IS_HOT + ; RUN: llvm-profdata merge -sample -extbinary -prof-sym-list=%S/Inputs/profile-symbol-list.text %S/Inputs/profsampleacc.extbinary.afdo -o %t.symlist.afdo -; RUN: opt < %s -sample-profile -sample-profile-file=%t.symlist.afdo -profile-summary-cutoff-hot=600000 -profile-sample-accurate -S | FileCheck %s --check-prefix=PROFSYMLIST -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.symlist.afdo -profile-summary-cutoff-hot=600000 -profile-sample-accurate -S | FileCheck %s --check-prefix=PROFSYMLIST +; RUN: opt < %s -sample-profile -sample-profile-file=%t.symlist.afdo -profile-summary-cutoff-hot=600000 -profile-accurate-for-symsinlist -S | FileCheck %s --check-prefix=PROFSYMLIST +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.symlist.afdo -profile-summary-cutoff-hot=600000 -profile-accurate-for-symsinlist -S | FileCheck %s --check-prefix=PROFSYMLIST +; +; If -profile-accurate-for-symsinlist and -profile-sample-accurate both present, +; -profile-sample-accurate will override -profile-accurate-for-symsinlist. +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/profsampleacc.extbinary.afdo -profile-summary-cutoff-hot=600000 -profile-sample-accurate -profile-accurate-for-symsinlist -S | FileCheck %s --check-prefix=CALL_SUM_IS_WARM +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profsampleacc.extbinary.afdo -profile-summary-cutoff-hot=900000 -profile-sample-accurate -profile-accurate-for-symsinlist -S | FileCheck %s --check-prefix=CALL_SUM_IS_HOT +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/profsampleacc.extbinary.afdo -profile-summary-cutoff-hot=600000 -profile-sample-accurate -profile-accurate-for-symsinlist -S | FileCheck %s --check-prefix=CALL_SUM_IS_WARM +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profsampleacc.extbinary.afdo -profile-summary-cutoff-hot=900000 -profile-sample-accurate -profile-accurate-for-symsinlist -S | FileCheck %s --check-prefix=CALL_SUM_IS_HOT ; ; Original C++ test case ; @@ -22,13 +32,21 @@ ; @.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1 -; Check _Z3sumii's function entry count will be intialized to 0 if no profile -; symbol list is available. -; If symbol list is available, _Z3sumii's function entry count will be -; initialized to -1 if it shows up in the profile. +; Check _Z3sumii's function entry count will be 0 when +; profile-sample-accurate is enabled. +; CALL_SUM_IS_HOT: define i32 @_Z3sumii{{.*}}!prof ![[ZERO_ID:[0-9]+]] ; -; CHECK: define i32 @_Z3sumii{{.*}}!prof ![[ZERO_ID:[0-9]+]] +; Check _Z3sumii's function entry count will be nonzero when +; profile-sample-accurate is enabled because the callsite is warm and not +; inlined so its function entry count is adjusted to nonzero. +; CALL_SUM_IS_WARM: define i32 @_Z3sumii{{.*}}!prof ![[NONZERO_ID:[0-9]+]] +; +; Check _Z3sumii's function entry count will be initialized to -1 when +; profile-accurate-for-profsymlist is enabled and _Z3sumii exists in the +; profile symbol list because it also shows up in the profile as inline +; instance. ; PROFSYMLIST: define i32 @_Z3sumii{{.*}}!prof ![[UNKNOWN_ID:[0-9]+]] +; ; Function Attrs: nounwind uwtable define i32 @_Z3sumii(i32 %x, i32 %y) !dbg !4 { entry: @@ -65,11 +83,21 @@ br i1 %cmp1, label %if.then, label %if.else, !dbg !16 ; With the hot cutoff being set to 600000, the inline instance of _Z3sumii -; in main is neither hot nor cold. Check it will still be inlined when +; in main is neither hot nor cold. Check it won't be inlined when ; profile-sample-accurate is enabled. -; CHECK: if.then: -; CHECK-NOT: call i32 @_Z3sumii -; CHECK: if.else: +; CALL_SUM_IS_WARM: if.then: +; CALL_SUM_IS_WARM: call i32 @_Z3sumii +; CALL_SUM_IS_WARM: if.else: +; +; With the hot cutoff being set to 900000, the inline instance of _Z3sumii +; in main is hot. Check the callsite of _Z3sumii will be inlined when +; profile-sample-accurate is enabled. +; CALL_SUM_IS_HOT: if.then: +; CALL_SUM_IS_HOT-NOT: call i32 @_Z3sumii +; CALL_SUM_IS_HOT: if.else: +; +; Check _Z3sumii will be inlined when profile-accurate-for-profsymlist is +; enabled ; PROFSYMLIST: if.then: ; PROFSYMLIST-NOT: call i32 @_Z3sumii ; PROFSYMLIST: if.else: @@ -99,7 +127,8 @@ !llvm.module.flags = !{!8, !9} !llvm.ident = !{!10} -; CHECK: ![[ZERO_ID]] = !{!"function_entry_count", i64 0} +; CALL_SUM_IS_HOT: ![[ZERO_ID]] = !{!"function_entry_count", i64 0} +; CALL_SUM_IS_WARM: ![[NONZERO_ID]] = !{!"function_entry_count", i64 5179} ; PROFSYMLIST: ![[UNKNOWN_ID]] = !{!"function_entry_count", i64 -1} !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: NoDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2) !1 = !DIFile(filename: "calls.cc", directory: ".") Index: llvm/trunk/test/Transforms/SampleProfile/uncompressed-profile-symbol-list.ll =================================================================== --- llvm/trunk/test/Transforms/SampleProfile/uncompressed-profile-symbol-list.ll +++ llvm/trunk/test/Transforms/SampleProfile/uncompressed-profile-symbol-list.ll @@ -1,4 +1,4 @@ ; Append inline.prof with profile symbol list and save it without compression. ; RUN: llvm-profdata merge --sample --prof-sym-list=%S/Inputs/profile-symbol-list.text --compress-prof-sym-list=false --extbinary %S/Inputs/inline.prof --output=%t.profdata -; RUN: opt < %S/Inputs/profile-symbol-list.ll -sample-profile -profile-sample-accurate -sample-profile-file=%t.profdata -S | FileCheck %S/Inputs/profile-symbol-list.ll -; RUN: opt < %S/Inputs/profile-symbol-list.ll -passes=sample-profile -profile-sample-accurate -sample-profile-file=%t.profdata -S | FileCheck %S/Inputs/profile-symbol-list.ll +; RUN: opt < %S/Inputs/profile-symbol-list.ll -sample-profile -profile-accurate-for-symsinlist -sample-profile-file=%t.profdata -S | FileCheck %S/Inputs/profile-symbol-list.ll +; RUN: opt < %S/Inputs/profile-symbol-list.ll -passes=sample-profile -profile-accurate-for-symsinlist -sample-profile-file=%t.profdata -S | FileCheck %S/Inputs/profile-symbol-list.ll