Index: include/llvm/ProfileData/SampleProf.h =================================================================== --- include/llvm/ProfileData/SampleProf.h +++ include/llvm/ProfileData/SampleProf.h @@ -18,12 +18,14 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" #include #include #include @@ -50,7 +52,10 @@ truncated_name_table, not_implemented, counter_overflow, - ostream_seek_unsupported + ostream_seek_unsupported, + compress_failed, + uncompress_failed, + zlib_unavailable }; inline std::error_code make_error_code(sampleprof_error E) { @@ -114,6 +119,7 @@ SecInValid = 0, SecProfSummary = 1, SecNameTable = 2, + SecProfileSymbolList = 3, // marker for the first type of profile. SecFuncProfileFirst = 32, SecLBRProfile = SecFuncProfileFirst @@ -595,6 +601,47 @@ SamplesWithLocList V; }; +/// ProfileSymbolList records the list of function symbols shown up +/// in the binary used to generate the profile. It is useful to +/// to discriminate a function being so cold as not to shown up +/// in the profile and a function newly added. +class ProfileSymbolList { +public: + /// copy indicates whether we need to copy the underlying memory + /// for the input Name. + void add(StringRef Name, bool copy = false) { + if (!copy) { + Syms.insert(Name); + return; + } + Syms.insert(Name.copy(Allocator)); + } + + bool contains(StringRef Name) { return Syms.count(Name); } + + void merge(const ProfileSymbolList &List) { + for (auto Sym : List.Syms) + add(Sym, true); + } + + unsigned size() { return Syms.size(); } + + void setToCompress(bool TC) { ToCompress = TC; } + + std::error_code read(uint64_t CompressSize, uint64_t UncompressSize, + const uint8_t *Data); + std::error_code write(raw_ostream &OS); + void dump(raw_ostream &OS = dbgs()) const; + +private: + // Determine whether or not to compress the symbol list when + // writing it into profile. The variable is unused when the symbol + // list is read from an existing profile. + bool ToCompress = true; + DenseSet Syms; + BumpPtrAllocator Allocator; +}; + } // end namespace sampleprof } // end namespace llvm Index: include/llvm/ProfileData/SampleProfReader.h =================================================================== --- include/llvm/ProfileData/SampleProfReader.h +++ include/llvm/ProfileData/SampleProfReader.h @@ -326,6 +326,10 @@ /// \brief Return the profile format. SampleProfileFormat getFormat() { return Format; } + virtual std::unique_ptr getProfileSymbolList() { + return nullptr; + }; + protected: /// Map every function to its associated profile. /// @@ -477,6 +481,7 @@ class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary { protected: std::vector SecHdrTable; + std::unique_ptr ProfSymList; std::error_code readSecHdrTableEntry(); std::error_code readSecHdrTable(); virtual std::error_code readHeader() override; @@ -498,6 +503,7 @@ virtual std::error_code verifySPMagic(uint64_t Magic) override; virtual std::error_code readOneSection(const uint8_t *Start, uint64_t Size, SecType Type) override; + std::error_code readProfileSymbolList(); public: SampleProfileReaderExtBinary(std::unique_ptr B, LLVMContext &C, @@ -506,6 +512,10 @@ /// \brief Return true if \p Buffer is in the format supported by this class. static bool hasFormat(const MemoryBuffer &Buffer); + + virtual std::unique_ptr getProfileSymbolList() override { + return std::move(ProfSymList); + }; }; class SampleProfileReaderCompactBinary : public SampleProfileReaderBinary { Index: include/llvm/ProfileData/SampleProfWriter.h =================================================================== --- include/llvm/ProfileData/SampleProfWriter.h +++ include/llvm/ProfileData/SampleProfWriter.h @@ -56,6 +56,8 @@ static ErrorOr> create(std::unique_ptr &OS, SampleProfileFormat Format); + virtual void setProfileSymbolList(ProfileSymbolList *PSL) {} + protected: SampleProfileWriter(std::unique_ptr &OS) : OutputStream(std::move(OS)) {} @@ -175,12 +177,19 @@ class SampleProfileWriterExtBinary : public SampleProfileWriterExtBinaryBase { using SampleProfileWriterExtBinaryBase::SampleProfileWriterExtBinaryBase; +public: + virtual void setProfileSymbolList(ProfileSymbolList *PSL) override { + ProfSymList = PSL; + }; + private: virtual void initSectionLayout() override { - SectionLayout = {SecProfSummary, SecNameTable, SecLBRProfile}; + SectionLayout = {SecProfSummary, SecNameTable, SecLBRProfile, + SecProfileSymbolList}; }; virtual std::error_code writeSections(const StringMap &ProfileMap) override; + ProfileSymbolList *ProfSymList = nullptr; }; // CompactBinary is a compact format of binary profile which both reduces Index: lib/ProfileData/SampleProf.cpp =================================================================== --- lib/ProfileData/SampleProf.cpp +++ lib/ProfileData/SampleProf.cpp @@ -15,8 +15,11 @@ #include "llvm/Config/llvm-config.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/Compression.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/LEB128.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/raw_ostream.h" #include @@ -66,6 +69,12 @@ return "Counter overflow"; case sampleprof_error::ostream_seek_unsupported: return "Ostream does not support seek"; + case sampleprof_error::compress_failed: + return "Compress failure"; + case sampleprof_error::uncompress_failed: + return "Uncompress failure"; + case sampleprof_error::zlib_unavailable: + return "Zlib is unavailable"; } llvm_unreachable("A value of sampleprof_error has no message."); } @@ -188,3 +197,75 @@ #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void FunctionSamples::dump() const { print(dbgs(), 0); } #endif + +std::error_code ProfileSymbolList::read(uint64_t CompressSize, + uint64_t UncompressSize, + const uint8_t *Data) { + const char *ListStart = reinterpret_cast(Data); + // CompressSize being non-zero means the profile is compressed and + // needs to be uncompressed first. + if (CompressSize) { + if (!llvm::zlib::isAvailable()) + return sampleprof_error::zlib_unavailable; + + StringRef CompressedStrings(reinterpret_cast(Data), + CompressSize); + char *Buffer = Allocator.Allocate(UncompressSize); + llvm::Error E = zlib::uncompress(CompressedStrings, Buffer, UncompressSize); + if (E) + return sampleprof_error::uncompress_failed; + ListStart = Buffer; + } + + uint64_t Size = 0; + while (Size < UncompressSize) { + StringRef Str(ListStart + Size); + add(Str); + Size += Str.size() + 1; + } + return sampleprof_error::success; +} + +std::error_code ProfileSymbolList::write(raw_ostream &OS) { + // Sort the symbols before doing compression. It will make the + // compression much more effective. + std::vector SortedList; + SortedList.insert(SortedList.begin(), Syms.begin(), Syms.end()); + llvm::sort(SortedList); + + std::string UncompressedStrings; + for (auto &Sym : SortedList) { + UncompressedStrings.append(Sym.str()); + UncompressedStrings.append(1, '\0'); + } + + if (ToCompress) { + if (!llvm::zlib::isAvailable()) + return sampleprof_error::zlib_unavailable; + SmallString<128> CompressedStrings; + llvm::Error E = zlib::compress(UncompressedStrings, CompressedStrings, + zlib::BestSizeCompression); + if (E) + return sampleprof_error::compress_failed; + encodeULEB128(UncompressedStrings.size(), OS); + encodeULEB128(CompressedStrings.size(), OS); + OS << CompressedStrings.str(); + } else { + encodeULEB128(UncompressedStrings.size(), OS); + // If profile symbol list is not compressed, we will still save + // a compressed size value, but the value of the size is 0. + encodeULEB128(0, OS); + OS << UncompressedStrings; + } + return sampleprof_error::success; +} + +void ProfileSymbolList::dump(raw_ostream &OS) const { + OS << "======== Dump profile symbol list ========\n"; + std::vector SortedList; + SortedList.insert(SortedList.begin(), Syms.begin(), Syms.end()); + llvm::sort(SortedList); + + for (auto &Sym : SortedList) + OS << Sym << "\n"; +} Index: lib/ProfileData/SampleProfReader.cpp =================================================================== --- lib/ProfileData/SampleProfReader.cpp +++ lib/ProfileData/SampleProfReader.cpp @@ -486,12 +486,40 @@ return EC; } break; + case SecProfileSymbolList: + if (std::error_code EC = readProfileSymbolList()) + return EC; + break; default: break; } return sampleprof_error::success; } +std::error_code SampleProfileReaderExtBinary::readProfileSymbolList() { + auto UncompressSize = readNumber(); + if (std::error_code EC = UncompressSize.getError()) + return EC; + + auto CompressSize = readNumber(); + if (std::error_code EC = CompressSize.getError()) + return EC; + + if (!ProfSymList) + ProfSymList = std::make_unique(); + + if (std::error_code EC = + ProfSymList->read(*CompressSize, *UncompressSize, Data)) + return EC; + + // CompressSize is zero only when ProfileSymbolList is not compressed. + if (*CompressSize == 0) + Data = Data + *UncompressSize; + else + Data = Data + *CompressSize; + return sampleprof_error::success; +} + std::error_code SampleProfileReaderExtBinaryBase::read() { const uint8_t *BufStart = reinterpret_cast(Buffer->getBufferStart()); Index: lib/ProfileData/SampleProfWriter.cpp =================================================================== --- lib/ProfileData/SampleProfWriter.cpp +++ lib/ProfileData/SampleProfWriter.cpp @@ -121,7 +121,12 @@ if (std::error_code EC = writeFuncProfiles(ProfileMap)) return EC; - addNewSection(SecLBRProfile, SectionStart); + SectionStart = addNewSection(SecLBRProfile, SectionStart); + + if (ProfSymList) + if (std::error_code EC = ProfSymList->write(*OutputStream)) + return EC; + addNewSection(SecProfileSymbolList, SectionStart); return sampleprof_error::success; } Index: lib/Transforms/IPO/SampleProfile.cpp =================================================================== --- lib/Transforms/IPO/SampleProfile.cpp +++ lib/Transforms/IPO/SampleProfile.cpp @@ -379,6 +379,10 @@ /// Profile Summary Info computed from sample profile. ProfileSummaryInfo *PSI = nullptr; + /// Profle Symbol list tells whether a function name appears in the binary + /// used to generate the current profile. + std::unique_ptr PSL; + /// Total number of samples collected in this profile. /// /// This is the sum of all the samples collected in all the functions executed @@ -1634,6 +1638,7 @@ Reader = std::move(ReaderOrErr.get()); Reader->collectFuncsToUse(M); ProfileIsValid = (Reader->read() == sampleprof_error::success); + PSL = Reader->getProfileSymbolList(); if (!RemappingFilename.empty()) { // Apply profile remappings to the loaded profile data if requested. @@ -1725,11 +1730,15 @@ // conservatively by getEntryCount as the same as unknown (None). This is // to avoid newly added code to be treated as cold. If we have samples // this will be overwritten in emitAnnotations. + // + // PSL -- profile symbol list include all the symbols in sampled binary. // If ProfileSampleAccurate is true or F has profile-sample-accurate - // attribute, initialize the entry count to 0 so callsites or functions - // unsampled will be treated as cold. + // attribute, and if there is no profile symbol list read in, initialize + // all the function entry counts to 0; if there is profile symbol list, only + // initialize the entry count to 0 when current function is in the list. uint64_t initialEntryCount = - (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate")) + ((ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate")) && + (!PSL || PSL->contains(F.getName()))) ? 0 : -1; F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real)); Index: test/Transforms/SampleProfile/Inputs/profile-symbol-list.text =================================================================== --- test/Transforms/SampleProfile/Inputs/profile-symbol-list.text +++ test/Transforms/SampleProfile/Inputs/profile-symbol-list.text @@ -0,0 +1,9 @@ +_Z3goov +_Z3sumii +__libc_csu_fini +__libc_csu_init +_dl_relocate_static_pie +_fini +_init +_start +main Index: test/Transforms/SampleProfile/compressed-profile-symbol-list.ll =================================================================== --- test/Transforms/SampleProfile/compressed-profile-symbol-list.ll +++ test/Transforms/SampleProfile/compressed-profile-symbol-list.ll @@ -0,0 +1,140 @@ +; REQUIRES: zlib +; Append inline.prof with profile symbol list and save it after compression. +; RUN: llvm-profdata merge --sample --prof-sym-list=%S/Inputs/profile-symbol-list.text --compress-prof-sym-list=true --extbinary %S/Inputs/inline.prof --output=%t.profdata +; RUN: opt < %s -sample-profile -profile-sample-accurate -sample-profile-file=%t.profdata -S | FileCheck %s +; RUN: opt < %s -passes=sample-profile -profile-sample-accurate -sample-profile-file=%t.profdata -S | FileCheck %s + +; Original C++ test case +; +; #include +; +; __attribute__((noinline)) int goo() { return 3 }; +; __attribute__((noinline)) int hoo() { return 4 }; +; +; int sum(int x, int y) { +; return x + y; +; } +; +; int main() { +; int s, i = 0; +; while (i++ < 20000 * 20000) +; if (i != 100) s = sum(i, s); else s = 30; +; printf("sum is %d\n", s); +; return goo() + hoo() != 7; +; } +; +; Both goo and hoo don't show up in the input profile. +; Suppose function goo shows up in the binary generating the input profile +; and function hoo doesn't show up. Then the profile symbol list in the input +; profile will contain goo but not hoo. Verify the entry count of goo is +; 0 and the entry count of hoo is -1. +; CHECK: define {{.*}} i32 @_Z3goov() {{.*}} !prof ![[IDX1:[0-9]*]] +; CHECK: define {{.*}} i32 @_Z3hoov() {{.*}} !prof ![[IDX2:[0-9]*]] +; CHECK: ![[IDX1]] = !{!"function_entry_count", i64 0} +; CHECK: ![[IDX2]] = !{!"function_entry_count", i64 -1} + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1 + +; Function Attrs: noinline norecurse nounwind readnone uwtable +define dso_local i32 @_Z3goov() local_unnamed_addr #0 !dbg !7 { +entry: + ret i32 3, !dbg !9 +} + +; Function Attrs: noinline norecurse nounwind readnone uwtable +define dso_local i32 @_Z3hoov() local_unnamed_addr #0 !dbg !10 { +entry: + ret i32 4, !dbg !11 +} + +; Function Attrs: norecurse nounwind readnone uwtable +define dso_local i32 @_Z3sumii(i32 %x, i32 %y) local_unnamed_addr #1 !dbg !12 { +entry: + %add = add nsw i32 %y, %x, !dbg !13 + ret i32 %add, !dbg !14 +} + +; Function Attrs: nofree norecurse nounwind uwtable +define dso_local i32 @main() local_unnamed_addr #2 !dbg !15 { +entry: + br label %while.body, !dbg !16 + +while.body: ; preds = %while.body, %entry + %inc12 = phi i32 [ 1, %entry ], [ %inc.4, %while.body ] + %s.011 = phi i32 [ undef, %entry ], [ %spec.select.4, %while.body ] + %cmp1 = icmp eq i32 %inc12, 100, !dbg !18 + %add.i = add nsw i32 %inc12, %s.011, !dbg !20 + %spec.select = select i1 %cmp1, i32 30, i32 %add.i, !dbg !23 + %inc = add nuw nsw i32 %inc12, 1, !dbg !24 + %cmp1.1 = icmp eq i32 %inc, 100, !dbg !18 + %add.i.1 = add nsw i32 %inc, %spec.select, !dbg !20 + %spec.select.1 = select i1 %cmp1.1, i32 30, i32 %add.i.1, !dbg !23 + %inc.1 = add nuw nsw i32 %inc12, 2, !dbg !24 + %cmp1.2 = icmp eq i32 %inc.1, 100, !dbg !18 + %add.i.2 = add nsw i32 %inc.1, %spec.select.1, !dbg !20 + %spec.select.2 = select i1 %cmp1.2, i32 30, i32 %add.i.2, !dbg !23 + %inc.2 = add nuw nsw i32 %inc12, 3, !dbg !24 + %cmp1.3 = icmp eq i32 %inc.2, 100, !dbg !18 + %add.i.3 = add nsw i32 %inc.2, %spec.select.2, !dbg !20 + %spec.select.3 = select i1 %cmp1.3, i32 30, i32 %add.i.3, !dbg !23 + %inc.3 = add nuw nsw i32 %inc12, 4, !dbg !24 + %cmp1.4 = icmp eq i32 %inc.3, 100, !dbg !18 + %add.i.4 = add nsw i32 %inc.3, %spec.select.3, !dbg !20 + %spec.select.4 = select i1 %cmp1.4, i32 30, i32 %add.i.4, !dbg !23 + %inc.4 = add nuw nsw i32 %inc12, 5, !dbg !24 + %exitcond.4 = icmp eq i32 %inc.4, 400000001, !dbg !26 + br i1 %exitcond.4, label %while.end, label %while.body, !dbg !27, !llvm.loop !28 + +while.end: ; preds = %while.body + %call2 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0), i32 %spec.select.4), !dbg !31 + ret i32 0, !dbg !32 +} + +; Function Attrs: nofree nounwind +declare dso_local i32 @printf(i8* nocapture readonly, ...) local_unnamed_addr #3 + +attributes #0 = { noinline norecurse nounwind readnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { norecurse nounwind readnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nofree norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nofree nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 10.0.0 (trunk 369144)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, debugInfoForProfiling: true, nameTableKind: None) +!1 = !DIFile(filename: "1.cc", directory: "/usr/local/google/home/wmi/workarea/llvm-r369144/src") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 10.0.0 (trunk 369144)"} +!7 = distinct !DISubprogram(name: "goo", linkageName: "_Z3goov", scope: !1, file: !1, line: 3, type: !8, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!8 = !DISubroutineType(types: !2) +!9 = !DILocation(line: 3, column: 39, scope: !7) +!10 = distinct !DISubprogram(name: "hoo", linkageName: "_Z3hoov", scope: !1, file: !1, line: 4, type: !8, scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!11 = !DILocation(line: 4, column: 39, scope: !10) +!12 = distinct !DISubprogram(name: "sum", linkageName: "_Z3sumii", scope: !1, file: !1, line: 6, type: !8, scopeLine: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!13 = !DILocation(line: 7, column: 12, scope: !12) +!14 = !DILocation(line: 7, column: 3, scope: !12) +!15 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 10, type: !8, scopeLine: 10, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!16 = !DILocation(line: 12, column: 3, scope: !17) +!17 = !DILexicalBlockFile(scope: !15, file: !1, discriminator: 2) +!18 = !DILocation(line: 13, column: 11, scope: !19) +!19 = !DILexicalBlockFile(scope: !15, file: !1, discriminator: 21) +!20 = !DILocation(line: 7, column: 12, scope: !21, inlinedAt: !22) +!21 = !DILexicalBlockFile(scope: !12, file: !1, discriminator: 21) +!22 = distinct !DILocation(line: 13, column: 23, scope: !17) +!23 = !DILocation(line: 13, column: 9, scope: !19) +!24 = !DILocation(line: 12, column: 11, scope: !25) +!25 = !DILexicalBlockFile(scope: !15, file: !1, discriminator: 1282) +!26 = !DILocation(line: 12, column: 14, scope: !25) +!27 = !DILocation(line: 12, column: 3, scope: !25) +!28 = distinct !{!28, !29, !30} +!29 = !DILocation(line: 12, column: 3, scope: !15) +!30 = !DILocation(line: 13, column: 43, scope: !15) +!31 = !DILocation(line: 14, column: 3, scope: !15) +!32 = !DILocation(line: 15, column: 3, scope: !15) Index: test/Transforms/SampleProfile/uncompressed-profile-symbol-list.ll =================================================================== --- test/Transforms/SampleProfile/uncompressed-profile-symbol-list.ll +++ test/Transforms/SampleProfile/uncompressed-profile-symbol-list.ll @@ -0,0 +1,139 @@ +; Append inline.prof with profile symbol list and save it without compression. +; RUN: llvm-profdata merge --sample --prof-sym-list=%S/Inputs/profile-symbol-list.text --compress-prof-sym-list=false --extbinary %S/Inputs/inline.prof --output=%t.profdata +; RUN: opt < %s -sample-profile -profile-sample-accurate -sample-profile-file=%t.profdata -S | FileCheck %s +; RUN: opt < %s -passes=sample-profile -profile-sample-accurate -sample-profile-file=%t.profdata -S | FileCheck %s + +; Original C++ test case +; +; #include +; +; __attribute__((noinline)) int goo() { return 3 }; +; __attribute__((noinline)) int hoo() { return 4 }; +; +; int sum(int x, int y) { +; return x + y; +; } +; +; int main() { +; int s, i = 0; +; while (i++ < 20000 * 20000) +; if (i != 100) s = sum(i, s); else s = 30; +; printf("sum is %d\n", s); +; return goo() + hoo() != 7; +; } +; +; Both goo and hoo don't show up in the input profile. +; Suppose function goo shows up in the binary generating the input profile +; and function hoo doesn't show up. Then the profile symbol list in the input +; profile will contain goo but not hoo. Verify the entry count of goo is +; 0 and the entry count of hoo is -1. +; CHECK: define {{.*}} i32 @_Z3goov() {{.*}} !prof ![[IDX1:[0-9]*]] +; CHECK: define {{.*}} i32 @_Z3hoov() {{.*}} !prof ![[IDX2:[0-9]*]] +; CHECK: ![[IDX1]] = !{!"function_entry_count", i64 0} +; CHECK: ![[IDX2]] = !{!"function_entry_count", i64 -1} + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1 + +; Function Attrs: noinline norecurse nounwind readnone uwtable +define dso_local i32 @_Z3goov() local_unnamed_addr #0 !dbg !7 { +entry: + ret i32 3, !dbg !9 +} + +; Function Attrs: noinline norecurse nounwind readnone uwtable +define dso_local i32 @_Z3hoov() local_unnamed_addr #0 !dbg !10 { +entry: + ret i32 4, !dbg !11 +} + +; Function Attrs: norecurse nounwind readnone uwtable +define dso_local i32 @_Z3sumii(i32 %x, i32 %y) local_unnamed_addr #1 !dbg !12 { +entry: + %add = add nsw i32 %y, %x, !dbg !13 + ret i32 %add, !dbg !14 +} + +; Function Attrs: nofree norecurse nounwind uwtable +define dso_local i32 @main() local_unnamed_addr #2 !dbg !15 { +entry: + br label %while.body, !dbg !16 + +while.body: ; preds = %while.body, %entry + %inc12 = phi i32 [ 1, %entry ], [ %inc.4, %while.body ] + %s.011 = phi i32 [ undef, %entry ], [ %spec.select.4, %while.body ] + %cmp1 = icmp eq i32 %inc12, 100, !dbg !18 + %add.i = add nsw i32 %inc12, %s.011, !dbg !20 + %spec.select = select i1 %cmp1, i32 30, i32 %add.i, !dbg !23 + %inc = add nuw nsw i32 %inc12, 1, !dbg !24 + %cmp1.1 = icmp eq i32 %inc, 100, !dbg !18 + %add.i.1 = add nsw i32 %inc, %spec.select, !dbg !20 + %spec.select.1 = select i1 %cmp1.1, i32 30, i32 %add.i.1, !dbg !23 + %inc.1 = add nuw nsw i32 %inc12, 2, !dbg !24 + %cmp1.2 = icmp eq i32 %inc.1, 100, !dbg !18 + %add.i.2 = add nsw i32 %inc.1, %spec.select.1, !dbg !20 + %spec.select.2 = select i1 %cmp1.2, i32 30, i32 %add.i.2, !dbg !23 + %inc.2 = add nuw nsw i32 %inc12, 3, !dbg !24 + %cmp1.3 = icmp eq i32 %inc.2, 100, !dbg !18 + %add.i.3 = add nsw i32 %inc.2, %spec.select.2, !dbg !20 + %spec.select.3 = select i1 %cmp1.3, i32 30, i32 %add.i.3, !dbg !23 + %inc.3 = add nuw nsw i32 %inc12, 4, !dbg !24 + %cmp1.4 = icmp eq i32 %inc.3, 100, !dbg !18 + %add.i.4 = add nsw i32 %inc.3, %spec.select.3, !dbg !20 + %spec.select.4 = select i1 %cmp1.4, i32 30, i32 %add.i.4, !dbg !23 + %inc.4 = add nuw nsw i32 %inc12, 5, !dbg !24 + %exitcond.4 = icmp eq i32 %inc.4, 400000001, !dbg !26 + br i1 %exitcond.4, label %while.end, label %while.body, !dbg !27, !llvm.loop !28 + +while.end: ; preds = %while.body + %call2 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0), i32 %spec.select.4), !dbg !31 + ret i32 0, !dbg !32 +} + +; Function Attrs: nofree nounwind +declare dso_local i32 @printf(i8* nocapture readonly, ...) local_unnamed_addr #3 + +attributes #0 = { noinline norecurse nounwind readnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { norecurse nounwind readnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nofree norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nofree nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 10.0.0 (trunk 369144)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, debugInfoForProfiling: true, nameTableKind: None) +!1 = !DIFile(filename: "1.cc", directory: "/usr/local/google/home/wmi/workarea/llvm-r369144/src") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 10.0.0 (trunk 369144)"} +!7 = distinct !DISubprogram(name: "goo", linkageName: "_Z3goov", scope: !1, file: !1, line: 3, type: !8, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!8 = !DISubroutineType(types: !2) +!9 = !DILocation(line: 3, column: 39, scope: !7) +!10 = distinct !DISubprogram(name: "hoo", linkageName: "_Z3hoov", scope: !1, file: !1, line: 4, type: !8, scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!11 = !DILocation(line: 4, column: 39, scope: !10) +!12 = distinct !DISubprogram(name: "sum", linkageName: "_Z3sumii", scope: !1, file: !1, line: 6, type: !8, scopeLine: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!13 = !DILocation(line: 7, column: 12, scope: !12) +!14 = !DILocation(line: 7, column: 3, scope: !12) +!15 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 10, type: !8, scopeLine: 10, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!16 = !DILocation(line: 12, column: 3, scope: !17) +!17 = !DILexicalBlockFile(scope: !15, file: !1, discriminator: 2) +!18 = !DILocation(line: 13, column: 11, scope: !19) +!19 = !DILexicalBlockFile(scope: !15, file: !1, discriminator: 21) +!20 = !DILocation(line: 7, column: 12, scope: !21, inlinedAt: !22) +!21 = !DILexicalBlockFile(scope: !12, file: !1, discriminator: 21) +!22 = distinct !DILocation(line: 13, column: 23, scope: !17) +!23 = !DILocation(line: 13, column: 9, scope: !19) +!24 = !DILocation(line: 12, column: 11, scope: !25) +!25 = !DILexicalBlockFile(scope: !15, file: !1, discriminator: 1282) +!26 = !DILocation(line: 12, column: 14, scope: !25) +!27 = !DILocation(line: 12, column: 3, scope: !25) +!28 = distinct !{!28, !29, !30} +!29 = !DILocation(line: 12, column: 3, scope: !15) +!30 = !DILocation(line: 13, column: 43, scope: !15) +!31 = !DILocation(line: 14, column: 3, scope: !15) +!32 = !DILocation(line: 15, column: 3, scope: !15) Index: test/tools/llvm-profdata/Inputs/profile-symbol-list-1.text =================================================================== --- test/tools/llvm-profdata/Inputs/profile-symbol-list-1.text +++ test/tools/llvm-profdata/Inputs/profile-symbol-list-1.text @@ -0,0 +1,5 @@ +_Z3goov +_Z3sumii +__libc_csu_fini +__libc_csu_init +_dl_relocate_static_pie Index: test/tools/llvm-profdata/Inputs/profile-symbol-list-2.text =================================================================== --- test/tools/llvm-profdata/Inputs/profile-symbol-list-2.text +++ test/tools/llvm-profdata/Inputs/profile-symbol-list-2.text @@ -0,0 +1,4 @@ +_fini +_init +_start +main Index: test/tools/llvm-profdata/Inputs/profile-symbol-list.expected =================================================================== --- test/tools/llvm-profdata/Inputs/profile-symbol-list.expected +++ test/tools/llvm-profdata/Inputs/profile-symbol-list.expected @@ -0,0 +1,42 @@ +Function: main: 368038, 0, 7 sampled lines +Samples collected in the function's body { + 4: 1068 + 4.2: 1068 + 5: 2150 + 5.1: 2150 + 6: 4160 + 7: 1068 + 9: 4128, calls: _Z3bari:2942 _Z3fooi:1262 +} +Samples collected in inlined callsites { + 10: inlined callee: inline1: 2000, 0, 1 sampled lines + Samples collected in the function's body { + 1: 2000 + } + No inlined callsites in this function + 10: inlined callee: inline2: 4000, 0, 1 sampled lines + Samples collected in the function's body { + 1: 4000 + } + No inlined callsites in this function +} +Function: _Z3fooi: 15422, 1220, 1 sampled lines +Samples collected in the function's body { + 1: 1220 +} +No inlined callsites in this function +Function: _Z3bari: 40602, 2874, 1 sampled lines +Samples collected in the function's body { + 1: 2874 +} +No inlined callsites in this function +======== Dump profile symbol list ======== +_Z3goov +_Z3sumii +__libc_csu_fini +__libc_csu_init +_dl_relocate_static_pie +_fini +_init +_start +main Index: test/tools/llvm-profdata/profile-symbol-list.test =================================================================== --- test/tools/llvm-profdata/profile-symbol-list.test +++ test/tools/llvm-profdata/profile-symbol-list.test @@ -0,0 +1,5 @@ +; RUN: llvm-profdata merge -sample -extbinary -prof-sym-list=%S/Inputs/profile-symbol-list-1.text %S/Inputs/sample-profile.proftext -o %t.1.output +; RUN: llvm-profdata merge -sample -extbinary -prof-sym-list=%S/Inputs/profile-symbol-list-2.text %S/Inputs/sample-profile.proftext -o %t.2.output +; RUN: llvm-profdata merge -sample -extbinary %t.1.output %t.2.output -o %t.3.output +; RUN: llvm-profdata show -sample -show-prof-sym-list %t.3.output > %t.4.output +; RUN: diff %S/Inputs/profile-symbol-list.expected %t.4.output Index: tools/llvm-profdata/llvm-profdata.cpp =================================================================== --- tools/llvm-profdata/llvm-profdata.cpp +++ tools/llvm-profdata/llvm-profdata.cpp @@ -433,14 +433,40 @@ sampleprof::SPF_GCC, sampleprof::SPF_Binary}; -static void mergeSampleProfile(const WeightedFileVector &Inputs, - SymbolRemapper *Remapper, - StringRef OutputFilename, - ProfileFormat OutputFormat) { +static std::unique_ptr +getInputFileBuf(const StringRef &InputFile) { + if (InputFile == "") + return {}; + + auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFile); + if (!BufOrError) + exitWithErrorCode(BufOrError.getError(), InputFile); + + return std::move(*BufOrError); +} + +static void populateProfileSymbolList(MemoryBuffer *Buffer, + sampleprof::ProfileSymbolList &PSL) { + if (!Buffer) + return; + + SmallVector SymbolVec; + StringRef Data = Buffer->getBuffer(); + Data.split(SymbolVec, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false); + + for (StringRef symbol : SymbolVec) + PSL.add(symbol); +} + +static void +mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper, + StringRef OutputFilename, ProfileFormat OutputFormat, + StringRef ProfileSymbolListFile, bool CompressProfSymList) { using namespace sampleprof; StringMap ProfileMap; SmallVector, 5> Readers; LLVMContext Context; + sampleprof::ProfileSymbolList WriterList; for (const auto &Input : Inputs) { auto ReaderOrErr = SampleProfileReader::create(Input.Filename, Context); if (std::error_code EC = ReaderOrErr.getError()) @@ -471,13 +497,28 @@ handleMergeWriterError(errorCodeToError(EC), Input.Filename, FName); } } + + std::unique_ptr ReaderList = + Reader->getProfileSymbolList(); + if (ReaderList) + WriterList.merge(*ReaderList); } auto WriterOrErr = SampleProfileWriter::create(OutputFilename, FormatMap[OutputFormat]); if (std::error_code EC = WriterOrErr.getError()) exitWithErrorCode(EC, OutputFilename); + // WriterList will have StringRef refering to string in Buffer. + // Make sure Buffer lives as long as WriterList. + auto Buffer = getInputFileBuf(ProfileSymbolListFile); + populateProfileSymbolList(Buffer.get(), WriterList); + WriterList.setToCompress(CompressProfSymList); + if (WriterList.size() > 0 && OutputFormat != PF_Ext_Binary) + warn("Profile Symbol list is not empty but the output format is not " + "ExtBinary format. The list will be lost in the output. "); + auto Writer = std::move(WriterOrErr.get()); + Writer->setProfileSymbolList(&WriterList); Writer->write(ProfileMap); } @@ -492,18 +533,6 @@ return {FileName, Weight}; } -static std::unique_ptr -getInputFilenamesFileBuf(const StringRef &InputFilenamesFile) { - if (InputFilenamesFile == "") - return {}; - - auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFilenamesFile); - if (!BufOrError) - exitWithErrorCode(BufOrError.getError(), InputFilenamesFile); - - return std::move(*BufOrError); -} - static void addWeightedInput(WeightedFileVector &WNI, const WeightedFile &WF) { StringRef Filename = WF.Filename; uint64_t Weight = WF.Weight; @@ -603,6 +632,13 @@ cl::desc("Number of merge threads to use (default: autodetect)")); cl::alias NumThreadsA("j", cl::desc("Alias for --num-threads"), cl::aliasopt(NumThreads)); + cl::opt ProfileSymbolListFile( + "prof-sym-list", cl::init(""), + cl::desc("Path to file containing the list of function symbols " + "used to populate profile symbol list")); + cl::opt CompressProfSymList( + "compress-prof-sym-list", cl::init(true), cl::Hidden, + cl::desc("Compress profile symbol list before write it into profile. ")); cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n"); @@ -614,7 +650,7 @@ // Make sure that the file buffer stays alive for the duration of the // weighted input vector's lifetime. - auto Buffer = getInputFilenamesFileBuf(InputFilenamesFile); + auto Buffer = getInputFileBuf(InputFilenamesFile); parseInputFilenamesFile(Buffer.get(), WeightedInputs); if (WeightedInputs.empty()) @@ -636,7 +672,8 @@ OutputFormat, OutputSparse, NumThreads); else mergeSampleProfile(WeightedInputs, Remapper.get(), OutputFilename, - OutputFormat); + OutputFormat, ProfileSymbolListFile, + CompressProfSymList); return 0; } @@ -954,7 +991,7 @@ static int showSampleProfile(const std::string &Filename, bool ShowCounts, bool ShowAllFunctions, const std::string &ShowFunction, - raw_fd_ostream &OS) { + bool ShowProfileSymbolList, raw_fd_ostream &OS) { using namespace sampleprof; LLVMContext Context; auto ReaderOrErr = SampleProfileReader::create(Filename, Context); @@ -970,6 +1007,12 @@ else Reader->dumpFunctionProfile(ShowFunction, OS); + if (ShowProfileSymbolList) { + std::unique_ptr ReaderList = + Reader->getProfileSymbolList(); + ReaderList->dump(OS); + } + return 0; } @@ -1022,6 +1065,10 @@ "list-below-cutoff", cl::init(false), cl::desc("Only output names of functions whose max count values are " "below the cutoff value")); + cl::opt ShowProfileSymbolList( + "show-prof-sym-list", cl::init(false), + cl::desc("Show profile symbol list if it exists in the profile. ")); + cl::ParseCommandLineOptions(argc, argv, "LLVM profile data summary\n"); if (OutputFilename.empty()) @@ -1049,7 +1096,7 @@ OnlyListBelow, ShowFunction, TextFormat, OS); else return showSampleProfile(Filename, ShowCounts, ShowAllFunctions, - ShowFunction, OS); + ShowFunction, ShowProfileSymbolList, OS); } int main(int argc, const char *argv[]) { Index: unittests/ProfileData/SampleProfTest.cpp =================================================================== --- unittests/ProfileData/SampleProfTest.cpp +++ unittests/ProfileData/SampleProfTest.cpp @@ -96,6 +96,13 @@ Profiles[FooName] = std::move(FooSamples); Profiles[BarName] = std::move(BarSamples); + ProfileSymbolList List; + if (Format == SampleProfileFormat::SPF_Ext_Binary) { + List.add("zoo", true); + List.add("moo", true); + } + Writer->setProfileSymbolList(&List); + std::error_code EC; EC = Writer->write(Profiles); ASSERT_TRUE(NoError(EC)); @@ -107,6 +114,13 @@ EC = Reader->read(); ASSERT_TRUE(NoError(EC)); + if (Format == SampleProfileFormat::SPF_Ext_Binary) { + std::unique_ptr ReaderList = + Reader->getProfileSymbolList(); + ReaderList->contains("zoo"); + ReaderList->contains("moo"); + } + if (Remap) { auto MemBuffer = llvm::MemoryBuffer::getMemBuffer(R"( # Types 'int' and 'long' are equivalent