diff --git a/llvm/docs/CommandGuide/llvm-profdata.rst b/llvm/docs/CommandGuide/llvm-profdata.rst --- a/llvm/docs/CommandGuide/llvm-profdata.rst +++ b/llvm/docs/CommandGuide/llvm-profdata.rst @@ -20,6 +20,8 @@ * :ref:`merge ` * :ref:`show ` * :ref:`overlap ` +* :ref:`intersect ` +* :ref:`exclude ` .. program:: llvm-profdata merge @@ -370,6 +372,92 @@ Only show overlap for the context sensitive profile counts. The default is to show non-context sensitive profile counts. +.. _profdata-intersect: + +INTERSECT +--------- + +SYNOPSIS +^^^^^^^^ + +:program:`llvm-profdata intersect` [*options*] [*base profile file*] [*test profile file*] + +DESCRIPTION +^^^^^^^^^^^ + +:program:`llvm-profdata intersect` takes two indexed profile data files and +produces a single indexed profile data file which contains the *intersection* of +two given profile data files. + +Here is an example, if *base profile file* has counts of {400, 600}, and +*test profile file* has matched counts of {60000, 40000}. The *output profile* +is {400, 600}. + +OPTIONS +^^^^^^^ + +.. option:: -help + + Print a summary of command line options. + +.. option:: -output=output, -o=output + + Specify the output file name. *Output* cannot be ``-`` as the resulting + indexed profile data can't be written to standard output. + + .. option:: -binary (default) + + Emit the profile using a binary encoding. + + .. option:: -text + + Emit the profile in text mode. When this option is used the profile will be + dumped in the text format that is parsable by the profile reader. + +.. _profdata-exclude: + +EXCLUDE +------- + +SYNOPSIS +^^^^^^^^ + +:program:`llvm-profdata exclude` [*options*] [*base profile file*] [*test profile file*] + +DESCRIPTION +^^^^^^^^^^^ + +:program:`llvm-profdata exclude` takes two indexed profile data files and +produces a single indexed profile data file. The *output profile* contains the +profile counts of *base profile file* that have no matching counts or zero +counts in *test profile file*. + +Here is an example, if *base profile file* has counts of {10, 10, 10, 10, 10}, +and *test profile file* has matched counts of {0, 10, 1} for the first three +counts in *base profile file* but no matched count for the remainder, the +*output profile* is {10, 0, 0, 10, 10}. + +OPTIONS +^^^^^^^ + +.. option:: -help + + Print a summary of command line options. + +.. option:: -output=output, -o=output + + Specify the output file name. *Output* cannot be ``-`` as the resulting + indexed profile data can't be written to standard output. + + .. option:: -binary (default) + + Emit the profile using a binary encoding. + + .. option:: -text + + Emit the profile in text mode. When this option is used the profile will be + dumped in the text format that is parsable by the profile reader. + EXIT STATUS ----------- diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -692,6 +692,14 @@ /// Compute the overlap b/w this record and Input record. void overlap(InstrProfValueSiteRecord &Input, uint32_t ValueKind, OverlapStats &Overlap, OverlapStats &FuncLevelOverlap); + + /// Reduce this record to the intersection of this and \p Input record. + void intersect(InstrProfValueSiteRecord &Input, + function_ref Warn); + + /// Exclude data from this record if found in \p Input record. + void exclude(InstrProfValueSiteRecord &Input, + function_ref Warn); }; /// Profiling information for a single function. @@ -792,6 +800,15 @@ OverlapStats &Overlap, OverlapStats &FuncLevelOverlap); + /// Reduce this record to the intersection of this one and \p Other. + void intersect(InstrProfRecord &Other, + function_ref Warn); + + /// Reduce this record such that it excludes any profile data + /// found in \p Other. + void exclude(InstrProfRecord &Other, + function_ref Warn); + private: struct ValueProfData { std::vector IndirectCallSites; @@ -850,6 +867,12 @@ // Scale up value profile data count by N (Numerator) / D (Denominator). void scaleValueProfData(uint32_t ValueKind, uint64_t N, uint64_t D, function_ref Warn); + + enum ReduceOp { Intersect, Exclude }; + // Reduce value profile data of this record for \p ValueKind. + void reduceValueProfData(uint32_t ValkeKind, InstrProfRecord &Src, + ReduceOp Op, + function_ref Warn); }; struct NamedInstrProfRecord : InstrProfRecord { diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -635,6 +635,41 @@ } } +void InstrProfValueSiteRecord::intersect( + InstrProfValueSiteRecord &Input, function_ref Warn) { + this->sortByTargetValues(); + Input.sortByTargetValues(); + auto I = ValueData.begin(); + auto IE = ValueData.end(); + for (auto J = Input.ValueData.begin(), JE = Input.ValueData.end(); J != JE; + ++J) { + while (I != IE && I->Value < J->Value) + ++I; + if (I != IE && I->Value == J->Value) { + I->Count = std::min(I->Count, J->Count); + ++I; + } + } +} + +void InstrProfValueSiteRecord::exclude( + InstrProfValueSiteRecord &Input, function_ref Warn) { + this->sortByTargetValues(); + Input.sortByTargetValues(); + auto I = ValueData.begin(); + auto IE = ValueData.end(); + for (auto J = Input.ValueData.begin(), JE = Input.ValueData.end(); J != JE; + ++J) { + while (I != IE && I->Value < J->Value) + ++I; + if (I != IE && I->Value == J->Value) { + if (J->Count > 0) + I->Count = 0; + ++I; + } + } +} + // Merge Value Profile data from Src record to this record for ValueKind. // Scale merged value counts by \p Weight. void InstrProfRecord::mergeValueProfData( @@ -697,6 +732,66 @@ scaleValueProfData(Kind, N, D, Warn); } +void InstrProfRecord::reduceValueProfData( + uint32_t ValueKind, InstrProfRecord &Src, ReduceOp Op, + function_ref Warn) { + uint32_t ThisNumValueSites = getNumValueSites(ValueKind); + uint32_t OtherNumValueSites = Src.getNumValueSites(ValueKind); + if (ThisNumValueSites != OtherNumValueSites) { + Warn(instrprof_error::value_site_count_mismatch); + return; + } + if (!ThisNumValueSites) + return; + std::vector &ThisSiteRecords = + getOrCreateValueSitesForKind(ValueKind); + MutableArrayRef OtherSiteRecords = + Src.getValueSitesForKind(ValueKind); + for (uint32_t I = 0; I < ThisNumValueSites; I++) { + if (Op == Intersect) { + ThisSiteRecords[I].intersect(OtherSiteRecords[I], Warn); + } else { + assert(Op == Exclude && "Unknown reduce operation"); + ThisSiteRecords[I].exclude(OtherSiteRecords[I], Warn); + } + } +} + +void InstrProfRecord::intersect(InstrProfRecord &Other, + function_ref Warn) { + // If the number of counters doesn't match we either have bad data + // or a hash collision. + if (Counts.size() != Other.Counts.size()) { + Warn(instrprof_error::count_mismatch); + return; + } + + for (size_t I = 0, E = Other.Counts.size(); I < E; ++I) { + Counts[I] = std::min(Counts[I], Other.Counts[I]); + } + + for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) + reduceValueProfData(Kind, Other, Intersect, Warn); +} + +void InstrProfRecord::exclude(InstrProfRecord &Other, + function_ref Warn) { + // If the number of counters doesn't match we either have bad data + // or a hash collision. + if (Counts.size() != Other.Counts.size()) { + Warn(instrprof_error::count_mismatch); + return; + } + + for (size_t I = 0, E = Other.Counts.size(); I < E; ++I) { + if (Other.Counts[I] > 0) + Counts[I] = 0; + } + + for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) + reduceValueProfData(Kind, Other, Exclude, Warn); +} + // Map indirect call target name hash to name string. uint64_t InstrProfRecord::remapValue(uint64_t Value, uint32_t ValueKind, InstrProfSymtab *SymTab) { diff --git a/llvm/test/tools/llvm-profdata/exclude.test b/llvm/test/tools/llvm-profdata/exclude.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/exclude.test @@ -0,0 +1,27 @@ +RUN: llvm-profdata merge -o %t_ir_base.profdata %p/Inputs/overlap_1.proftext +RUN: llvm-profdata merge -o %t_ir_test.profdata %p/Inputs/overlap_2.proftext +RUN: llvm-profdata exclude -text -o %t_ir_output.proftext %t_ir_base.profdata %t_ir_test.profdata +RUN: FileCheck --input-file=%t_ir_output.proftext %s + +CHECK: :ir +CHECK: bar +CHECK: 12884901887 +CHECK: 1 +CHECK: 0 + +CHECK: bar1 +CHECK: 12884901887 +CHECK: 1 +CHECK: 100000 + +CHECK: foo +CHECK: 25571299074 +CHECK: 2 +CHECK: 40000 +CHECK: 60000 + +CHECK: main +CHECK: 29212902728 +CHECK: 2 +CHECK: 0 +CHECK: 0 diff --git a/llvm/test/tools/llvm-profdata/intersect.test b/llvm/test/tools/llvm-profdata/intersect.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/intersect.test @@ -0,0 +1,16 @@ +RUN: llvm-profdata merge -o %t_ir_base.profdata %p/Inputs/overlap_1.proftext +RUN: llvm-profdata merge -o %t_ir_test.profdata %p/Inputs/overlap_2.proftext +RUN: llvm-profdata intersect -text -o %t_ir_output.proftext %t_ir_base.profdata %t_ir_test.profdata +RUN: FileCheck --input-file=%t_ir_output.proftext %s + +CHECK: :ir +CHECK: bar +CHECK: 12884901887 +CHECK: 1 +CHECK: 10000 + +CHECK: main +CHECK: 29212902728 +CHECK: 2 +CHECK: 20000 +CHECK: 0 diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -977,6 +977,168 @@ return 0; } +static void intersectInstrProfile(const std::string &BaseFilename, + const std::string &TestFilename, + const std::string &OutputFilename, + ProfileFormat OutputFormat) { + if (OutputFilename.compare("-") == 0) + exitWithError("Cannot write indexed profdata format to stdout."); + + if (OutputFormat != PF_Binary && OutputFormat != PF_Text) + exitWithError("Unknown format is specified."); + + std::mutex ErrorLock; + SmallSet WriterErrorCodes; + WriterContext WC(false, ErrorLock, WriterErrorCodes); + WeightedFile BaseInput{BaseFilename, 1}; + WeightedFile TestInput{TestFilename, 1}; + + auto BaseReaderOrErr = InstrProfReader::create(BaseInput.Filename); + if (Error E = BaseReaderOrErr.takeError()) { + exitWithError(std::move(E), "Error in reading base profile data"); + } + auto TestReaderOrErr = IndexedInstrProfReader::create(TestInput.Filename); + if (Error E = TestReaderOrErr.takeError()) { + exitWithError(std::move(E), "Error in reading test profile data"); + } + + auto BaseReader = std::move(BaseReaderOrErr.get()); + auto TestReader = std::move(TestReaderOrErr.get()); + + bool IsIRProfile = BaseReader->isIRLevelProfile(); + bool HasCSIRProfile = BaseReader->hasCSIRLevelProfile(); + if (WC.Writer.setIsIRLevelProfile(IsIRProfile, HasCSIRProfile)) { + exitWithError("Mixing IR generated profile with Clang generated profile."); + } + WC.Writer.setInstrEntryBBEnabled(BaseReader->instrEntryBBEnabled()); + + for (auto &I : *BaseReader) { + const StringRef FuncName = I.Name; + const auto FuncHash = I.Hash; + + auto RecordOrErr = TestReader->getInstrProfRecord(FuncName, FuncHash); + if (Error E = RecordOrErr.takeError()) { + consumeError(std::move(E)); + continue; + } + + auto Record = std::move(RecordOrErr.get()); + I.intersect(Record, [&](instrprof_error E) { + exitWithError(toString(make_error(E))); + }); + + WC.Writer.addRecord(std::move(I), BaseInput.Weight, + [&](Error E) { exitWithError(std::move(E)); }); + } + + writeInstrProfile(OutputFilename, OutputFormat, WC.Writer); +} + +static void excludeInstrProfile(const std::string &BaseFilename, + const std::string &TestFilename, + const std::string &OutputFilename, + ProfileFormat OutputFormat) { + if (OutputFilename.compare("-") == 0) + exitWithError("Cannot write indexed profdata format to stdout."); + + if (OutputFormat != PF_Binary && OutputFormat != PF_Text) + exitWithError("Unknown format is specified."); + + std::mutex ErrorLock; + SmallSet WriterErrorCodes; + WriterContext WC(false, ErrorLock, WriterErrorCodes); + WeightedFile BaseInput{BaseFilename, 1}; + WeightedFile TestInput{TestFilename, 1}; + + auto BaseReaderOrErr = InstrProfReader::create(BaseInput.Filename); + if (Error E = BaseReaderOrErr.takeError()) { + exitWithError(std::move(E), "Error in reading base profile data"); + } + auto TestReaderOrErr = IndexedInstrProfReader::create(TestInput.Filename); + if (Error E = TestReaderOrErr.takeError()) { + exitWithError(std::move(E), "Error in reading test profile data"); + } + + auto BaseReader = std::move(BaseReaderOrErr.get()); + auto TestReader = std::move(TestReaderOrErr.get()); + + bool IsIRProfile = BaseReader->isIRLevelProfile(); + bool HasCSIRProfile = BaseReader->hasCSIRLevelProfile(); + if (WC.Writer.setIsIRLevelProfile(IsIRProfile, HasCSIRProfile)) { + exitWithError("Mixing IR generated profile with Clang generated profile."); + } + WC.Writer.setInstrEntryBBEnabled(BaseReader->instrEntryBBEnabled()); + + for (auto &I : *BaseReader) { + const StringRef FuncName = I.Name; + const auto FuncHash = I.Hash; + + auto RecordOrErr = TestReader->getInstrProfRecord(FuncName, FuncHash); + if (Error E = RecordOrErr.takeError()) { + consumeError(std::move(E)); + } else { + auto Record = std::move(RecordOrErr.get()); + I.exclude(Record, [&](instrprof_error E) { + exitWithError(toString(make_error(E))); + }); + } + + WC.Writer.addRecord(std::move(I), BaseInput.Weight, + [&](Error E) { exitWithError(std::move(E)); }); + } + + writeInstrProfile(OutputFilename, OutputFormat, WC.Writer); +} + +namespace { +enum ReduceOp { intersect, exclude }; +} + +static int reduce_main(int argc, const char *argv[], ReduceOp Op) { + cl::opt BaseFilename(cl::Positional, cl::Required, + cl::desc("")); + cl::opt TestFilename(cl::Positional, cl::Required, + cl::desc("")); + cl::opt OutputFilename("output", cl::value_desc("output"), + cl::init("-"), cl::Required, + cl::desc("Output file")); + cl::alias OutputA("o", cl::desc("Alias for --output"), + cl::aliasopt(OutputFilename)); + cl::opt OutputFormat( + cl::desc("Format of output profile"), cl::init(PF_Binary), + cl::values(clEnumValN(PF_Binary, "binary", "Binary encoding (default)"), + clEnumValN(PF_Text, "text", "Text encoding"))); + + if (Op == exclude) { + cl::ParseCommandLineOptions( + argc, argv, + "LLVM profile data tool that performs the following set operation: " + " NOT IN \n"); + + excludeInstrProfile(BaseFilename, TestFilename, OutputFilename, + OutputFormat); + } else { + assert(Op == intersect && "Unknown reduce operation"); + cl::ParseCommandLineOptions( + argc, argv, + "LLVM profile data tool that performs the following set operation: " + " INTERSECT \n"); + + intersectInstrProfile(BaseFilename, TestFilename, OutputFilename, + OutputFormat); + } + + return 0; +} + +static int intersect_main(int argc, const char *argv[]) { + return reduce_main(argc, argv, intersect); +} + +static int exclude_main(int argc, const char *argv[]) { + return reduce_main(argc, argv, exclude); +} + typedef struct ValueSitesStats { ValueSitesStats() : TotalNumValueSites(0), TotalNumValueSitesWithValueProfile(0), @@ -1520,6 +1682,11 @@ func = show_main; else if (strcmp(argv[1], "overlap") == 0) func = overlap_main; + // Other useful operators for differential coverage analysis + else if (strcmp(argv[1], "intersect") == 0) + func = intersect_main; + else if (strcmp(argv[1], "exclude") == 0) + func = exclude_main; if (func) { std::string Invocation(ProgName.str() + " " + argv[1]);