diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -692,6 +692,14 @@ /// Compute the overlap b/w this record and Input record. void overlap(InstrProfValueSiteRecord &Input, uint32_t ValueKind, OverlapStats &Overlap, OverlapStats &FuncLevelOverlap); + + /// Reduce this record to the intersection of this and \p Input record. + void intersect(InstrProfValueSiteRecord &Input, + function_ref Warn); + + /// Exclude data from this record if found in \p Input record. + void exclude(InstrProfValueSiteRecord &Input, + function_ref Warn); }; /// Profiling information for a single function. @@ -792,6 +800,15 @@ OverlapStats &Overlap, OverlapStats &FuncLevelOverlap); + /// Reduce this record to the intersection of this one and \p Other. + void intersect(InstrProfRecord &Other, + function_ref Warn); + + /// Reduce this record such that it excludes any profile data + /// found in \p Other. + void exclude(InstrProfRecord &Other, + function_ref Warn); + private: struct ValueProfData { std::vector IndirectCallSites; @@ -850,6 +867,14 @@ // Scale up value profile data count by N (Numerator) / D (Denominator). void scaleValueProfData(uint32_t ValueKind, uint64_t N, uint64_t D, function_ref Warn); + + // Reduce data to the intersection of \p Src record and this record. + void intersectValueProfData(uint32_t ValkeKind, InstrProfRecord &Src, + function_ref Warn); + + // Exclude any data found in \p Src record from this record. + void excludeValueProfData(uint32_t ValkeKind, InstrProfRecord &Src, + function_ref Warn); }; struct NamedInstrProfRecord : InstrProfRecord { diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -635,6 +635,41 @@ } } +void InstrProfValueSiteRecord::intersect( + InstrProfValueSiteRecord &Input, function_ref Warn) { + this->sortByTargetValues(); + Input.sortByTargetValues(); + auto I = ValueData.begin(); + auto IE = ValueData.end(); + for (auto J = Input.ValueData.begin(), JE = Input.ValueData.end(); J != JE; + ++J) { + while (I != IE && I->Value < J->Value) + ++I; + if (I != IE && I->Value == J->Value) { + I->Count = std::min(I->Count, J->Count); + ++I; + } + } +} + +void InstrProfValueSiteRecord::exclude( + InstrProfValueSiteRecord &Input, function_ref Warn) { + this->sortByTargetValues(); + Input.sortByTargetValues(); + auto I = ValueData.begin(); + auto IE = ValueData.end(); + for (auto J = Input.ValueData.begin(), JE = Input.ValueData.end(); J != JE; + ++J) { + while (I != IE && I->Value < J->Value) + ++I; + if (I != IE && I->Value == J->Value) { + if (J->Count > 0) + I->Count = 0; + ++I; + } + } +} + // Merge Value Profile data from Src record to this record for ValueKind. // Scale merged value counts by \p Weight. void InstrProfRecord::mergeValueProfData( @@ -697,6 +732,79 @@ scaleValueProfData(Kind, N, D, Warn); } +void InstrProfRecord::intersectValueProfData( + uint32_t ValueKind, InstrProfRecord &Src, + function_ref Warn) { + uint32_t ThisNumValueSites = getNumValueSites(ValueKind); + uint32_t OtherNumValueSites = Src.getNumValueSites(ValueKind); + if (ThisNumValueSites != OtherNumValueSites) { + Warn(instrprof_error::value_site_count_mismatch); + return; + } + if (!ThisNumValueSites) + return; + std::vector &ThisSiteRecords = + getOrCreateValueSitesForKind(ValueKind); + MutableArrayRef OtherSiteRecords = + Src.getValueSitesForKind(ValueKind); + for (uint32_t I = 0; I < ThisNumValueSites; I++) + ThisSiteRecords[I].intersect(OtherSiteRecords[I], Warn); +} + +void InstrProfRecord::intersect(InstrProfRecord &Other, + function_ref Warn) { + // If the number of counters doesn't match we either have bad data + // or a hash collision. + if (Counts.size() != Other.Counts.size()) { + Warn(instrprof_error::count_mismatch); + return; + } + + for (size_t I = 0, E = Other.Counts.size(); I < E; ++I) { + Counts[I] = std::min(Counts[I], Other.Counts[I]); + } + + for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) + intersectValueProfData(Kind, Other, Warn); +} + +void InstrProfRecord::excludeValueProfData( + uint32_t ValueKind, InstrProfRecord &Src, + function_ref Warn) { + uint32_t ThisNumValueSites = getNumValueSites(ValueKind); + uint32_t OtherNumValueSites = Src.getNumValueSites(ValueKind); + if (ThisNumValueSites != OtherNumValueSites) { + Warn(instrprof_error::value_site_count_mismatch); + return; + } + if (!ThisNumValueSites) + return; + std::vector &ThisSiteRecords = + getOrCreateValueSitesForKind(ValueKind); + MutableArrayRef OtherSiteRecords = + Src.getValueSitesForKind(ValueKind); + for (uint32_t I = 0; I < ThisNumValueSites; I++) + ThisSiteRecords[I].exclude(OtherSiteRecords[I], Warn); +} + +void InstrProfRecord::exclude(InstrProfRecord &Other, + function_ref Warn) { + // If the number of counters doesn't match we either have bad data + // or a hash collision. + if (Counts.size() != Other.Counts.size()) { + Warn(instrprof_error::count_mismatch); + return; + } + + for (size_t I = 0, E = Other.Counts.size(); I < E; ++I) { + if (Other.Counts[I] > 0) + Counts[I] = 0; + } + + for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) + excludeValueProfData(Kind, Other, Warn); +} + // Map indirect call target name hash to name string. uint64_t InstrProfRecord::remapValue(uint64_t Value, uint32_t ValueKind, InstrProfSymtab *SymTab) { diff --git a/llvm/test/tools/llvm-profdata/exclude.test b/llvm/test/tools/llvm-profdata/exclude.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/exclude.test @@ -0,0 +1,27 @@ +RUN: llvm-profdata merge -o %t_ir_base.profdata %p/Inputs/overlap_1.proftext +RUN: llvm-profdata merge -o %t_ir_test.profdata %p/Inputs/overlap_2.proftext +RUN: llvm-profdata exclude -text -o %t_ir_output.proftext %t_ir_base.profdata %t_ir_test.profdata +RUN: FileCheck --input-file=%t_ir_output.proftext %s + +CHECK: :ir +CHECK: bar +CHECK: 12884901887 +CHECK: 1 +CHECK: 0 + +CHECK: bar1 +CHECK: 12884901887 +CHECK: 1 +CHECK: 100000 + +CHECK: foo +CHECK: 25571299074 +CHECK: 2 +CHECK: 40000 +CHECK: 60000 + +CHECK: main +CHECK: 29212902728 +CHECK: 2 +CHECK: 0 +CHECK: 0 diff --git a/llvm/test/tools/llvm-profdata/intersect.test b/llvm/test/tools/llvm-profdata/intersect.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/intersect.test @@ -0,0 +1,16 @@ +RUN: llvm-profdata merge -o %t_ir_base.profdata %p/Inputs/overlap_1.proftext +RUN: llvm-profdata merge -o %t_ir_test.profdata %p/Inputs/overlap_2.proftext +RUN: llvm-profdata intersect -text -o %t_ir_output.proftext %t_ir_base.profdata %t_ir_test.profdata +RUN: FileCheck --input-file=%t_ir_output.proftext %s + +CHECK: :ir +CHECK: bar +CHECK: 12884901887 +CHECK: 1 +CHECK: 10000 + +CHECK: main +CHECK: 29212902728 +CHECK: 2 +CHECK: 20000 +CHECK: 0 diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -977,6 +977,182 @@ return 0; } +static void intersectInstrProfile(const std::string &BaseFilename, + const std::string &TestFilename, + const std::string &OutputFilename, + ProfileFormat OutputFormat) { + if (OutputFilename.compare("-") == 0) + exitWithError("Cannot write indexed profdata format to stdout."); + + if (OutputFormat != PF_Binary && OutputFormat != PF_Compact_Binary && + OutputFormat != PF_Ext_Binary && OutputFormat != PF_Text) + exitWithError("Unknown format is specified."); + + std::mutex ErrorLock; + SmallSet WriterErrorCodes; + WriterContext WC(false, ErrorLock, WriterErrorCodes); + WeightedFile BaseInput{BaseFilename, 1}; + WeightedFile TestInput{TestFilename, 1}; + + auto BaseReaderOrErr = InstrProfReader::create(BaseInput.Filename); + if (Error E = BaseReaderOrErr.takeError()) { + exitWithError(std::move(E), "Error in reading base profile data"); + } + auto TestReaderOrErr = IndexedInstrProfReader::create(TestInput.Filename); + if (Error E = TestReaderOrErr.takeError()) { + exitWithError(std::move(E), "Error in reading test profile data"); + } + + auto BaseReader = std::move(BaseReaderOrErr.get()); + auto TestReader = std::move(TestReaderOrErr.get()); + + bool IsIRProfile = BaseReader->isIRLevelProfile(); + bool HasCSIRProfile = BaseReader->hasCSIRLevelProfile(); + if (WC.Writer.setIsIRLevelProfile(IsIRProfile, HasCSIRProfile)) { + exitWithError("Mixing IR generated profile with Clang generated profile."); + } + WC.Writer.setInstrEntryBBEnabled(BaseReader->instrEntryBBEnabled()); + + for (auto &I : *BaseReader) { + const StringRef FuncName = I.Name; + const auto FuncHash = I.Hash; + + auto RecordOrErr = TestReader->getInstrProfRecord(FuncName, FuncHash); + if (Error E = RecordOrErr.takeError()) { + consumeError(std::move(E)); + continue; + } + + auto Record = std::move(RecordOrErr.get()); + I.intersect(Record, [&](instrprof_error E) { + exitWithError(toString(make_error(E))); + }); + + WC.Writer.addRecord(std::move(I), BaseInput.Weight, + [&](Error E) { exitWithError(std::move(E)); }); + } + + writeInstrProfile(OutputFilename, OutputFormat, WC.Writer); +} + +static int intersect_main(int argc, const char *argv[]) { + cl::opt BaseFilename(cl::Positional, cl::Required, + cl::desc("")); + cl::opt TestFilename(cl::Positional, cl::Required, + cl::desc("")); + cl::opt OutputFilename("output", cl::value_desc("output"), + cl::init("-"), cl::Required, + cl::desc("Output file")); + cl::alias OutputA("o", cl::desc("Alias for --output"), + cl::aliasopt(OutputFilename)); + cl::opt OutputFormat( + cl::desc("Format of output profile"), cl::init(PF_Binary), + cl::values( + clEnumValN(PF_Binary, "binary", "Binary encoding (default)"), + clEnumValN(PF_Compact_Binary, "compbinary", + "Compact binary encoding"), + clEnumValN(PF_Ext_Binary, "extbinary", "Extensible binary encoding"), + clEnumValN(PF_Text, "text", "Text encoding"), + clEnumValN(PF_GCC, "gcc", + "GCC encoding (only meaningful for -sample)"))); + cl::ParseCommandLineOptions( + argc, argv, + "LLVM profile data tool that performs the following set operation: INTERSECT \n"); + + intersectInstrProfile(BaseFilename, TestFilename, OutputFilename, + OutputFormat); + + return 0; +} + +static void excludeInstrProfile(const std::string &BaseFilename, + const std::string &TestFilename, + const std::string &OutputFilename, + ProfileFormat OutputFormat) { + if (OutputFilename.compare("-") == 0) + exitWithError("Cannot write indexed profdata format to stdout."); + + if (OutputFormat != PF_Binary && OutputFormat != PF_Compact_Binary && + OutputFormat != PF_Ext_Binary && OutputFormat != PF_Text) + exitWithError("Unknown format is specified."); + + std::mutex ErrorLock; + SmallSet WriterErrorCodes; + WriterContext WC(false, ErrorLock, WriterErrorCodes); + WeightedFile BaseInput{BaseFilename, 1}; + WeightedFile TestInput{TestFilename, 1}; + + auto BaseReaderOrErr = InstrProfReader::create(BaseInput.Filename); + if (Error E = BaseReaderOrErr.takeError()) { + exitWithError(std::move(E), "Error in reading base profile data"); + } + auto TestReaderOrErr = IndexedInstrProfReader::create(TestInput.Filename); + if (Error E = TestReaderOrErr.takeError()) { + exitWithError(std::move(E), "Error in reading test profile data"); + } + + auto BaseReader = std::move(BaseReaderOrErr.get()); + auto TestReader = std::move(TestReaderOrErr.get()); + + bool IsIRProfile = BaseReader->isIRLevelProfile(); + bool HasCSIRProfile = BaseReader->hasCSIRLevelProfile(); + if (WC.Writer.setIsIRLevelProfile(IsIRProfile, HasCSIRProfile)) { + exitWithError("Mixing IR generated profile with Clang generated profile."); + } + WC.Writer.setInstrEntryBBEnabled(BaseReader->instrEntryBBEnabled()); + + for (auto &I : *BaseReader) { + const StringRef FuncName = I.Name; + const auto FuncHash = I.Hash; + + auto RecordOrErr = TestReader->getInstrProfRecord(FuncName, FuncHash); + if (Error E = RecordOrErr.takeError()) { + consumeError(std::move(E)); + } else { + auto Record = std::move(RecordOrErr.get()); + I.exclude(Record, [&](instrprof_error E) { + exitWithError(toString(make_error(E))); + }); + } + + WC.Writer.addRecord(std::move(I), BaseInput.Weight, + [&](Error E) { exitWithError(std::move(E)); }); + } + + writeInstrProfile(OutputFilename, OutputFormat, WC.Writer); +} + +static int exclude_main(int argc, const char *argv[]) { + cl::opt BaseFilename(cl::Positional, cl::Required, + cl::desc("")); + cl::opt TestFilename(cl::Positional, cl::Required, + cl::desc("")); + cl::opt OutputFilename("output", cl::value_desc("output"), + cl::init("-"), cl::Required, + cl::desc("Output file")); + cl::alias OutputA("o", cl::desc("Alias for --output"), + cl::aliasopt(OutputFilename)); + cl::opt OutputFormat( + cl::desc("Format of output profile"), cl::init(PF_Binary), + cl::values( + clEnumValN(PF_Binary, "binary", "Binary encoding (default)"), + clEnumValN(PF_Compact_Binary, "compbinary", + "Compact binary encoding"), + clEnumValN(PF_Ext_Binary, "extbinary", "Extensible binary encoding"), + clEnumValN(PF_Text, "text", "Text encoding"), + clEnumValN(PF_GCC, "gcc", + "GCC encoding (only meaningful for -sample)"))); + cl::ParseCommandLineOptions( + argc, argv, + "LLVM profile data tool that performs the following set operation: NOT IN \n"); + + excludeInstrProfile(BaseFilename, TestFilename, OutputFilename, OutputFormat); + + return 0; +} + typedef struct ValueSitesStats { ValueSitesStats() : TotalNumValueSites(0), TotalNumValueSitesWithValueProfile(0), @@ -1520,6 +1696,11 @@ func = show_main; else if (strcmp(argv[1], "overlap") == 0) func = overlap_main; + // Other useful operators for differential coverage analysis + else if (strcmp(argv[1], "intersect") == 0) + func = intersect_main; + else if (strcmp(argv[1], "exclude") == 0) + func = exclude_main; if (func) { std::string Invocation(ProgName.str() + " " + argv[1]);