Index: compiler-rt/lib/fuzzer/FuzzerDriver.cpp =================================================================== --- compiler-rt/lib/fuzzer/FuzzerDriver.cpp +++ compiler-rt/lib/fuzzer/FuzzerDriver.cpp @@ -519,7 +519,7 @@ std::string CFPath = CFPathOrNull ? CFPathOrNull : TempPath("Merge", ".txt"); Vector NewFiles; - Set NewFeatures, NewCov; + ModuleRelativeValues NewFeatures, NewCov; CrashResistantMerge(Args, OldCorpus, NewCorpus, &NewFiles, {}, &NewFeatures, {}, &NewCov, CFPath, true); for (auto &Path : NewFiles) Index: compiler-rt/lib/fuzzer/FuzzerFork.h =================================================================== --- compiler-rt/lib/fuzzer/FuzzerFork.h +++ compiler-rt/lib/fuzzer/FuzzerFork.h @@ -10,6 +10,7 @@ #define LLVM_FUZZER_FORK_H #include "FuzzerDefs.h" +#include "FuzzerModuleRelative.h" #include "FuzzerOptions.h" #include "FuzzerRandom.h" @@ -21,10 +22,16 @@ const Vector &Args, const Vector &CorpusDirs, int NumJobs); +// Feature files encode features in a format that organizes them by module. +// Example: +// 0x0000002 # Number of modules +// 0xdeadbeef 0xfeedface 0x00000003 # Hash upper half, lower half, num features +// 0x00000011 0x00000022 0x00000033 # Sorted module-relative features +// ... # Next module +bool DecodeFeatures(const Vector &Encoded, + ModuleRelativeValues *Features); void EncodeFeatures(const Vector &Features, Vector *Encoded); -bool DecodeFeatures(const Vector &Encoded, - Vector *Features); } // namespace fuzzer Index: compiler-rt/lib/fuzzer/FuzzerFork.cpp =================================================================== --- compiler-rt/lib/fuzzer/FuzzerFork.cpp +++ compiler-rt/lib/fuzzer/FuzzerFork.cpp @@ -92,7 +92,7 @@ std::string TempDir; std::string DFTDir; std::string DataFlowBinary; - Set Features, Cov; + ModuleRelativeValues Features, Cov; Set FilesWithDFT; Vector Files; Random *Rand; @@ -184,6 +184,7 @@ NumRuns += Stats.number_of_executed_units; Vector TempFiles, MergeCandidates; + ModuleRelativeValues NewFeatures, NewCov; // Read all newly created inputs and their feature sets. // Choose only those inputs that have new features. GetSizedFilesFromDir(Job->CorpusDir, &TempFiles); @@ -195,10 +196,8 @@ assert((FeatureBytes.size() % sizeof(uint32_t)) == 0); Vector Encoded(FeatureBytes.size() / sizeof(uint32_t)); memcpy(Encoded.data(), FeatureBytes.data(), FeatureBytes.size()); - Vector NewFeatures; if (DecodeFeatures(Encoded, &NewFeatures)) - if (!std::includes(Features.begin(), Features.end(), - NewFeatures.begin(), NewFeatures.end())) + if (!Features.Includes(NewFeatures)) MergeCandidates.push_back(F); } // if (!FilesToAdd.empty() || Job->ExitCode != 0) @@ -211,7 +210,6 @@ if (MergeCandidates.empty()) return; Vector FilesToAdd; - Set NewFeatures, NewCov; CrashResistantMerge(Args, {}, MergeCandidates, &FilesToAdd, Features, &NewFeatures, Cov, &NewCov, Job->CFPath, false); for (auto &Path : FilesToAdd) { @@ -220,14 +218,14 @@ WriteToFile(U, NewPath); Files.push_back(NewPath); } - Features.insert(NewFeatures.begin(), NewFeatures.end()); - Cov.insert(NewCov.begin(), NewCov.end()); - for (auto Idx : NewCov) - if (auto *TE = TPC.PCTableEntryByIdx(Idx)) + Features.Merge(NewFeatures); + Cov.Merge(NewCov); + NewCov.ForEachValue([](uint64_t Hash, uint32_t Offset) { + if (auto *TE = TPC.PCTableEntryByModule(Hash, Offset)) if (TPC.PcIsFuncEntry(TE)) PrintPC(" NEW_FUNC: %p %F %L\n", "", TPC.GetNextInstructionPc(TE->PC)); - + }); } @@ -315,11 +313,11 @@ Env.Files.push_back(File.File); } else { auto CFPath = DirPlusFile(Env.TempDir, "merge.txt"); - Set NewFeatures, NewCov; + ModuleRelativeValues NewFeatures, NewCov; CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, Env.Features, &NewFeatures, Env.Cov, &NewCov, CFPath, false); - Env.Features.insert(NewFeatures.begin(), NewFeatures.end()); - Env.Cov.insert(NewFeatures.begin(), NewFeatures.end()); + Env.Features.Merge(NewFeatures); + Env.Cov.Merge(NewCov); RemoveFile(CFPath); } Printf("INFO: -fork=%d: %zd seed inputs, starting to fuzz in %s\n", NumJobs, @@ -414,23 +412,107 @@ exit(ExitCode); } +// For the purpose of encoding and decoding features, the encoded vector has the +// following format (in EBNF): +// +// encoded = num_dso, { hash_upper, hash_lower, num_features, { feature } } +// +// where the following fields are uint32_ts with the following meanings: +// num_dsos: The number of modules. +// hash_upper: The upper 32 bits of the module hash. +// hash_lower: The lower 32 bits of the module hash. +// num_features: The number of features for a given module. void EncodeFeatures(const Vector &Features, Vector *Encoded) { assert(Encoded); + + // Convert Features to be module-relative. + ModuleRelativeValues TmpFeatures; + ModuleInfo Info; + Set Tmp; + size_t NumFeatures = 0; + bool First = true; + for (auto Feature : Features) { + // Only look up a new module info when needed. |CollectFeatures| orders the + // features by module, so this should be the minimum number of lookups + // needed. + if (Feature < Info.FirstFeature || Info.LastFeature <= Feature || First) { + TmpFeatures.Merge(Info.Hash, Tmp); + Tmp.clear(); + TPC.ModuleInfoByFeature(Feature, &Info); + NumFeatures = 0; + First = false; + } + // Cap the number of features to what can be expressed by |NumFeatures|. + // Record the features as offsets to the module's first feature so that the + // encoded features are independent of module load order. + if (NumFeatures < std::numeric_limits::max()) + Tmp.insert(Feature - Info.FirstFeature); + }; + TmpFeatures.Merge(Info.Hash, Tmp); + + // Acording to the format given above, the total length of the encoded vector + // is the number of features, plus 3 for each module (for hash_upper, + // hash_lower, num_features), plus 1 more (for num_modules). + uint32_t NumModules = TmpFeatures.NumModules(); + size_t NumValues = 1 + (NumModules * 3) + TmpFeatures.size(); Encoded->clear(); - Encoded->resize(Features.size()); - std::partial_sort_copy(Features.begin(), Features.end(), Encoded->begin(), - Encoded->end()); + Encoded->reserve(NumValues); + Encoded->push_back(NumModules); + + uint64_t Cur = 0; + First = true; + TmpFeatures.ForEachValue([&](uint64_t Hash, uint32_t Feature) { + if (Hash != Cur || First) { + // Write module metadata: hash upper, hash lower, and num features. + Encoded->push_back(static_cast(Hash >> 32)); + Encoded->push_back(static_cast(Hash)); + Encoded->push_back(TmpFeatures.NumValuesForModule(Hash)); + Cur = Hash; + First = false; + } + // Write each feature. + Encoded->push_back(Feature); + }); } bool DecodeFeatures(const Vector &Encoded, - Vector *Features) { - assert(Features); - if (!std::is_sorted(Encoded.begin(), Encoded.end())) + ModuleRelativeValues *Features) { + if (Encoded.empty()) return false; Features->clear(); - Features->insert(Features->end(), Encoded.begin(), Encoded.end()); - return true; + + auto *I = &Encoded[0]; + size_t Remaining = Encoded.size(); + + uint32_t RemainingModules = *I++; + --Remaining; + + // Read the number of expected modules. + for (; RemainingModules; --RemainingModules) { + if (Remaining < 3) + return false; + + // Read module metadata: hash upper, hash lower, and num features. + uint64_t Hash = *I++; + Hash <<= 32; + Hash |= *I++; + auto NumFeatures = *I++; + Remaining -= 3; + if (Remaining < NumFeatures) + return false; + + // Read each feature. `EncodeFeatures` required that these were sorted, + // so they should still be. + if (!std::is_sorted(I, I + NumFeatures)) + return false; + + Set Values(I, I + NumFeatures); + Features->Merge(Hash, Values); + I += NumFeatures; + Remaining -= NumFeatures; + } + return Remaining == 0; } } // namespace fuzzer Index: compiler-rt/lib/fuzzer/FuzzerMerge.h =================================================================== --- compiler-rt/lib/fuzzer/FuzzerMerge.h +++ compiler-rt/lib/fuzzer/FuzzerMerge.h @@ -41,6 +41,8 @@ #define LLVM_FUZZER_MERGE_H #include "FuzzerDefs.h" +#include "FuzzerIO.h" +#include "FuzzerModuleRelative.h" #include #include @@ -52,7 +54,7 @@ struct MergeFileInfo { std::string Name; size_t Size = 0; - Vector Features, Cov; + ModuleRelativeValues Features, Cov; }; struct Merger { @@ -64,23 +66,19 @@ bool Parse(std::istream &IS, bool ParseCoverage); bool Parse(const std::string &Str, bool ParseCoverage); void ParseOrExit(std::istream &IS, bool ParseCoverage); - size_t Merge(const Set &InitialFeatures, Set *NewFeatures, - const Set &InitialCov, Set *NewCov, - Vector *NewFiles); + size_t Merge(const ModuleRelativeValues &InitialFeatures, + ModuleRelativeValues *NewFeatures, + const ModuleRelativeValues &InitialCov, + ModuleRelativeValues *NewCov, Vector *NewFiles); size_t ApproximateMemoryConsumption() const; - Set AllFeatures() const; }; -void CrashResistantMerge(const Vector &Args, - const Vector &OldCorpus, - const Vector &NewCorpus, - Vector *NewFiles, - const Set &InitialFeatures, - Set *NewFeatures, - const Set &InitialCov, - Set *NewCov, - const std::string &CFPath, - bool Verbose); +void CrashResistantMerge( + const Vector &Args, const Vector &OldCorpus, + const Vector &NewCorpus, Vector *NewFiles, + const ModuleRelativeValues &InitialFeatures, + ModuleRelativeValues *NewFeatures, const ModuleRelativeValues &InitialCov, + ModuleRelativeValues *NewCov, const std::string &CFPath, bool Verbose); } // namespace fuzzer Index: compiler-rt/lib/fuzzer/FuzzerMerge.cpp =================================================================== --- compiler-rt/lib/fuzzer/FuzzerMerge.cpp +++ compiler-rt/lib/fuzzer/FuzzerMerge.cpp @@ -50,6 +50,7 @@ // FT 2 8 9 // COV 2 11 12 bool Merger::Parse(std::istream &IS, bool ParseCoverage) { + Files.clear(); LastFailure.clear(); std::string Line; @@ -77,8 +78,10 @@ size_t ExpectedStartMarker = 0; const size_t kInvalidStartMarker = -1; size_t LastSeenStartMarker = kInvalidStartMarker; - Vector TmpFeatures; - Set PCs; + bool HasFeatures = true; + uint64_t Hash = 0; + Set Tmp, NewCov; + ModuleRelativeValues AllCov; while (std::getline(IS, Line, '\n')) { std::istringstream ISS1(Line); std::string Marker; @@ -93,30 +96,42 @@ LastSeenStartMarker = ExpectedStartMarker; assert(ExpectedStartMarker < Files.size()); ExpectedStartMarker++; + HasFeatures = false; } else if (Marker == "FT") { - // FT FILE_ID COV1 COV2 COV3 ... + // FT FILE_ID HASH FT1 FT2 FT3 ... size_t CurrentFileIdx = N; if (CurrentFileIdx != LastSeenStartMarker) return false; - LastSeenStartMarker = kInvalidStartMarker; + HasFeatures = true; + ISS1 >> std::hex >> Hash >> std::dec; + if (!ISS1) + return false; if (ParseCoverage) { - TmpFeatures.clear(); // use a vector from outer scope to avoid resizes. + Tmp.clear(); // use a vector from outer scope to avoid resizes. while (ISS1 >> N) - TmpFeatures.push_back(N); - std::sort(TmpFeatures.begin(), TmpFeatures.end()); - Files[CurrentFileIdx].Features = TmpFeatures; + Tmp.insert(N); + Files[CurrentFileIdx].Features.Merge(Hash, Tmp); } } else if (Marker == "COV") { + // COV FILE_ID HASH COV1 COV2 COV3 ... size_t CurrentFileIdx = N; - if (ParseCoverage) + if (CurrentFileIdx != LastSeenStartMarker) + return false; + ISS1 >> std::hex >> Hash >> std::dec; + if (!ISS1) + return false; + if (ParseCoverage) { + Tmp.clear(); while (ISS1 >> N) - if (PCs.insert(N).second) - Files[CurrentFileIdx].Cov.push_back(N); + Tmp.insert(N); + AllCov.Merge(Hash, Tmp, &NewCov); + Files[CurrentFileIdx].Cov.Merge(Hash, NewCov); + } } else { return false; } } - if (LastSeenStartMarker != kInvalidStartMarker) + if (!HasFeatures) LastFailure = Files[LastSeenStartMarker].Name; FirstNotProcessedFile = ExpectedStartMarker; @@ -125,36 +140,38 @@ size_t Merger::ApproximateMemoryConsumption() const { size_t Res = 0; - for (const auto &F: Files) - Res += sizeof(F) + F.Features.size() * sizeof(F.Features[0]); + for (const auto &F : Files) { + Res += sizeof(F); + Res += F.Features.NumModules() * sizeof(uint64_t); + Res += F.Features.NumValues() * sizeof(uint32_t); + } return Res; } // Decides which files need to be merged (add those to NewFiles). // Returns the number of new features added. -size_t Merger::Merge(const Set &InitialFeatures, - Set *NewFeatures, - const Set &InitialCov, Set *NewCov, +size_t Merger::Merge(const ModuleRelativeValues &InitialFeatures, + ModuleRelativeValues *NewFeatures, + const ModuleRelativeValues &InitialCov, + ModuleRelativeValues *NewCov, Vector *NewFiles) { NewFiles->clear(); NewFeatures->clear(); NewCov->clear(); assert(NumFilesInFirstCorpus <= Files.size()); - Set AllFeatures = InitialFeatures; + ModuleRelativeValues AllFeatures, AllCov; + AllFeatures.Merge(InitialFeatures); + AllCov.Merge(InitialCov); // What features are in the initial corpus? for (size_t i = 0; i < NumFilesInFirstCorpus; i++) { - auto &Cur = Files[i].Features; - AllFeatures.insert(Cur.begin(), Cur.end()); + auto &File = Files[i]; + AllFeatures.Merge(File.Features); + AllCov.Merge(File.Cov); } // Remove all features that we already know from all other inputs. - for (size_t i = NumFilesInFirstCorpus; i < Files.size(); i++) { - auto &Cur = Files[i].Features; - Vector Tmp; - std::set_difference(Cur.begin(), Cur.end(), AllFeatures.begin(), - AllFeatures.end(), std::inserter(Tmp, Tmp.begin())); - Cur.swap(Tmp); - } + for (size_t i = NumFilesInFirstCorpus; i < Files.size(); i++) + Files[i].Features.Deduplicate(AllFeatures); // Sort. Give preference to // * smaller files @@ -169,32 +186,14 @@ // One greedy pass: add the file's features to AllFeatures. // If new features were added, add this file to NewFiles. for (size_t i = NumFilesInFirstCorpus; i < Files.size(); i++) { - auto &Cur = Files[i].Features; - // Printf("%s -> sz %zd ft %zd\n", Files[i].Name.c_str(), - // Files[i].Size, Cur.size()); - bool FoundNewFeatures = false; - for (auto Fe: Cur) { - if (AllFeatures.insert(Fe).second) { - FoundNewFeatures = true; - NewFeatures->insert(Fe); - } - } - if (FoundNewFeatures) - NewFiles->push_back(Files[i].Name); - for (auto Cov : Files[i].Cov) - if (InitialCov.find(Cov) == InitialCov.end()) - NewCov->insert(Cov); + auto &File = Files[i]; + if (AllFeatures.Merge(File.Features, NewFeatures)) + NewFiles->push_back(File.Name); + AllCov.Merge(File.Cov, NewCov); } return NewFeatures->size(); } -Set Merger::AllFeatures() const { - Set S; - for (auto &File : Files) - S.insert(File.Features.begin(), File.Features.end()); - return S; -} - // Inner process. May crash if the target crashes. void Fuzzer::CrashResistantMergeInternalStep(const std::string &CFPath) { Printf("MERGE-INNER: using the control file '%s'\n", CFPath.c_str()); @@ -212,11 +211,11 @@ M.Files.size() - M.FirstNotProcessedFile); std::ofstream OF(CFPath, std::ofstream::out | std::ofstream::app); - Set AllFeatures; + ModuleRelativeValues AllFeatures; auto PrintStatsWrapper = [this, &AllFeatures](const char* Where) { this->PrintStats(Where, "\n", 0, AllFeatures.size()); }; - Set AllPCs; + ModuleRelativeValues AllCov; for (size_t i = M.FirstNotProcessedFile; i < M.Files.size(); i++) { Fuzzer::MaybeExitGracefully(); auto U = FileToVector(M.Files[i].Name); @@ -236,28 +235,61 @@ // * Then, all other files, smallest first. // So it makes no sense to record all features for all files, instead we // only record features that were not seen before. - Set UniqFeatures; + ModuleInfo Info; + Set Tmp; + ModuleRelativeValues Features, UniqFeatures; TPC.CollectFeatures([&](size_t Feature) { - if (AllFeatures.insert(Feature).second) - UniqFeatures.insert(Feature); + if (Feature < Info.FirstFeature || Info.LastFeature <= Feature) { + Features.Merge(Info.Hash, Tmp); + TPC.ModuleInfoByFeature(Feature, &Info); + } + Tmp.insert(Feature - Info.FirstFeature); }); + Features.Merge(Info.Hash, Tmp); + AllFeatures.Merge(Features, &UniqFeatures); + + Tmp.clear(); + ModuleRelativeValues Cov, UniqCov; TPC.UpdateObservedPCs(); + TPC.ForEachObservedPC([&](const TracePC::PCTableEntry *TE) { + uintptr_t Idx = TPC.PCTableEntryIdx(TE); + if (Idx < Info.FirstIdx || Info.LastIdx <= Idx) { + Cov.Merge(Info.Hash, Tmp); + TPC.ModuleInfoByIdx(Idx, &Info); + } + Tmp.insert(Idx - Info.FirstIdx); + }); + Cov.Merge(Info.Hash, Tmp); + AllCov.Merge(Cov, &UniqCov); + // Show stats. if (!(TotalNumberOfRuns & (TotalNumberOfRuns - 1))) PrintStatsWrapper("pulse "); if (TotalNumberOfRuns == M.NumFilesInFirstCorpus) PrintStatsWrapper("LOADED"); + // Write the post-run marker and the coverage. - OF << "FT " << i; - for (size_t F : UniqFeatures) - OF << " " << F; - OF << "\n"; - OF << "COV " << i; - TPC.ForEachObservedPC([&](const TracePC::PCTableEntry *TE) { - if (AllPCs.insert(TE).second) - OF << " " << TPC.PCTableEntryIdx(TE); - }); - OF << "\n"; + uintptr_t FirstIdx = 0; + while (true) { + TPC.ModuleInfoByIdx(FirstIdx, &Info); + if (!Info.Hash) + break; + auto *UF = UniqFeatures.get(Info.Hash); + if (UF) { + OF << "FT " << i << " 0x" << std::hex << Info.Hash << std::dec; + for (uint32_t Feature : *UF) + OF << " " << Feature; + OF << "\n"; + } + auto *UC = UniqCov.get(Info.Hash); + if (UC) { + OF << "COV " << i << " 0x" << std::hex << Info.Hash << std::dec; + for (uint32_t Idx : *UC) + OF << " " << Idx; + OF << "\n"; + } + FirstIdx = Info.LastIdx; + } OF.flush(); } PrintStatsWrapper("DONE "); @@ -303,12 +335,11 @@ const Vector &OldCorpus, const Vector &NewCorpus, Vector *NewFiles, - const Set &InitialFeatures, - Set *NewFeatures, - const Set &InitialCov, - Set *NewCov, - const std::string &CFPath, - bool V /*Verbose*/) { + const ModuleRelativeValues &InitialFeatures, + ModuleRelativeValues *NewFeatures, + const ModuleRelativeValues &InitialCov, + ModuleRelativeValues *NewCov, + const std::string &CFPath, bool V /*Verbose*/) { if (NewCorpus.empty() && OldCorpus.empty()) return; // Nothing to merge. size_t NumAttempts = 0; Vector KnownFiles; @@ -377,7 +408,7 @@ } auto ExitCode = ExecuteCommand(Cmd); if (!ExitCode) { - VPrintf(V, "MERGE-OUTER: succesfull in %zd attempt(s)\n", Attempt); + VPrintf(V, "MERGE-OUTER: successful in %zd attempt(s)\n", Attempt); break; } } Index: compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp =================================================================== --- compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp +++ compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp @@ -948,34 +948,81 @@ template void CheckEncodedFeatures(FakeModule &M, bool UseCounters) { - Vector Features, Expected, Actual; - M.CollectFeatures(UseCounters, &Expected); + Vector Features; + M.CollectFeatures(UseCounters, &Features); + ASSERT_GT(Features.size(), 0U); + + ModuleInfo Info; + TPC.ModuleInfoByFeature(Features[0], &Info); + + // Generate the sequence of relative encoded features for the given module. + Vector Expected; + Expected.reserve(Features.size() + 3); + Expected.push_back(static_cast(Info.Hash >> 32)); + Expected.push_back(static_cast(Info.Hash)); + Expected.push_back(static_cast(Features.size())); + for (auto F : Features) + Expected.push_back(F - Info.FirstFeature); + + // Now get the actual encoded features, and look for the expected ones. + bool Found = false; + Vector Actual; TPC.CollectFeatures( [&](size_t F) { Features.push_back(static_cast(F)); }); EncodeFeatures(Features, &Actual); - auto I = std::search(Actual.begin(), Actual.end(), Expected.begin(), - Expected.end()); - EXPECT_NE(I, Actual.end()); + + // Number of modules. + auto U32 = Actual.begin(); + ASSERT_NE(U32, Actual.end()); + auto NumModules = *U32++; + uint32_t NumObservedModules = 0; + + // Loop through all the modules and validate their headers. + while (U32 != Actual.end()) { + NumObservedModules++; + auto Mark = U32; + + // First two values make up the module hash. + ASSERT_NE(U32, Actual.end()); + auto Hash = static_cast(*U32++) << 32; + ASSERT_NE(U32, Actual.end()); + Hash |= *U32++; + + // Next value is the number of features for the module. + ASSERT_NE(U32, Actual.end()); + auto NumFeatures = *U32++; + + // Next are the features themselves. + ASSERT_GE(Actual.end() - U32, NumFeatures); + U32 += NumFeatures; + + // Compare this module's values with the expected values. + if (!Found && Hash == Info.Hash && 3 + NumFeatures == Expected.size()) + Found = std::equal(Mark, U32, Expected.begin(), Expected.end()); + } + EXPECT_EQ(NumObservedModules, NumModules); + EXPECT_TRUE(Found); } TEST(FuzzerFork, EncodeFeatures) { - TPC.SetUseCounters(0); - TPC.SetUseValueProfileMask(0); - TPC.RecordInitialStack(); - TPC.ResetMaps(); - // Add a module. It will be added to TPC, so make it static to allow the test // to be run multiple times. + TPC.SetUseCounters(0); static FakeModule<4> M1; + M1.AddToTPC(); M1.Counters[0] = 3; M1.Counters[1] = 2; M1.Counters[2] = 1; M1.Counters[3] = 0; // Not encoded. - CheckEncodedFeatures(M1, /* UseCounters */ false); + { + SCOPED_TRACE("M1"); + EXPECT_NO_FATAL_FAILURE(CheckEncodedFeatures(M1, /*UseCounters*/ false)); + } // Add a second module. This time, use counters. TPC.SetUseCounters(1); static FakeModule<9> M2; + M2.AddToTPC(); M2.Counters[0] = 0; // Not encoded. M2.Counters[1] = 1; M2.Counters[2] = 2; @@ -985,27 +1032,71 @@ M2.Counters[6] = 16; M2.Counters[7] = 32; M2.Counters[8] = 128; - CheckEncodedFeatures(M2, /* UseCounters */ true); + { + SCOPED_TRACE("M2"); + EXPECT_NO_FATAL_FAILURE(CheckEncodedFeatures(M2, /*UseCounters*/ true)); + } + + // The extra counters, value profile bits, and stack depth emit features that + // do not correspond to a module. These are associated with a hash of zero and + // always appear first. + TPC.SetUseValueProfileMask(1); + TPC.AddValueForMemcmp(this, M1.Counters, M2.Counters, + std::min(M1.NumPCs, M2.NumPCs), + /* StopAtZero */ false); + Vector Features, Encoded; + TPC.CollectFeatures( + [&](size_t F) { Features.push_back(static_cast(F)); }); + EncodeFeatures(Features, &Encoded); + + ASSERT_GE(Encoded.size(), 4U); + EXPECT_EQ(Encoded[1], 0U); + EXPECT_EQ(Encoded[2], 0U); + EXPECT_GE(Encoded[3], 1U); } TEST(FuzzerFork, DecodeFeatures) { - Vector A; + ModuleRelativeValues A; // Empty input. - EXPECT_TRUE(DecodeFeatures({}, &A)); - TRACED_EQ(A, {}); + EXPECT_FALSE(DecodeFeatures({}, &A)); + + // No modules. + EXPECT_TRUE(DecodeFeatures({0}, &A)); + EXPECT_EQ(A.size(), 0U); - // Not sorted. - EXPECT_FALSE(DecodeFeatures({3, 2}, &A)); - TRACED_EQ(A, {}); + // Incomplete modules header. + EXPECT_FALSE(DecodeFeatures({1}, &A)); + EXPECT_FALSE(DecodeFeatures({1, 0xde}, &A)); + EXPECT_FALSE(DecodeFeatures({1, 0xde, 0xad}, &A)); - // Valid. - EXPECT_TRUE(DecodeFeatures({2, 3}, &A)); - TRACED_EQ(A, {2, 3}); + // Module length mismatch. + EXPECT_FALSE(DecodeFeatures({1, 0xde, 0xad, 1}, &A)); + EXPECT_FALSE(DecodeFeatures({1, 0xde, 0xad, 2, 2}, &A)); + EXPECT_FALSE(DecodeFeatures({1, 0xde, 0xad, 1, 1, 2}, &A)); + EXPECT_FALSE(DecodeFeatures({1, 0xde, 0xad, 2, 2, 3, 4}, &A)); + + // Module not sorted. + EXPECT_FALSE(DecodeFeatures({1, 0xde, 0xad, 2, 3, 2}, &A)); + + // One valid module. + EXPECT_TRUE(DecodeFeatures({1, 0xde, 0xad, 2, 2, 3}, &A)); + EXPECT_EQ(A.size(), 2U); + TRACED_EQ(A.get(0xde000000adULL), {2, 3}); + + // Missing module. + EXPECT_FALSE(DecodeFeatures({2, 0xde, 0xad, 2, 2, 3}, &A)); + + // Extra moudle. + EXPECT_FALSE( + DecodeFeatures({1, 0xde, 0xad, 2, 2, 3, 0xbe, 0xef, 3, 4, 5, 6}, &A)); // Multiple valid modules. - EXPECT_TRUE(DecodeFeatures({2, 3, 4, 5, 6}, &A)); - TRACED_EQ(A, {2, 3, 4, 5, 6}); + EXPECT_TRUE( + DecodeFeatures({2, 0xde, 0xad, 2, 2, 3, 0xbe, 0xef, 3, 4, 5, 6}, &A)); + EXPECT_EQ(A.size(), 5U); + TRACED_EQ(A.get(0xde000000adULL), {2, 3}); + TRACED_EQ(A.get(0xbe000000efULL), {4, 5, 6}); } TEST(Merger, Parse) { @@ -1014,19 +1105,27 @@ const char *kInvalidInputs[] = { // Bad file numbers. "", - "x", + "BAD", "0\n0", - "3\nx", + "3\nBAD", "2\n3", "2\n2", // Bad file names. "2\n2\nA\n", "2\n2\nA\nB\nC\n", - // Unknown markers. + // Unexpected markers + "1\n1\nA\nFT 0", + "1\n1\nA\nCOV 0", + // Unknown markers + "1\n1\nA\nBAD 0", "2\n1\nA\nSTARTED 0\nBAD 0 0x0", - // Bad file IDs. + "2\n1\nA\nSTARTED 0\nFT 0 BAD", + // Bad file IDs "1\n1\nA\nSTARTED 1", "2\n1\nA\nSTARTED 0\nFT 1 0x0", + // Missing Hash + "2\n1\nA\nSTARTED 0\nFT 1\n", + "2\n1\nA\nSTARTED 0\nCOV 1\n", }; for (auto S : kInvalidInputs) { SCOPED_TRACE(S); @@ -1053,9 +1152,9 @@ // Parse control file that failed on later attempt EXPECT_TRUE(M.Parse("3\n1\nAA\nBB\nC\n" "STARTED 0 1000\n" - "FT 0 1 2 3\n" + "FT 0 0xbeef 1 2 3\n" "STARTED 1 1001\n" - "FT 1 4 5 6 \n" + "FT 1 0xbeef 4 5 6 \n" "STARTED 2 1002\n" "", true)); @@ -1069,17 +1168,17 @@ EXPECT_EQ(M.Files[2].Size, 1002U); EXPECT_EQ(M.LastFailure, "C"); EXPECT_EQ(M.FirstNotProcessedFile, 3U); - TRACED_EQ(M.Files[0].Features, {1, 2, 3}); - TRACED_EQ(M.Files[1].Features, {4, 5, 6}); + TRACED_EQ(M.Files[0].Features.get(0xbeef), {1, 2, 3}); + TRACED_EQ(M.Files[1].Features.get(0xbeef), {4, 5, 6}); // Parse control file without features or PCs EXPECT_TRUE(M.Parse("2\n0\nAA\nBB\n" "STARTED 0 1000\n" - "FT 0\n" - "COV 0\n" + "FT 0 0xbeef\n" + "COV 0 0xbeef\n" "STARTED 1 1001\n" - "FT 1\n" - "COV 1\n" + "FT 1 0xcafe\n" + "COV 1 0xcafe\n" "", true)); ASSERT_EQ(M.Files.size(), 2U); @@ -1094,143 +1193,147 @@ // Parse features and PCs EXPECT_TRUE(M.Parse("3\n2\nAA\nBB\nC\n" "STARTED 0 1000\n" - "FT 0 1 2 3\n" - "COV 0 11 12 13\n" + "FT 0 0xbeef 1 2 3\n" + "COV 0 0xbeef 11 12 13\n" "STARTED 1 1001\n" - "FT 1 4 5 6\n" - "COV 1 7 8 9\n" + "FT 1 0xbeef 4 5 6\n" + "COV 1 0xbeef 7 8 9\n" + "FT 1 0xcafe 14 15 16\n" + "COV 1 0xcafe 17 18 19\n" "STARTED 2 1002\n" - "FT 2 6 1 3\n" - "COV 2 16 11 13\n" + "FT 2 0xcafe 6 1 3\n" + "COV 2 0xcafe 16 11 13\n" "", true)); ASSERT_EQ(M.Files.size(), 3U); EXPECT_EQ(M.NumFilesInFirstCorpus, 2U); EXPECT_TRUE(M.LastFailure.empty()); EXPECT_EQ(M.FirstNotProcessedFile, 3U); - TRACED_EQ(M.Files[0].Features, {1, 2, 3}); - TRACED_EQ(M.Files[0].Cov, {11, 12, 13}); - TRACED_EQ(M.Files[1].Features, {4, 5, 6}); - TRACED_EQ(M.Files[1].Cov, {7, 8, 9}); - TRACED_EQ(M.Files[2].Features, {1, 3, 6}); - TRACED_EQ(M.Files[2].Cov, {16}); + TRACED_EQ(M.Files[0].Features.get(0xbeef), {1, 2, 3}); + TRACED_EQ(M.Files[0].Cov.get(0xbeef), {11, 12, 13}); + TRACED_EQ(M.Files[1].Features.get(0xbeef), {4, 5, 6}); + TRACED_EQ(M.Files[1].Features.get(0xcafe), {14, 15, 16}); + TRACED_EQ(M.Files[1].Cov.get(0xbeef), {7, 8, 9}); + TRACED_EQ(M.Files[1].Cov.get(0xcafe), {17, 18, 19}); + TRACED_EQ(M.Files[2].Features.get(0xcafe), {1, 3, 6}); + TRACED_EQ(M.Files[2].Cov.get(0xcafe), {11, 13, 16}); } TEST(Merger, Merge) { Merger M; - Set Features, NewFeatures; - Set Cov, NewCov; + ModuleRelativeValues Features, NewFeatures; + ModuleRelativeValues Cov, NewCov; Vector NewFiles; // Adds new files and features EXPECT_TRUE(M.Parse("3\n0\nA\nB\nC\n" "STARTED 0 1000\n" - "FT 0 1 2 3\n" + "FT 0 0xbeef 1 2 3\n" "STARTED 1 1001\n" - "FT 1 4 5 6 \n" + "FT 1 0xbeef 4 5 6 \n" "STARTED 2 1002\n" - "FT 2 6 1 3\n" + "FT 2 0xbeef 6 1 3\n" "", true)); EXPECT_EQ(M.Merge(Features, &NewFeatures, Cov, &NewCov, &NewFiles), 6U); TRACED_EQ(M.Files, {"A", "B", "C"}); TRACED_EQ(NewFiles, {"A", "B"}); - TRACED_EQ(NewFeatures, {1, 2, 3, 4, 5, 6}); + TRACED_EQ(NewFeatures.get(0xbeef), {1, 2, 3, 4, 5, 6}); // Doesn't return features or files in the initial corpus. EXPECT_TRUE(M.Parse("3\n1\nA\nB\nC\n" "STARTED 0 1000\n" - "FT 0 1 2 3\n" + "FT 0 0xbeef 1 2 3\n" "STARTED 1 1001\n" - "FT 1 4 5 6 \n" + "FT 1 0xbeef 4 5 6 \n" "STARTED 2 1002\n" - "FT 2 6 1 3\n" + "FT 2 0xbeef 6 1 3\n" "", true)); EXPECT_EQ(M.Merge(Features, &NewFeatures, Cov, &NewCov, &NewFiles), 3U); TRACED_EQ(M.Files, {"A", "B", "C"}); TRACED_EQ(NewFiles, {"B"}); - TRACED_EQ(NewFeatures, {4, 5, 6}); + TRACED_EQ(NewFeatures.get(0xbeef), {4, 5, 6}); // No new features, so no new files EXPECT_TRUE(M.Parse("3\n2\nA\nB\nC\n" "STARTED 0 1000\n" - "FT 0 1 2 3\n" + "FT 0 0xbeef 1 2 3\n" "STARTED 1 1001\n" - "FT 1 4 5 6 \n" + "FT 1 0xbeef 4 5 6 \n" "STARTED 2 1002\n" - "FT 2 6 1 3\n" + "FT 2 0xbeef 6 1 3\n" "", true)); EXPECT_EQ(M.Merge(Features, &NewFeatures, Cov, &NewCov, &NewFiles), 0U); TRACED_EQ(M.Files, {"A", "B", "C"}); TRACED_EQ(NewFiles, {}); - TRACED_EQ(NewFeatures, {}); + EXPECT_EQ(NewFeatures.get(0xbeef), nullptr); // Can pass initial features and coverage. - Features = {1, 2, 3}; + Features.Merge(0xbeef, {1, 2, 3}); Cov = {}; EXPECT_TRUE(M.Parse("2\n0\nA\nB\n" "STARTED 0 1000\n" - "FT 0 1 2 3\n" + "FT 0 0xbeef 1 2 3\n" "STARTED 1 1001\n" - "FT 1 4 5 6\n" + "FT 1 0xbeef 4 5 6\n" "", true)); EXPECT_EQ(M.Merge(Features, &NewFeatures, Cov, &NewCov, &NewFiles), 3U); TRACED_EQ(M.Files, {"A", "B"}); TRACED_EQ(NewFiles, {"B"}); - TRACED_EQ(NewFeatures, {4, 5, 6}); + TRACED_EQ(NewFeatures.get(0xbeef), {4, 5, 6}); Features.clear(); Cov.clear(); // Parse smaller files first EXPECT_TRUE(M.Parse("3\n0\nA\nB\nC\n" "STARTED 0 2000\n" - "FT 0 1 2 3\n" + "FT 0 0xbeef 1 2 3\n" "STARTED 1 1001\n" - "FT 1 4 5 6 \n" + "FT 1 0xbeef 4 5 6 \n" "STARTED 2 1002\n" - "FT 2 6 1 3 \n" + "FT 2 0xbeef 6 1 3 \n" "", true)); EXPECT_EQ(M.Merge(Features, &NewFeatures, Cov, &NewCov, &NewFiles), 6U); TRACED_EQ(M.Files, {"B", "C", "A"}); TRACED_EQ(NewFiles, {"B", "C", "A"}); - TRACED_EQ(NewFeatures, {1, 2, 3, 4, 5, 6}); + TRACED_EQ(NewFeatures.get(0xbeef), {1, 2, 3, 4, 5, 6}); EXPECT_TRUE(M.Parse("4\n0\nA\nB\nC\nD\n" "STARTED 0 2000\n" - "FT 0 1 2 3\n" + "FT 0 0xbeef 1 2 3\n" "STARTED 1 1101\n" - "FT 1 4 5 6 \n" + "FT 1 0xbeef 4 5 6 \n" "STARTED 2 1102\n" - "FT 2 6 1 3 100 \n" + "FT 2 0xbeef 6 1 3 100 \n" "STARTED 3 1000\n" - "FT 3 1 \n" + "FT 3 0xbeef 1 \n" "", true)); EXPECT_EQ(M.Merge(Features, &NewFeatures, Cov, &NewCov, &NewFiles), 7U); TRACED_EQ(M.Files, {"A", "B", "C", "D"}); TRACED_EQ(NewFiles, {"D", "B", "C", "A"}); - TRACED_EQ(NewFeatures, {1, 2, 3, 4, 5, 6, 100}); + TRACED_EQ(NewFeatures.get(0xbeef), {1, 2, 3, 4, 5, 6, 100}); // For same sized file, parse more features first EXPECT_TRUE(M.Parse("4\n1\nA\nB\nC\nD\n" "STARTED 0 2000\n" - "FT 0 4 5 6 7 8\n" + "FT 0 0xbeef 4 5 6 7 8\n" "STARTED 1 1100\n" - "FT 1 1 2 3 \n" + "FT 1 0xbeef 1 2 3 \n" "STARTED 2 1100\n" - "FT 2 2 3 \n" + "FT 2 0xbeef 2 3 \n" "STARTED 3 1000\n" - "FT 3 1 \n" + "FT 3 0xbeef 1 \n" "", true)); EXPECT_EQ(M.Merge(Features, &NewFeatures, Cov, &NewCov, &NewFiles), 3U); TRACED_EQ(M.Files, {"A", "B", "C", "D"}); TRACED_EQ(NewFiles, {"D", "B"}); - TRACED_EQ(NewFeatures, {1, 2, 3}); + TRACED_EQ(NewFeatures.get(0xbeef), {1, 2, 3}); } #undef TRACED_EQ Index: compiler-rt/test/fuzzer/merge-control-file.test =================================================================== --- compiler-rt/test/fuzzer/merge-control-file.test +++ compiler-rt/test/fuzzer/merge-control-file.test @@ -32,9 +32,9 @@ RUN: rm -f %t/T1/*; cp %t/T0/* %t/T1 RUN: echo 3 > %t/MCF; echo 0 >> %t/MCF; echo %t/T1/1 >> %t/MCF; echo %t/T1/2 >> %t/MCF; echo %t/T1/3 >> %t/MCF RUN: echo STARTED 0 1 >> %t/MCF -RUN: echo FT 0 11 >> %t/MCF +RUN: echo FT 0 0xdeadbeef 11 >> %t/MCF RUN: echo STARTED 1 2 >> %t/MCF -RUN: echo FT 1 12 >> %t/MCF +RUN: echo FT 1 0xcafef00d 12 >> %t/MCF RUN: %run %t/T.exe -merge=1 %t/T1 %t/T2 -merge_control_file=%t/MCF 2>&1 | FileCheck %s --check-prefix=OK_2 OK_2: MERGE-OUTER: control file ok, 3 files total, first not processed file 2 OK_2: MERGE-OUTER: 3 new files with {{.*}} new features added @@ -42,10 +42,10 @@ RUN: rm -f %t/T1/*; cp %t/T0/* %t/T1 RUN: echo 3 > %t/MCF; echo 0 >> %t/MCF; echo %t/T1/1 >> %t/MCF; echo %t/T1/2 >> %t/MCF; echo %t/T1/3 >> %t/MCF RUN: echo STARTED 0 1 >> %t/MCF -RUN: echo FT 0 11 >> %t/MCF +RUN: echo FT 0 0xdeadbeef 11 >> %t/MCF RUN: echo STARTED 1 2 >> %t/MCF -RUN: echo FT 1 12 >> %t/MCF +RUN: echo FT 1 0xcafef00d 12 >> %t/MCF RUN: echo STARTED 2 2 >> %t/MCF -RUN: echo FT 2 13 >> %t/MCF +RUN: echo FT 2 0x12345678 13 >> %t/MCF RUN: %run %t/T.exe -merge=1 %t/T1 %t/T2 -merge_control_file=%t/MCF 2>&1 | FileCheck %s --check-prefix=OK_3 OK_3: MERGE-OUTER: nothing to do, merge has been completed before Index: compiler-rt/test/fuzzer/merge.test =================================================================== --- compiler-rt/test/fuzzer/merge.test +++ compiler-rt/test/fuzzer/merge.test @@ -52,7 +52,7 @@ RUN: cp %t/T0/* %t/T1/ RUN: echo 'FUZZER' > %t/T2/FUZZER RUN: %run %t-FullCoverageSetTest -merge=1 %t/T1 %t/T2 2>&1 | FileCheck %s --check-prefix=MERGE_WITH_CRASH -MERGE_WITH_CRASH: MERGE-OUTER: succesfull in 2 attempt(s) +MERGE_WITH_CRASH: MERGE-OUTER: successful in 2 attempt(s) MERGE_WITH_CRASH: MERGE-OUTER: 3 new files # Check that we actually limit the size with max_len @@ -61,5 +61,5 @@ RUN: %run %t-FullCoverageSetTest -merge=1 %t/T1 %t/T2 -max_len=5 2>&1 | FileCheck %s --check-prefix=MERGE_LEN5 RUN: not grep FUZZER %t/T1/* RUN: grep FUZZE %t/T1/* -MERGE_LEN5: MERGE-OUTER: succesfull in 1 attempt(s) +MERGE_LEN5: MERGE-OUTER: successful in 1 attempt(s)