Index: lib/fuzzer/FuzzerDefs.h =================================================================== --- lib/fuzzer/FuzzerDefs.h +++ lib/fuzzer/FuzzerDefs.h @@ -15,10 +15,13 @@ #include #include #include +#include +#include +#include #include #include -#include -#include +#include + // Platform detection. #ifdef __linux__ @@ -191,6 +194,11 @@ template using Set = std::set, fuzzer_allocator>; +template +using Map = + std::map, fuzzer_allocator>>; + +typedef std::pair FilePair; typedef Vector Unit; typedef Vector UnitVector; typedef int (*UserCallback)(const uint8_t *Data, size_t Size); Index: lib/fuzzer/FuzzerDriver.cpp =================================================================== --- lib/fuzzer/FuzzerDriver.cpp +++ lib/fuzzer/FuzzerDriver.cpp @@ -489,11 +489,16 @@ std::string CFPath = CFPathOrNull ? CFPathOrNull : TempPath(".txt"); Vector NewFiles; + Vector ReplacedFiles; Set NewFeatures, NewCov; - CrashResistantMerge(Args, OldCorpus, NewCorpus, &NewFiles, {}, &NewFeatures, - {}, &NewCov, CFPath, true); + CrashResistantMerge(Args, OldCorpus, NewCorpus, &NewFiles, &ReplacedFiles, {}, + &NewFeatures, {}, &NewCov, CFPath, true); for (auto &Path : NewFiles) F->WriteToOutputCorpus(FileToVector(Path, Options.MaxLen)); + for (auto &Pair : ReplacedFiles) { + F->WriteToOutputCorpus(FileToVector(Pair.second, Options.MaxLen)); + RemoveFile(Pair.first); + } // We are done, delete the control file if it was a temporary one. if (!Flags.merge_control_file) RemoveFile(CFPath); Index: lib/fuzzer/FuzzerFork.cpp =================================================================== --- lib/fuzzer/FuzzerFork.cpp +++ lib/fuzzer/FuzzerFork.cpp @@ -210,15 +210,18 @@ if (MergeCandidates.empty()) return; Vector FilesToAdd; + Vector FilesToReplace; Set NewFeatures, NewCov; - CrashResistantMerge(Args, {}, MergeCandidates, &FilesToAdd, Features, - &NewFeatures, Cov, &NewCov, Job->CFPath, false); + CrashResistantMerge(Args, {}, MergeCandidates, &FilesToAdd, &FilesToReplace, + Features, &NewFeatures, Cov, &NewCov, Job->CFPath, + false); for (auto &Path : FilesToAdd) { auto U = FileToVector(Path); auto NewPath = DirPlusFile(MainCorpusDir, Hash(U)); WriteToFile(U, NewPath); Files.push_back(NewPath); } + // TODO(Dor1s): go through FilesToReplace here. Features.insert(NewFeatures.begin(), NewFeatures.end()); Cov.insert(NewCov.begin(), NewCov.end()); for (auto Idx : NewCov) @@ -226,10 +229,8 @@ if (TPC.PcIsFuncEntry(TE)) PrintPC(" NEW_FUNC: %p %F %L\n", "", TPC.GetNextInstructionPc(TE->PC)); - } - void CollectDFT(const std::string &InputPath) { if (DataFlowBinary.empty()) return; if (!FilesWithDFT.insert(InputPath).second) return; @@ -310,13 +311,13 @@ Env.MainCorpusDir = CorpusDirs[0]; auto CFPath = DirPlusFile(Env.TempDir, "merge.txt"); - CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, {}, &Env.Features, - {}, &Env.Cov, - CFPath, false); + Vector FilesToReplace; + CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, &FilesToReplace, {}, + &Env.Features, {}, &Env.Cov, CFPath, false); RemoveFile(CFPath); Printf("INFO: -fork=%d: %zd seed inputs, starting to fuzz in %s\n", NumJobs, Env.Files.size(), Env.TempDir.c_str()); - + // TODO(Dor1s): think if anything has to be done with FilesToReplace. int ExitCode = 0; JobQueue FuzzQ, MergeQ; Index: lib/fuzzer/FuzzerMerge.h =================================================================== --- lib/fuzzer/FuzzerMerge.h +++ lib/fuzzer/FuzzerMerge.h @@ -52,6 +52,7 @@ std::string Name; size_t Size = 0; Vector Features, Cov; + std::string Signature; }; struct Merger { @@ -65,7 +66,8 @@ void ParseOrExit(std::istream &IS, bool ParseCoverage); size_t Merge(const Set &InitialFeatures, Set *NewFeatures, const Set &InitialCov, Set *NewCov, - Vector *NewFiles); + Vector *NewFiles, + Vector *ReplacedFiles); size_t ApproximateMemoryConsumption() const; Set AllFeatures() const; }; @@ -74,6 +76,7 @@ const Vector &OldCorpus, const Vector &NewCorpus, Vector *NewFiles, + Vector *ReplacedFiles, const Set &InitialFeatures, Set *NewFeatures, const Set &InitialCov, Index: lib/fuzzer/FuzzerMerge.cpp =================================================================== --- lib/fuzzer/FuzzerMerge.cpp +++ lib/fuzzer/FuzzerMerge.cpp @@ -82,6 +82,7 @@ std::istringstream ISS1(Line); std::string Marker; size_t N; + std::string Signature; ISS1 >> Marker; ISS1 >> N; if (Marker == "STARTED") { @@ -111,6 +112,9 @@ while (ISS1 >> N) if (PCs.insert(N).second) Files[CurrentFileIdx].Cov.push_back(N); + } else if (Marker == "SIGNATURE") { + ISS1 >> Signature; + Files[N].Signature = Signature; } else { return false; } @@ -134,15 +138,18 @@ size_t Merger::Merge(const Set &InitialFeatures, Set *NewFeatures, const Set &InitialCov, Set *NewCov, - Vector *NewFiles) { + Vector *NewFiles, + Vector *ReplacedFiles) { NewFiles->clear(); assert(NumFilesInFirstCorpus <= Files.size()); Set AllFeatures = InitialFeatures; + Map> SignaturesMap; // What features are in the initial corpus? for (size_t i = 0; i < NumFilesInFirstCorpus; i++) { - auto &Cur = Files[i].Features; - AllFeatures.insert(Cur.begin(), Cur.end()); + auto &Cur = Files[i]; + AllFeatures.insert(Cur.Features.begin(), Cur.Features.end()); + SignaturesMap[Cur.Signature] = std::make_pair(Cur.Size, Cur.Name); } // Remove all features that we already know from all other inputs. for (size_t i = NumFilesInFirstCorpus; i < Files.size(); i++) { @@ -165,6 +172,8 @@ // One greedy pass: add the file's features to AllFeatures. // If new features were added, add this file to NewFiles. + // Check files' signatures and sizes, replace equivalent files with reduced + // variants. for (size_t i = NumFilesInFirstCorpus; i < Files.size(); i++) { auto &Cur = Files[i].Features; // Printf("%s -> sz %zd ft %zd\n", Files[i].Name.c_str(), @@ -181,6 +190,16 @@ for (auto Cov : Files[i].Cov) if (InitialCov.find(Cov) == InitialCov.end()) NewCov->insert(Cov); + + auto Signature = SignaturesMap.find(Files[i].Signature); + if (Signature != SignaturesMap.end()) { + if (Signature->second.first > Files[i].Size) { + ReplacedFiles->emplace_back(std::make_pair(Signature->second.second, + Files[i].Name)); + Files[i].Name = Signature->second.second; + Files[i].Size = Signature->second.first; + } + } } return NewFeatures->size(); } @@ -231,7 +250,12 @@ // So it makes no sense to record all features for all files, instead we // only record features that were not seen before. Set UniqFeatures; + // Collect all coverage edges and features for the given input in order to + // produce a signature. That signature is used later on for replacing the + // inputs in the output corpus if they have reduced variants available. + Vector Signature; TPC.CollectFeatures([&](size_t Feature) { + Signature.push_back(Feature); if (AllFeatures.insert(Feature).second) UniqFeatures.insert(Feature); }); @@ -248,10 +272,17 @@ OF << "\n"; OF << "COV " << i; TPC.ForEachObservedPC([&](const TracePC::PCTableEntry *TE) { + Signature.push_back(TPC.PCTableEntryIdx(TE)); if (AllPCs.insert(TE).second) OF << " " << TPC.PCTableEntryIdx(TE); }); OF << "\n"; + OF << "SIGNATURE " << i; + std::sort(Signature.begin(), Signature.end()); + OF << " " + << Hash(reinterpret_cast(Signature.data()), + Signature.size() * sizeof(decltype(Signature)::value_type)); + OF << "\n"; OF.flush(); } PrintStats("DONE "); @@ -280,6 +311,7 @@ const Vector &OldCorpus, const Vector &NewCorpus, Vector *NewFiles, + Vector *ReplacedFiles, const Set &InitialFeatures, Set *NewFeatures, const Set &InitialCov, @@ -355,10 +387,12 @@ VPrintf(V, "MERGE-OUTER: consumed %zdMb (%zdMb rss) to parse the control file\n", M.ApproximateMemoryConsumption() >> 20, GetPeakRSSMb()); - M.Merge(InitialFeatures, NewFeatures, InitialCov, NewCov, NewFiles); + M.Merge(InitialFeatures, NewFeatures, InitialCov, NewCov, NewFiles, + ReplacedFiles); VPrintf(V, "MERGE-OUTER: %zd new files with %zd new features added; " - "%zd new coverage edges\n", - NewFiles->size(), NewFeatures->size(), NewCov->size()); + "%zd new coverage edges; %zd files reduced\n", + NewFiles->size(), NewFeatures->size(), NewCov->size(), + ReplacedFiles->size()); } } // namespace fuzzer Index: lib/fuzzer/FuzzerSHA1.h =================================================================== --- lib/fuzzer/FuzzerSHA1.h +++ lib/fuzzer/FuzzerSHA1.h @@ -27,6 +27,8 @@ std::string Hash(const Unit &U); +std::string Hash(const uint8_t *Data, size_t Len); + } // namespace fuzzer #endif // LLVM_FUZZER_SHA1_H Index: lib/fuzzer/FuzzerSHA1.cpp =================================================================== --- lib/fuzzer/FuzzerSHA1.cpp +++ lib/fuzzer/FuzzerSHA1.cpp @@ -214,8 +214,12 @@ } std::string Hash(const Unit &U) { - uint8_t Hash[kSHA1NumBytes]; - ComputeSHA1(U.data(), U.size(), Hash); + return Hash(U.data(), U.size()); +} + +std::string Hash(const uint8_t *Data, size_t Len) { + uint8_t Hash[kSHA1NumBytes]; + ComputeSHA1(Data, Len, Hash); return Sha1ToString(Hash); } Index: lib/fuzzer/tests/FuzzerUnittest.cpp =================================================================== --- lib/fuzzer/tests/FuzzerUnittest.cpp +++ lib/fuzzer/tests/FuzzerUnittest.cpp @@ -644,9 +644,11 @@ size_t NumNewFeatures) { Merger M; Vector NewFiles; + Vector ReplacedFiles; Set NewFeatures, NewCov; EXPECT_TRUE(M.Parse(Input, true)); - EXPECT_EQ(NumNewFeatures, M.Merge({}, &NewFeatures, {}, &NewCov, &NewFiles)); + EXPECT_EQ(NumNewFeatures, + M.Merge({}, &NewFeatures, {}, &NewCov, &NewFiles, &ReplacedFiles)); EQ(NewFiles, Result); } @@ -690,6 +692,7 @@ Vector NewFiles; + Vector ReplacedFiles; Set NewFeatures, NewCov; EXPECT_TRUE(M.Parse("3\n2\nAA\nBB\nC\n" @@ -704,7 +707,8 @@ EQ(M.Files[0].Features, {1, 2, 3}); EQ(M.Files[1].Features, {4, 5, 6}); EQ(M.Files[2].Features, {1, 3, 6}); - EXPECT_EQ(0U, M.Merge({}, &NewFeatures, {}, &NewCov, &NewFiles)); + EXPECT_EQ(0U, + M.Merge({}, &NewFeatures, {}, &NewCov, &NewFiles, &ReplacedFiles)); EQ(NewFiles, {}); EXPECT_TRUE(M.Parse("3\n1\nA\nB\nC\n" @@ -715,7 +719,8 @@ EQ(M.Files[0].Features, {1, 2, 3}); EQ(M.Files[1].Features, {4, 5, 6}); EQ(M.Files[2].Features, {1, 3, 6}); - EXPECT_EQ(3U, M.Merge({}, &NewFeatures, {}, &NewCov, &NewFiles)); + EXPECT_EQ(3U, + M.Merge({}, &NewFeatures, {}, &NewCov, &NewFiles, &ReplacedFiles)); EQ(NewFiles, {"B"}); // Same as the above, but with InitialFeatures. @@ -729,7 +734,9 @@ InitialFeatures.insert(1); InitialFeatures.insert(2); InitialFeatures.insert(3); - EXPECT_EQ(3U, M.Merge(InitialFeatures, &NewFeatures, {}, &NewCov, &NewFiles)); + EXPECT_EQ(3U, + M.Merge(InitialFeatures, &NewFeatures, {}, &NewCov, &NewFiles, + &ReplacedFiles)); EQ(NewFiles, {"B"}); }