Index: lib/fuzzer/FuzzerDefs.h =================================================================== --- lib/fuzzer/FuzzerDefs.h +++ lib/fuzzer/FuzzerDefs.h @@ -15,10 +15,13 @@ #include #include #include +#include +#include +#include #include #include -#include -#include +#include + // Platform detection. #ifdef __linux__ @@ -191,6 +194,11 @@ template using Set = std::set, fuzzer_allocator>; +template +using Map = + std::map, fuzzer_allocator>>; + +typedef std::pair FilePair; typedef Vector Unit; typedef Vector UnitVector; typedef int (*UserCallback)(const uint8_t *Data, size_t Size); Index: lib/fuzzer/FuzzerDriver.cpp =================================================================== --- lib/fuzzer/FuzzerDriver.cpp +++ lib/fuzzer/FuzzerDriver.cpp @@ -489,11 +489,16 @@ std::string CFPath = CFPathOrNull ? CFPathOrNull : TempPath(".txt"); Vector NewFiles; + Vector ReplacedFiles; Set NewFeatures, NewCov; - CrashResistantMerge(Args, OldCorpus, NewCorpus, &NewFiles, {}, &NewFeatures, - {}, &NewCov, CFPath, true); + CrashResistantMerge(Args, OldCorpus, NewCorpus, &NewFiles, &ReplacedFiles, {}, + &NewFeatures, {}, &NewCov, CFPath, true); for (auto &Path : NewFiles) F->WriteToOutputCorpus(FileToVector(Path, Options.MaxLen)); + for (auto &Pair : ReplacedFiles) { + F->WriteToOutputCorpus(FileToVector(Pair.second, Options.MaxLen)); + RemoveFile(Pair.first); + } // We are done, delete the control file if it was a temporary one. if (!Flags.merge_control_file) RemoveFile(CFPath); Index: lib/fuzzer/FuzzerFork.cpp =================================================================== --- lib/fuzzer/FuzzerFork.cpp +++ lib/fuzzer/FuzzerFork.cpp @@ -210,15 +210,18 @@ if (MergeCandidates.empty()) return; Vector FilesToAdd; + Vector FilesToReplace; Set NewFeatures, NewCov; - CrashResistantMerge(Args, {}, MergeCandidates, &FilesToAdd, Features, - &NewFeatures, Cov, &NewCov, Job->CFPath, false); + CrashResistantMerge(Args, {}, MergeCandidates, &FilesToAdd, &FilesToReplace, + Features, &NewFeatures, Cov, &NewCov, Job->CFPath, + false); for (auto &Path : FilesToAdd) { auto U = FileToVector(Path); auto NewPath = DirPlusFile(MainCorpusDir, Hash(U)); WriteToFile(U, NewPath); Files.push_back(NewPath); } + // TODO(Dor1s): go through FilesToReplace here. Features.insert(NewFeatures.begin(), NewFeatures.end()); Cov.insert(NewCov.begin(), NewCov.end()); for (auto Idx : NewCov) @@ -226,10 +229,8 @@ if (TPC.PcIsFuncEntry(TE)) PrintPC(" NEW_FUNC: %p %F %L\n", "", TPC.GetNextInstructionPc(TE->PC)); - } - void CollectDFT(const std::string &InputPath) { if (DataFlowBinary.empty()) return; if (!FilesWithDFT.insert(InputPath).second) return; @@ -310,13 +311,13 @@ Env.MainCorpusDir = CorpusDirs[0]; auto CFPath = DirPlusFile(Env.TempDir, "merge.txt"); - CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, {}, &Env.Features, - {}, &Env.Cov, - CFPath, false); + Vector FilesToReplace; + CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, &FilesToReplace, {}, + &Env.Features, {}, &Env.Cov, CFPath, false); RemoveFile(CFPath); Printf("INFO: -fork=%d: %zd seed inputs, starting to fuzz in %s\n", NumJobs, Env.Files.size(), Env.TempDir.c_str()); - + // TODO(Dor1s): think if anything has to be done with FilesToReplace. int ExitCode = 0; JobQueue FuzzQ, MergeQ; Index: lib/fuzzer/FuzzerMerge.h =================================================================== --- lib/fuzzer/FuzzerMerge.h +++ lib/fuzzer/FuzzerMerge.h @@ -52,6 +52,7 @@ std::string Name; size_t Size = 0; Vector Features, Cov; + std::string Signature; }; struct Merger { @@ -65,7 +66,8 @@ void ParseOrExit(std::istream &IS, bool ParseCoverage); size_t Merge(const Set &InitialFeatures, Set *NewFeatures, const Set &InitialCov, Set *NewCov, - Vector *NewFiles); + Vector *NewFiles, + Vector *ReplacedFiles); size_t ApproximateMemoryConsumption() const; Set AllFeatures() const; }; @@ -74,6 +76,7 @@ const Vector &OldCorpus, const Vector &NewCorpus, Vector *NewFiles, + Vector *ReplacedFiles, const Set &InitialFeatures, Set *NewFeatures, const Set &InitialCov, Index: lib/fuzzer/FuzzerMerge.cpp =================================================================== --- lib/fuzzer/FuzzerMerge.cpp +++ lib/fuzzer/FuzzerMerge.cpp @@ -82,6 +82,7 @@ std::istringstream ISS1(Line); std::string Marker; size_t N; + std::string Signature; ISS1 >> Marker; ISS1 >> N; if (Marker == "STARTED") { @@ -111,6 +112,9 @@ while (ISS1 >> N) if (PCs.insert(N).second) Files[CurrentFileIdx].Cov.push_back(N); + } else if (Marker == "SIGNATURE") { + ISS1 >> Signature; + Files[N].Signature = Signature; } else { return false; } @@ -134,15 +138,18 @@ size_t Merger::Merge(const Set &InitialFeatures, Set *NewFeatures, const Set &InitialCov, Set *NewCov, - Vector *NewFiles) { + Vector *NewFiles, + Vector *ReplacedFiles) { NewFiles->clear(); assert(NumFilesInFirstCorpus <= Files.size()); Set AllFeatures = InitialFeatures; + Map> SignaturesMap; // What features are in the initial corpus? for (size_t i = 0; i < NumFilesInFirstCorpus; i++) { - auto &Cur = Files[i].Features; - AllFeatures.insert(Cur.begin(), Cur.end()); + auto &Cur = Files[i]; + AllFeatures.insert(Cur.Features.begin(), Cur.Features.end()); + SignaturesMap[Cur.Signature] = std::make_pair(Cur.Size, Cur.Name); } // Remove all features that we already know from all other inputs. for (size_t i = NumFilesInFirstCorpus; i < Files.size(); i++) { @@ -165,6 +172,8 @@ // One greedy pass: add the file's features to AllFeatures. // If new features were added, add this file to NewFiles. + // Check files' signatures and sizes, replace equivalent files with reduced + // variants. for (size_t i = NumFilesInFirstCorpus; i < Files.size(); i++) { auto &Cur = Files[i].Features; // Printf("%s -> sz %zd ft %zd\n", Files[i].Name.c_str(), @@ -181,6 +190,16 @@ for (auto Cov : Files[i].Cov) if (InitialCov.find(Cov) == InitialCov.end()) NewCov->insert(Cov); + + auto Signature = SignaturesMap.find(Files[i].Signature); + if (Signature != SignaturesMap.end()) { + if (Signature->second.first > Files[i].Size) { + ReplacedFiles->emplace_back(std::make_pair(Signature->second.second, + Files[i].Name)); + Files[i].Name = Signature->second.second; + Files[i].Size = Signature->second.first; + } + } } return NewFeatures->size(); } @@ -234,7 +253,12 @@ // So it makes no sense to record all features for all files, instead we // only record features that were not seen before. Set UniqFeatures; + // Collect all coverage edges and features for the given input in order to + // produce a signature. That signature is used later on for replacing the + // inputs in the output corpus if they have reduced variants available. + Vector Signature; TPC.CollectFeatures([&](size_t Feature) { + Signature.push_back(Feature); if (AllFeatures.insert(Feature).second) UniqFeatures.insert(Feature); }); @@ -251,10 +275,17 @@ OF << "\n"; OF << "COV " << i; TPC.ForEachObservedPC([&](const TracePC::PCTableEntry *TE) { + Signature.push_back(TPC.PCTableEntryIdx(TE)); if (AllPCs.insert(TE).second) OF << " " << TPC.PCTableEntryIdx(TE); }); OF << "\n"; + OF << "SIGNATURE " << i; + std::sort(Signature.begin(), Signature.end()); + OF << " " + << Hash(reinterpret_cast(Signature.data()), + Signature.size() * sizeof(decltype(Signature)::value_type)); + OF << "\n"; OF.flush(); } PrintStatsWrapper("DONE "); @@ -283,6 +314,7 @@ const Vector &OldCorpus, const Vector &NewCorpus, Vector *NewFiles, + Vector *ReplacedFiles, const Set &InitialFeatures, Set *NewFeatures, const Set &InitialCov, @@ -358,10 +390,12 @@ VPrintf(V, "MERGE-OUTER: consumed %zdMb (%zdMb rss) to parse the control file\n", M.ApproximateMemoryConsumption() >> 20, GetPeakRSSMb()); - M.Merge(InitialFeatures, NewFeatures, InitialCov, NewCov, NewFiles); + M.Merge(InitialFeatures, NewFeatures, InitialCov, NewCov, NewFiles, + ReplacedFiles); VPrintf(V, "MERGE-OUTER: %zd new files with %zd new features added; " - "%zd new coverage edges\n", - NewFiles->size(), NewFeatures->size(), NewCov->size()); + "%zd new coverage edges; %zd files reduced\n", + NewFiles->size(), NewFeatures->size(), NewCov->size(), + ReplacedFiles->size()); } } // namespace fuzzer Index: lib/fuzzer/FuzzerSHA1.h =================================================================== --- lib/fuzzer/FuzzerSHA1.h +++ lib/fuzzer/FuzzerSHA1.h @@ -27,6 +27,8 @@ std::string Hash(const Unit &U); +std::string Hash(const uint8_t *Data, size_t Len); + } // namespace fuzzer #endif // LLVM_FUZZER_SHA1_H Index: lib/fuzzer/FuzzerSHA1.cpp =================================================================== --- lib/fuzzer/FuzzerSHA1.cpp +++ lib/fuzzer/FuzzerSHA1.cpp @@ -214,8 +214,12 @@ } std::string Hash(const Unit &U) { - uint8_t Hash[kSHA1NumBytes]; - ComputeSHA1(U.data(), U.size(), Hash); + return Hash(U.data(), U.size()); +} + +std::string Hash(const uint8_t *Data, size_t Len) { + uint8_t Hash[kSHA1NumBytes]; + ComputeSHA1(Data, Len, Hash); return Sha1ToString(Hash); } Index: lib/fuzzer/tests/FuzzerUnittest.cpp =================================================================== --- lib/fuzzer/tests/FuzzerUnittest.cpp +++ lib/fuzzer/tests/FuzzerUnittest.cpp @@ -644,9 +644,11 @@ size_t NumNewFeatures) { Merger M; Vector NewFiles; + Vector ReplacedFiles; Set NewFeatures, NewCov; EXPECT_TRUE(M.Parse(Input, true)); - EXPECT_EQ(NumNewFeatures, M.Merge({}, &NewFeatures, {}, &NewCov, &NewFiles)); + EXPECT_EQ(NumNewFeatures, + M.Merge({}, &NewFeatures, {}, &NewCov, &NewFiles, &ReplacedFiles)); EQ(NewFiles, Result); } @@ -690,6 +692,7 @@ Vector NewFiles; + Vector ReplacedFiles; Set NewFeatures, NewCov; EXPECT_TRUE(M.Parse("3\n2\nAA\nBB\nC\n" @@ -704,7 +707,8 @@ EQ(M.Files[0].Features, {1, 2, 3}); EQ(M.Files[1].Features, {4, 5, 6}); EQ(M.Files[2].Features, {1, 3, 6}); - EXPECT_EQ(0U, M.Merge({}, &NewFeatures, {}, &NewCov, &NewFiles)); + EXPECT_EQ(0U, + M.Merge({}, &NewFeatures, {}, &NewCov, &NewFiles, &ReplacedFiles)); EQ(NewFiles, {}); EXPECT_TRUE(M.Parse("3\n1\nA\nB\nC\n" @@ -715,7 +719,8 @@ EQ(M.Files[0].Features, {1, 2, 3}); EQ(M.Files[1].Features, {4, 5, 6}); EQ(M.Files[2].Features, {1, 3, 6}); - EXPECT_EQ(3U, M.Merge({}, &NewFeatures, {}, &NewCov, &NewFiles)); + EXPECT_EQ(3U, + M.Merge({}, &NewFeatures, {}, &NewCov, &NewFiles, &ReplacedFiles)); EQ(NewFiles, {"B"}); // Same as the above, but with InitialFeatures. @@ -729,7 +734,9 @@ InitialFeatures.insert(1); InitialFeatures.insert(2); InitialFeatures.insert(3); - EXPECT_EQ(3U, M.Merge(InitialFeatures, &NewFeatures, {}, &NewCov, &NewFiles)); + EXPECT_EQ(3U, + M.Merge(InitialFeatures, &NewFeatures, {}, &NewCov, &NewFiles, + &ReplacedFiles)); EQ(NewFiles, {"B"}); } Index: test/fuzzer/merge-sigusr.test =================================================================== --- test/fuzzer/merge-sigusr.test +++ test/fuzzer/merge-sigusr.test @@ -32,8 +32,9 @@ RUN: cat %t/log | FileCheck %s --dump-input=fail RUN: grep C2/g %t/MCF RUN: grep STARTED %t/MCF -RUN: tail -n 2 %t/MCF | grep FT -RUN: tail -n 1 %t/MCF | grep COV +RUN: tail -n 3 %t/MCF | grep FT +RUN: tail -n 2 %t/MCF | grep COV +RUN: tail -n 1 %t/MCF | grep SIGNATURE CHECK: INFO: signal received, trying to exit gracefully CHECK: INFO: libFuzzer: exiting as requested Index: test/fuzzer/merge.test =================================================================== --- test/fuzzer/merge.test +++ test/fuzzer/merge.test @@ -4,31 +4,41 @@ RUN: rm -rf %t/T0 %t/T1 %t/T2 RUN: mkdir -p %t/T0 %t/T1 %t/T2 -RUN: echo F..... > %t/T0/1 -RUN: echo .U.... > %t/T0/2 -RUN: echo ..Z... > %t/T0/3 +RUN: echo ......... > %t/T0/1 +RUN: echo F........ > %t/T0/2 +RUN: echo .U....... > %t/T0/3 +RUN: echo ..Z...... > %t/T0/4 + +# Intentionally put an extra byte in the following unit as it should be replaced +# by %t/T2/1 unit later on. Also, an extra byte here warrants that this input +# will be processed the last when going through T0 (or T1) directory. +RUN: echo ...Z...... > %t/T0/5 -# T1 has 3 elements, T2 is empty. +# T1 has 4 elements, T2 is empty. RUN: cp %t/T0/* %t/T1/ RUN: %run %t-FullCoverageSetTest -merge=1 %t/T1 %t/T2 2>&1 | FileCheck %s --check-prefix=CHECK1 -CHECK1: MERGE-OUTER: 3 files, 3 in the initial corpus +CHECK1: MERGE-OUTER: 5 files, 5 in the initial corpus CHECK1: MERGE-OUTER: 0 new files with 0 new features added -RUN: echo ...Z.. > %t/T2/1 -RUN: echo ....E. > %t/T2/2 -RUN: echo .....R > %t/T2/3 -RUN: echo F..... > %t/T2/a -RUN: echo .U.... > %t/T2/b -RUN: echo ..Z... > %t/T2/c +# The input below is intentionally one byte shorter than the others, as it is +# expected to replace %t/T1/5 (copied from %t/T0/5) because it gives the same +# coverage while being smaller in size. Also, the smaller size of this input +# warrants that it will be processed first when going through the T2 directory. +RUN: echo ...Z.... > %t/T2/1 +RUN: echo ....E.... > %t/T2/2 +RUN: echo .....R... > %t/T2/3 +RUN: echo F........ > %t/T2/a +RUN: echo .U....... > %t/T2/b +RUN: echo ..Z...... > %t/T2/c -# T1 has 3 elements, T2 has 6 elements, only 3 are new. +# T1 has 5 elements, T2 has 6 elements, 2 are new and 1 is a reduced variant. RUN: %run %t-FullCoverageSetTest -merge=1 %t/T1 %t/T2 2>&1 | FileCheck %s --check-prefix=CHECK2 -CHECK2: MERGE-OUTER: 9 files, 3 in the initial corpus -CHECK2: MERGE-OUTER: 3 new files with 3 new features added +CHECK2: MERGE-OUTER: 11 files, 5 in the initial corpus +CHECK2: MERGE-OUTER: 2 new files with 2 new features added; 2 new coverage edges; 1 files reduced # Now, T1 has 6 units and T2 has no new interesting units. RUN: %run %t-FullCoverageSetTest -merge=1 %t/T1 %t/T2 2>&1 | FileCheck %s --check-prefix=CHECK3 -CHECK3: MERGE-OUTER: 12 files, 6 in the initial corpus +CHECK3: MERGE-OUTER: 13 files, 7 in the initial corpus CHECK3: MERGE-OUTER: 0 new files with 0 new features added # Check that we respect max_len during the merge and don't crash. @@ -36,7 +46,7 @@ RUN: cp %t/T0/* %t/T1/ RUN: echo looooooooong > %t/T2/looooooooong RUN: %run %t-FullCoverageSetTest -merge=1 %t/T1 %t/T2 -max_len=6 2>&1 | FileCheck %s --check-prefix=MAX_LEN -MAX_LEN: MERGE-OUTER: 3 new files +MAX_LEN: MERGE-OUTER: 2 new files with 2 new features added; 2 new coverage edges; 0 files reduced # Check that we respect -merge_control_file=FILE RUN: rm %t/T1/* @@ -46,7 +56,7 @@ RUN: grep STARTED %t/MCF RUN: grep FT %t/MCF MCF: MERGE-INNER: using the control file {{.*}}MCF -MCF: MERGE-OUTER: 3 new files +MCF: MERGE-OUTER: 2 new files with 2 new features added; 2 new coverage edges; 1 files reduced # Check that merge tolerates failures. @@ -55,7 +65,7 @@ RUN: echo 'FUZZER' > %t/T2/FUZZER RUN: %run %t-FullCoverageSetTest -merge=1 %t/T1 %t/T2 2>&1 | FileCheck %s --check-prefix=MERGE_WITH_CRASH MERGE_WITH_CRASH: MERGE-OUTER: succesfull in 2 attempt(s) -MERGE_WITH_CRASH: MERGE-OUTER: 3 new files +MERGE_WITH_CRASH: MERGE-OUTER: 2 new files with 2 new features added; 2 new coverage edges; 0 files reduced # Check that we actually limit the size with max_len RUN: rm %t/T1/* %t/T2/*