diff --git a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp --- a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp @@ -869,6 +869,7 @@ exit(0); } + Options.ForkCorpusGroups = Flags.fork_corpus_groups; if (Flags.fork) FuzzWithFork(F->GetMD().GetRand(), Options, Args, *Inputs, Flags.fork); diff --git a/compiler-rt/lib/fuzzer/FuzzerFlags.def b/compiler-rt/lib/fuzzer/FuzzerFlags.def --- a/compiler-rt/lib/fuzzer/FuzzerFlags.def +++ b/compiler-rt/lib/fuzzer/FuzzerFlags.def @@ -58,6 +58,10 @@ FUZZER_FLAG_INT(help, 0, "Print help.") FUZZER_FLAG_INT(fork, 0, "Experimental mode where fuzzing happens " "in a subprocess") +FUZZER_FLAG_INT(fork_corpus_groups, 1, "For fork mode, enable the corpus-group " + "strategy, The main corpus will be grouped according to size, " + "and each sub-process will randomly select seeds from different " + "groups as the sub-corpus.") FUZZER_FLAG_INT(ignore_timeouts, 1, "Ignore timeouts in fork mode") FUZZER_FLAG_INT(ignore_ooms, 1, "Ignore OOMs in fork mode") FUZZER_FLAG_INT(ignore_crashes, 0, "Ignore crashes in fork mode") diff --git a/compiler-rt/lib/fuzzer/FuzzerFork.cpp b/compiler-rt/lib/fuzzer/FuzzerFork.cpp --- a/compiler-rt/lib/fuzzer/FuzzerFork.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerFork.cpp @@ -95,9 +95,12 @@ std::set Features, Cov; std::set FilesWithDFT; std::vector Files; + std::vector FilesSizes; Random *Rand; std::chrono::system_clock::time_point ProcessStartTime; int Verbosity = 0; + int Group = 0; + int NumCorpuses = 8; size_t NumTimeouts = 0; size_t NumOOMs = 0; @@ -136,10 +139,31 @@ if (size_t CorpusSubsetSize = std::min(Files.size(), (size_t)sqrt(Files.size() + 2))) { auto Time1 = std::chrono::system_clock::now(); - for (size_t i = 0; i < CorpusSubsetSize; i++) { - auto &SF = Files[Rand->SkewTowardsLast(Files.size())]; - Seeds += (Seeds.empty() ? "" : ",") + SF; - CollectDFT(SF); + if (Group) { // whether to group the corpus. + size_t AverageCorpusSize = Files.size() / NumCorpuses + 1; + size_t StartIndex = ((JobId - 1) % NumCorpuses) * AverageCorpusSize; + for (size_t i = 0; i < CorpusSubsetSize; i++) { + std::random_device rd; + std::mt19937 RandomSeed(rd()); + std::uniform_int_distribution<> rand(0, AverageCorpusSize); + size_t RandNum = rand(RandomSeed); + size_t Index = RandNum + StartIndex; + if (Index < Files.size()) { + auto &SF = Files[Index]; + Seeds += (Seeds.empty() ? "" : ",") + SF; + CollectDFT(SF); + } else { + auto &SF = Files[Rand->SkewTowardsLast(Files.size())]; + Seeds += (Seeds.empty() ? "" : ",") + SF; + CollectDFT(SF); + } + } + } else { + for (size_t i = 0; i < CorpusSubsetSize; i++) { + auto &SF = Files[Rand->SkewTowardsLast(Files.size())]; + Seeds += (Seeds.empty() ? "" : ",") + SF; + CollectDFT(SF); + } } auto Time2 = std::chrono::system_clock::now(); auto DftTimeInSeconds = duration_cast(Time2 - Time1).count(); @@ -219,7 +243,15 @@ auto U = FileToVector(Path); auto NewPath = DirPlusFile(MainCorpusDir, Hash(U)); WriteToFile(U, NewPath); - Files.push_back(NewPath); + if (Group) { // Insert the queue according to the size of the seed. + long usz = U.size(); + auto idx = std::upper_bound(FilesSizes.begin(), FilesSizes.end(), usz) - + FilesSizes.begin(); + FilesSizes.insert(FilesSizes.begin() + idx, usz); + Files.insert(Files.begin() + idx, NewPath); + } else { + Files.push_back(NewPath); + } } Features.insert(NewFeatures.begin(), NewFeatures.end()); Cov.insert(NewCov.begin(), NewCov.end()); @@ -228,10 +260,8 @@ if (TPC.PcIsFuncEntry(TE)) PrintPC(" NEW_FUNC: %p %F %L\n", "", TPC.GetNextInstructionPc(TE->PC)); - } - void CollectDFT(const std::string &InputPath) { if (DataFlowBinary.empty()) return; if (!FilesWithDFT.insert(InputPath).second) return; @@ -294,6 +324,7 @@ Env.Verbosity = Options.Verbosity; Env.ProcessStartTime = std::chrono::system_clock::now(); Env.DataFlowBinary = Options.CollectDataFlow; + Env.Group = Options.ForkCorpusGroups; std::vector SeedFiles; for (auto &Dir : CorpusDirs) @@ -312,8 +343,10 @@ Env.MainCorpusDir = CorpusDirs[0]; if (Options.KeepSeed) { - for (auto &File : SeedFiles) + for (auto &File : SeedFiles) { Env.Files.push_back(File.File); + Env.FilesSizes.push_back(File.Size); + } } else { auto CFPath = DirPlusFile(Env.TempDir, "merge.txt"); std::set NewFeatures, NewCov; @@ -323,6 +356,13 @@ Env.Cov.insert(NewFeatures.begin(), NewFeatures.end()); RemoveFile(CFPath); } + + if (Env.Group) { + for (auto &path : Env.Files) { + Env.FilesSizes.push_back(FileSize(path)); + } + } + Printf("INFO: -fork=%d: %zd seed inputs, starting to fuzz in %s\n", NumJobs, Env.Files.size(), Env.TempDir.c_str()); @@ -337,6 +377,8 @@ WriteToFile(Unit({1}), Env.StopFile()); }; + size_t MergeCycle = 20; + size_t JobExecuted = 0; size_t JobId = 1; std::vector Threads; for (int t = 0; t < NumJobs; t++) { @@ -358,6 +400,50 @@ Env.RunOneMergeJob(Job.get()); + // merge the corpus . + JobExecuted++; + if (Env.Group && JobExecuted >= MergeCycle) { + std::vector CurrentSeedFiles; + for (auto &Dir : CorpusDirs) + GetSizedFilesFromDir(Dir, &CurrentSeedFiles); + std::sort(CurrentSeedFiles.begin(), CurrentSeedFiles.end()); + + auto CFPath = DirPlusFile(Env.TempDir, "merge.txt"); + std::set TmpNewFeatures, TmpNewCov; + std::set TmpFeatures, TmpCov; + Env.Files.clear(); + Env.FilesSizes.clear(); + CrashResistantMerge(Env.Args, {}, CurrentSeedFiles, &Env.Files, + TmpFeatures, &TmpNewFeatures, TmpCov, &TmpNewCov, + CFPath, false); + for (auto &path : Env.Files) { + Env.FilesSizes.push_back(FileSize(path)); + } + RemoveFile(CFPath); + JobExecuted = 0; + MergeCycle += 5; + } + + // Since the number of corpus seeds will gradually increase, in order to + // control the number in each group to be about three times the number of + // seeds selected each time, the number of groups is dynamically adjusted. + if (Env.Files.size() >= 1 && Env.Files.size() < 1600) + Env.NumCorpuses = 16; + if (Env.Files.size() >= 1600 && Env.Files.size() < 3600) + Env.NumCorpuses = 20; + if (Env.Files.size() >= 3600 && Env.Files.size() < 6400) + Env.NumCorpuses = 24; + if (Env.Files.size() >= 6400 && Env.Files.size() < 8100) + Env.NumCorpuses = 32; + if (Env.Files.size() >= 8100 && Env.Files.size() < 12000) + Env.NumCorpuses = 36; + if (Env.Files.size() >= 12000 && Env.Files.size() < 16000) + Env.NumCorpuses = 40; + if (Env.Files.size() >= 16000 && Env.Files.size() < 30000) + Env.NumCorpuses = 60; + if (Env.Files.size() >= 30000) + Env.NumCorpuses = 80; + // Continue if our crash is one of the ignorred ones. if (Options.IgnoreTimeouts && ExitCode == Options.TimeoutExitCode) Env.NumTimeouts++; diff --git a/compiler-rt/lib/fuzzer/FuzzerOptions.h b/compiler-rt/lib/fuzzer/FuzzerOptions.h --- a/compiler-rt/lib/fuzzer/FuzzerOptions.h +++ b/compiler-rt/lib/fuzzer/FuzzerOptions.h @@ -47,6 +47,7 @@ int ReportSlowUnits = 10; bool OnlyASCII = false; bool Entropic = true; + bool ForkCorpusGroups = true; size_t EntropicFeatureFrequencyThreshold = 0xFF; size_t EntropicNumberOfRarestFeatures = 100; bool EntropicScalePerExecTime = false;