diff --git a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp --- a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp @@ -870,6 +870,7 @@ exit(0); } + Options.ForkCorpusGroups = Flags.fork_corpus_groups; if (Flags.fork) FuzzWithFork(F->GetMD().GetRand(), Options, Args, *Inputs, Flags.fork); diff --git a/compiler-rt/lib/fuzzer/FuzzerFlags.def b/compiler-rt/lib/fuzzer/FuzzerFlags.def --- a/compiler-rt/lib/fuzzer/FuzzerFlags.def +++ b/compiler-rt/lib/fuzzer/FuzzerFlags.def @@ -58,6 +58,10 @@ FUZZER_FLAG_INT(help, 0, "Print help.") FUZZER_FLAG_INT(fork, 0, "Experimental mode where fuzzing happens " "in a subprocess") +FUZZER_FLAG_INT(fork_corpus_groups, 0, "For fork mode, enable the corpus-group " + "strategy, The main corpus will be grouped according to size, " + "and each sub-process will randomly select seeds from different " + "groups as the sub-corpus.") FUZZER_FLAG_INT(ignore_timeouts, 1, "Ignore timeouts in fork mode") FUZZER_FLAG_INT(ignore_ooms, 1, "Ignore OOMs in fork mode") FUZZER_FLAG_INT(ignore_crashes, 0, "Ignore crashes in fork mode") diff --git a/compiler-rt/lib/fuzzer/FuzzerFork.cpp b/compiler-rt/lib/fuzzer/FuzzerFork.cpp --- a/compiler-rt/lib/fuzzer/FuzzerFork.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerFork.cpp @@ -95,9 +95,12 @@ std::set Features, Cov; std::set FilesWithDFT; std::vector Files; + std::vector FilesSizes; Random *Rand; std::chrono::system_clock::time_point ProcessStartTime; int Verbosity = 0; + int Group = 0; + int NumCorpuses = 8; size_t NumTimeouts = 0; size_t NumOOMs = 0; @@ -136,10 +139,24 @@ if (size_t CorpusSubsetSize = std::min(Files.size(), (size_t)sqrt(Files.size() + 2))) { auto Time1 = std::chrono::system_clock::now(); - for (size_t i = 0; i < CorpusSubsetSize; i++) { - auto &SF = Files[Rand->SkewTowardsLast(Files.size())]; - Seeds += (Seeds.empty() ? "" : ",") + SF; - CollectDFT(SF); + if (Group) { // whether to group the corpus. + size_t AverageCorpusSize = Files.size() / NumCorpuses + 1; + size_t StartIndex = ((JobId - 1) % NumCorpuses) * AverageCorpusSize; + for (size_t i = 0; i < CorpusSubsetSize; i++) { + size_t RandNum = (*Rand)(AverageCorpusSize); + size_t Index = RandNum + StartIndex; + Index = Index < Files.size() ? Index + : Rand->SkewTowardsLast(Files.size()); + auto &SF = Files[Index]; + Seeds += (Seeds.empty() ? "" : ",") + SF; + CollectDFT(SF); + } + } else { + for (size_t i = 0; i < CorpusSubsetSize; i++) { + auto &SF = Files[Rand->SkewTowardsLast(Files.size())]; + Seeds += (Seeds.empty() ? "" : ",") + SF; + CollectDFT(SF); + } } auto Time2 = std::chrono::system_clock::now(); auto DftTimeInSeconds = duration_cast(Time2 - Time1).count(); @@ -222,7 +239,16 @@ auto U = FileToVector(Path); auto NewPath = DirPlusFile(MainCorpusDir, Hash(U)); WriteToFile(U, NewPath); - Files.push_back(NewPath); + if (Group) { // Insert the queue according to the size of the seed. + size_t UnitSize = U.size(); + auto Idx = + std::upper_bound(FilesSizes.begin(), FilesSizes.end(), UnitSize) - + FilesSizes.begin(); + FilesSizes.insert(FilesSizes.begin() + Idx, UnitSize); + Files.insert(Files.begin() + Idx, NewPath); + } else { + Files.push_back(NewPath); + } } Features.insert(NewFeatures.begin(), NewFeatures.end()); Cov.insert(NewCov.begin(), NewCov.end()); @@ -231,10 +257,8 @@ if (TPC.PcIsFuncEntry(TE)) PrintPC(" NEW_FUNC: %p %F %L\n", "", TPC.GetNextInstructionPc(TE->PC)); - } - void CollectDFT(const std::string &InputPath) { if (DataFlowBinary.empty()) return; if (!FilesWithDFT.insert(InputPath).second) return; @@ -297,6 +321,7 @@ Env.Verbosity = Options.Verbosity; Env.ProcessStartTime = std::chrono::system_clock::now(); Env.DataFlowBinary = Options.CollectDataFlow; + Env.Group = Options.ForkCorpusGroups; std::vector SeedFiles; for (auto &Dir : CorpusDirs) @@ -327,6 +352,12 @@ Env.Cov.insert(NewFeatures.begin(), NewFeatures.end()); RemoveFile(CFPath); } + + if (Env.Group) { + for (auto &path : Env.Files) + Env.FilesSizes.push_back(FileSize(path)); + } + Printf("INFO: -fork=%d: %zd seed inputs, starting to fuzz in %s\n", NumJobs, Env.Files.size(), Env.TempDir.c_str()); @@ -341,6 +372,8 @@ WriteToFile(Unit({1}), Env.StopFile()); }; + size_t MergeCycle = 20; + size_t JobExecuted = 0; size_t JobId = 1; std::vector Threads; for (int t = 0; t < NumJobs; t++) { @@ -362,6 +395,45 @@ Env.RunOneMergeJob(Job.get()); + // merge the corpus . + JobExecuted++; + if (Env.Group && JobExecuted >= MergeCycle) { + std::vector CurrentSeedFiles; + for (auto &Dir : CorpusDirs) + GetSizedFilesFromDir(Dir, &CurrentSeedFiles); + std::sort(CurrentSeedFiles.begin(), CurrentSeedFiles.end()); + + auto CFPath = DirPlusFile(Env.TempDir, "merge.txt"); + std::set TmpNewFeatures, TmpNewCov; + std::set TmpFeatures, TmpCov; + Env.Files.clear(); + Env.FilesSizes.clear(); + CrashResistantMerge(Env.Args, {}, CurrentSeedFiles, &Env.Files, + TmpFeatures, &TmpNewFeatures, TmpCov, &TmpNewCov, + CFPath, false); + for (auto &path : Env.Files) + Env.FilesSizes.push_back(FileSize(path)); + RemoveFile(CFPath); + JobExecuted = 0; + MergeCycle += 5; + } + + // Since the number of corpus seeds will gradually increase, in order to + // control the number in each group to be about three times the number of + // seeds selected each time, the number of groups is dynamically adjusted. + if (Env.Files.size() < 2000) + Env.NumCorpuses = 12; + else if (Env.Files.size() < 6000) + Env.NumCorpuses = 20; + else if (Env.Files.size() < 12000) + Env.NumCorpuses = 32; + else if (Env.Files.size() < 16000) + Env.NumCorpuses = 40; + else if (Env.Files.size() < 24000) + Env.NumCorpuses = 60; + else + Env.NumCorpuses = 80; + // Continue if our crash is one of the ignored ones. if (Options.IgnoreTimeouts && ExitCode == Options.TimeoutExitCode) Env.NumTimeouts++; diff --git a/compiler-rt/lib/fuzzer/FuzzerOptions.h b/compiler-rt/lib/fuzzer/FuzzerOptions.h --- a/compiler-rt/lib/fuzzer/FuzzerOptions.h +++ b/compiler-rt/lib/fuzzer/FuzzerOptions.h @@ -47,6 +47,7 @@ int ReportSlowUnits = 10; bool OnlyASCII = false; bool Entropic = true; + bool ForkCorpusGroups = false; size_t EntropicFeatureFrequencyThreshold = 0xFF; size_t EntropicNumberOfRarestFeatures = 100; bool EntropicScalePerExecTime = false; diff --git a/compiler-rt/test/fuzzer/fork_corpus_groups.test b/compiler-rt/test/fuzzer/fork_corpus_groups.test new file mode 100644 --- /dev/null +++ b/compiler-rt/test/fuzzer/fork_corpus_groups.test @@ -0,0 +1,21 @@ +# UNSUPPORTED: darwin, freebsd, aarch64 +BINGO: BINGO +RUN: %cpp_compiler %S/SimpleTest.cpp -o %t-SimpleTest +RUN: not %run %t-SimpleTest -fork=1 -fork_corpus_groups=1 2>&1 | FileCheck %s --check-prefix=BINGO + +TIMEOUT: ERROR: libFuzzer: timeout +RUN: %cpp_compiler %S/TimeoutTest.cpp -o %t-TimeoutTest +RUN: not %run %t-TimeoutTest -fork=1 -fork_corpus_groups=1 -timeout=1 -ignore_timeouts=0 2>&1 | FileCheck %s --check-prefix=TIMEOUT + +OOM: ERROR: libFuzzer: out-of-memory +RUN: %cpp_compiler %S/OutOfMemoryTest.cpp -o %t-OutOfMemoryTest +RUN: not %run %t-OutOfMemoryTest -fork=1 -fork_corpus_groups=1 -ignore_ooms=0 -rss_limit_mb=128 2>&1 | FileCheck %s --check-prefix=OOM + +# access-violation is the error thrown on Windows. Address will be smaller on i386. +CRASH: {{SEGV|access-violation}} on unknown address 0x00000000 +RUN: %cpp_compiler %S/ShallowOOMDeepCrash.cpp -o %t-ShallowOOMDeepCrash +RUN: not %run %t-ShallowOOMDeepCrash -fork=1 -fork_corpus_groups=1 -rss_limit_mb=128 2>&1 | FileCheck %s --check-prefix=CRASH + +MAX_TOTAL_TIME: INFO: fuzzed for {{.*}} seconds, wrapping up soon +MAX_TOTAL_TIME: INFO: exiting: {{.*}} time: +RUN: not %run %t-ShallowOOMDeepCrash -fork=1 -fork_corpus_groups=1 -rss_limit_mb=128 -ignore_crashes=1 -max_total_time=10 2>&1 | FileCheck %s --check-prefix=MAX_TOTAL_TIME