Index: compiler-rt/lib/fuzzer/FuzzerDriver.cpp =================================================================== --- compiler-rt/lib/fuzzer/FuzzerDriver.cpp +++ compiler-rt/lib/fuzzer/FuzzerDriver.cpp @@ -867,7 +874,8 @@ } if (Flags.fork) - FuzzWithFork(F->GetMD().GetRand(), Options, Args, *Inputs, Flags.fork); + FuzzWithFork(F->GetMD().GetRand(), Options, Args, *Inputs, Flags.fork, + Flags.NumCorpuses); if (Flags.merge) Merge(F, Options, Args, *Inputs, Flags.merge_control_file); Index: compiler-rt/lib/fuzzer/FuzzerFlags.def =================================================================== --- compiler-rt/lib/fuzzer/FuzzerFlags.def +++ compiler-rt/lib/fuzzer/FuzzerFlags.def @@ -12,58 +12,81 @@ FUZZER_FLAG_INT(verbosity, 1, "Verbosity level.") FUZZER_FLAG_UNSIGNED(seed, 0, "Random seed. If 0, seed is generated.") FUZZER_FLAG_INT(runs, -1, - "Number of individual test runs (-1 for infinite runs).") -FUZZER_FLAG_INT(max_len, 0, "Maximum length of the test input. " + "Number of individual test runs (-1 for infinite runs).") +FUZZER_FLAG_INT( + max_len, 0, + "Maximum length of the test input. " "If 0, libFuzzer tries to guess a good value based on the corpus " "and reports it. ") -FUZZER_FLAG_INT(len_control, 100, "Try generating small inputs first, " - "then try larger inputs over time. Specifies the rate at which the length " - "limit is increased (smaller == faster). If 0, immediately try inputs with " - "size up to max_len. Default value is 0, if LLVMFuzzerCustomMutator is used.") -FUZZER_FLAG_STRING(seed_inputs, "A comma-separated list of input files " - "to use as an additional seed corpus. Alternatively, an \"@\" followed by " - "the name of a file containing the comma-separated list.") -FUZZER_FLAG_INT(keep_seed, 0, "If 1, keep seed inputs in the corpus even if " - "they do not produce new coverage. When used with |reduce_inputs==1|, the " - "seed inputs will never be reduced. This option can be useful when seeds are" - "not properly formed for the fuzz target but still have useful snippets.") +FUZZER_FLAG_INT( + len_control, 100, + "Try generating small inputs first, " + "then try larger inputs over time. Specifies the rate at which the length " + "limit is increased (smaller == faster). If 0, immediately try inputs " + "with " + "size up to max_len. Default value is 0, if LLVMFuzzerCustomMutator is " + "used.") +FUZZER_FLAG_STRING( + seed_inputs, + "A comma-separated list of input files " + "to use as an additional seed corpus. Alternatively, an \"@\" followed by " + "the name of a file containing the comma-separated list.") +FUZZER_FLAG_INT( + keep_seed, 0, + "If 1, keep seed inputs in the corpus even if " + "they do not produce new coverage. When used with |reduce_inputs==1|, the " + "seed inputs will never be reduced. This option can be useful when seeds " + "are" + "not properly formed for the fuzz target but still have useful snippets.") FUZZER_FLAG_INT(cross_over, 1, "If 1, cross over inputs.") -FUZZER_FLAG_INT(cross_over_uniform_dist, 0, "Experimental. If 1, use a " - "uniform probability distribution when choosing inputs to cross over with. " - "Some of the inputs in the corpus may never get chosen for mutation " - "depending on the input mutation scheduling policy. With this flag, all " - "inputs, regardless of the input mutation scheduling policy, can be chosen " - "as an input to cross over with. This can be particularly useful with " - "|keep_seed==1|; all the initial seed inputs, even though they do not " - "increase coverage because they are not properly formed, will still be " - "chosen as an input to cross over with.") +FUZZER_FLAG_INT( + cross_over_uniform_dist, 0, + "Experimental. If 1, use a " + "uniform probability distribution when choosing inputs to cross over with. " + "Some of the inputs in the corpus may never get chosen for mutation " + "depending on the input mutation scheduling policy. With this flag, all " + "inputs, regardless of the input mutation scheduling policy, can be chosen " + "as an input to cross over with. This can be particularly useful with " + "|keep_seed==1|; all the initial seed inputs, even though they do not " + "increase coverage because they are not properly formed, will still be " + "chosen as an input to cross over with.") FUZZER_FLAG_INT(mutate_depth, 5, - "Apply this number of consecutive mutations to each input.") -FUZZER_FLAG_INT(reduce_depth, 0, "Experimental/internal. " + "Apply this number of consecutive mutations to each input.") +FUZZER_FLAG_INT(reduce_depth, 0, + "Experimental/internal. " "Reduce depth if mutations lose unique features") FUZZER_FLAG_INT(shuffle, 1, "Shuffle inputs at startup") FUZZER_FLAG_INT(prefer_small, 1, - "If 1, always prefer smaller inputs during the corpus shuffle.") + "If 1, always prefer smaller inputs during the corpus shuffle.") FUZZER_FLAG_INT( timeout, 1200, "Timeout in seconds (if positive). " "If one unit runs more than this number of seconds the process will abort.") -FUZZER_FLAG_INT(error_exitcode, 77, "When libFuzzer itself reports a bug " - "this exit code will be used.") -FUZZER_FLAG_INT(timeout_exitcode, 70, "When libFuzzer reports a timeout " - "this exit code will be used.") -FUZZER_FLAG_INT(max_total_time, 0, "If positive, indicates the maximal total " - "time in seconds to run the fuzzer.") +FUZZER_FLAG_INT(error_exitcode, 77, + "When libFuzzer itself reports a bug " + "this exit code will be used.") +FUZZER_FLAG_INT(timeout_exitcode, 70, + "When libFuzzer reports a timeout " + "this exit code will be used.") +FUZZER_FLAG_INT(max_total_time, 0, + "If positive, indicates the maximal total " + "time in seconds to run the fuzzer.") FUZZER_FLAG_INT(help, 0, "Print help.") -FUZZER_FLAG_INT(fork, 0, "Experimental mode where fuzzing happens " +FUZZER_FLAG_INT( + NumCorpuses, 1, + "FOR fork mode. Divide the main corpus into N parts according to size.") +FUZZER_FLAG_INT(fork, 0, + "Experimental mode where fuzzing happens " "in a subprocess") FUZZER_FLAG_INT(ignore_timeouts, 1, "Ignore timeouts in fork mode") FUZZER_FLAG_INT(ignore_ooms, 1, "Ignore OOMs in fork mode") FUZZER_FLAG_INT(ignore_crashes, 0, "Ignore crashes in fork mode") -FUZZER_FLAG_INT(merge, 0, "If 1, the 2-nd, 3-rd, etc corpora will be " - "merged into the 1-st corpus. Only interesting units will be taken. " - "This flag can be used to minimize a corpus.") +FUZZER_FLAG_INT( + merge, 0, + "If 1, the 2-nd, 3-rd, etc corpora will be " + "merged into the 1-st corpus. Only interesting units will be taken. " + "This flag can be used to minimize a corpus.") FUZZER_FLAG_STRING(stop_file, "Stop fuzzing ASAP if this file exists") FUZZER_FLAG_STRING(merge_inner, "internal flag") FUZZER_FLAG_STRING(merge_control_file, Index: compiler-rt/lib/fuzzer/FuzzerFork.h =================================================================== --- compiler-rt/lib/fuzzer/FuzzerFork.h +++ compiler-rt/lib/fuzzer/FuzzerFork.h @@ -18,7 +18,8 @@ namespace fuzzer { void FuzzWithFork(Random &Rand, const FuzzingOptions &Options, const Vector &Args, - const Vector &CorpusDirs, int NumJobs); + const Vector &CorpusDirs, int NumJobs, + int NumCorpuses); } // namespace fuzzer #endif // LLVM_FUZZER_FORK_H Index: compiler-rt/lib/fuzzer/FuzzerFork.cpp =================================================================== --- compiler-rt/lib/fuzzer/FuzzerFork.cpp +++ compiler-rt/lib/fuzzer/FuzzerFork.cpp @@ -95,6 +96,8 @@ Set Features, Cov; Set FilesWithDFT; Vector Files; + // This variable is used to store the size of the seed. + Vector FilesSizes; Random *Rand; std::chrono::system_clock::time_point ProcessStartTime; int Verbosity = 0; @@ -114,16 +116,16 @@ .count(); } - FuzzJob *CreateNewJob(size_t JobId) { + FuzzJob *CreateNewJob(size_t JobId, int NumCorpuses) { Command Cmd(Args); Cmd.removeFlag("fork"); Cmd.removeFlag("runs"); Cmd.removeFlag("collect_data_flow"); for (auto &C : CorpusDirs) // Remove all corpora from the args. Cmd.removeArgument(C); - Cmd.addFlag("reload", "0"); // working in an isolated dir, no reload. + Cmd.addFlag("reload", "0"); // working in an isolated dir, no reload. Cmd.addFlag("print_final_stats", "1"); - Cmd.addFlag("print_funcs", "0"); // no need to spend time symbolizing. + Cmd.addFlag("print_funcs", "0"); // no need to spend time symbolizing. Cmd.addFlag("max_total_time", std::to_string(std::min((size_t)300, JobId))); Cmd.addFlag("stop_file", StopFile()); if (!DataFlowBinary.empty()) { @@ -135,11 +137,27 @@ std::string Seeds; if (size_t CorpusSubsetSize = std::min(Files.size(), (size_t)sqrt(Files.size() + 2))) { + size_t AverageSize = Files.size() / NumCorpuses + 1; auto Time1 = std::chrono::system_clock::now(); + size_t StartIndex = ((JobId - 1) % NumCorpuses) * AverageSize; + // At this time, the seeds in the File variable are sorted according to + // the seed size, so by generating a uniformly distributed random + // number, the seeds are selected in a certain group. for (size_t i = 0; i < CorpusSubsetSize; i++) { - auto &SF = Files[Rand->SkewTowardsLast(Files.size())]; - Seeds += (Seeds.empty() ? "" : ",") + SF; - CollectDFT(SF); + std::random_device rd; + std::mt19937 randomseed(rd()); + std::uniform_int_distribution<> rand(0, AverageSize); + size_t j = rand(randomseed); + size_t m = j + StartIndex; + if (m < Files.size()) { + auto &SF = Files[m]; + Seeds += (Seeds.empty() ? "" : ",") + SF; + CollectDFT(SF); + } else { + auto &SF = Files[rand(randomseed)]; + Seeds += (Seeds.empty() ? "" : ",") + SF; + CollectDFT(SF); + } } auto Time2 = std::chrono::system_clock::now(); auto DftTimeInSeconds = duration_cast(Time2 - Time1).count(); @@ -179,7 +196,7 @@ return Job; } - void RunOneMergeJob(FuzzJob *Job) { + void RunOneMergeJob(FuzzJob *Job, int NumCorpuses) { auto Stats = ParseFinalStatsFromLog(Job->LogPath); NumRuns += Stats.number_of_executed_units; @@ -219,7 +237,12 @@ auto U = FileToVector(Path); auto NewPath = DirPlusFile(MainCorpusDir, Hash(U)); WriteToFile(U, NewPath); - Files.push_back(NewPath); + // Seeds are inserted into Files according to size. + long usz = U.size(); + auto idx = std::upper_bound(FilesSizes.begin(), FilesSizes.end(), usz) - + FilesSizes.begin(); + FilesSizes.insert(FilesSizes.begin() + idx, usz); + Files.insert(Files.begin() + idx, NewPath); } Features.insert(NewFeatures.begin(), NewFeatures.end()); Cov.insert(NewCov.begin(), NewCov.end()); @@ -284,7 +306,8 @@ // This is just a skeleton of an experimental -fork=1 feature. void FuzzWithFork(Random &Rand, const FuzzingOptions &Options, const Vector &Args, - const Vector &CorpusDirs, int NumJobs) { + const Vector &CorpusDirs, int NumJobs, + int NumCorpuses) { Printf("INFO: -fork=%d: fuzzing in separate process(s)\n", NumJobs); GlobalEnv Env; @@ -301,19 +324,20 @@ std::sort(SeedFiles.begin(), SeedFiles.end()); Env.TempDir = TempPath("FuzzWithFork", ".dir"); Env.DFTDir = DirPlusFile(Env.TempDir, "DFT"); - RmDirRecursive(Env.TempDir); // in case there is a leftover from old runs. + RmDirRecursive(Env.TempDir); // in case there is a leftover from old runs. MkDir(Env.TempDir); MkDir(Env.DFTDir); - if (CorpusDirs.empty()) MkDir(Env.MainCorpusDir = DirPlusFile(Env.TempDir, "C")); else Env.MainCorpusDir = CorpusDirs[0]; if (Options.KeepSeed) { - for (auto &File : SeedFiles) + for (auto &File : SeedFiles) { Env.Files.push_back(File.File); + Env.FilesSizes.push_back(File.Size); + } } else { auto CFPath = DirPlusFile(Env.TempDir, "merge.txt"); Set NewFeatures, NewCov; @@ -323,6 +347,11 @@ Env.Cov.insert(NewFeatures.begin(), NewFeatures.end()); RemoveFile(CFPath); } + + for (auto &path : Env.Files) { + Env.FilesSizes.push_back(FileSize(path)); + } + Printf("INFO: -fork=%d: %zd seed inputs, starting to fuzz in %s\n", NumJobs, Env.Files.size(), Env.TempDir.c_str()); @@ -341,7 +370,7 @@ Vector Threads; for (int t = 0; t < NumJobs; t++) { Threads.push_back(std::thread(WorkerThread, &FuzzQ, &MergeQ)); - FuzzQ.Push(Env.CreateNewJob(JobId++)); + FuzzQ.Push(Env.CreateNewJob(JobId++, NumCorpuses)); } while (true) { @@ -356,7 +385,7 @@ } Fuzzer::MaybeExitGracefully(); - Env.RunOneMergeJob(Job.get()); + Env.RunOneMergeJob(Job.get(), NumCorpuses); // Continue if our crash is one of the ignorred ones. if (Options.IgnoreTimeouts && ExitCode == Options.TimeoutExitCode) @@ -398,10 +427,28 @@ StopJobs(); break; } - - FuzzQ.Push(Env.CreateNewJob(JobId++)); + // Since the number of corpus seeds will gradually increase, in order to + // control the number in each group to be about three times the number of + // seeds selected each time, the number of groups is dynamically adjusted. + if (Env.Files.size() >= 1 && Env.Files.size() < 1600) + NumCorpuses = 16; + if (Env.Files.size() >= 1600 && Env.Files.size() < 3600) + NumCorpuses = 20; + if (Env.Files.size() >= 3600 && Env.Files.size() < 6400) + NumCorpuses = 24; + if (Env.Files.size() >= 6400 && Env.Files.size() < 8100) + NumCorpuses = 32; + if (Env.Files.size() >= 8100 && Env.Files.size() < 12000) + NumCorpuses = 36; + if (Env.Files.size() >= 12000 && Env.Files.size() < 16000) + NumCorpuses = 40; + if (Env.Files.size() >= 16000 && Env.Files.size() < 30000) + NumCorpuses = 60; + if (Env.Files.size() >= 30000) + NumCorpuses = 80; + + FuzzQ.Push(Env.CreateNewJob(JobId++, NumCorpuses)); } - for (auto &T : Threads) T.join();