diff --git a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp --- a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp @@ -765,14 +765,13 @@ // Parse -seed_inputs=file1,file2,... Vector ExtraSeedFiles; - if (Flags.seed_inputs) { - std::string s = Flags.seed_inputs; - size_t comma_pos; - while ((comma_pos = s.find_last_of(',')) != std::string::npos) { - ExtraSeedFiles.push_back(s.substr(comma_pos + 1)); - s = s.substr(0, comma_pos); - } - ExtraSeedFiles.push_back(s); + if (Flags.seed_inputs_file) { + std::string SeedInputsFile(Flags.seed_inputs_file); + std::istringstream SeedInputsStream(FileToString(SeedInputsFile)); + RemoveFile(SeedInputsFile); + std::string seed_input; + while (std::getline(SeedInputsStream, seed_input, ',')) + ExtraSeedFiles.push_back(seed_input); } F->Loop(*Inputs, ExtraSeedFiles); diff --git a/compiler-rt/lib/fuzzer/FuzzerFlags.def b/compiler-rt/lib/fuzzer/FuzzerFlags.def --- a/compiler-rt/lib/fuzzer/FuzzerFlags.def +++ b/compiler-rt/lib/fuzzer/FuzzerFlags.def @@ -20,8 +20,8 @@ "then try larger inputs over time. Specifies the rate at which the length " "limit is increased (smaller == faster). If 0, immediately try inputs with " "size up to max_len.") -FUZZER_FLAG_STRING(seed_inputs, "A comma-separated list of input files " - "to use as an additional seed corpus") +FUZZER_FLAG_STRING(seed_inputs_file, "A file containing a comma-separated list " + "of input files " "to use as an additional seed corpus") FUZZER_FLAG_INT(cross_over, 1, "If 1, cross over inputs.") FUZZER_FLAG_INT(mutate_depth, 5, "Apply this number of consecutive mutations to each input.") diff --git a/compiler-rt/lib/fuzzer/FuzzerFork.cpp b/compiler-rt/lib/fuzzer/FuzzerFork.cpp --- a/compiler-rt/lib/fuzzer/FuzzerFork.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerFork.cpp @@ -121,8 +121,13 @@ for (size_t i = 0; i < CorpusSubsetSize; i++) Seeds += (Seeds.empty() ? "" : ",") + Files[Rand->SkewTowardsLast(Files.size())]; - if (!Seeds.empty()) - Cmd.addFlag("seed_inputs", Seeds); + + + if (!Seeds.empty()) { + std::string SeedsFile = "seeds-list." + std::to_string(JobId); + WriteToFile(reinterpret_cast(Seeds.c_str()), Seeds.size(), SeedsFile); + Cmd.addFlag("seed_inputs_file", SeedsFile); + } Job->LogPath = DirPlusFile(TempDir, std::to_string(JobId) + ".log"); Job->CorpusDir = DirPlusFile(TempDir, "C" + std::to_string(JobId)); Job->FeaturesDir = DirPlusFile(TempDir, "F" + std::to_string(JobId)); diff --git a/compiler-rt/test/fuzzer/cross_over.test b/compiler-rt/test/fuzzer/cross_over.test --- a/compiler-rt/test/fuzzer/cross_over.test +++ b/compiler-rt/test/fuzzer/cross_over.test @@ -15,4 +15,5 @@ RUN: not %run %t-CrossOverTest -max_len=10 -seed=1 -runs=10000000 %t-corpus # Test the same thing but using -seed_inputs instead of passing the corpus dir. -RUN: not %run %t-CrossOverTest -max_len=10 -seed=1 -runs=10000000 -seed_inputs=%t-corpus/A,%t-corpus/B +RUN: python -c "import sys; sys.stdout.write(r'%t-corpus/A,%t-corpus/B')" > %t.seed-inputs +RUN: not %run %t-CrossOverTest -max_len=10 -seed=1 -runs=10000000 -seed_inputs_file=%t.seed-inputs diff --git a/compiler-rt/test/fuzzer/len_control.test b/compiler-rt/test/fuzzer/len_control.test --- a/compiler-rt/test/fuzzer/len_control.test +++ b/compiler-rt/test/fuzzer/len_control.test @@ -4,8 +4,9 @@ LIM4: DONE{{.*}}lim: 4 LIM77: DONE{{.*}}lim: 77 LIM20: DONE{{.*}}lim: 20 -RUN: %run %t-SimpleTest -runs=1 2>&1 | FileCheck %s --check-prefix=LIM4 -RUN: %run %t-SimpleTest -seed_inputs=%t-SimpleTest -max_len=77 -runs=1 2>&1 | FileCheck %s --check-prefix=LIM77 +RUN: %run %t-SimpleTest -runs=1 2>&1 | FileCheck %s --check-prefix=LIM4 +RUN: python -c "import sys; sys.stdout.write(r'%t-SimpleTest')" > %t.seed-inputs +RUN: %run %t-SimpleTest -seed_inputs_file=%t.seed-inputs -max_len=77 -runs=1 2>&1 | FileCheck %s --check-prefix=LIM77 RUN: echo -n 01234567890123456789 > %t-temp -RUN: %run %t-SimpleTest -seed_inputs=%t-temp -runs=1 2>&1 | FileCheck %s --check-prefix=LIM20 - +RUN: python -c "import sys; sys.stdout.write(r'%t-temp')" > %t.seed-inputs +RUN: %run %t-SimpleTest -seed_inputs_file=%t.seed-inputs -runs=1 2>&1 | FileCheck %s --check-prefix=LIM20 diff --git a/compiler-rt/test/fuzzer/seed_inputs_file.test b/compiler-rt/test/fuzzer/seed_inputs_file.test new file mode 100644 --- /dev/null +++ b/compiler-rt/test/fuzzer/seed_inputs_file.test @@ -0,0 +1,17 @@ +RUN: %cpp_compiler %S/SimpleTest.cpp -o %t-SimpleTest + +USE-2: INFO: seed corpus: files: 2 +RUN: python -c "import sys; sys.stdout.write(r'%t-SimpleTest,%t-SimpleTest')" > %t.seed-inputs +RUN: %run %t-SimpleTest -runs=1 -seed_inputs_file=%t.seed-inputs 2>&1 | FileCheck %s --check-prefix=USE-2 +# Ensure seed inputs file is removed after use. +RUN: not cat %t.seed-inputs + +# Test that missing files and trailing commas are tolerated. +RUN: python -c "import sys; sys.stdout.write(r'%t-SimpleTest,%t-SimpleTest,nonexistent-file,')" > %t.seed-inputs +RUN: %run %t-SimpleTest -runs=1 -seed_inputs_file=%t.seed-inputs 2>&1 | FileCheck %s --check-prefix=USE-2 + +# Test that libFuzzer can handle many seed corpus files. +USE-20000: INFO: seed corpus: files: 20000 +RUN: echo "hi" > "%t-seed-file" +RUN: python -c "import sys; sys.stdout.write(r'%t-seed-file,' * 20000)" > %t.seed-inputs +RUN: %run %t-SimpleTest -runs=100 -seed_inputs_file=%t.seed-inputs 2>&1 | FileCheck %s --check-prefix=USE-20000 \ No newline at end of file