diff --git a/compiler-rt/lib/fuzzer/FuzzerCorpus.h b/compiler-rt/lib/fuzzer/FuzzerCorpus.h --- a/compiler-rt/lib/fuzzer/FuzzerCorpus.h +++ b/compiler-rt/lib/fuzzer/FuzzerCorpus.h @@ -33,6 +33,7 @@ // Stats. size_t NumExecutedMutations = 0; size_t NumSuccessfullMutations = 0; + bool NeverReduce = false; bool MayDeleteFile = false; bool Reduced = false; bool HasFocusFunction = false; @@ -177,7 +178,7 @@ bool empty() const { return Inputs.empty(); } const Unit &operator[] (size_t Idx) const { return Inputs[Idx]->U; } InputInfo *AddToCorpus(const Unit &U, size_t NumFeatures, bool MayDeleteFile, - bool HasFocusFunction, + bool HasFocusFunction, bool NeverReduce, const Vector &FeatureSet, const DataFlowTrace &DFT, const InputInfo *BaseII) { assert(!U.empty()); @@ -187,6 +188,7 @@ InputInfo &II = *Inputs.back(); II.U = U; II.NumFeatures = NumFeatures; + II.NeverReduce = NeverReduce; II.MayDeleteFile = MayDeleteFile; II.UniqFeatureSet = FeatureSet; II.HasFocusFunction = HasFocusFunction; diff --git a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp --- a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp @@ -656,6 +656,7 @@ Options.Verbosity = Flags.verbosity; Options.MaxLen = Flags.max_len; Options.LenControl = Flags.len_control; + Options.KeepSeed = Flags.keep_seed; Options.UnitTimeoutSec = Flags.timeout; Options.ErrorExitCode = Flags.error_exitcode; Options.TimeoutExitCode = Flags.timeout_exitcode; diff --git a/compiler-rt/lib/fuzzer/FuzzerFlags.def b/compiler-rt/lib/fuzzer/FuzzerFlags.def --- a/compiler-rt/lib/fuzzer/FuzzerFlags.def +++ b/compiler-rt/lib/fuzzer/FuzzerFlags.def @@ -23,6 +23,10 @@ FUZZER_FLAG_STRING(seed_inputs, "A comma-separated list of input files " "to use as an additional seed corpus. Alternatively, an \"@\" followed by " "the name of a file containing the comma-separated list.") +FUZZER_FLAG_INT(keep_seed, 0, "If 1, keep seed inputs in the corpus even if " + "they do not produce new coverage. When used with |reduce_inputs==1|, the " + "seed inputs will never be reduced. This option can be useful when seeds are" + "not properly formed for the fuzz target but still have useful snippets.") FUZZER_FLAG_INT(cross_over, 1, "If 1, cross over inputs.") FUZZER_FLAG_INT(mutate_depth, 5, "Apply this number of consecutive mutations to each input.") diff --git a/compiler-rt/lib/fuzzer/FuzzerFork.cpp b/compiler-rt/lib/fuzzer/FuzzerFork.cpp --- a/compiler-rt/lib/fuzzer/FuzzerFork.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerFork.cpp @@ -309,11 +309,15 @@ else Env.MainCorpusDir = CorpusDirs[0]; - auto CFPath = DirPlusFile(Env.TempDir, "merge.txt"); - CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, {}, &Env.Features, - {}, &Env.Cov, - CFPath, false); - RemoveFile(CFPath); + if (Options.KeepSeed) { + for (auto &File : SeedFiles) + Env.Files.push_back(File.File); + } else { + auto CFPath = DirPlusFile(Env.TempDir, "merge.txt"); + CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, {}, &Env.Features, + {}, &Env.Cov, CFPath, false); + RemoveFile(CFPath); + } Printf("INFO: -fork=%d: %zd seed inputs, starting to fuzz in %s\n", NumJobs, Env.Files.size(), Env.TempDir.c_str()); diff --git a/compiler-rt/lib/fuzzer/FuzzerInternal.h b/compiler-rt/lib/fuzzer/FuzzerInternal.h --- a/compiler-rt/lib/fuzzer/FuzzerInternal.h +++ b/compiler-rt/lib/fuzzer/FuzzerInternal.h @@ -67,7 +67,8 @@ void ExecuteCallback(const uint8_t *Data, size_t Size); bool RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile = false, - InputInfo *II = nullptr, bool *FoundUniqFeatures = nullptr); + InputInfo *II = nullptr, bool SeedInput = false, + bool *FoundUniqFeatures = nullptr); // Merge Corpora[1:] into Corpora[0]. void Merge(const Vector &Corpora); diff --git a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp --- a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp @@ -464,7 +464,8 @@ } bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile, - InputInfo *II, bool *FoundUniqFeatures) { + InputInfo *II, bool ForceAddToCorpus, + bool *FoundUniqFeatures) { if (!Size) return false; @@ -478,7 +479,7 @@ UniqFeatureSetTmp.push_back(Feature); if (Options.Entropic) Corpus.UpdateFeatureFrequency(II, Feature); - if (Options.ReduceInputs && II) + if (Options.ReduceInputs && II && !II->NeverReduce) if (std::binary_search(II->UniqFeatureSet.begin(), II->UniqFeatureSet.end(), Feature)) FoundUniqFeaturesOfII++; @@ -487,11 +488,12 @@ *FoundUniqFeatures = FoundUniqFeaturesOfII; PrintPulseAndReportSlowInput(Data, Size); size_t NumNewFeatures = Corpus.NumFeatureUpdates() - NumUpdatesBefore; - if (NumNewFeatures) { + if (NumNewFeatures || ForceAddToCorpus) { TPC.UpdateObservedPCs(); - auto NewII = Corpus.AddToCorpus({Data, Data + Size}, NumNewFeatures, - MayDeleteFile, TPC.ObservedFocusFunction(), - UniqFeatureSetTmp, DFT, II); + auto NewII = + Corpus.AddToCorpus({Data, Data + Size}, NumNewFeatures, MayDeleteFile, + TPC.ObservedFocusFunction(), ForceAddToCorpus, + UniqFeatureSetTmp, DFT, II); WriteFeatureSetToFile(Options.FeaturesDir, Sha1ToString(NewII->Sha1), NewII->UniqFeatureSet); return true; @@ -700,7 +702,7 @@ bool FoundUniqFeatures = false; bool NewCov = RunOne(CurrentUnitData, Size, /*MayDeleteFile=*/true, &II, - &FoundUniqFeatures); + /*ForceAddToCorpus*/ false, &FoundUniqFeatures); TryDetectingAMemoryLeak(CurrentUnitData, Size, /*DuringInitialCorpusExecution*/ false); if (NewCov) { @@ -768,7 +770,9 @@ for (auto &SF : CorporaFiles) { auto U = FileToVector(SF.File, MaxInputLen, /*ExitOnError=*/false); assert(U.size() <= MaxInputLen); - RunOne(U.data(), U.size()); + RunOne(U.data(), U.size(), /*MayDeleteFile*/ false, /*II*/ nullptr, + /*ForceAddToCorpus*/ Options.KeepSeed, + /*FoundUniqFeatures*/ nullptr); CheckExitOnSrcPosOrItem(); TryDetectingAMemoryLeak(U.data(), U.size(), /*DuringInitialCorpusExecution*/ true); diff --git a/compiler-rt/lib/fuzzer/FuzzerOptions.h b/compiler-rt/lib/fuzzer/FuzzerOptions.h --- a/compiler-rt/lib/fuzzer/FuzzerOptions.h +++ b/compiler-rt/lib/fuzzer/FuzzerOptions.h @@ -18,6 +18,7 @@ int Verbosity = 1; size_t MaxLen = 0; size_t LenControl = 1000; + bool KeepSeed = false; int UnitTimeoutSec = 300; int TimeoutExitCode = 70; int OOMExitCode = 71; diff --git a/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp b/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp --- a/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp +++ b/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp @@ -597,8 +597,10 @@ size_t N = 10; size_t TriesPerUnit = 1<<16; for (size_t i = 0; i < N; i++) - C->AddToCorpus(Unit{static_cast(i)}, 1, false, false, {}, DFT, - nullptr); + C->AddToCorpus(Unit{static_cast(i)}, /*NumFeatures*/ 1, + /*MayDeleteFile*/ false, /*HasFocusFunction*/ false, + /*ForceAddToCorpus*/ false, /*FeatureSet*/ {}, DFT, + /*BaseII*/ nullptr); Vector Hist(N); for (size_t i = 0; i < N * TriesPerUnit; i++) { diff --git a/compiler-rt/test/fuzzer/KeepSeedTest.cpp b/compiler-rt/test/fuzzer/KeepSeedTest.cpp new file mode 100644 --- /dev/null +++ b/compiler-rt/test/fuzzer/KeepSeedTest.cpp @@ -0,0 +1,37 @@ +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +// Test whether the fuzzer can find "SELECT FROM WHERE", given a seed input +// "SELECTxFROMxWHERE". Without -keep_seed=1, it takes longer time to trigger +// find the desired string, because the seed input is more likely to be reduced +// to a prefix of the given input first, losing useful fragments towards the end +// of the seed input. +#include +#include +#include +#include + +static volatile int Sink = 0; + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + if (Size > 17) + return 0; + + if (Size >= 6 && Data[0] == 'S' && Data[1] == 'E' && Data[2] == 'L' && + Data[3] == 'E' && Data[4] == 'C' && Data[5] == 'T') { + if (Size >= 7 && Data[6] == ' ') { + if (Size >= 11 && Data[7] == 'F' && Data[8] == 'R' && Data[9] == 'O' && + Data[10] == 'M') { + if (Size >= 12 && Data[11] == ' ') { + if (Size >= 17 && Data[12] == 'W' && Data[13] == 'H' && + Data[14] == 'E' && Data[15] == 'R' && Data[16] == 'E') { + fprintf(stderr, "BINGO; Found the target, exiting.\n"); + exit(1); + } + } + } + } + } + return 0; +} diff --git a/compiler-rt/test/fuzzer/keep-seed.test b/compiler-rt/test/fuzzer/keep-seed.test new file mode 100644 --- /dev/null +++ b/compiler-rt/test/fuzzer/keep-seed.test @@ -0,0 +1,17 @@ +REQUIRES: linux, x86_64 +RUN: %cpp_compiler %S/KeepSeedTest.cpp -o %t-KeepSeedTest + +RUN: rm -rf %t-corpus +RUN: mkdir %t-corpus +RUN: echo -n SELECTxFROMxWHERE > %t-corpus/valid-fragments + +RUN: not %run %t-KeepSeedTest -keep_seed=1 -seed=1 -runs=2000000 %t-corpus 2>&1 | FileCheck %s +CHECK: BINGO + +RUN: rm -rf %t-corpus-baseline +RUN: mkdir %t-corpus-baseline +RUN: echo -n SELECTxFROMxWHERE > %t-corpus-baseline/valid-fragments + +# The following checks whether without -keep_seed=1 libFuzzer does not find the +# crashing input "SELECT FROM WHERE" even with 2x more runs. +RUN: %run %t-KeepSeedTest -seed=1 -runs=4000000 %t-corpus-baseline -print_final_stats=1