Index: lib/Fuzzer/FuzzerDriver.cpp =================================================================== --- lib/Fuzzer/FuzzerDriver.cpp +++ lib/Fuzzer/FuzzerDriver.cpp @@ -355,6 +355,108 @@ return 0; } +int MinimizeDictionary(Fuzzer *F, const std::vector& Dict, + UnitVector& Corpus) { + std::size_t NumberOfTests = Dict.size() * Corpus.size() * 2; + Printf("Started dictionary minimization."); + Printf(" Need to perform %zd tests.\n", NumberOfTests); + +// F->Corpus.ResetFeatureSet(); // private method :( + F->ResetCoverage(); + TPC.ResetMaps(); + + // Scores for every dictionary unit. + std::vector Scores(Dict.size()); + for (auto &Data : Corpus) { + // Get coverage for Corpus unit without modifications. + F->RunOne(Data.data(), Data.size()); + auto InitialCoverage = TPC.GetTotalPCCoverage(); + std::set InitialFeatures; + auto InitialResult = TPC.CollectFeatures([&](size_t Feature) -> bool { + InitialFeatures.insert(Feature); + return true; + }); + + // DEBUG OUTPUT + Printf("Initial corpus unit: "); + PrintASCII(Data.data(), Data.size(), "\n"); + Printf("Initial Result: "); + Printf("%zd, Features:", InitialResult); + for (size_t F : InitialFeatures) + Printf(" %zd", F); + Printf(", Coverage: %zd\n", InitialCoverage); + + TPC.ResetMaps(); + F->ResetCoverage(); + + for (std::size_t i = 0; i != Dict.size(); ++i) { + auto U = Dict[i]; + auto Pos = std::search(Data.begin(), Data.end(), + Dict[i].begin(), Dict[i].end()); + if (Pos == Data.end()) + continue; + + // DEBUG OUTPUT + Printf("\nDictionary unit: "); + PrintASCII(Dict[i].data(), Dict[i].size(), "\n"); + + // Replace dictionary unit in the data by its masked value. + for (auto It = Pos; It != Pos + Dict[i].size(); ++It) + *It ^= 0xFF; + + F->RunOne(Data.data(), Data.size()); + auto ModifiedCoverage = TPC.GetTotalPCCoverage(); + std::set ModifiedFeatures; + auto ModifiedResult = TPC.CollectFeatures([&](size_t Feature) -> bool { + ModifiedFeatures.insert(Feature); + return true; + }); + + // DEBUG OUTPUT + Printf("Initial corpus unit: "); + PrintASCII(Data.data(), Data.size(), "\n"); + Printf("Modified Result: "); + Printf("%zd, Features:", ModifiedResult); + for (size_t F : ModifiedFeatures) + Printf(" %zd", F); + Printf(", Coverage: %zd\n", ModifiedCoverage); + + TPC.ResetMaps(); + F->ResetCoverage(); + + // Restore initial contents of the data. + for (auto It = Pos; It != Pos + Dict[i].size(); ++It) + *It ^= 0xFF; + + // if (InitialCoverage == ModifiedCoverage) { + if (InitialFeatures == ModifiedFeatures) { + --Scores[i]; + } else { + ++Scores[i]; + } + + if (i > 20) // DEBUG for early bailout. + break; + } + Printf("\n"); // DEBUG + break; // DEBUG for bailout after the first input. + } + + Printf("###### Useless dictionary elements. ######\n"); + for (std::size_t i = 0; i != Dict.size(); ++i) { + // if (Scores[i] > 0) + if (Scores[i] == 0) + continue; + + // PRINT ALL ELEMENST WITH THEIR SCORES FOR DEBUGING + Printf("\""); + PrintASCII(Dict[i].data(), Dict[i].size(), "\""); + Printf(" # Score: %d\n", Scores[i]); + } + Printf("###### End of useless dictionary elements. ######\n"); + return 0; +} + int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { using namespace fuzzer; assert(argc && argv && "Argument pointers cannot be nullptr"); @@ -550,6 +652,19 @@ TemporaryMaxLen, /*ExitOnError=*/false); } + if (Flags.minimize_dict) { + if (Dictionary.empty() || Inputs->empty()) { + Printf("ERROR: can't minimize dict without dict and corpus provided\n"); + return 1; + } + if (MinimizeDictionary(F, Dictionary, InitialCorpus)) { + Printf("Dictionary minimization failed\n"); + exit(1); + } + Printf("Dictionary minimization suceeded\n"); + exit(0); + } + if (Options.MaxLen == 0) { size_t MaxLen = 0; for (auto &U : InitialCorpus) Index: lib/Fuzzer/FuzzerFlags.def =================================================================== --- lib/Fuzzer/FuzzerFlags.def +++ lib/Fuzzer/FuzzerFlags.def @@ -39,6 +39,7 @@ "merged into the 1-st corpus. Only interesting units will be taken. " "This flag can be used to minimize a corpus.") FUZZER_FLAG_STRING(merge_control_file, "internal flag") +FUZZER_FLAG_INT(minimize_dict, 0, "internal flag") FUZZER_FLAG_INT(minimize_crash, 0, "If 1, minimizes the provided" " crash input. Use with -runs=N or -max_total_time=N to limit " "the number attempts")