Index: compiler-rt/lib/fuzzer/FuzzerCorpus.h =================================================================== --- compiler-rt/lib/fuzzer/FuzzerCorpus.h +++ compiler-rt/lib/fuzzer/FuzzerCorpus.h @@ -38,12 +38,29 @@ bool HasFocusFunction = false; Vector UniqFeatureSet; Vector DataFlowTraceForFocusFunction; + // Power schedule. + bool NeedsUpdate = false; + double Energy = 0.0; + size_t SumIncidence = 0; + Vector> FeatureFreqs; }; class InputCorpus { - static const size_t kFeatureSetSize = 1 << 21; - public: - InputCorpus(const std::string &OutputCorpus) : OutputCorpus(OutputCorpus) { + static const uint32_t kFeatureSetSize = 1 << 21; + static const uint8_t kMaxMutationFactor = 20; + static const uint8_t kProbAgressiveSchedule = 80; + static const size_t kSparseEnergyUpdates = 10000; + + bool Entropic; + size_t ConsideredRare; + size_t TopXRarestFeatures; + +public: + size_t NumExecutedMutations = 0; + InputCorpus(const std::string &OutputCorpus, bool Entropic, + size_t ConsideredRare, size_t TopXRarestFeatures) + : Entropic(Entropic), ConsideredRare(ConsideredRare), + TopXRarestFeatures(TopXRarestFeatures), OutputCorpus(OutputCorpus) { memset(InputSizesPerFeature, 0, sizeof(InputSizesPerFeature)); memset(SmallestElementPerFeature, 0, sizeof(SmallestElementPerFeature)); } @@ -99,6 +116,10 @@ II.MayDeleteFile = MayDeleteFile; II.UniqFeatureSet = FeatureSet; II.HasFocusFunction = HasFocusFunction; + // Assign maximal energy to the new seed. + II.Energy = RareFeatures.empty() ? 1.0 : log(RareFeatures.size()); + II.SumIncidence = RareFeatures.size(); + II.NeedsUpdate = false; std::sort(II.UniqFeatureSet.begin(), II.UniqFeatureSet.end()); ComputeSHA1(U.data(), U.size(), II.Sha1); auto Sha1Str = Sha1ToString(II.Sha1); @@ -111,7 +132,7 @@ // But if we don't, we'll use the DFT of its base input. if (II.DataFlowTraceForFocusFunction.empty() && BaseII) II.DataFlowTraceForFocusFunction = BaseII->DataFlowTraceForFocusFunction; - UpdateCorpusDistribution(); + DistributionNeedsUpdate = true; PrintCorpus(); // ValidateFeatureSet(); return &II; @@ -162,12 +183,13 @@ Hashes.insert(Sha1ToString(II->Sha1)); II->U = U; II->Reduced = true; - UpdateCorpusDistribution(); + DistributionNeedsUpdate = true; } bool HasUnit(const Unit &U) { return Hashes.count(Hash(U)); } bool HasUnit(const std::string &H) { return Hashes.count(H); } InputInfo &ChooseUnitToMutate(Random &Rand) { + UpdateCorpusDistribution(Rand); InputInfo &II = *Inputs[ChooseUnitIdxToMutate(Rand)]; assert(!II.U.empty()); return II; @@ -210,10 +232,80 @@ InputInfo &II = *Inputs[Idx]; DeleteFile(II); Unit().swap(II.U); + II.Energy = 0.0; + II.NeedsUpdate = false; + DistributionNeedsUpdate = true; if (FeatureDebug) Printf("EVICTED %zd\n", Idx); } + bool DeleteFeatureFreq(InputInfo *II, uint32_t Idx) { + if (II->FeatureFreqs.empty()) + return false; + + // Binary search over local feature frequencies sorted by index. + auto lower = + std::lower_bound(II->FeatureFreqs.begin(), II->FeatureFreqs.end(), + std::pair(Idx, 0)); + + if (lower != II->FeatureFreqs.end() && lower->first == Idx) { + II->FeatureFreqs.erase(lower); + return true; + } + return false; + } + + void AddRareFeature(uint32_t Idx) { + // Maintain *at least* TopXRarestFeatures many rare features + // and all features with a frequency below ConsideredRare. + // Remove all other features. + while (RareFeatures.size() > TopXRarestFeatures && + FreqOfMostAbundantRareFeature > ConsideredRare) { + uint32_t ST_mostAbundantRareFeatureIdx = + RareFeatures[0]; // 1st most abd feature index + uint32_t ND_mostAbundantRareFeatureIdx = + RareFeatures[0]; // 2nd most abd feature index + + // Find most and second most abbundant feature + for (uint32_t Idx2 : RareFeatures) { + if (GlobalFeatureFreqs[Idx2] >= + GlobalFeatureFreqs[ST_mostAbundantRareFeatureIdx]) { + ND_mostAbundantRareFeatureIdx = ST_mostAbundantRareFeatureIdx; + ST_mostAbundantRareFeatureIdx = Idx2; + } + } + + // Remove most abundant rare feature. + RareFeatures.erase(remove(RareFeatures.begin(), RareFeatures.end(), + ST_mostAbundantRareFeatureIdx), + RareFeatures.end()); + for (auto II : Inputs) { + if (DeleteFeatureFreq(II, ST_mostAbundantRareFeatureIdx)) + II->NeedsUpdate = true; + } + + // Set 2nd most abundant as the new most abundant feature count. + FreqOfMostAbundantRareFeature = + GlobalFeatureFreqs[ND_mostAbundantRareFeatureIdx]; + } + + // Add rare feature, handle collisions, and update energy. + RareFeatures.push_back(Idx); + GlobalFeatureFreqs[Idx] = 0; + for (auto II : Inputs) { + DeleteFeatureFreq(II, Idx); + + // Apply add-one smoothing to this locally undiscovered feature. + // Zero energy seeds will never be fuzzed and remain zero energy. + if (II->Energy > 0.0) { + II->SumIncidence += 1; + II->Energy += logl(II->SumIncidence) / II->SumIncidence; + } + } + + DistributionNeedsUpdate = true; + } + bool AddFeature(size_t Idx, uint32_t NewSize, bool Shrink) { assert(NewSize); Idx = Idx % kFeatureSetSize; @@ -228,6 +320,8 @@ DeleteInput(OldIdx); } else { NumAddedFeatures++; + if (Entropic) + AddRareFeature((uint32_t)Idx); } NumUpdatedFeatures++; if (FeatureDebug) @@ -239,6 +333,88 @@ return false; } + void UpdateFeatureFrequency(InputInfo *II, size_t Idx) { + uint32_t Idx32 = Idx % kFeatureSetSize; + + // Saturated increment. + if (GlobalFeatureFreqs[Idx32] == 0xFFFF) + return; + uint16_t Freq = GlobalFeatureFreqs[Idx32]++; + + // Skip if abundant. + if (Freq > FreqOfMostAbundantRareFeature || + std::find(RareFeatures.begin(), RareFeatures.end(), Idx32) == + RareFeatures.end()) + return; + + // Update global frequencies. + if (Freq == FreqOfMostAbundantRareFeature) + FreqOfMostAbundantRareFeature++; + + // Update local frequencies. + if (!II) + return; + + // The local feature frequencies is an ordered vector of pairs. + // If there are no local feature frequencies, push_back preserves order. + // Set the feature frequency for feature Idx32 to 1. + if (II->FeatureFreqs.empty()) { + II->FeatureFreqs.push_back(std::pair(Idx32, 1)); + II->NeedsUpdate = true; + return; + } + + // Binary search over local feature frequencies sorted by index. + auto lower = + std::lower_bound(II->FeatureFreqs.begin(), II->FeatureFreqs.end(), + std::pair(Idx32, 0)); + + // If feature Idx32 already exists, increment its frequency. + // Otherwise, insert a new pair right after the next lower index. + if (lower != II->FeatureFreqs.end() && lower->first == Idx32) { + lower->second++; + } else { + II->FeatureFreqs.insert(lower, std::pair(Idx32, 1)); + } + II->NeedsUpdate = true; + } + + // Assign more energy to a high-entropy seed, i.e., that reveals more + // information about the globally rare features in the neighborhood + // of the seed. Since we do not know the entropy of a seed that has + // never been executed we assign fresh seeds maximum entropy and + // let II->Energy approach the true entropy from above. + void UpdateEnergy(InputInfo *II, size_t GlobalNumberOfFeatures) { + long double Energy = 0.0L; + size_t SumIncidence = 0; + + II->Energy = 0.0; + II->SumIncidence = 0; + + // Apply add-one smoothing to locally discovered features. + for (auto F : II->FeatureFreqs) { + size_t LocalIncidence = F.second + 1; + Energy -= LocalIncidence * logl(LocalIncidence); + SumIncidence += LocalIncidence; + } + + // Apply add-one smoothing to locally undiscovered features. + // Energy -= 0; // since logl(1.0) == 0) + SumIncidence += (GlobalNumberOfFeatures - II->FeatureFreqs.size()); + + // Add a single locally abundant feature apply add-one smoothing. + size_t AbdIncidence = II->NumExecutedMutations + 1; + Energy -= AbdIncidence * logl(AbdIncidence); + SumIncidence += AbdIncidence; + + // Normalize. + if (SumIncidence != 0) + Energy = (Energy / SumIncidence) + logl(SumIncidence); + + II->Energy = (double)Energy; + II->SumIncidence = SumIncidence; + } + size_t NumFeatures() const { return NumAddedFeatures; } size_t NumFeatureUpdates() const { return NumUpdatedFeatures; } @@ -265,19 +441,74 @@ // Updates the probability distribution for the units in the corpus. // Must be called whenever the corpus or unit weights are changed. // - // Hypothesis: units added to the corpus last are more interesting. - // - // Hypothesis: inputs with infrequent features are more interesting. - void UpdateCorpusDistribution() { + // Hypothesis: inputs that maximize information about globally rare features + // are interesting. + void UpdateCorpusDistribution(Random &Rand) { + // Skip update if no seeds or rare features were added/deleted. + // Sparse updates for local change of feature frequencies, + // i.e., randomly do not skip. + if (!DistributionNeedsUpdate && (!Entropic || Rand(kSparseEnergyUpdates))) + return; + + DistributionNeedsUpdate = false; + size_t N = Inputs.size(); assert(N); Intervals.resize(N + 1); Weights.resize(N); std::iota(Intervals.begin(), Intervals.end(), 0); - for (size_t i = 0; i < N; i++) - Weights[i] = Inputs[i]->NumFeatures - ? (i + 1) * (Inputs[i]->HasFocusFunction ? 1000 : 1) - : 0.; + + bool VanillaSchedule = true; + if (Entropic) { + // If aggressive, assign zero energy to seeds with below average entropy + // and normalize. + bool AggressiveSchedule = Rand(100) < kProbAgressiveSchedule; + + double AvgEnergy = 0; + for (auto II : Inputs) { + if (II->NeedsUpdate && II->Energy != 0.0) { + UpdateEnergy(II, RareFeatures.size()); + II->NeedsUpdate = false; + } + AvgEnergy += II->Energy * (long double)II->NumExecutedMutations / + NumExecutedMutations; + } + + for (size_t i = 0; i < N; i++) { + + if (Inputs[i]->NumFeatures == 0) { + // If the seed doesn't represent any features, assign zero energy. + Weights[i] = 0.; + } else if (Inputs[i]->NumExecutedMutations / kMaxMutationFactor > + NumExecutedMutations / Inputs.size()) { + // If the seed was fuzzed a lot more than average, assign zero energy. + Weights[i] = 0.; + } else if (AggressiveSchedule) { + if (Inputs[i]->Energy < AvgEnergy) { + // If the seed has below avarage entropy, assign zero energy. + Weights[i] = 0.; + } else { + // Otherwise subtract the average entropy to normalize. + Weights[i] = Inputs[i]->Energy - AvgEnergy; + } + } else { + // If not aggressive, simply assign the computed energy. + Weights[i] = Inputs[i]->Energy; + } + + // If energy for all seeds is zero, fall back to vanilla schedule. + if (Weights[i] > 0.0) + VanillaSchedule = false; + } + } + + if (VanillaSchedule) { + for (size_t i = 0; i < N; i++) + Weights[i] = Inputs[i]->NumFeatures + ? (i + 1) * (Inputs[i]->HasFocusFunction ? 1000 : 1) + : 0.; + } + if (FeatureDebug) { for (size_t i = 0; i < N; i++) Printf("%zd ", Inputs[i]->NumFeatures); @@ -302,6 +533,11 @@ uint32_t InputSizesPerFeature[kFeatureSetSize]; uint32_t SmallestElementPerFeature[kFeatureSetSize]; + bool DistributionNeedsUpdate = true; + uint16_t FreqOfMostAbundantRareFeature = 0; + uint16_t GlobalFeatureFreqs[kFeatureSetSize] = {}; + Vector RareFeatures; + std::string OutputCorpus; }; Index: compiler-rt/lib/fuzzer/FuzzerDriver.cpp =================================================================== --- compiler-rt/lib/fuzzer/FuzzerDriver.cpp +++ compiler-rt/lib/fuzzer/FuzzerDriver.cpp @@ -708,6 +708,18 @@ Options.CollectDataFlow = Flags.collect_data_flow; if (Flags.stop_file) Options.StopFile = Flags.stop_file; + Options.Entropic = Flags.entropic; + Options.ConsideredRare = (size_t)Flags.considered_rare; + Options.TopXRarestFeatures = (size_t)Flags.topX_rarest_features; + if (Options.Entropic) { + if (!Options.FocusFunction.empty()) { + Printf("ERROR: The parameters `--entropic` and `--focus_function` cannot " + "be used together.\n"); + exit(1); + } + Printf("INFO: Running with entropic power schedule (0x%X, %d).\n", + Options.ConsideredRare, Options.TopXRarestFeatures); + } unsigned Seed = Flags.seed; // Initialize Seed. @@ -728,7 +740,9 @@ Random Rand(Seed); auto *MD = new MutationDispatcher(Rand, Options); - auto *Corpus = new InputCorpus(Options.OutputCorpus); + auto *Corpus = + new InputCorpus(Options.OutputCorpus, Options.Entropic, + Options.ConsideredRare, Options.TopXRarestFeatures); auto *F = new Fuzzer(Callback, *Corpus, *MD, Options); for (auto &U: Dictionary) Index: compiler-rt/lib/fuzzer/FuzzerFlags.def =================================================================== --- compiler-rt/lib/fuzzer/FuzzerFlags.def +++ compiler-rt/lib/fuzzer/FuzzerFlags.def @@ -153,6 +153,13 @@ "Fuzzing will focus on inputs that trigger calls to this function. " "If -focus_function=auto and -data_flow_trace is used, libFuzzer " "will choose the focus functions automatically.") +FUZZER_FLAG_INT(entropic, 0, "Experimental. Enables entropic power schedule.") +FUZZER_FLAG_INT(considered_rare, 0xFF, "Experimental. If entropic is enabled, " + "all features which are observed less often than the specified value " + "are considered as rare.") +FUZZER_FLAG_INT(topX_rarest_features, 100, "Experimental. If entropic is " + "enabled, we keep track of the frequencies only for the Top-X least " + "abundant features (union features that are considered as rare).") FUZZER_FLAG_INT(analyze_dict, 0, "Experimental") FUZZER_DEPRECATED_FLAG(use_clang_coverage) Index: compiler-rt/lib/fuzzer/FuzzerLoop.cpp =================================================================== --- compiler-rt/lib/fuzzer/FuzzerLoop.cpp +++ compiler-rt/lib/fuzzer/FuzzerLoop.cpp @@ -475,6 +475,8 @@ TPC.CollectFeatures([&](size_t Feature) { if (Corpus.AddFeature(Feature, Size, Options.Shrink)) UniqFeatureSetTmp.push_back(Feature); + if (Options.Entropic) + Corpus.UpdateFeatureFrequency(II, Feature); if (Options.ReduceInputs && II) if (std::binary_search(II->UniqFeatureSet.begin(), II->UniqFeatureSet.end(), Feature)) @@ -693,6 +695,7 @@ assert(NewSize <= CurrentMaxMutationLen && "Mutator return oversized unit"); Size = NewSize; II.NumExecutedMutations++; + Corpus.NumExecutedMutations++; bool FoundUniqFeatures = false; bool NewCov = RunOne(CurrentUnitData, Size, /*MayDeleteFile=*/true, &II, @@ -706,6 +709,7 @@ if (Options.ReduceDepth && !FoundUniqFeatures) break; } + II.NeedsUpdate = true; } void Fuzzer::PurgeAllocator() { Index: compiler-rt/lib/fuzzer/FuzzerOptions.h =================================================================== --- compiler-rt/lib/fuzzer/FuzzerOptions.h +++ compiler-rt/lib/fuzzer/FuzzerOptions.h @@ -44,6 +44,9 @@ size_t MaxNumberOfRuns = -1L; int ReportSlowUnits = 10; bool OnlyASCII = false; + bool Entropic = false; + size_t ConsideredRare = 0xFF; + size_t TopXRarestFeatures = 100; std::string OutputCorpus; std::string ArtifactPrefix = "./"; std::string ExactArtifactPath; Index: compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp =================================================================== --- compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp +++ compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp @@ -592,7 +592,7 @@ TEST(Corpus, Distribution) { DataFlowTrace DFT; Random Rand(0); - std::unique_ptr C(new InputCorpus("")); + std::unique_ptr C(new InputCorpus("", false, 0, 0)); size_t N = 10; size_t TriesPerUnit = 1<<16; for (size_t i = 0; i < N; i++) @@ -1050,6 +1050,69 @@ EXPECT_EQ(CmdLine, makeCmdLine("", ">thud 2>&1")); } +TEST(Entropic, UpdateFrequency) { + const size_t One = 1, Two = 2; + const size_t FeatIdx1 = 0, FeatIdx2 = 42, FeatIdx3 = 12, FeatIdx4 = 26; + size_t Index; + // Create input corpus with default entropic configuration + std::unique_ptr C(new InputCorpus("", true, 0xFF, 100)); + InputInfo *II = new InputInfo(); + + C->AddRareFeature(FeatIdx1); + C->UpdateFeatureFrequency(II, FeatIdx1); + EXPECT_EQ(II->FeatureFreqs.size(), One); + C->AddRareFeature(FeatIdx2); + C->UpdateFeatureFrequency(II, FeatIdx1); + C->UpdateFeatureFrequency(II, FeatIdx2); + EXPECT_EQ(II->FeatureFreqs.size(), Two); + EXPECT_EQ(II->FeatureFreqs[0].second, 2); + EXPECT_EQ(II->FeatureFreqs[1].second, 1); + + C->AddRareFeature(FeatIdx3); + C->AddRareFeature(FeatIdx4); + C->UpdateFeatureFrequency(II, FeatIdx3); + C->UpdateFeatureFrequency(II, FeatIdx3); + C->UpdateFeatureFrequency(II, FeatIdx3); + C->UpdateFeatureFrequency(II, FeatIdx4); + + for (Index = 1; Index < II->FeatureFreqs.size(); Index++) + EXPECT_LT(II->FeatureFreqs[Index - 1].first, II->FeatureFreqs[Index].first); + + C->DeleteFeatureFreq(II, FeatIdx3); + for (Index = 1; Index < II->FeatureFreqs.size(); Index++) + EXPECT_LT(II->FeatureFreqs[Index - 1].first, II->FeatureFreqs[Index].first); +} + +long double SubAndSquare(long double X, long double Y) { + long double R = X - Y; + R = R * R; + return R; +} + +TEST(Entropic, ComputeEnergy) { + const long double Precision = 0.01; + std::unique_ptr C(new InputCorpus("", true, 0xFF, 100)); + InputInfo *II = new InputInfo(); + Vector> FeatureFreqs = { + std::pair(1, 3), + std::pair(2, 3), + std::pair(3, 3)}; + II->FeatureFreqs = FeatureFreqs; + II->NumExecutedMutations = 0; + C->UpdateEnergy(II, 4); + EXPECT_LT(SubAndSquare(II->Energy, 1.450805), Precision); + + II->NumExecutedMutations = 9; + C->UpdateEnergy(II, 5); + EXPECT_LT(SubAndSquare(II->Energy, 1.525496), Precision); + + II->FeatureFreqs[0].second++; + II->FeatureFreqs.push_back(std::pair(42, 6)); + II->NumExecutedMutations = 20; + C->UpdateEnergy(II, 10); + EXPECT_LT(SubAndSquare(II->Energy, 1.792831), Precision); +} + int main(int argc, char **argv) { testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS();