diff --git a/compiler-rt/lib/fuzzer/FuzzerCorpus.h b/compiler-rt/lib/fuzzer/FuzzerCorpus.h --- a/compiler-rt/lib/fuzzer/FuzzerCorpus.h +++ b/compiler-rt/lib/fuzzer/FuzzerCorpus.h @@ -38,12 +38,102 @@ bool HasFocusFunction = false; Vector UniqFeatureSet; Vector DataFlowTraceForFocusFunction; + // Power schedule. + bool NeedsEnergyUpdate = false; + double Energy = 0.0; + size_t SumIncidence = 0; + Vector> FeatureFreqs; + + // Delete feature Idx and its frequency from FeatureFreqs. + bool DeleteFeatureFreq(uint32_t Idx) { + if (FeatureFreqs.empty()) + return false; + + // Binary search over local feature frequencies sorted by index. + auto Lower = std::lower_bound(FeatureFreqs.begin(), FeatureFreqs.end(), + std::pair(Idx, 0)); + + if (Lower != FeatureFreqs.end() && Lower->first == Idx) { + FeatureFreqs.erase(Lower); + return true; + } + return false; + } + + // Assign more energy to a high-entropy seed, i.e., that reveals more + // information about the globally rare features in the neighborhood + // of the seed. Since we do not know the entropy of a seed that has + // never been executed we assign fresh seeds maximum entropy and + // let II->Energy approach the true entropy from above. + void UpdateEnergy(size_t GlobalNumberOfFeatures) { + Energy = 0.0; + SumIncidence = 0; + + // Apply add-one smoothing to locally discovered features. + for (auto F : FeatureFreqs) { + size_t LocalIncidence = F.second + 1; + Energy -= LocalIncidence * logl(LocalIncidence); + SumIncidence += LocalIncidence; + } + + // Apply add-one smoothing to locally undiscovered features. + // PreciseEnergy -= 0; // since logl(1.0) == 0) + SumIncidence += (GlobalNumberOfFeatures - FeatureFreqs.size()); + + // Add a single locally abundant feature apply add-one smoothing. + size_t AbdIncidence = NumExecutedMutations + 1; + Energy -= AbdIncidence * logl(AbdIncidence); + SumIncidence += AbdIncidence; + + // Normalize. + if (SumIncidence != 0) + Energy = (Energy / SumIncidence) + logl(SumIncidence); + } + + // Increment the frequency of the feature Idx. + void UpdateFeatureFrequency(uint32_t Idx) { + NeedsEnergyUpdate = true; + + // The local feature frequencies is an ordered vector of pairs. + // If there are no local feature frequencies, push_back preserves order. + // Set the feature frequency for feature Idx32 to 1. + if (FeatureFreqs.empty()) { + FeatureFreqs.push_back(std::pair(Idx, 1)); + return; + } + + // Binary search over local feature frequencies sorted by index. + auto Lower = std::lower_bound(FeatureFreqs.begin(), FeatureFreqs.end(), + std::pair(Idx, 0)); + + // If feature Idx32 already exists, increment its frequency. + // Otherwise, insert a new pair right after the next lower index. + if (Lower != FeatureFreqs.end() && Lower->first == Idx) { + Lower->second++; + } else { + FeatureFreqs.insert(Lower, std::pair(Idx, 1)); + } + } +}; + +struct EntropicOptions { + bool Enabled; + size_t NumberOfRarestFeatures; + size_t FeatureFrequencyThreshold; }; class InputCorpus { - static const size_t kFeatureSetSize = 1 << 21; - public: - InputCorpus(const std::string &OutputCorpus) : OutputCorpus(OutputCorpus) { + static const uint32_t kFeatureSetSize = 1 << 21; + static const uint8_t kMaxMutationFactor = 20; + static const size_t kSparseEnergyUpdates = 100; + + size_t NumExecutedMutations = 0; + + EntropicOptions Entropic; + +public: + InputCorpus(const std::string &OutputCorpus, EntropicOptions Entropic) + : Entropic(Entropic), OutputCorpus(OutputCorpus) { memset(InputSizesPerFeature, 0, sizeof(InputSizesPerFeature)); memset(SmallestElementPerFeature, 0, sizeof(SmallestElementPerFeature)); } @@ -70,6 +160,7 @@ Res = std::max(Res, II->U.size()); return Res; } + void IncrementNumExecutedMutations() { NumExecutedMutations++; } size_t NumInputsThatTouchFocusFunction() { return std::count_if(Inputs.begin(), Inputs.end(), [](const InputInfo *II) { @@ -99,6 +190,10 @@ II.MayDeleteFile = MayDeleteFile; II.UniqFeatureSet = FeatureSet; II.HasFocusFunction = HasFocusFunction; + // Assign maximal energy to the new seed. + II.Energy = RareFeatures.empty() ? 1.0 : log(RareFeatures.size()); + II.SumIncidence = RareFeatures.size(); + II.NeedsEnergyUpdate = false; std::sort(II.UniqFeatureSet.begin(), II.UniqFeatureSet.end()); ComputeSHA1(U.data(), U.size(), II.Sha1); auto Sha1Str = Sha1ToString(II.Sha1); @@ -111,7 +206,7 @@ // But if we don't, we'll use the DFT of its base input. if (II.DataFlowTraceForFocusFunction.empty() && BaseII) II.DataFlowTraceForFocusFunction = BaseII->DataFlowTraceForFocusFunction; - UpdateCorpusDistribution(); + DistributionNeedsUpdate = true; PrintCorpus(); // ValidateFeatureSet(); return &II; @@ -162,7 +257,7 @@ Hashes.insert(Sha1ToString(II->Sha1)); II->U = U; II->Reduced = true; - UpdateCorpusDistribution(); + DistributionNeedsUpdate = true; } bool HasUnit(const Unit &U) { return Hashes.count(Hash(U)); } @@ -175,6 +270,7 @@ // Returns an index of random unit from the corpus to mutate. size_t ChooseUnitIdxToMutate(Random &Rand) { + UpdateCorpusDistribution(Rand); size_t Idx = static_cast(CorpusDistribution(Rand)); assert(Idx < Inputs.size()); return Idx; @@ -210,10 +306,65 @@ InputInfo &II = *Inputs[Idx]; DeleteFile(II); Unit().swap(II.U); + II.Energy = 0.0; + II.NeedsEnergyUpdate = false; + DistributionNeedsUpdate = true; if (FeatureDebug) Printf("EVICTED %zd\n", Idx); } + void AddRareFeature(uint32_t Idx) { + // Maintain *at least* TopXRarestFeatures many rare features + // and all features with a frequency below ConsideredRare. + // Remove all other features. + while (RareFeatures.size() > Entropic.NumberOfRarestFeatures && + FreqOfMostAbundantRareFeature > Entropic.FeatureFrequencyThreshold) { + + // Find most and second most abbundant feature. + uint32_t MostAbundantRareFeatureIndices[2] = {RareFeatures[0], + RareFeatures[0]}; + size_t Delete = 0; + for (size_t i = 0; i < RareFeatures.size(); i++) { + uint32_t Idx2 = RareFeatures[i]; + if (GlobalFeatureFreqs[Idx2] >= + GlobalFeatureFreqs[MostAbundantRareFeatureIndices[0]]) { + MostAbundantRareFeatureIndices[1] = MostAbundantRareFeatureIndices[0]; + MostAbundantRareFeatureIndices[0] = Idx2; + Delete = i; + } + } + + // Remove most abundant rare feature. + RareFeatures[Delete] = RareFeatures.back(); + RareFeatures.pop_back(); + + for (auto II : Inputs) { + if (II->DeleteFeatureFreq(MostAbundantRareFeatureIndices[0])) + II->NeedsEnergyUpdate = true; + } + + // Set 2nd most abundant as the new most abundant feature count. + FreqOfMostAbundantRareFeature = + GlobalFeatureFreqs[MostAbundantRareFeatureIndices[1]]; + } + + // Add rare feature, handle collisions, and update energy. + RareFeatures.push_back(Idx); + GlobalFeatureFreqs[Idx] = 0; + for (auto II : Inputs) { + II->DeleteFeatureFreq(Idx); + + // Apply add-one smoothing to this locally undiscovered feature. + // Zero energy seeds will never be fuzzed and remain zero energy. + if (II->Energy > 0.0) { + II->SumIncidence += 1; + II->Energy += logl(II->SumIncidence) / II->SumIncidence; + } + } + + DistributionNeedsUpdate = true; + } + bool AddFeature(size_t Idx, uint32_t NewSize, bool Shrink) { assert(NewSize); Idx = Idx % kFeatureSetSize; @@ -228,6 +379,8 @@ DeleteInput(OldIdx); } else { NumAddedFeatures++; + if (Entropic.Enabled) + AddRareFeature((uint32_t)Idx); } NumUpdatedFeatures++; if (FeatureDebug) @@ -239,6 +392,30 @@ return false; } + // Increment frequency of feature Idx globally and locally. + void UpdateFeatureFrequency(InputInfo *II, size_t Idx) { + uint32_t Idx32 = Idx % kFeatureSetSize; + + // Saturated increment. + if (GlobalFeatureFreqs[Idx32] == 0xFFFF) + return; + uint16_t Freq = GlobalFeatureFreqs[Idx32]++; + + // Skip if abundant. + if (Freq > FreqOfMostAbundantRareFeature || + std::find(RareFeatures.begin(), RareFeatures.end(), Idx32) == + RareFeatures.end()) + return; + + // Update global frequencies. + if (Freq == FreqOfMostAbundantRareFeature) + FreqOfMostAbundantRareFeature++; + + // Update local frequencies. + if (II) + II->UpdateFeatureFrequency(Idx32); + } + size_t NumFeatures() const { return NumAddedFeatures; } size_t NumFeatureUpdates() const { return NumUpdatedFeatures; } @@ -265,19 +442,60 @@ // Updates the probability distribution for the units in the corpus. // Must be called whenever the corpus or unit weights are changed. // - // Hypothesis: units added to the corpus last are more interesting. - // - // Hypothesis: inputs with infrequent features are more interesting. - void UpdateCorpusDistribution() { + // Hypothesis: inputs that maximize information about globally rare features + // are interesting. + void UpdateCorpusDistribution(Random &Rand) { + // Skip update if no seeds or rare features were added/deleted. + // Sparse updates for local change of feature frequencies, + // i.e., randomly do not skip. + if (!DistributionNeedsUpdate && + (!Entropic.Enabled || Rand(kSparseEnergyUpdates))) + return; + + DistributionNeedsUpdate = false; + size_t N = Inputs.size(); assert(N); Intervals.resize(N + 1); Weights.resize(N); std::iota(Intervals.begin(), Intervals.end(), 0); - for (size_t i = 0; i < N; i++) - Weights[i] = Inputs[i]->NumFeatures - ? (i + 1) * (Inputs[i]->HasFocusFunction ? 1000 : 1) - : 0.; + + bool VanillaSchedule = true; + if (Entropic.Enabled) { + for (auto II : Inputs) { + if (II->NeedsEnergyUpdate && II->Energy != 0.0) { + II->NeedsEnergyUpdate = false; + II->UpdateEnergy(RareFeatures.size()); + } + } + + for (size_t i = 0; i < N; i++) { + + if (Inputs[i]->NumFeatures == 0) { + // If the seed doesn't represent any features, assign zero energy. + Weights[i] = 0.; + } else if (Inputs[i]->NumExecutedMutations / kMaxMutationFactor > + NumExecutedMutations / Inputs.size()) { + // If the seed was fuzzed a lot more than average, assign zero energy. + Weights[i] = 0.; + } else { + // Otherwise, simply assign the computed energy. + Weights[i] = Inputs[i]->Energy; + } + + // If energy for all seeds is zero, fall back to vanilla schedule. + if (Weights[i] > 0.0) + VanillaSchedule = false; + } + } + + if (VanillaSchedule) { + for (size_t i = 0; i < N; i++) + Weights[i] = Inputs[i]->NumFeatures + ? (i + 1) * (Inputs[i]->HasFocusFunction ? 1000 : 1) + : 0.; + } + if (FeatureDebug) { for (size_t i = 0; i < N; i++) Printf("%zd ", Inputs[i]->NumFeatures); @@ -302,6 +520,11 @@ uint32_t InputSizesPerFeature[kFeatureSetSize]; uint32_t SmallestElementPerFeature[kFeatureSetSize]; + bool DistributionNeedsUpdate = true; + uint16_t FreqOfMostAbundantRareFeature = 0; + uint16_t GlobalFeatureFreqs[kFeatureSetSize] = {}; + Vector RareFeatures; + std::string OutputCorpus; }; diff --git a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp --- a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp @@ -708,6 +708,26 @@ Options.CollectDataFlow = Flags.collect_data_flow; if (Flags.stop_file) Options.StopFile = Flags.stop_file; + Options.Entropic = Flags.entropic; + Options.EntropicFeatureFrequencyThreshold = + (size_t)Flags.entropic_feature_frequency_threshold; + Options.EntropicNumberOfRarestFeatures = + (size_t)Flags.entropic_number_of_rarest_features; + if (Options.Entropic) { + if (!Options.FocusFunction.empty()) { + Printf("ERROR: The parameters `--entropic` and `--focus_function` cannot " + "be used together.\n"); + exit(1); + } + Printf("INFO: Running with entropic power schedule (0x%X, %d).\n", + Options.EntropicFeatureFrequencyThreshold, + Options.EntropicNumberOfRarestFeatures); + } + struct EntropicOptions Entropic; + Entropic.Enabled = Options.Entropic; + Entropic.FeatureFrequencyThreshold = + Options.EntropicFeatureFrequencyThreshold; + Entropic.NumberOfRarestFeatures = Options.EntropicNumberOfRarestFeatures; unsigned Seed = Flags.seed; // Initialize Seed. @@ -728,7 +748,7 @@ Random Rand(Seed); auto *MD = new MutationDispatcher(Rand, Options); - auto *Corpus = new InputCorpus(Options.OutputCorpus); + auto *Corpus = new InputCorpus(Options.OutputCorpus, Entropic); auto *F = new Fuzzer(Callback, *Corpus, *MD, Options); for (auto &U: Dictionary) diff --git a/compiler-rt/lib/fuzzer/FuzzerFlags.def b/compiler-rt/lib/fuzzer/FuzzerFlags.def --- a/compiler-rt/lib/fuzzer/FuzzerFlags.def +++ b/compiler-rt/lib/fuzzer/FuzzerFlags.def @@ -153,6 +153,14 @@ "Fuzzing will focus on inputs that trigger calls to this function. " "If -focus_function=auto and -data_flow_trace is used, libFuzzer " "will choose the focus functions automatically.") +FUZZER_FLAG_INT(entropic, 0, "Experimental. Enables entropic power schedule.") +FUZZER_FLAG_INT(entropic_feature_frequency_threshold, 0xFF, "Experimental. If " + "entropic is enabled, all features which are observed less often than " + "the specified value are considered as rare.") +FUZZER_FLAG_INT(entropic_number_of_rarest_features, 100, "Experimental. If " + "entropic is enabled, we keep track of the frequencies only for the " + "Top-X least abundant features (union features that are considered as " + "rare).") FUZZER_FLAG_INT(analyze_dict, 0, "Experimental") FUZZER_DEPRECATED_FLAG(use_clang_coverage) diff --git a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp --- a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp @@ -475,6 +475,8 @@ TPC.CollectFeatures([&](size_t Feature) { if (Corpus.AddFeature(Feature, Size, Options.Shrink)) UniqFeatureSetTmp.push_back(Feature); + if (Options.Entropic) + Corpus.UpdateFeatureFrequency(II, Feature); if (Options.ReduceInputs && II) if (std::binary_search(II->UniqFeatureSet.begin(), II->UniqFeatureSet.end(), Feature)) @@ -693,6 +695,7 @@ assert(NewSize <= CurrentMaxMutationLen && "Mutator return oversized unit"); Size = NewSize; II.NumExecutedMutations++; + Corpus.IncrementNumExecutedMutations(); bool FoundUniqFeatures = false; bool NewCov = RunOne(CurrentUnitData, Size, /*MayDeleteFile=*/true, &II, @@ -706,6 +709,8 @@ if (Options.ReduceDepth && !FoundUniqFeatures) break; } + + II.NeedsEnergyUpdate = true; } void Fuzzer::PurgeAllocator() { diff --git a/compiler-rt/lib/fuzzer/FuzzerOptions.h b/compiler-rt/lib/fuzzer/FuzzerOptions.h --- a/compiler-rt/lib/fuzzer/FuzzerOptions.h +++ b/compiler-rt/lib/fuzzer/FuzzerOptions.h @@ -44,6 +44,9 @@ size_t MaxNumberOfRuns = -1L; int ReportSlowUnits = 10; bool OnlyASCII = false; + bool Entropic = false; + size_t EntropicFeatureFrequencyThreshold = 0xFF; + size_t EntropicNumberOfRarestFeatures = 100; std::string OutputCorpus; std::string ArtifactPrefix = "./"; std::string ExactArtifactPath; diff --git a/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp b/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp --- a/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp +++ b/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp @@ -592,7 +592,8 @@ TEST(Corpus, Distribution) { DataFlowTrace DFT; Random Rand(0); - std::unique_ptr C(new InputCorpus("")); + struct EntropicOptions Entropic = {false, 0xFF, 100}; + std::unique_ptr C(new InputCorpus("", Entropic)); size_t N = 10; size_t TriesPerUnit = 1<<16; for (size_t i = 0; i < N; i++) @@ -1050,6 +1051,68 @@ EXPECT_EQ(CmdLine, makeCmdLine("", ">thud 2>&1")); } +TEST(Entropic, UpdateFrequency) { + const size_t One = 1, Two = 2; + const size_t FeatIdx1 = 0, FeatIdx2 = 42, FeatIdx3 = 12, FeatIdx4 = 26; + size_t Index; + // Create input corpus with default entropic configuration + struct EntropicOptions Entropic = {true, 0xFF, 100}; + std::unique_ptr C(new InputCorpus("", Entropic)); + InputInfo *II = new InputInfo(); + + C->AddRareFeature(FeatIdx1); + C->UpdateFeatureFrequency(II, FeatIdx1); + EXPECT_EQ(II->FeatureFreqs.size(), One); + C->AddRareFeature(FeatIdx2); + C->UpdateFeatureFrequency(II, FeatIdx1); + C->UpdateFeatureFrequency(II, FeatIdx2); + EXPECT_EQ(II->FeatureFreqs.size(), Two); + EXPECT_EQ(II->FeatureFreqs[0].second, 2); + EXPECT_EQ(II->FeatureFreqs[1].second, 1); + + C->AddRareFeature(FeatIdx3); + C->AddRareFeature(FeatIdx4); + C->UpdateFeatureFrequency(II, FeatIdx3); + C->UpdateFeatureFrequency(II, FeatIdx3); + C->UpdateFeatureFrequency(II, FeatIdx3); + C->UpdateFeatureFrequency(II, FeatIdx4); + + for (Index = 1; Index < II->FeatureFreqs.size(); Index++) + EXPECT_LT(II->FeatureFreqs[Index - 1].first, II->FeatureFreqs[Index].first); + + II->DeleteFeatureFreq(FeatIdx3); + for (Index = 1; Index < II->FeatureFreqs.size(); Index++) + EXPECT_LT(II->FeatureFreqs[Index - 1].first, II->FeatureFreqs[Index].first); +} + +double SubAndSquare(double X, double Y) { + double R = X - Y; + R = R * R; + return R; +} + +TEST(Entropic, ComputeEnergy) { + const double Precision = 0.01; + struct EntropicOptions Entropic = {true, 0xFF, 100}; + std::unique_ptr C(new InputCorpus("", Entropic)); + InputInfo *II = new InputInfo(); + Vector> FeatureFreqs = {{1, 3}, {2, 3}, {3, 3}}; + II->FeatureFreqs = FeatureFreqs; + II->NumExecutedMutations = 0; + II->UpdateEnergy(4); + EXPECT_LT(SubAndSquare(II->Energy, 1.450805), Precision); + + II->NumExecutedMutations = 9; + II->UpdateEnergy(5); + EXPECT_LT(SubAndSquare(II->Energy, 1.525496), Precision); + + II->FeatureFreqs[0].second++; + II->FeatureFreqs.push_back(std::pair(42, 6)); + II->NumExecutedMutations = 20; + II->UpdateEnergy(10); + EXPECT_LT(SubAndSquare(II->Energy, 1.792831), Precision); +} + int main(int argc, char **argv) { testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS();