Index: compiler-rt/lib/fuzzer/FuzzerCorpus.h =================================================================== --- compiler-rt/lib/fuzzer/FuzzerCorpus.h +++ compiler-rt/lib/fuzzer/FuzzerCorpus.h @@ -33,17 +33,109 @@ // Stats. size_t NumExecutedMutations = 0; size_t NumSuccessfullMutations = 0; + size_t TotalFuzzTime = 0; // in microseconds bool MayDeleteFile = false; bool Reduced = false; bool HasFocusFunction = false; Vector UniqFeatureSet; Vector DataFlowTraceForFocusFunction; + // Power schedule. + bool NeedsUpdate = false; + double Energy = 0.0; + size_t SumIncidence = 0; + Vector> FeatureFreqs; + + // Delete feature Idx and its frequency from FeatureFreqs. + bool DeleteFeatureFreq(uint32_t Idx) { + if (FeatureFreqs.empty()) + return false; + + // Binary search over local feature frequencies sorted by index. + auto lower = std::lower_bound(FeatureFreqs.begin(), FeatureFreqs.end(), + std::pair(Idx, 0)); + + if (lower != FeatureFreqs.end() && lower->first == Idx) { + FeatureFreqs.erase(lower); + return true; + } + return false; + } + + // Assign more energy to a high-entropy seed, i.e., that reveals more + // information about the globally rare features in the neighborhood + // of the seed. Since we do not know the entropy of a seed that has + // never been executed we assign fresh seeds maximum entropy and + // let II->Energy approach the true entropy from above. + void UpdateEnergy(size_t GlobalNumberOfFeatures) { + long double PreciseEnergy = 0.0L; + SumIncidence = 0; + + // Apply add-one smoothing to locally discovered features. + for (auto F : FeatureFreqs) { + size_t LocalIncidence = F.second + 1; + PreciseEnergy -= LocalIncidence * logl(LocalIncidence); + SumIncidence += LocalIncidence; + } + + // Apply add-one smoothing to locally undiscovered features. + // PreciseEnergy -= 0; // since logl(1.0) == 0) + SumIncidence += (GlobalNumberOfFeatures - FeatureFreqs.size()); + + // Add a single locally abundant feature apply add-one smoothing. + size_t AbdIncidence = NumExecutedMutations + 1; + PreciseEnergy -= AbdIncidence * logl(AbdIncidence); + SumIncidence += AbdIncidence; + + // Normalize. + if (SumIncidence != 0) + PreciseEnergy = (PreciseEnergy / SumIncidence) + logl(SumIncidence); + + Energy = (double)PreciseEnergy; + } + + // Increment the frequency of the feature Idx. + void UpdateFeatureFrequency(uint32_t Idx) { + NeedsUpdate = true; + + // The local feature frequencies is an ordered vector of pairs. + // If there are no local feature frequencies, push_back preserves order. + // Set the feature frequency for feature Idx32 to 1. + if (FeatureFreqs.empty()) { + FeatureFreqs.push_back(std::pair(Idx, 1)); + return; + } + + // Binary search over local feature frequencies sorted by index. + auto lower = std::lower_bound(FeatureFreqs.begin(), FeatureFreqs.end(), + std::pair(Idx, 0)); + + // If feature Idx32 already exists, increment its frequency. + // Otherwise, insert a new pair right after the next lower index. + if (lower != FeatureFreqs.end() && lower->first == Idx) { + lower->second++; + } else { + FeatureFreqs.insert(lower, std::pair(Idx, 1)); + } + } }; class InputCorpus { - static const size_t kFeatureSetSize = 1 << 21; - public: - InputCorpus(const std::string &OutputCorpus) : OutputCorpus(OutputCorpus) { + static const uint32_t kFeatureSetSize = 1 << 21; + static const uint8_t kMaxMutationFactor = 20; + static const size_t kSparseEnergyUpdates = 100; + + size_t NumExecutedMutations = 0; + + // Set in constructor + bool Entropic; + size_t ConsideredRare; + size_t TopXRarestFeatures; + +public: + InputCorpus(const std::string &OutputCorpus, bool Entropic, + size_t ConsideredRare, size_t TopXRarestFeatures) + : Entropic(Entropic), ConsideredRare(ConsideredRare), + TopXRarestFeatures(TopXRarestFeatures), OutputCorpus(OutputCorpus) { memset(InputSizesPerFeature, 0, sizeof(InputSizesPerFeature)); memset(SmallestElementPerFeature, 0, sizeof(SmallestElementPerFeature)); } @@ -70,6 +162,7 @@ Res = std::max(Res, II->U.size()); return Res; } + void IncrementNumExecutedMutations() { NumExecutedMutations++; } size_t NumInputsThatTouchFocusFunction() { return std::count_if(Inputs.begin(), Inputs.end(), [](const InputInfo *II) { @@ -99,6 +192,10 @@ II.MayDeleteFile = MayDeleteFile; II.UniqFeatureSet = FeatureSet; II.HasFocusFunction = HasFocusFunction; + // Assign maximal energy to the new seed. + II.Energy = RareFeatures.empty() ? 1.0 : log(RareFeatures.size()); + II.SumIncidence = RareFeatures.size(); + II.NeedsUpdate = false; std::sort(II.UniqFeatureSet.begin(), II.UniqFeatureSet.end()); ComputeSHA1(U.data(), U.size(), II.Sha1); auto Sha1Str = Sha1ToString(II.Sha1); @@ -111,7 +208,7 @@ // But if we don't, we'll use the DFT of its base input. if (II.DataFlowTraceForFocusFunction.empty() && BaseII) II.DataFlowTraceForFocusFunction = BaseII->DataFlowTraceForFocusFunction; - UpdateCorpusDistribution(); + DistributionNeedsUpdate = true; PrintCorpus(); // ValidateFeatureSet(); return &II; @@ -162,12 +259,13 @@ Hashes.insert(Sha1ToString(II->Sha1)); II->U = U; II->Reduced = true; - UpdateCorpusDistribution(); + DistributionNeedsUpdate = true; } bool HasUnit(const Unit &U) { return Hashes.count(Hash(U)); } bool HasUnit(const std::string &H) { return Hashes.count(H); } InputInfo &ChooseUnitToMutate(Random &Rand) { + UpdateCorpusDistribution(Rand); InputInfo &II = *Inputs[ChooseUnitIdxToMutate(Rand)]; assert(!II.U.empty()); return II; @@ -210,10 +308,65 @@ InputInfo &II = *Inputs[Idx]; DeleteFile(II); Unit().swap(II.U); + II.Energy = 0.0; + II.NeedsUpdate = false; + DistributionNeedsUpdate = true; if (FeatureDebug) Printf("EVICTED %zd\n", Idx); } + void AddRareFeature(uint32_t Idx) { + // Maintain *at least* TopXRarestFeatures many rare features + // and all features with a frequency below ConsideredRare. + // Remove all other features. + while (RareFeatures.size() > TopXRarestFeatures && + FreqOfMostAbundantRareFeature > ConsideredRare) { + + // Find most and second most abbundant feature. + uint32_t MostAbundantRareFeatureIndices[2] = {RareFeatures[0], + RareFeatures[0]}; + size_t Delete = 0; + for (size_t i = 0; i < RareFeatures.size(); i++) { + uint32_t Idx2 = RareFeatures[i]; + if (GlobalFeatureFreqs[Idx2] >= + GlobalFeatureFreqs[MostAbundantRareFeatureIndices[0]]) { + MostAbundantRareFeatureIndices[1] = MostAbundantRareFeatureIndices[0]; + MostAbundantRareFeatureIndices[0] = Idx2; + Delete = i; + } + } + + // Remove most abundant rare feature. + RareFeatures[Delete] = RareFeatures.back(); + RareFeatures.pop_back(); + + for (auto II : Inputs) { + if (II->DeleteFeatureFreq(MostAbundantRareFeatureIndices[0])) + II->NeedsUpdate = true; + } + + // Set 2nd most abundant as the new most abundant feature count. + FreqOfMostAbundantRareFeature = + GlobalFeatureFreqs[MostAbundantRareFeatureIndices[1]]; + } + + // Add rare feature, handle collisions, and update energy. + RareFeatures.push_back(Idx); + GlobalFeatureFreqs[Idx] = 0; + for (auto II : Inputs) { + II->DeleteFeatureFreq(Idx); + + // Apply add-one smoothing to this locally undiscovered feature. + // Zero energy seeds will never be fuzzed and remain zero energy. + if (II->Energy > 0.0) { + II->SumIncidence += 1; + II->Energy += logl(II->SumIncidence) / II->SumIncidence; + } + } + + DistributionNeedsUpdate = true; + } + bool AddFeature(size_t Idx, uint32_t NewSize, bool Shrink) { assert(NewSize); Idx = Idx % kFeatureSetSize; @@ -228,6 +381,8 @@ DeleteInput(OldIdx); } else { NumAddedFeatures++; + if (Entropic) + AddRareFeature((uint32_t)Idx); } NumUpdatedFeatures++; if (FeatureDebug) @@ -239,6 +394,30 @@ return false; } + // Increment frequency of feature Idx globally and locally. + void UpdateFeatureFrequency(InputInfo *II, size_t Idx) { + uint32_t Idx32 = Idx % kFeatureSetSize; + + // Saturated increment. + if (GlobalFeatureFreqs[Idx32] == 0xFFFF) + return; + uint16_t Freq = GlobalFeatureFreqs[Idx32]++; + + // Skip if abundant. + if (Freq > FreqOfMostAbundantRareFeature || + std::find(RareFeatures.begin(), RareFeatures.end(), Idx32) == + RareFeatures.end()) + return; + + // Update global frequencies. + if (Freq == FreqOfMostAbundantRareFeature) + FreqOfMostAbundantRareFeature++; + + // Update local frequencies. + if (II) + II->UpdateFeatureFrequency(Idx32); + } + size_t NumFeatures() const { return NumAddedFeatures; } size_t NumFeatureUpdates() const { return NumUpdatedFeatures; } @@ -265,19 +444,94 @@ // Updates the probability distribution for the units in the corpus. // Must be called whenever the corpus or unit weights are changed. // - // Hypothesis: units added to the corpus last are more interesting. - // - // Hypothesis: inputs with infrequent features are more interesting. - void UpdateCorpusDistribution() { + // Hypothesis: inputs that maximize information about globally rare features + // are interesting. + void UpdateCorpusDistribution(Random &Rand) { + // Skip update if no seeds or rare features were added/deleted. + // Sparse updates for local change of feature frequencies, + // i.e., randomly do not skip. + if (!DistributionNeedsUpdate && (!Entropic || Rand(kSparseEnergyUpdates))) + return; + + DistributionNeedsUpdate = false; + size_t N = Inputs.size(); assert(N); Intervals.resize(N + 1); Weights.resize(N); std::iota(Intervals.begin(), Intervals.end(), 0); - for (size_t i = 0; i < N; i++) - Weights[i] = Inputs[i]->NumFeatures - ? (i + 1) * (Inputs[i]->HasFocusFunction ? 1000 : 1) - : 0.; + + bool VanillaSchedule = true; + if (Entropic) { + for (auto II : Inputs) { + if (II->NeedsUpdate && II->Energy != 0.0) { + II->NeedsUpdate = false; + II->UpdateEnergy(RareFeatures.size()); + } + } + + for (size_t i = 0; i < N; i++) { + + if (Inputs[i]->NumFeatures == 0) { + // If the seed doesn't represent any features, assign zero energy. + Weights[i] = 0.; + } else if (Inputs[i]->NumExecutedMutations / kMaxMutationFactor > + NumExecutedMutations / Inputs.size()) { + // If the seed was fuzzed a lot more than average, assign zero energy. + Weights[i] = 0.; + } else { + // Otherwise, simply assign the computed energy. + Weights[i] = Inputs[i]->Energy; + } + + // If energy for all seeds is zero, fall back to vanilla schedule. + if (Weights[i] > 0.0) + VanillaSchedule = false; + } + } + + if (VanillaSchedule) { + for (size_t i = 0; i < N; i++) + Weights[i] = Inputs[i]->NumFeatures + ? (i + 1) * (Inputs[i]->HasFocusFunction ? 1000 : 1) + : 0.; + } + + if (Entropic) { + // Prefer fast seeds + size_t AvgFuzzTime = 0; + size_t Count = 0; + for (auto II : Inputs) { + if (II->NumExecutedMutations > 0) { + Count++; + AvgFuzzTime += II->TotalFuzzTime / II->NumExecutedMutations; + } + } + if (Count > 0) + AvgFuzzTime /= Count; + + for (size_t i = 0; i < N; i++) { + if (Inputs[i]->NumExecutedMutations > 0) { + size_t FuzzTime = + Inputs[i]->TotalFuzzTime / Inputs[i]->NumExecutedMutations; + if (FuzzTime * 0.1 > AvgFuzzTime) + Weights[i] *= 0.1; + else if (FuzzTime * 0.25 > AvgFuzzTime) + Weights[i] *= 0.25; + else if (FuzzTime * 0.5 > AvgFuzzTime) + Weights[i] *= 0.5; + else if (FuzzTime * 0.75 > AvgFuzzTime) + Weights[i] *= 0.75; + else if (FuzzTime * 4 < AvgFuzzTime) + Weights[i] *= 3.0; + else if (FuzzTime * 3 < AvgFuzzTime) + Weights[i] *= 2.0; + else if (FuzzTime * 2 < AvgFuzzTime) + Weights[i] *= 1.5; + } else + Weights[i] *= 3.0; + } + } if (FeatureDebug) { for (size_t i = 0; i < N; i++) Printf("%zd ", Inputs[i]->NumFeatures); @@ -302,6 +556,11 @@ uint32_t InputSizesPerFeature[kFeatureSetSize]; uint32_t SmallestElementPerFeature[kFeatureSetSize]; + bool DistributionNeedsUpdate = true; + uint16_t FreqOfMostAbundantRareFeature = 0; + uint16_t GlobalFeatureFreqs[kFeatureSetSize] = {}; + Vector RareFeatures; + std::string OutputCorpus; }; Index: compiler-rt/lib/fuzzer/FuzzerDriver.cpp =================================================================== --- compiler-rt/lib/fuzzer/FuzzerDriver.cpp +++ compiler-rt/lib/fuzzer/FuzzerDriver.cpp @@ -708,6 +708,18 @@ Options.CollectDataFlow = Flags.collect_data_flow; if (Flags.stop_file) Options.StopFile = Flags.stop_file; + Options.Entropic = Flags.entropic; + Options.ConsideredRare = (size_t)Flags.considered_rare; + Options.TopXRarestFeatures = (size_t)Flags.topX_rarest_features; + if (Options.Entropic) { + if (!Options.FocusFunction.empty()) { + Printf("ERROR: The parameters `--entropic` and `--focus_function` cannot " + "be used together.\n"); + exit(1); + } + Printf("INFO: Running with entropic power schedule (0x%X, %d).\n", + Options.ConsideredRare, Options.TopXRarestFeatures); + } unsigned Seed = Flags.seed; // Initialize Seed. @@ -728,7 +740,9 @@ Random Rand(Seed); auto *MD = new MutationDispatcher(Rand, Options); - auto *Corpus = new InputCorpus(Options.OutputCorpus); + auto *Corpus = + new InputCorpus(Options.OutputCorpus, Options.Entropic, + Options.ConsideredRare, Options.TopXRarestFeatures); auto *F = new Fuzzer(Callback, *Corpus, *MD, Options); for (auto &U: Dictionary) Index: compiler-rt/lib/fuzzer/FuzzerFlags.def =================================================================== --- compiler-rt/lib/fuzzer/FuzzerFlags.def +++ compiler-rt/lib/fuzzer/FuzzerFlags.def @@ -153,6 +153,13 @@ "Fuzzing will focus on inputs that trigger calls to this function. " "If -focus_function=auto and -data_flow_trace is used, libFuzzer " "will choose the focus functions automatically.") +FUZZER_FLAG_INT(entropic, 0, "Experimental. Enables entropic power schedule.") +FUZZER_FLAG_INT(considered_rare, 0xFF, "Experimental. If entropic is enabled, " + "all features which are observed less often than the specified value " + "are considered as rare.") +FUZZER_FLAG_INT(topX_rarest_features, 100, "Experimental. If entropic is " + "enabled, we keep track of the frequencies only for the Top-X least " + "abundant features (union features that are considered as rare).") FUZZER_FLAG_INT(analyze_dict, 0, "Experimental") FUZZER_DEPRECATED_FLAG(use_clang_coverage) Index: compiler-rt/lib/fuzzer/FuzzerLoop.cpp =================================================================== --- compiler-rt/lib/fuzzer/FuzzerLoop.cpp +++ compiler-rt/lib/fuzzer/FuzzerLoop.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #if defined(__has_include) #if __has_include() @@ -475,6 +476,8 @@ TPC.CollectFeatures([&](size_t Feature) { if (Corpus.AddFeature(Feature, Size, Options.Shrink)) UniqFeatureSetTmp.push_back(Feature); + if (Options.Entropic) + Corpus.UpdateFeatureFrequency(II, Feature); if (Options.ReduceInputs && II) if (std::binary_search(II->UniqFeatureSet.begin(), II->UniqFeatureSet.end(), Feature)) @@ -676,6 +679,11 @@ Min(MaxMutationLen, Max(U.size(), TmpMaxMutationLen)); assert(CurrentMaxMutationLen > 0); + struct timeval TimeVal; + gettimeofday(&TimeVal, NULL); + + size_t StartFuzzingII = (TimeVal.tv_sec * 1000000ULL) + TimeVal.tv_usec; + for (int i = 0; i < Options.MutateDepth; i++) { if (TotalNumberOfRuns >= Options.MaxNumberOfRuns) break; @@ -693,6 +701,7 @@ assert(NewSize <= CurrentMaxMutationLen && "Mutator return oversized unit"); Size = NewSize; II.NumExecutedMutations++; + Corpus.IncrementNumExecutedMutations(); bool FoundUniqFeatures = false; bool NewCov = RunOne(CurrentUnitData, Size, /*MayDeleteFile=*/true, &II, @@ -706,6 +715,11 @@ if (Options.ReduceDepth && !FoundUniqFeatures) break; } + + gettimeofday(&TimeVal, NULL); + size_t StopFuzzingII = (TimeVal.tv_sec * 1000000ULL) + TimeVal.tv_usec; + II.TotalFuzzTime += StopFuzzingII - StartFuzzingII; + II.NeedsUpdate = true; } void Fuzzer::PurgeAllocator() { Index: compiler-rt/lib/fuzzer/FuzzerOptions.h =================================================================== --- compiler-rt/lib/fuzzer/FuzzerOptions.h +++ compiler-rt/lib/fuzzer/FuzzerOptions.h @@ -44,6 +44,9 @@ size_t MaxNumberOfRuns = -1L; int ReportSlowUnits = 10; bool OnlyASCII = false; + bool Entropic = false; + size_t ConsideredRare = 0xFF; + size_t TopXRarestFeatures = 100; std::string OutputCorpus; std::string ArtifactPrefix = "./"; std::string ExactArtifactPath; Index: compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp =================================================================== --- compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp +++ compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp @@ -1050,6 +1050,69 @@ EXPECT_EQ(CmdLine, makeCmdLine("", ">thud 2>&1")); } +TEST(Entropic, UpdateFrequency) { + const size_t One = 1, Two = 2; + const size_t FeatIdx1 = 0, FeatIdx2 = 42, FeatIdx3 = 12, FeatIdx4 = 26; + size_t Index; + // Create input corpus with default entropic configuration + std::unique_ptr C(new InputCorpus("", true, 0xFF, 100)); + InputInfo *II = new InputInfo(); + + C->AddRareFeature(FeatIdx1); + C->UpdateFeatureFrequency(II, FeatIdx1); + EXPECT_EQ(II->FeatureFreqs.size(), One); + C->AddRareFeature(FeatIdx2); + C->UpdateFeatureFrequency(II, FeatIdx1); + C->UpdateFeatureFrequency(II, FeatIdx2); + EXPECT_EQ(II->FeatureFreqs.size(), Two); + EXPECT_EQ(II->FeatureFreqs[0].second, 2); + EXPECT_EQ(II->FeatureFreqs[1].second, 1); + + C->AddRareFeature(FeatIdx3); + C->AddRareFeature(FeatIdx4); + C->UpdateFeatureFrequency(II, FeatIdx3); + C->UpdateFeatureFrequency(II, FeatIdx3); + C->UpdateFeatureFrequency(II, FeatIdx3); + C->UpdateFeatureFrequency(II, FeatIdx4); + + for (Index = 1; Index < II->FeatureFreqs.size(); Index++) + EXPECT_LT(II->FeatureFreqs[Index - 1].first, II->FeatureFreqs[Index].first); + + II->DeleteFeatureFreq(FeatIdx3); + for (Index = 1; Index < II->FeatureFreqs.size(); Index++) + EXPECT_LT(II->FeatureFreqs[Index - 1].first, II->FeatureFreqs[Index].first); +} + +long double SubAndSquare(long double X, long double Y) { + long double R = X - Y; + R = R * R; + return R; +} + +TEST(Entropic, ComputeEnergy) { + const long double Precision = 0.01; + std::unique_ptr C(new InputCorpus("", true, 0xFF, 100)); + InputInfo *II = new InputInfo(); + Vector> FeatureFreqs = { + std::pair(1, 3), + std::pair(2, 3), + std::pair(3, 3)}; + II->FeatureFreqs = FeatureFreqs; + II->NumExecutedMutations = 0; + II->UpdateEnergy(4); + EXPECT_LT(SubAndSquare(II->Energy, 1.450805), Precision); + + II->NumExecutedMutations = 9; + II->UpdateEnergy(5); + EXPECT_LT(SubAndSquare(II->Energy, 1.525496), Precision); + + II->FeatureFreqs[0].second++; + II->FeatureFreqs.push_back(std::pair(42, 6)); + II->NumExecutedMutations = 20; + II->UpdateEnergy(10); + EXPECT_LT(SubAndSquare(II->Energy, 1.792831), Precision); +} + int main(int argc, char **argv) { testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS();