diff --git a/compiler-rt/lib/fuzzer/FuzzerCorpus.h b/compiler-rt/lib/fuzzer/FuzzerCorpus.h --- a/compiler-rt/lib/fuzzer/FuzzerCorpus.h +++ b/compiler-rt/lib/fuzzer/FuzzerCorpus.h @@ -19,8 +19,12 @@ #include "FuzzerTracePC.h" #include #include +#include +#include +#include #include #include +#include #include namespace fuzzer { @@ -35,6 +39,7 @@ // Stats. size_t NumExecutedMutations = 0; size_t NumSuccessfullMutations = 0; + bool SlowInput = false; bool NeverReduce = false; bool MayDeleteFile = false; bool Reduced = false; @@ -46,6 +51,17 @@ double Energy = 0.0; double SumIncidence = 0.0; std::vector> FeatureFreqs; + // K-Scheduler. + bool ComputedBefore = false; + bool FuzzedBefore = false; + double KschedulerEnergy = 0.0; + uint32_t KschedulerPerfScore; // The performance score reflects runtime cost + // of current input used in K-Scheduler. + std::vector EdgeSet; // The set of edges covered by current input. + std::vector + BorderEdgeIdx; // Index table of BorderEdgeSet for current input. + std::vector> + BorderEdgeSet; // Set of border edges for current input. // Delete feature Idx and its frequency from FeatureFreqs. bool DeleteFeatureFreq(uint32_t Idx) { @@ -63,6 +79,58 @@ return false; } + // Everytime kscheduler identifies a border edge for a seed, it saves the + // border edge(ParentId, ChildId) and the corresponding border edge idx + // BorderEdgeId into a seed's BorderEdgeSet and BorderEdgeId respectively. In + // the following seed weight computation, kscheduler can directly retrieve the + // border edges from saved data stucture. + void SaveBorderEdge(uint32_t ParentId, uint32_t ChildId, + uint32_t BorderEdgeId) { + + if (BorderEdgeSet.empty()) { + BorderEdgeSet.push_back(std::pair(ParentId, ChildId)); + BorderEdgeIdx.push_back(BorderEdgeId); + return; + } + + // Binary search to find the location since the pair is sorted. + auto Lower = + std::lower_bound(BorderEdgeSet.begin(), BorderEdgeSet.end(), + std::pair(ParentId, ChildId)); + + int TmpIdx = Lower - BorderEdgeSet.begin(); + BorderEdgeSet.insert(Lower, + std::pair(ParentId, ChildId)); + BorderEdgeIdx.insert(BorderEdgeIdx.begin() + TmpIdx, BorderEdgeId); + } + + // Check if a newly visited border edge is included into current seed's energy + // computation. If so, delete the border edge from current seed's data + // structure since we only consider unvisited border edges into K-Scheduler's + // seed energy computation. + int SearchAndDeleteBorderEdge(uint32_t ParentId, uint32_t ChildId) { + // The border edge is not included into current seed's energy. + if (BorderEdgeSet.empty()) + return -2; + + // Binary search. + auto Lower = + std::lower_bound(BorderEdgeSet.begin(), BorderEdgeSet.end(), + std::pair(ParentId, ChildId)); + + if (Lower != BorderEdgeSet.end() && Lower->first == ParentId && + Lower->second == ChildId) { + int TmpIdx = Lower - BorderEdgeSet.begin(); + BorderEdgeSet.erase(Lower); + + int ret_idx = BorderEdgeIdx[TmpIdx]; + BorderEdgeIdx.erase(BorderEdgeIdx.begin() + TmpIdx); + + return ret_idx; + } + return -2; + } + // Assign more energy to a high-entropy seed, i.e., that reveals more // information about the globally rare features in the neighborhood of the // seed. Since we do not know the entropy of a seed that has never been @@ -152,25 +220,102 @@ bool ScalePerExecTime; }; +struct KschedulerOptions { + bool Enabled; + size_t MinNumMutationsForEachSeed; + size_t PerThres; +}; + class InputCorpus { static const uint32_t kFeatureSetSize = 1 << 21; + static const uint32_t kEdgeSetSize = 1 << 17; static const uint8_t kMaxMutationFactor = 20; static const size_t kSparseEnergyUpdates = 100; size_t NumExecutedMutations = 0; + // The initial number must be greater than 1 in order to avoid 0 value + // when computing log(CurNumExecutedMutations) for the first time. + // Hence, we use a small number (i.e., 2) here. + size_t CurNumExecutedMutations = 2; EntropicOptions Entropic; + KschedulerOptions Kscheduler; public: - InputCorpus(const std::string &OutputCorpus, EntropicOptions Entropic) - : Entropic(Entropic), OutputCorpus(OutputCorpus) { + InputCorpus(const std::string &OutputCorpus, EntropicOptions Entropic, + KschedulerOptions Kscheduler) + : Entropic(Entropic), Kscheduler(Kscheduler), OutputCorpus(OutputCorpus) { memset(InputSizesPerFeature, 0, sizeof(InputSizesPerFeature)); memset(SmallestElementPerFeature, 0, sizeof(SmallestElementPerFeature)); + if (Kscheduler.Enabled) + LoadGraphInfo(); } ~InputCorpus() { for (auto II : Inputs) delete II; } + + void LoadGraphInfo() { + // Load katz centrality scores from files + std::ifstream KatzFile("katz_cent", std::ifstream::in); + if (!KatzFile.good()) { + Printf("Missing graph analysis file.\n"); + exit(1); + } + size_t EdgeId; + double KatzScores; + while (KatzFile >> EdgeId >> KatzScores) { + EdgeCentralityScore[EdgeId] = KatzScores; + } + KatzFile.close(); + + // Load adjacency list of program's inter-procedural CFG i.e., node2child + // mapping. + std::ifstream ChildFile("child_node", std::ifstream::in); + if (!ChildFile.good()) { + Printf("Missing graph analysis file.\n"); + exit(1); + } + std::string Line; + while (std::getline(ChildFile, Line)) { + std::istringstream ss(Line); + size_t NodeId, ChildId; + ss >> NodeId; + while (ss >> ChildId) { + Child[NodeId].push_back(ChildId); + } + } + ChildFile.close(); + + // Load adjacency list of reverse inter-procedural CFG i.e., node2parent + // mapping. + std::ifstream ParentFile("parent_node", std::ifstream::in); + if (!ParentFile.good()) { + Printf("Missing graph analysis file.\n"); + exit(1); + } + while (std::getline(ParentFile, Line)) { + std::istringstream ss(Line); + size_t NodeId, ParentId; + ss >> NodeId; + while (ss >> ParentId) { + Parent[NodeId].push_back(ParentId); + } + } + ParentFile.close(); + + // read all border edges + std::ifstream EdgeFile("border_edges", std::ifstream::in); + if (!EdgeFile.good()) { + Printf("Missing graph analysis file.\n"); + exit(1); + } + uint32_t ParentId, ChildId; + while (EdgeFile >> ParentId >> ChildId) { + BorderEdge.push_back(std::pair(ParentId, ChildId)); + } + EdgeFile.close(); + } size_t size() const { return Inputs.size(); } size_t SizeInBytes() const { size_t Res = 0; @@ -191,7 +336,10 @@ return Res; } void IncrementNumExecutedMutations() { NumExecutedMutations++; } - + void SetCurNumExecutedMutations(uint32_t Num) { + CurNumExecutedMutations = Num; + } // For unit testing only. + size_t GetAvgExecTime() { return AvgExecTime.count(); } size_t NumInputsThatTouchFocusFunction() { return std::count_if(Inputs.begin(), Inputs.end(), [](const InputInfo *II) { return II->HasFocusFunction; @@ -204,6 +352,17 @@ }); } + // Record visited edges covered by current seed. + void RecordVisitedEdge(uint32_t EdgeId) { + if (TmpEdgeSet.empty()) { + TmpEdgeSet.push_back(EdgeId); + } else { + auto Lower = + std::lower_bound(TmpEdgeSet.begin(), TmpEdgeSet.end(), EdgeId); + TmpEdgeSet.insert(Lower, EdgeId); + } + } + bool empty() const { return Inputs.empty(); } const Unit &operator[] (size_t Idx) const { return Inputs[Idx]->U; } InputInfo *AddToCorpus(const Unit &U, size_t NumFeatures, bool MayDeleteFile, @@ -222,11 +381,32 @@ II.NumFeatures = NumFeatures; II.NeverReduce = NeverReduce; II.TimeOfUnit = TimeOfUnit; + // K-Scheduler: maintain a accumulative sum of total execution time over + // seed corpus, then compute average execution time. We do not count slow + // inputs as they would drag down the fuzzing throughput. + if (TotalExecTime.count() == 0) { + TotalExecTime += TimeOfUnit; + AvgExecTime += TimeOfUnit; + } else { + if (TimeOfUnit.count() <= + (AvgExecTime.count() * (int)Kscheduler.PerThres)) { + TotalExecTime += TimeOfUnit; + AvgExecTime = TotalExecTime / (Inputs.size() - NumOfSlowInputs); + II.SlowInput = false; + } else { + NumOfSlowInputs += 1; + II.SlowInput = true; + } + } + + II.EdgeSet = TmpEdgeSet; // Record the visited edge set of current seed. II.MayDeleteFile = MayDeleteFile; II.UniqFeatureSet = FeatureSet; II.HasFocusFunction = HasFocusFunction; // Assign maximal energy to the new seed. II.Energy = RareFeatures.empty() ? 1.0 : log(RareFeatures.size()); + II.KschedulerEnergy = 100000.0; + II.SumIncidence = static_cast(RareFeatures.size()); II.NeedsEnergyUpdate = false; std::sort(II.UniqFeatureSet.begin(), II.UniqFeatureSet.end()); @@ -242,6 +422,7 @@ if (II.DataFlowTraceForFocusFunction.empty() && BaseII) II.DataFlowTraceForFocusFunction = BaseII->DataFlowTraceForFocusFunction; DistributionNeedsUpdate = true; + KschedulerDistributionNeedsUpdate = true; PrintCorpus(); // ValidateFeatureSet(); return &II; @@ -284,6 +465,23 @@ } } + // Find the index of a given border edge (ParentId, ChildId) in the sorted + // vector BorderEdge. + int FindBorderEdgeIdx(uint32_t ParentId, uint32_t ChildId) { + if (BorderEdge.empty()) + return -1; + + auto Lower = + std::lower_bound(BorderEdge.begin(), BorderEdge.end(), + std::pair(ParentId, ChildId)); + + if (Lower != BorderEdge.end() && Lower->first == ParentId && + Lower->second == ChildId) + return Lower - BorderEdge.begin(); + else + return -1; + } + void Replace(InputInfo *II, const Unit &U, std::chrono::microseconds TimeOfUnit) { assert(II->U.size() > U.size()); @@ -293,21 +491,59 @@ Hashes.insert(Sha1ToString(II->Sha1)); II->U = U; II->Reduced = true; - II->TimeOfUnit = TimeOfUnit; + DistributionNeedsUpdate = true; + + if (Kscheduler.Enabled) { + KschedulerDistributionNeedsUpdate = true; + + // Substrate runtime cost of old input from the sum + // of total execution time over seed corpus. + if (II->SlowInput) + NumOfSlowInputs -= 1; + else + TotalExecTime -= II->TimeOfUnit; + + II->ComputedBefore = false; + II->FuzzedBefore = true; + II->KschedulerEnergy = 100000.0; + + II->EdgeSet.clear(); // The set of edges covered by current input. + II->EdgeSet = TmpEdgeSet; + II->BorderEdgeIdx.clear(); // Index table of BorderEdgeSet for current input. + II->BorderEdgeSet.clear(); // Set of border edges for current input. + + II->TimeOfUnit = TimeOfUnit; + if (TimeOfUnit.count() <= + (AvgExecTime.count() * (int)Kscheduler.PerThres)) { + TotalExecTime += TimeOfUnit; + AvgExecTime = TotalExecTime / (Inputs.size() - NumOfSlowInputs); + II->SlowInput = false; + } else { + NumOfSlowInputs += 1; + if (Inputs.size() - NumOfSlowInputs > 0) + AvgExecTime = TotalExecTime / (Inputs.size() - NumOfSlowInputs); + else + AvgExecTime = std::chrono::microseconds(0); + II->SlowInput = true; + } + } else { + II->TimeOfUnit = TimeOfUnit; + } } bool HasUnit(const Unit &U) { return Hashes.count(Hash(U)); } bool HasUnit(const std::string &H) { return Hashes.count(H); } - InputInfo &ChooseUnitToMutate(Random &Rand) { - InputInfo &II = *Inputs[ChooseUnitIdxToMutate(Rand)]; + InputInfo &ChooseUnitToMutate(Random &Rand, size_t CurTime) { + InputInfo &II = *Inputs[ChooseUnitIdxToMutate(Rand, CurTime)]; assert(!II.U.empty()); return II; } - InputInfo &ChooseUnitToCrossOverWith(Random &Rand, bool UniformDist) { + InputInfo &ChooseUnitToCrossOverWith(Random &Rand, bool UniformDist, + size_t CurTime) { if (!UniformDist) { - return ChooseUnitToMutate(Rand); + return ChooseUnitToMutate(Rand, CurTime); } InputInfo &II = *Inputs[Rand(Inputs.size())]; assert(!II.U.empty()); @@ -315,8 +551,9 @@ } // Returns an index of random unit from the corpus to mutate. - size_t ChooseUnitIdxToMutate(Random &Rand) { + size_t ChooseUnitIdxToMutate(Random &Rand, size_t CurTime) { UpdateCorpusDistribution(Rand); + KschedulerUpdateCorpusDistribution(Rand, CurTime); size_t Idx = static_cast(CorpusDistribution(Rand)); assert(Idx < Inputs.size()); return Idx; @@ -354,12 +591,98 @@ DeleteFile(II); Unit().swap(II.U); II.Energy = 0.0; + II.KschedulerEnergy = 0.0; + if (!II.SlowInput) { + // Substrate runtime cost of old input from the sum of + // total execution time over seed corpus. + TotalExecTime -= II.TimeOfUnit; + NumOfSlowInputs += 1; + AvgExecTime = TotalExecTime / (Inputs.size() - NumOfSlowInputs); + II.SlowInput = true; + } II.NeedsEnergyUpdate = false; DistributionNeedsUpdate = true; + KschedulerDistributionNeedsUpdate = true; if (FeatureDebug) Printf("EVICTED %zd\n", Idx); } + // Compute energy for a seed based on its Katz Centraltiy and its runtime + // cost. + void ComputeEnergy(InputInfo *II, + std::chrono::microseconds AverageUnitExecutionTime) { + II->KschedulerEnergy = 0.0; + // Identify border edges for a seed when we encounter it for the first time, + // then save the border edges into BorderEdgeSet. + if (II->BorderEdgeSet.size() == 0) { + // Loop every border edges of a seed. + for (auto NodeId : II->EdgeSet) { + // Get child node for each visited node of a seed + for (auto ChildId : Child[NodeId]) { + // If child node is not visited and parent node is visited, then we + // find a border edges defined as (NodeId, ChildId). + if (GlobalEdgeFreqs[ChildId] == 0) { + double TmpEdgeWeight = 0.0; + int BorderEdgeIdx = FindBorderEdgeIdx(NodeId, ChildId); + if (BorderEdgeIdx != -1) { + // case 1: EdgeWeight != 0. It means we computed this edge weight + // before, just retrieve it from BorderEdgeWeight cache table. + if (TmpEdgeWeight != 0.0) { + TmpEdgeWeight = BorderEdgeWeight[BorderEdgeIdx]; + } + // case 2: EdgeWeight == 0. It means we didn't compute this edge + // weight. First compute EdgeWeight, then save it into the + // borderEdge weight table. + else { + TmpEdgeWeight = EdgeCentralityScore[ChildId] * + sqrt(log(CurNumExecutedMutations) / + GlobalEdgeFreqs[NodeId]); + BorderEdgeWeight[BorderEdgeIdx] = TmpEdgeWeight; + } + } + // Save current seed's border edges into its own BorderEdgeSet cache + // so that we don't need to scan the inter-CFG again to identify + // them next time. + II->SaveBorderEdge(NodeId, ChildId, BorderEdgeIdx); + + // Katz centrality equals sum of all its border edge weights. + II->KschedulerEnergy += TmpEdgeWeight; + } + } + } + } + // We have identified the border edges for this seed before and read + // directly from cached BorderEdgeIdx + else { + for (auto BorderEdgeId : II->BorderEdgeIdx) { + double TmpBorderEdgeWeight = 0.0; + if (BorderEdgeId != -1) + TmpBorderEdgeWeight = BorderEdgeWeight[BorderEdgeId]; + II->KschedulerEnergy += TmpBorderEdgeWeight; + } + } + + // Slow seeds tend to have large Katz centraltiy score and decrease the + // fuzzing throughput. We penalize them by performance score. NOTE: we ONLY + // penalize seeds with extremely large runtime overhead, but NEVER boost + // seeds with small runtime overhead. Because seeds with small runtime + // overhead are not necessarliy good/promisng/interesting seeds. A promising + // seed should have a large Katz centrality and a NOT-large runtime + // overhead. + uint32_t CurPerfScore = 100; + if (II->TimeOfUnit.count() > AverageUnitExecutionTime.count() * 10) + CurPerfScore = 10; + else if (II->TimeOfUnit.count() > AverageUnitExecutionTime.count() * 4) + CurPerfScore = 25; + else if (II->TimeOfUnit.count() > AverageUnitExecutionTime.count() * 2) + CurPerfScore = 50; + else if (II->TimeOfUnit.count() * 3 > AverageUnitExecutionTime.count() * 4) + CurPerfScore = 75; + + II->KschedulerPerfScore = CurPerfScore; + II->KschedulerEnergy *= CurPerfScore; + } + void AddRareFeature(uint32_t Idx) { // Maintain *at least* TopXRarestFeatures many rare features // and all features with a frequency below ConsideredRare. @@ -412,6 +735,95 @@ DistributionNeedsUpdate = true; } + // Log feature and edge coverage every 5 minutes, write result into file + // "cov_stats". + void LogCoverage(int CurTime, size_t Cov, size_t FeatureCov) { + int ElapsedTime = CurTime - LastLogTime; + if (ElapsedTime < 300) + return; + + std::ofstream LogFile; + LogFile.open("cov_stats", std::ofstream::out | std::ofstream::app); + LogFile << "time: " << CurTime / 60 << " cov: " << Cov + << " feat: " << FeatureCov << "\n"; + LogFile.close(); + LastLogTime = CurTime; + } + + // Write visited node information into a file which will be consumed by the + // dynamic graph analysis module. Reload latest graph centrality scores. Also + // log all the seed weight, edge weight for debug purpose. + void UpdateCentralityScore(int CurTime) { + int ElapsedTime = CurTime - LastDumpTime; + if (ElapsedTime < 120) + return; + + LogVisitedNode(); + ReloadCentralistScore(); + std::ofstream LogFile; + LogFile.open("weight_dump"); + size_t Idx = 0; + CacheBorderEdgeWeight(); + LogFile << "### " << CurNumExecutedMutations << "\n"; + for (auto E : BorderEdge) { + uint32_t ParentId = E.first; + uint32_t ChildId = E.second; + // Log all border edge weight + if (EdgeCentralityScore[ChildId] != 0.0 && GlobalEdgeFreqs[ParentId] && + !GlobalEdgeFreqs[ChildId]) { + LogFile << BorderEdgeWeight[Idx] << " " << EdgeCentralityScore[ChildId] + << " " + << sqrt(log(CurNumExecutedMutations) / + GlobalEdgeFreqs[ParentId]) + << " " << GlobalEdgeFreqs[ParentId] << "\n"; + } + Idx += 1; + } + + LogFile << "$$$$$$$$$$$$$$$$$$$\n"; + for (auto II : Inputs) { + LogFile << II->KschedulerEnergy << " " << II->BorderEdgeIdx.size() << " " + << II->KschedulerPerfScore << " " << II->NumExecutedMutations + << "\n"; + } + LogFile.close(); + LastDumpTime = CurTime; + } + + // Log all visited node into a file for build edge horizon graph. + void LogVisitedNode() { + std::ofstream LogFile; + LogFile.open("cur_coverage"); + + for (size_t i = 0; i < kEdgeSetSize; i++) { + if (GlobalEdgeFreqs[i] > 0) + LogFile << i << " "; + } + + LogFile.close(); + + std::ofstream SignalFile; + SignalFile.open("signal"); + SignalFile << "1\n"; + SignalFile.close(); + } + + // Load updated katz centrality scores. + void ReloadCentralistScore() { + std::ifstream KatzFile; + KatzFile.open("dyn_katz_cent"); + + if (KatzFile.good() == false) + return; + + int EdgeId; + double KatzScore; + while (KatzFile >> EdgeId >> KatzScore) { + EdgeCentralityScore[EdgeId] = KatzScore; + } + KatzFile.close(); + } + bool AddFeature(size_t Idx, uint32_t NewSize, bool Shrink) { assert(NewSize); Idx = Idx % kFeatureSetSize; @@ -440,6 +852,63 @@ return false; } + // K-Scheduler: increment frequency of feature Idx globally and locally. + void KschedulerUpdateFeatureFrequency(InputInfo *II, size_t Idx) { + // Convert featuer idx to edge idx + uint32_t EdgeId = (Idx / 8) % kFeatureSetSize; + + // Hit a new edge, check if the new edge is a border edge associated with + // other seeds. If so, update other seeds' energy. + if (GlobalEdgeFreqs[EdgeId] == 0) { + for (auto ParentId : Parent[EdgeId]) { + if (GlobalEdgeFreqs[ParentId]) { + for (auto TmpII : Inputs) { + if (TmpII->KschedulerEnergy != 0.0 && TmpII->ComputedBefore) { + int TmpIdx = TmpII->SearchAndDeleteBorderEdge(ParentId, EdgeId); + // Current border edge is used by this seed's energy computation, + // delete current border edge's weight from this seed's energy. + if ((TmpIdx != -2) && (TmpIdx != -1)) { + TmpII->KschedulerEnergy -= + (BorderEdgeWeight[TmpIdx] * TmpII->KschedulerPerfScore); + } + } + } + } + } + } + + // Increment global edge hit count. + GlobalEdgeFreqs[EdgeId]++; + // Record edge coverage locally for current seed. + RecordVisitedEdge(EdgeId); + } + + void SetDistributionNeedsUpdate() { + KschedulerDistributionNeedsUpdate = true; + } + void ClearTmpEdgeSet() { TmpEdgeSet.clear(); } + + // Weight computation for each border edges. We pre-compute all border edges + // weight and cache the result in a table to avoid duplicated computation. + void CacheBorderEdgeWeight() { + int Idx = 0; + CurNumExecutedMutations = NumExecutedMutations; + for (auto E : BorderEdge) { + uint32_t ParentId = E.first; + uint32_t ChildId = E.second; + + if ((EdgeCentralityScore[ChildId] != 0.0) && + (GlobalEdgeFreqs[ParentId]) && (!GlobalEdgeFreqs[ChildId])) { + // weight = Katz_score_child * + // sqrt(log(total_num_executed)/hit_count_parent) + BorderEdgeWeight[Idx] = + EdgeCentralityScore[ChildId] * + sqrt(log(CurNumExecutedMutations) / GlobalEdgeFreqs[ParentId]); + } + Idx += 1; + } + } + // Increment frequency of feature Idx globally and locally. void UpdateFeatureFrequency(InputInfo *II, size_t Idx) { uint32_t Idx32 = Idx % kFeatureSetSize; @@ -467,6 +936,14 @@ size_t NumFeatures() const { return NumAddedFeatures; } size_t NumFeatureUpdates() const { return NumUpdatedFeatures; } + // Only for unit testing. + uint32_t GetGlobalEdgeFreqs(uint32_t EdgeId) { + return GlobalEdgeFreqs[EdgeId]; + } + void SetGlobalEdgeFreqs(uint32_t EdgeId, uint32_t Cnt) { + GlobalEdgeFreqs[EdgeId] = Cnt; + } + private: static const bool FeatureDebug = false; @@ -564,6 +1041,125 @@ CorpusDistribution = std::piecewise_constant_distribution( Intervals.begin(), Intervals.end(), Weights.begin()); } + + // Updates the probability distribution for the units in the corpus. + // Must be called in three cases + // 1. every 2 minutes + // 2. new seeds are generated and k-scheduler doesn't compute centrality + // weight for them yet because their NumExecutedMutations less than + // MinNumMutationsForEachSeed. + // 3. new seeds which NumExecutedMutations greater than + // MinNumMutationsForEachSeed, k-scheduler starts to compute centrality based + // weight for them. + // + // Hypothesis: inputs that have the highest graph centrality scores are + // interesting i.e., they can reach the maximum amount of unvisited code + // regions. + void KschedulerUpdateCorpusDistribution(Random &Rand, int CurTime) { + + if (!Kscheduler.Enabled) + return; + if ((!KschedulerDistributionNeedsUpdate) && + (CurTime - LastComputeTime < 120)) + return; + + KschedulerDistributionNeedsUpdate = false; + + size_t N = Inputs.size(); + assert(N); + Intervals.resize(N + 1); + Weights.resize(N); + std::iota(Intervals.begin(), Intervals.end(), 0); + + bool VanillaSchedule = true; + + // case1: Compute energy for all seeds every 2 minutes. + if (CurTime - LastComputeTime > 120) { + // Compute the weights for each border edges and save the results into a + // cache table. + CacheBorderEdgeWeight(); + for (auto II : Inputs) { + // Compute energy when the minimum mutation on a seed has been + // satifsfied. We also skip a seed which has no border edges. + if (II->FuzzedBefore && (II->KschedulerEnergy != 0.0) && + (II->NumExecutedMutations >= + Kscheduler.MinNumMutationsForEachSeed)) { + ComputeEnergy(II, AvgExecTime); + II->ComputedBefore = true; + } + } + LastComputeTime = CurTime; + } + + // case2: Compute energy only for new seeds whose minmimum mutations have + // been satisfied. We also skip a seed which has no border edges. + else { + for (auto II : Inputs) { + if (!II->ComputedBefore && (II->KschedulerEnergy != 0.0) && + (II->NumExecutedMutations >= + Kscheduler.MinNumMutationsForEachSeed)) { + ComputeEnergy(II, AvgExecTime); + II->ComputedBefore = true; + } + } + } + + // case3: Use initial weight for new seeds whose minmimum mutations less + // than MinNumMutationsForEachSeed. + for (size_t i = 0; i < N; i++) { + + if (Inputs[i]->NumFeatures == 0) { + // If the seed doesn't represent any features, assign zero energy. + Weights[i] = 0.; + } else if (Inputs[i]->NumExecutedMutations / kMaxMutationFactor > + NumExecutedMutations / Inputs.size()) { + // If the seed was fuzzed a lot more than average, assign zero energy. + Weights[i] = 0.; + } else { + // Otherwise, simply assign the computed energy. + Weights[i] = Inputs[i]->KschedulerEnergy; + } + + // If energy for all seeds is zero, fall back to vanilla schedule. + if (Weights[i] > 0.0) + VanillaSchedule = false; + + if(Weights[i] == 0.0){ + if (!Inputs[i]->SlowInput) { + // Substrate runtime cost of old input from the sum of + // total execution time over seed corpus. + TotalExecTime -= Inputs[i]->TimeOfUnit; + NumOfSlowInputs += 1; + if ( Inputs.size() - NumOfSlowInputs > 0) + AvgExecTime = TotalExecTime / (Inputs.size() - NumOfSlowInputs); + else + AvgExecTime = std::chrono::microseconds(0); + Inputs[i]->SlowInput = true; + } + } + } + + if (VanillaSchedule) { + for (size_t i = 0; i < N; i++) + Weights[i] = + Inputs[i]->NumFeatures + ? static_cast((i + 1) * + (Inputs[i]->HasFocusFunction ? 1000 : 1)) + : 0.; + } + + if (FeatureDebug) { + for (size_t i = 0; i < N; i++) + Printf("%zd ", Inputs[i]->NumFeatures); + Printf("SCORE\n"); + for (size_t i = 0; i < N; i++) + Printf("%f ", Weights[i]); + Printf("Weights\n"); + } + CorpusDistribution = std::piecewise_constant_distribution( + Intervals.begin(), Intervals.end(), Weights.begin()); + } + std::piecewise_constant_distribution CorpusDistribution; std::vector Intervals; @@ -572,6 +1168,13 @@ std::unordered_set Hashes; std::vector Inputs; + int LastComputeTime = 0; + int LastLogTime = 0; + int LastDumpTime = 0; + uint32_t NumOfSlowInputs = 0; + // Average execution time for all mutations generated by Libfuzzer. + std::chrono::microseconds AvgExecTime = std::chrono::microseconds(0); + std::chrono::microseconds TotalExecTime = std::chrono::microseconds(0); size_t NumAddedFeatures = 0; size_t NumUpdatedFeatures = 0; uint32_t InputSizesPerFeature[kFeatureSetSize]; @@ -581,8 +1184,25 @@ uint16_t FreqOfMostAbundantRareFeature = 0; uint16_t GlobalFeatureFreqs[kFeatureSetSize] = {}; std::vector RareFeatures; - std::string OutputCorpus; + + bool KschedulerDistributionNeedsUpdate = false; + + // Katz centrality score for each edge. + double EdgeCentralityScore[kEdgeSetSize] = {0.0}; + // Edge weight for border edge(ParentId, ChildId), defined as + // EdgeCentraltiyScore(ChildId) * + // sqrt(log(Total_Num_Execution)/HitCount(ParentId)). + double BorderEdgeWeight[kEdgeSetSize] = {0.0}; + + std::vector Child[kEdgeSetSize]; // Node-to-children adjacency list + std::vector Parent[kEdgeSetSize]; // Node-to-parent adjancency list + std::vector TmpEdgeSet; // Record edge coverage set for each seed. + + // A vector of all possible border edges defined in tuple (ParentId, ChildId). + std::vector> BorderEdge; + // Hit counter for global edge coverage. + uint32_t GlobalEdgeFreqs[kEdgeSetSize] ={}; }; } // namespace fuzzer diff --git a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp --- a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp @@ -548,7 +548,7 @@ // Get coverage for the testcase without modifications. F->ExecuteCallback(C.data(), C.size()); InitialFeatures.clear(); - TPC.CollectFeatures([&](size_t Feature) { + TPC.CollectFeatures([&](size_t Feature, bool CountEdge) { InitialFeatures.push_back(Feature); }); @@ -574,7 +574,7 @@ // Get coverage for testcase with masked occurrences of dictionary unit. F->ExecuteCallback(Data.data(), Data.size()); ModifiedFeatures.clear(); - TPC.CollectFeatures([&](size_t Feature) { + TPC.CollectFeatures([&](size_t Feature, bool CountEdge) { ModifiedFeatures.push_back(Feature); }); @@ -789,6 +789,20 @@ Entropic.NumberOfRarestFeatures = Options.EntropicNumberOfRarestFeatures; Entropic.ScalePerExecTime = Options.EntropicScalePerExecTime; + Options.Kscheduler = Flags.kscheduler; + Options.KschedulerMinNumMutationsForEachSeed = + (size_t)Flags.kscheduler_min_num_mutations_for_each_seed; + Options.KschedulerPerThres = (size_t)Flags.kscheduler_per_thres; + if (Options.Kscheduler) + Printf("INFO: Running with K-Schedule ( %d, %d).\n", + Options.KschedulerMinNumMutationsForEachSeed, + Options.KschedulerPerThres); + struct KschedulerOptions Kscheduler; + Kscheduler.Enabled = Options.Kscheduler; + Kscheduler.MinNumMutationsForEachSeed = + Options.KschedulerMinNumMutationsForEachSeed; + Kscheduler.PerThres = Options.KschedulerPerThres; + unsigned Seed = Flags.seed; // Initialize Seed. if (Seed == 0) @@ -809,7 +823,7 @@ Random Rand(Seed); auto *MD = new MutationDispatcher(Rand, Options); - auto *Corpus = new InputCorpus(Options.OutputCorpus, Entropic); + auto *Corpus = new InputCorpus(Options.OutputCorpus, Entropic, Kscheduler); auto *F = new Fuzzer(Callback, *Corpus, *MD, Options); for (auto &U: Dictionary) diff --git a/compiler-rt/lib/fuzzer/FuzzerFlags.def b/compiler-rt/lib/fuzzer/FuzzerFlags.def --- a/compiler-rt/lib/fuzzer/FuzzerFlags.def +++ b/compiler-rt/lib/fuzzer/FuzzerFlags.def @@ -199,7 +199,11 @@ "time. Inputs with lower execution time get scheduled more (up to 30x). " "Note that, if 1, fuzzer stops from being deterministic even if a " "non-zero random seed is given.") - +FUZZER_FLAG_INT(kscheduler, 0, "Experimental. Enables K-Schedule.") +FUZZER_FLAG_INT(kscheduler_per_thres, 4, "Experimental. PerfScore threshold. ") +FUZZER_FLAG_INT(kscheduler_min_num_mutations_for_each_seed, 100, "Experimental. If " + "K-Scheduler is enabled, we generate at least a specified number of mutations " + "for each seed before using graph centrality score to compute its seed weight.") FUZZER_FLAG_INT(analyze_dict, 0, "Experimental") FUZZER_DEPRECATED_FLAG(use_clang_coverage) FUZZER_FLAG_STRING(data_flow_trace, "Experimental: use the data flow trace") diff --git a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp --- a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp @@ -517,11 +517,15 @@ UniqFeatureSetTmp.clear(); size_t FoundUniqFeaturesOfII = 0; size_t NumUpdatesBefore = Corpus.NumFeatureUpdates(); - TPC.CollectFeatures([&](uint32_t Feature) { + Corpus.ClearTmpEdgeSet(); + TPC.CollectFeatures([&](uint32_t Feature, bool CountEdge) { if (Corpus.AddFeature(Feature, static_cast(Size), Options.Shrink)) UniqFeatureSetTmp.push_back(Feature); if (Options.Entropic) Corpus.UpdateFeatureFrequency(II, Feature); + if (Options.Kscheduler && CountEdge) + Corpus.KschedulerUpdateFeatureFrequency(II, Feature); + if (Options.ReduceInputs && II && !II->NeverReduce) if (std::binary_search(II->UniqFeatureSet.begin(), II->UniqFeatureSet.end(), Feature)) @@ -653,7 +657,7 @@ if (Options.Verbosity) { Printf(" L: %zd/%zd ", U.size(), Corpus.MaxInputSize()); MD.PrintMutationSequence(Options.Verbosity >= 2); - Printf("\n"); + Printf(" ElapsedTime: %d\n", (int)(secondsSinceProcessStartUp() / 60)); } } @@ -715,10 +719,12 @@ void Fuzzer::MutateAndTestOne() { MD.StartMutationSequence(); - auto &II = Corpus.ChooseUnitToMutate(MD.GetRand()); + auto &II = + Corpus.ChooseUnitToMutate(MD.GetRand(), secondsSinceProcessStartUp()); if (Options.DoCrossOver) { auto &CrossOverII = Corpus.ChooseUnitToCrossOverWith( - MD.GetRand(), Options.CrossOverUniformDist); + MD.GetRand(), Options.CrossOverUniformDist, + secondsSinceProcessStartUp()); MD.SetCrossOverWith(&CrossOverII.U); } const auto &U = II.U; @@ -767,6 +773,19 @@ } II.NeedsEnergyUpdate = true; + + // Log coverage every 5 minutes. + Corpus.LogCoverage(secondsSinceProcessStartUp(), TPC.GetTotalPCCoverage(), + Corpus.NumFeatures()); + + // K-Scheduler + if (Options.Kscheduler) { + II.FuzzedBefore = true; + Corpus.UpdateCentralityScore(secondsSinceProcessStartUp()); + if (!II.ComputedBefore && (II.NumExecutedMutations > + Options.KschedulerMinNumMutationsForEachSeed)) + Corpus.SetDistributionNeedsUpdate(); + } } void Fuzzer::PurgeAllocator() { diff --git a/compiler-rt/lib/fuzzer/FuzzerMerge.cpp b/compiler-rt/lib/fuzzer/FuzzerMerge.cpp --- a/compiler-rt/lib/fuzzer/FuzzerMerge.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerMerge.cpp @@ -238,9 +238,10 @@ // * Then, all other files, smallest first. std::set Features; if (IsSetCoverMerge) - TPC.CollectFeatures([&](size_t Feature) { Features.insert(Feature); }); + TPC.CollectFeatures( + [&](size_t Feature, bool CountEdge) { Features.insert(Feature); }); else - TPC.CollectFeatures([&](size_t Feature) { + TPC.CollectFeatures([&](size_t Feature, bool CountEdge) { if (AllFeatures.insert(Feature).second) Features.insert(Feature); }); diff --git a/compiler-rt/lib/fuzzer/FuzzerOptions.h b/compiler-rt/lib/fuzzer/FuzzerOptions.h --- a/compiler-rt/lib/fuzzer/FuzzerOptions.h +++ b/compiler-rt/lib/fuzzer/FuzzerOptions.h @@ -51,6 +51,9 @@ size_t EntropicFeatureFrequencyThreshold = 0xFF; size_t EntropicNumberOfRarestFeatures = 100; bool EntropicScalePerExecTime = false; + bool Kscheduler = false; + size_t KschedulerMinNumMutationsForEachSeed = 100; + size_t KschedulerPerThres = 4; std::string OutputCorpus; std::string ArtifactPrefix = "./"; std::string ExactArtifactPath; diff --git a/compiler-rt/lib/fuzzer/FuzzerTracePC.h b/compiler-rt/lib/fuzzer/FuzzerTracePC.h --- a/compiler-rt/lib/fuzzer/FuzzerTracePC.h +++ b/compiler-rt/lib/fuzzer/FuzzerTracePC.h @@ -237,13 +237,13 @@ template // void Callback(uint32_t Feature) ATTRIBUTE_NO_SANITIZE_ADDRESS ATTRIBUTE_NOINLINE size_t TracePC::CollectFeatures(Callback HandleFeature) const { - auto Handle8bitCounter = [&](size_t FirstFeature, - size_t Idx, uint8_t Counter) { - if (UseCounters) - HandleFeature(static_cast(FirstFeature + Idx * 8 + - CounterToFeature(Counter))); - else - HandleFeature(static_cast(FirstFeature + Idx)); + auto Handle8bitCounter = [&](size_t FirstFeature, size_t Idx, + uint8_t Counter) { + if (UseCounters) { + uint32_t feat = FirstFeature + Idx * 8 + CounterToFeature(Counter); + HandleFeature(feat, true); + } else + HandleFeature(static_cast(FirstFeature + Idx), true); }; size_t FirstFeature = 0; @@ -263,7 +263,7 @@ if (UseValueProfileMask) { ValueProfileMap.ForEach([&](size_t Idx) { - HandleFeature(static_cast(FirstFeature + Idx)); + HandleFeature(static_cast(FirstFeature + Idx), false); }); FirstFeature += ValueProfileMap.SizeInBits(); } @@ -283,8 +283,8 @@ assert(StackDepthStepFunction(1024 * 1024) == 144); if (auto MaxStackOffset = GetMaxStackOffset()) { - HandleFeature(static_cast( - FirstFeature + StackDepthStepFunction(MaxStackOffset / 8))); + uint32_t feat = FirstFeature + StackDepthStepFunction(MaxStackOffset / 8); + HandleFeature(feat, false); FirstFeature += StackDepthStepFunction(std::numeric_limits::max()); } diff --git a/compiler-rt/lib/fuzzer/FuzzerTracePC.cpp b/compiler-rt/lib/fuzzer/FuzzerTracePC.cpp --- a/compiler-rt/lib/fuzzer/FuzzerTracePC.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerTracePC.cpp @@ -107,7 +107,7 @@ if (size_t NumExtraCounters = ExtraCountersEnd() - ExtraCountersBegin()) Printf("INFO: %zd Extra Counters\n", NumExtraCounters); - size_t MaxFeatures = CollectFeatures([](uint32_t) {}); + size_t MaxFeatures = CollectFeatures([](uint32_t, bool) {}); if (MaxFeatures > std::numeric_limits::max()) Printf("WARNING: The coverage PC tables may produce up to %zu features.\n" "This exceeds the maximum 32-bit value. Some features may be\n" diff --git a/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp b/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp --- a/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp +++ b/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp @@ -17,6 +17,8 @@ #include "FuzzerRandom.h" #include "FuzzerTracePC.h" #include "gtest/gtest.h" +#include +#include #include #include #include @@ -631,7 +633,8 @@ DataFlowTrace DFT; Random Rand(0); struct EntropicOptions Entropic = {false, 0xFF, 100, false}; - std::unique_ptr C(new InputCorpus("", Entropic)); + struct KschedulerOptions Kscheduler = {false, 100, 4}; + std::unique_ptr C(new InputCorpus("", Entropic, Kscheduler)); size_t N = 10; size_t TriesPerUnit = 1<<16; for (size_t i = 0; i < N; i++) @@ -644,7 +647,7 @@ std::vector Hist(N); for (size_t i = 0; i < N * TriesPerUnit; i++) { - Hist[C->ChooseUnitIdxToMutate(Rand)]++; + Hist[C->ChooseUnitIdxToMutate(Rand, 0)]++; } for (size_t i = 0; i < N; i++) { // A weak sanity check that every unit gets invoked. @@ -655,8 +658,9 @@ TEST(Corpus, Replace) { DataFlowTrace DFT; struct EntropicOptions Entropic = {false, 0xFF, 100, false}; + struct KschedulerOptions Kscheduler = {false, 100, 4}; std::unique_ptr C( - new InputCorpus(/*OutputCorpus*/ "", Entropic)); + new InputCorpus(/*OutputCorpus*/ "", Entropic, Kscheduler)); InputInfo *FirstII = C->AddToCorpus(Unit{0x01, 0x00}, /*NumFeatures*/ 1, /*MayDeleteFile*/ false, /*HasFocusFunction*/ false, @@ -1367,7 +1371,8 @@ size_t Index; // Create input corpus with default entropic configuration struct EntropicOptions Entropic = {true, 0xFF, 100, false}; - std::unique_ptr C(new InputCorpus("", Entropic)); + struct KschedulerOptions Kscheduler = {false, 100, 4}; + std::unique_ptr C(new InputCorpus("", Entropic, Kscheduler)); std::unique_ptr II(new InputInfo()); C->AddRareFeature(FeatIdx1); @@ -1404,7 +1409,8 @@ TEST(Entropic, ComputeEnergy) { const double Precision = 0.01; struct EntropicOptions Entropic = {true, 0xFF, 100, false}; - std::unique_ptr C(new InputCorpus("", Entropic)); + struct KschedulerOptions Kscheduler = {false, 100, 4}; + std::unique_ptr C(new InputCorpus("", Entropic, Kscheduler)); std::unique_ptr II(new InputInfo()); std::vector> FeatureFreqs = { {1, 3}, {2, 3}, {3, 3}}; @@ -1424,6 +1430,101 @@ EXPECT_LT(SubAndSquare(II->Energy, 1.792831), Precision); } +TEST(Kscheduler, UpdateFrequency) { + const size_t FeatIdx1 = 0, FeatIdx2 = 42, FeatIdx3 = 12, FeatIdx4 = 26; + // Each EdgeIdx maps to a FeatIdx following the equation (EdgeIdx=FeatIdx/8). + const size_t EdgeIdx1 = 0, EdgeIdx2 = 5, EdgeIdx3 = 1, EdgeIdx4 = 3; + // Enable Kscheduler + struct EntropicOptions Entropic = {false, 0xFF, 100, false}; + struct KschedulerOptions Kscheduler = {true, 100, 4}; + + // Generate dummy graph analysis file + std::ofstream KatzFile; + KatzFile.open("katz_cent"); + KatzFile << "1 1.2\n2 0.5\n3 0.5\n4 1.7\n5 0.5\n6 1.2\n7 0.5\n8 0.5\n"; + KatzFile.close(); + + std::ofstream ChildFile; + ChildFile.open("child_node"); + ChildFile << "1 2 3\n2\n3\n4 5 6\n5\n6 7 8\n7\n8\n"; + ChildFile.close(); + + std::ofstream ParentFile; + ParentFile.open("parent_node"); + ParentFile << "1\n2 1\n3 1\n4\n5 4\n6 4\n7 6\n8 6\n"; + ParentFile.close(); + + std::ofstream EdgeFile; + EdgeFile.open("border_edges"); + EdgeFile << "1 2\n1 3\n4 5\n4 6\n6 7\n6 8\n"; + EdgeFile.close(); + + std::unique_ptr C(new InputCorpus("", Entropic, Kscheduler)); + std::unique_ptr II(new InputInfo()); + + C->KschedulerUpdateFeatureFrequency(II.get(), FeatIdx1); + EXPECT_EQ(C->GetGlobalEdgeFreqs(EdgeIdx1), 1); + C->KschedulerUpdateFeatureFrequency(II.get(), FeatIdx1); + C->KschedulerUpdateFeatureFrequency(II.get(), FeatIdx2); + EXPECT_EQ(C->GetGlobalEdgeFreqs(EdgeIdx1), 2); + EXPECT_EQ(C->GetGlobalEdgeFreqs(EdgeIdx2), 1); + + C->KschedulerUpdateFeatureFrequency(II.get(), FeatIdx3); + C->KschedulerUpdateFeatureFrequency(II.get(), FeatIdx3); + C->KschedulerUpdateFeatureFrequency(II.get(), FeatIdx3); + EXPECT_EQ(C->GetGlobalEdgeFreqs(EdgeIdx3), 3); + C->KschedulerUpdateFeatureFrequency(II.get(), FeatIdx4); + EXPECT_EQ(C->GetGlobalEdgeFreqs(EdgeIdx4), 1); +} + +TEST(Kscheduler, ComputeEnergy) { + const double Precision = 0.01; + // Enable Kscheduler + struct EntropicOptions Entropic = {false, 0xFF, 100, false}; + struct KschedulerOptions Kscheduler = {true, 100, 4}; + + // Generate dummy graph analysis file + std::ofstream KatzFile; + KatzFile.open("katz_cent"); + KatzFile << "1 1.2\n2 0.5\n3 0.5\n4 1.7\n5 0.5\n6 1.2\n7 0.5\n8 0.5\n"; + KatzFile.close(); + + std::ofstream ChildFile; + ChildFile.open("child_node"); + ChildFile << "1 2 3\n2\n3\n4 5 6\n5\n6 7 8\n7\n8\n"; + ChildFile.close(); + + std::ofstream ParentFile; + ParentFile.open("parent_node"); + ParentFile << "1\n2 1\n3 1\n4\n5 4\n6 4\n7 6\n8 6\n"; + ParentFile.close(); + + std::ofstream EdgeFile; + EdgeFile.open("border_edges"); + EdgeFile << "1 2\n1 3\n4 5\n4 6\n6 7\n6 8\n"; + EdgeFile.close(); + + std::unique_ptr C(new InputCorpus("", Entropic, Kscheduler)); + std::unique_ptr II1(new InputInfo()); + std::unique_ptr II2(new InputInfo()); + + II1->EdgeSet = {1, 2}; + II2->EdgeSet = {4, 5}; + II1->TimeOfUnit = std::chrono::microseconds(223); + II2->TimeOfUnit = std::chrono::microseconds(221); + C->SetGlobalEdgeFreqs(1, 1); + C->SetGlobalEdgeFreqs(2, 1); + C->SetGlobalEdgeFreqs(4, 1); + C->SetGlobalEdgeFreqs(5, 1); + C->SetCurNumExecutedMutations(2); + + C->ComputeEnergy(II1.get(), std::chrono::microseconds(200)); + C->ComputeEnergy(II2.get(), std::chrono::microseconds(200)); + + EXPECT_LT(SubAndSquare(II1->KschedulerEnergy, 41.627731), Precision); + EXPECT_LT(SubAndSquare(II2->KschedulerEnergy, 99.906553), Precision); +} + int main(int argc, char **argv) { testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -82,6 +82,7 @@ "tune-cpu" attribute is absent it tunes according to the "target-cpu". * Fixed relocations against temporary symbols (e.g. in jump tables and constant pools) in large COFF object files. +* Auto-vectorization now targets SVE by default when available. Changes to the ARM Backend --------------------------