Diff 412523

llvm/lib/Transforms/Utils/SampleProfileInference.cpp

Show All 9 Lines
// possibly imprecise block counts, the algorithm reconstructs realistic block		// possibly imprecise block counts, the algorithm reconstructs realistic block
// and edge counts that satisfy flow conservation rules, while minimally modify		// and edge counts that satisfy flow conservation rules, while minimally modify
// input block counts.		// input block counts.
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

#include "llvm/Transforms/Utils/SampleProfileInference.h"		#include "llvm/Transforms/Utils/SampleProfileInference.h"
#include "llvm/ADT/BitVector.h"		#include "llvm/ADT/BitVector.h"
		#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"		#include "llvm/Support/Debug.h"
#include <queue>		#include <queue>
#include <set>		#include <set>
		#include <stack>

using namespace llvm;		using namespace llvm;
#define DEBUG_TYPE "sample-profile-inference"		#define DEBUG_TYPE "sample-profile-inference"

namespace {		namespace {

		static cl::opt<bool> SampleProfileEvenCountDistribution(
		"sample-profile-even-count-distribution", cl::init(true), cl::Hidden,
		cl::ZeroOrMore,
		cl::desc("Try to evenly distribute counts when there are multiple equally "
		"likely options."));

		static cl::opt<unsigned> SampleProfileMaxDfsCalls(
		"sample-profile-max-dfs-calls", cl::init(10), cl::Hidden, cl::ZeroOrMore,
		cl::desc("Maximum number of dfs iterations for even count distribution."));

/// A value indicating an infinite flow/capacity/weight of a block/edge.		/// A value indicating an infinite flow/capacity/weight of a block/edge.
/// Not using numeric_limits<int64_t>::max(), as the values can be summed up		/// Not using numeric_limits<int64_t>::max(), as the values can be summed up
/// during the execution.		/// during the execution.
static constexpr int64_t INF = ((int64_t)1) << 50;		static constexpr int64_t INF = ((int64_t)1) << 50;

/// The minimum-cost maximum flow algorithm.		/// The minimum-cost maximum flow algorithm.
///		///
/// The algorithm finds the maximum flow of minimum cost on a given (directed)		/// The algorithm finds the maximum flow of minimum cost on a given (directed)
Show All 12 Lines
public:		public:
// Initialize algorithm's data structures for a network of a given size.		// Initialize algorithm's data structures for a network of a given size.
void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) {		void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) {
Source = SourceNode;		Source = SourceNode;
Target = SinkNode;		Target = SinkNode;

Nodes = std::vector<Node>(NodeCount);		Nodes = std::vector<Node>(NodeCount);
Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>());		Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>());
		if (SampleProfileEvenCountDistribution)
		AugmentingEdges =
		std::vector<std::vector<Edge >>(NodeCount, std::vector<Edge >());
}		}

// Run the algorithm.		// Run the algorithm.
int64_t run() {		int64_t run() {
// Find an augmenting path and update the flow along the path		// Iteratively find an augmentation path/dag in the network and send the
size_t AugmentationIters = 0;		// flow along its edges
while (findAugmentingPath()) {		size_t AugmentationIters = applyFlowAugmentation();
augmentFlowAlongPath();
AugmentationIters++;
}

// Compute the total flow and its cost		// Compute the total flow and its cost
int64_t TotalCost = 0;		int64_t TotalCost = 0;
int64_t TotalFlow = 0;		int64_t TotalFlow = 0;
for (uint64_t Src = 0; Src < Nodes.size(); Src++) {		for (uint64_t Src = 0; Src < Nodes.size(); Src++) {
for (auto &Edge : Edges[Src]) {		for (auto &Edge : Edges[Src]) {
if (Edge.Flow > 0) {		if (Edge.Flow > 0) {
TotalCost += Edge.Cost * Edge.Flow;		TotalCost += Edge.Cost * Edge.Flow;
if (Src == Source)		if (Src == Source)
TotalFlow += Edge.Flow;		TotalFlow += Edge.Flow;
}		}
}		}
}		}
LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters		LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters
<< " iterations with " << TotalFlow << " total flow"		<< " iterations with " << TotalFlow << " total flow"
<< " of " << TotalCost << " cost\n");		<< " of " << TotalCost << " cost\n");
(void)TotalFlow;		(void)TotalFlow;
		(void)AugmentationIters;
return TotalCost;		return TotalCost;
}		}

/// Adding an edge to the network with a specified capacity and a cost.		/// Adding an edge to the network with a specified capacity and a cost.
/// Multiple edges between a pair of nodes are allowed but self-edges		/// Multiple edges between a pair of nodes are allowed but self-edges
/// are not supported.		/// are not supported.
void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) {		void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) {
assert(Capacity > 0 && "adding an edge of zero capacity");		assert(Capacity > 0 && "adding an edge of zero capacity");
▲ Show 20 Lines • Show All 53 Lines • ▼ Show 20 Lines	public:
/// A cost of increasing the entry block's count by one.		/// A cost of increasing the entry block's count by one.
static constexpr int64_t AuxCostIncEntry = 40;		static constexpr int64_t AuxCostIncEntry = 40;
/// A cost of decreasing the entry block's count by one.		/// A cost of decreasing the entry block's count by one.
static constexpr int64_t AuxCostDecEntry = 10;		static constexpr int64_t AuxCostDecEntry = 10;
/// A cost of taking an unlikely jump.		/// A cost of taking an unlikely jump.
static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 30;		static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 30;

private:		private:
		/// Iteratively find an augmentation path/dag in the network and send the
		/// flow along its edges. The method returns the number of applied iterations.
		size_t applyFlowAugmentation() {
		size_t AugmentationIters = 0;
		while (findAugmentingPath()) {
		uint64_t PathCapacity = computeAugmentingPathCapacity();
		while (PathCapacity > 0) {
		bool Progress = false;
		if (SampleProfileEvenCountDistribution) {
		// Identify node/edge candidates for augmentation
		identifyShortestEdges(PathCapacity);

		// Find an augmenting DAG
		auto AugmentingOrder = findAugmentingDAG();

		// Apply the DAG augmentation
		Progress = augmentFlowAlongDAG(AugmentingOrder);
		PathCapacity = computeAugmentingPathCapacity();
		}

		if (!Progress) {
		augmentFlowAlongPath(PathCapacity);
		PathCapacity = 0;
		}

		AugmentationIters++;
		}
		}
		return AugmentationIters;
		}

		/// Compute the capacity of the cannonical augmenting path. If the path is
		/// saturated (that is, no flow can be sent along the path), then return 0.
		uint64_t computeAugmentingPathCapacity() {
		hoyUnsubmitted Done Reply Inline Actions nit: name it computeAugmentingPathCapacity? hoy: nit: name it computeAugmentingPathCapacity?
		uint64_t PathCapacity = INF;
		uint64_t Now = Target;
		while (Now != Source) {
		uint64_t Pred = Nodes[Now].ParentNode;
		auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];

		assert(Edge.Capacity >= Edge.Flow && "incorrect edge flow");
		uint64_t EdgeCapacity = uint64_t(Edge.Capacity - Edge.Flow);
		PathCapacity = std::min(PathCapacity, EdgeCapacity);

		Now = Pred;
		}
		return PathCapacity;
		}

/// Check for existence of an augmenting path with a positive capacity.		/// Check for existence of an augmenting path with a positive capacity.
bool findAugmentingPath() {		bool findAugmentingPath() {
// Initialize data structures		// Initialize data structures
for (auto &Node : Nodes) {		for (auto &Node : Nodes) {
Node.Distance = INF;		Node.Distance = INF;
Node.ParentNode = uint64_t(-1);		Node.ParentNode = uint64_t(-1);
Node.ParentEdgeIndex = uint64_t(-1);		Node.ParentEdgeIndex = uint64_t(-1);
Node.Taken = false;		Node.Taken = false;
Show All 16 Lines	while (!Queue.empty()) {
// the following early-stop criteria:		// the following early-stop criteria:
// -- If we find a path with zero-distance from Source to Target, stop the		// -- If we find a path with zero-distance from Source to Target, stop the
// search, as the path is the shortest since Dist[Source, Target] >= 0;		// search, as the path is the shortest since Dist[Source, Target] >= 0;
// -- If we have Dist[Source, V] > Dist[Source, Target], then do not		// -- If we have Dist[Source, V] > Dist[Source, Target], then do not
// process node V, as it is guaranteed _not_ to be on a shortest path		// process node V, as it is guaranteed _not_ to be on a shortest path
// from Source to Target; it follows from inequalities		// from Source to Target; it follows from inequalities
// Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target]		// Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target]
// >= Dist[Source, V]		// >= Dist[Source, V]
if (Nodes[Target].Distance == 0)		if (!SampleProfileEvenCountDistribution && Nodes[Target].Distance == 0)
break;		break;
if (Nodes[Src].Distance > Nodes[Target].Distance)		if (Nodes[Src].Distance > Nodes[Target].Distance)
continue;		continue;

// Process adjacent edges		// Process adjacent edges
for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) {		for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) {
auto &Edge = Edges[Src][EdgeIdx];		auto &Edge = Edges[Src][EdgeIdx];
if (Edge.Flow < Edge.Capacity) {		if (Edge.Flow < Edge.Capacity) {
Show All 13 Lines	while (!Queue.empty()) {
}		}
}		}
}		}

return Nodes[Target].Distance != INF;		return Nodes[Target].Distance != INF;
}		}

/// Update the current flow along the augmenting path.		/// Update the current flow along the augmenting path.
void augmentFlowAlongPath() {		void augmentFlowAlongPath(uint64_t PathCapacity) {
// Find path capacity
int64_t PathCapacity = INF;
uint64_t Now = Target;
while (Now != Source) {
uint64_t Pred = Nodes[Now].ParentNode;
auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
PathCapacity = std::min(PathCapacity, Edge.Capacity - Edge.Flow);
Now = Pred;
}

assert(PathCapacity > 0 && "found an incorrect augmenting path");		assert(PathCapacity > 0 && "found an incorrect augmenting path");
		uint64_t Now = Target;
// Update the flow along the path
Now = Target;
while (Now != Source) {		while (Now != Source) {
uint64_t Pred = Nodes[Now].ParentNode;		uint64_t Pred = Nodes[Now].ParentNode;
auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];		auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
auto &RevEdge = Edges[Now][Edge.RevEdgeIndex];		auto &RevEdge = Edges[Now][Edge.RevEdgeIndex];

Edge.Flow += PathCapacity;		Edge.Flow += PathCapacity;
RevEdge.Flow -= PathCapacity;		RevEdge.Flow -= PathCapacity;

Now = Pred;		Now = Pred;
}		}
}		}

		/// Find an Augmenting DAG order using a modified version of DFS in which we
		/// can visit a node multiple times. In the DFS search, when scanning each
		/// edge out of a node, continue search at Edge.Dst endpoint if it has not
		/// been discovered yet and its NumCalls < MaxDfsCalls. The algorithm
		/// runs in O(MaxDfsCalls * \|Edges\| + \|Nodes\|) time.
		/// It returns an Augmenting Order (Taken nodes in decreasing Finish time)
		/// that starts with Source and ends with Target.
		std::vector<uint64_t> findAugmentingDAG() {
		// We use a stack based implemenation of DFS to avoid recursion.
		// Defining DFS data structures:
		// A pair (NodeIdx, EdgeIdx) at the top of the Stack denotes that
		// - we are currently visiting Nodes[NodeIdx] and
		hoyUnsubmitted Done Reply Inline Actions nit: Edges[NodeIdx][EdgeIdx]? hoy: nit: Edges[NodeIdx][EdgeIdx]?
		// - the next edge to scan is Edges[NodeIdx][EdgeIdx]
		typedef std::pair<uint64_t, uint64_t> StackItemType;
		std::stack<StackItemType> Stack;
		std::vector<uint64_t> AugmentingOrder;

		// Phase 0: Initialize Node attributes and Time for DFS run
		for (auto &Node : Nodes) {
		Node.Discovery = 0;
		Node.Finish = 0;
		Node.NumCalls = 0;
		Node.Taken = false;
		}
		uint64_t Time = 0;
		// Mark Target as Taken
		// Taken attribute will be propagated backwards from Target towards Source
		Nodes[Target].Taken = true;

		// Phase 1: Start DFS traversal from Source
		Stack.emplace(Source, 0);
		Nodes[Source].Discovery = ++Time;
		while (!Stack.empty()) {
		auto NodeIdx = Stack.top().first;
		auto EdgeIdx = Stack.top().second;

		// If we haven't scanned all edges out of NodeIdx, continue scanning
		if (EdgeIdx < Edges[NodeIdx].size()) {
		auto &Edge = Edges[NodeIdx][EdgeIdx];
		auto &Dst = Nodes[Edge.Dst];
		Stack.top().second++;

		if (Edge.OnShortestPath) {
		// If we haven't seen Edge.Dst so far, continue DFS search there
		hoyUnsubmitted Not Done Reply Inline Actions Should the Discovery field be should a boolean? Looks like this is the only place using it. hoy: Should the Discovery field be should a boolean? Looks like this is the only place using it.
		spupyrevAuthorUnsubmitted Done Reply Inline Actions Quoting my colleague: I would keep it the way it is. Discovery and Finish times are well known DFS concepts. Of course, here they have been adapted to our modified version that can visit a node more than once. The marginal savings to switching to bool are not worth it. spupyrev: Quoting my colleague: > I would keep it the way it is. Discovery and Finish times are well…
		hoyUnsubmitted Not Done Reply Inline Actions I was wondering if setting `SampleProfileMaxDfsCalls=1` makes sense. It looks like whether `Dst` is taken or not doesn't matter with which predecessor the searching starts with. Correct me if I'm wrong. hoy: I was wondering if setting `SampleProfileMaxDfsCalls=1` makes sense. It looks like whether…
		spupyrevAuthorUnsubmitted Done Reply Inline Actions That is a good point. We actually ran experiments with the flag but found that the default value of 10 is fine. The tradeoff here is quality vs speed: the more iterations we apply, the higher quality we get. We certainly need more than 1 iteration (thus, `SampleProfileMaxDfsCalls>1`), otherwise we get poor results. Increasing it beyond 10-20 doesn't significantly improve the quality. Time savings of reducing the value from 10 to, say, 5 are minor. spupyrev: That is a good point. We actually ran experiments with the flag but found that the default…
		if (Dst.Discovery == 0 && Dst.NumCalls < SampleProfileMaxDfsCalls) {
		Dst.Discovery = ++Time;
		hoyUnsubmitted Done Reply Inline Actions nit: Stack.emplace(Edge.Dst, 0) hoy: nit: Stack.emplace(Edge.Dst, 0)
		Stack.emplace(Edge.Dst, 0);
		Dst.NumCalls++;
		} else if (Dst.Taken && Dst.Finish != 0) {
		// Else, if Edge.Dst already have a path to Target, so that NodeIdx
		Nodes[NodeIdx].Taken = true;
		}
		}
		} else {
		// If we are done scanning all edge out of NodeIdx
		Stack.pop();
		// If we haven't found a path from NodeIdx to Target, forget about it
		if (!Nodes[NodeIdx].Taken) {
		Nodes[NodeIdx].Discovery = 0;
		} else {
		// If we have found a path from NodeIdx to Target, then finish NodeIdx
		// and propagate Taken flag to DFS parent unless at the Source
		Nodes[NodeIdx].Finish = ++Time;
		// NodeIdx == Source if and only if the stack is empty
		if (NodeIdx != Source) {
		hoyUnsubmitted Done Reply Inline Actions assert stack must not be empty at this point? hoy: assert stack must not be empty at this point?
		assert(!Stack.empty() && "empty stack while running dfs");
		Nodes[Stack.top().first].Taken = true;
		}
		AugmentingOrder.push_back(NodeIdx);
		}
		}
		}
		// Nodes are collected decreasing Finish time, so the order is reversed
		std::reverse(AugmentingOrder.begin(), AugmentingOrder.end());

		// Phase 2: Extract all forward (DAG) edges and fill in AugmentingEdges
		for (size_t Src : AugmentingOrder) {
		AugmentingEdges[Src].clear();
		for (auto &Edge : Edges[Src]) {
		uint64_t Dst = Edge.Dst;
		if (Edge.OnShortestPath && Nodes[Src].Taken && Nodes[Dst].Taken &&
		Nodes[Dst].Finish < Nodes[Src].Finish) {
		AugmentingEdges[Src].push_back(&Edge);
		}
		}
		assert((Src == Target \|\| !AugmentingEdges[Src].empty()) &&
		"incorrectly constructed augmenting edges");
		}

		return AugmentingOrder;
		}

		/// Update the current flow along the given (acyclic) subgraph specified by
		/// the vertex order, AugmentingOrder. The objective is to send as much flow
		/// as possible while evenly distributing flow among successors of each node.
		/// After the update at least one edge is saturated.
		bool augmentFlowAlongDAG(const std::vector<uint64_t> &AugmentingOrder) {
		// Phase 0: Initialization
		for (uint64_t Src : AugmentingOrder) {
		Nodes[Src].FracFlow = 0;
		Nodes[Src].IntFlow = 0;
		for (auto &Edge : AugmentingEdges[Src]) {
		Edge->AugmentedFlow = 0;
		}
		}

		// Phase 1: Send a unit of fractional flow along the DAG
		uint64_t MaxFlowAmount = INF;
		Nodes[Source].FracFlow = 1.0;
		hoyUnsubmitted Done Reply Inline Actions Please add a message for the assertion. hoy: Please add a message for the assertion.
		for (uint64_t Src : AugmentingOrder) {
		assert((Src == Target \|\| Nodes[Src].FracFlow > 0.0) &&
		"incorrectly computed fractional flow");
		// Distribute flow evenly among successors of Src
		uint64_t Degree = AugmentingEdges[Src].size();
		for (auto &Edge : AugmentingEdges[Src]) {
		double EdgeFlow = Nodes[Src].FracFlow / Degree;
		Nodes[Edge->Dst].FracFlow += EdgeFlow;
		if (Edge->Capacity == INF)
		continue;
		uint64_t MaxIntFlow = double(Edge->Capacity - Edge->Flow) / EdgeFlow;
		MaxFlowAmount = std::min(MaxFlowAmount, MaxIntFlow);
		}
		}
		// Stop early if we cannot send any (integral) flow from Source to Target
		if (MaxFlowAmount == 0)
		return false;

		// Phase 2: Send an integral flow of MaxFlowAmount
		Nodes[Source].IntFlow = MaxFlowAmount;
		for (uint64_t Src : AugmentingOrder) {
		if (Src == Target)
		break;
		// Distribute flow evenly among successors of Src, rounding up to make
		// sure all flow is sent
		uint64_t Degree = AugmentingEdges[Src].size();
		// We are guaranteeed that Node[Src].IntFlow <= SuccFlow * Degree
		uint64_t SuccFlow = (Nodes[Src].IntFlow + Degree - 1) / Degree;
		for (auto &Edge : AugmentingEdges[Src]) {
		uint64_t Dst = Edge->Dst;
		uint64_t EdgeFlow = std::min(Nodes[Src].IntFlow, SuccFlow);
		EdgeFlow = std::min(EdgeFlow, uint64_t(Edge->Capacity - Edge->Flow));
		Nodes[Dst].IntFlow += EdgeFlow;
		Nodes[Src].IntFlow -= EdgeFlow;
		Edge->AugmentedFlow += EdgeFlow;
		}
		}
		assert(Nodes[Target].IntFlow <= MaxFlowAmount);
		Nodes[Target].IntFlow = 0;

		// Phase 3: Send excess flow back traversing the nodes backwards.
		// Because of rounding, not all flow can be sent along the edges of Src.
		// Hence, sending the remaining flow back to maintain flow conservation
		for (size_t Idx = AugmentingOrder.size() - 1; Idx > 0; Idx--) {
		uint64_t Src = AugmentingOrder[Idx - 1];
		// Try to send excess flow back along each edge.
		// Make sure we only send back flow we just augmented (AugmentedFlow).
		for (auto &Edge : AugmentingEdges[Src]) {
		uint64_t Dst = Edge->Dst;
		if (Nodes[Dst].IntFlow == 0)
		continue;
		uint64_t EdgeFlow = std::min(Nodes[Dst].IntFlow, Edge->AugmentedFlow);
		Nodes[Dst].IntFlow -= EdgeFlow;
		Nodes[Src].IntFlow += EdgeFlow;
		Edge->AugmentedFlow -= EdgeFlow;
		}
		}

		// Phase 4: Update flow values along all edges
		bool HasSaturatedEdges = false;
		for (uint64_t Src : AugmentingOrder) {
		// Verify that we have sent all the excess flow from the node
		assert(Src == Source \|\| Nodes[Src].IntFlow == 0);
		for (auto &Edge : AugmentingEdges[Src]) {
		assert(uint64_t(Edge->Capacity - Edge->Flow) >= Edge->AugmentedFlow);
		// Update flow values along the edge and its reverse copy
		auto &RevEdge = Edges[Edge->Dst][Edge->RevEdgeIndex];
		Edge->Flow += Edge->AugmentedFlow;
		RevEdge.Flow -= Edge->AugmentedFlow;
		if (Edge->Capacity == Edge->Flow && Edge->AugmentedFlow > 0)
		HasSaturatedEdges = true;
		}
		}

		// The augmentation is successful iff at least one edge becomes saturated
		return HasSaturatedEdges;
		}

		/// Identify candidate (shortest) edges for augmentation.
		void identifyShortestEdges(uint64_t PathCapacity) {
		assert(PathCapacity > 0 && "found an incorrect augmenting DAG");
		// To make sure the augmentation DAG contains only edges with large residual
		// capacity, we prune all edges whose capacity is below a fraction of
		// the capacity of the augmented path.
		hoyUnsubmitted Done Reply Inline Actions What is an incident node? hoy: What is an incident node?
		// (All edges of the path itself are always in the DAG)
		uint64_t MinCapacity = std::max(PathCapacity / 2, uint64_t(1));

		// Decide which edges are on a shortest path from Source to Target
		for (size_t Src = 0; Src < Nodes.size(); Src++) {
		// An edge cannot be augmenting if the endpoint has large distance
		if (Nodes[Src].Distance > Nodes[Target].Distance)
		continue;

		for (auto &Edge : Edges[Src]) {
		uint64_t Dst = Edge.Dst;
		Edge.OnShortestPath =
		Src != Target && Dst != Source &&
		Nodes[Dst].Distance <= Nodes[Target].Distance &&
		Nodes[Dst].Distance == Nodes[Src].Distance + Edge.Cost &&
		Edge.Capacity > Edge.Flow &&
		uint64_t(Edge.Capacity - Edge.Flow) >= MinCapacity;
		}
		}
		}

/// A node in a flow network.		/// A node in a flow network.
struct Node {		struct Node {
/// The cost of the cheapest path from the source to the current node.		/// The cost of the cheapest path from the source to the current node.
int64_t Distance;		int64_t Distance;
/// The node preceding the current one in the path.		/// The node preceding the current one in the path.
uint64_t ParentNode;		uint64_t ParentNode;
/// The index of the edge between ParentNode and the current node.		/// The index of the edge between ParentNode and the current node.
uint64_t ParentEdgeIndex;		uint64_t ParentEdgeIndex;
/// An indicator of whether the current node is in a queue.		/// An indicator of whether the current node is in a queue.
bool Taken;		bool Taken;

		/// Data fields utilized in DAG-augmentation:
		/// Fractional flow.
		double FracFlow;
		/// Integral flow.
		uint64_t IntFlow;
		/// Discovery time.
		uint64_t Discovery;
		/// Finish time.
		uint64_t Finish;
		/// NumCalls.
		uint64_t NumCalls;
};		};

/// An edge in a flow network.		/// An edge in a flow network.
struct Edge {		struct Edge {
/// The cost of the edge.		/// The cost of the edge.
int64_t Cost;		int64_t Cost;
/// The capacity of the edge.		/// The capacity of the edge.
int64_t Capacity;		int64_t Capacity;
/// The current flow on the edge.		/// The current flow on the edge.
int64_t Flow;		int64_t Flow;
/// The destination node of the edge.		/// The destination node of the edge.
uint64_t Dst;		uint64_t Dst;
/// The index of the reverse edge between Dst and the current node.		/// The index of the reverse edge between Dst and the current node.
uint64_t RevEdgeIndex;		uint64_t RevEdgeIndex;

		/// Data fields utilized in DAG-augmentation:
		/// Whether the edge is currently on a shortest path from Source to Target.
		bool OnShortestPath;
		/// Extra flow along the edge.
		uint64_t AugmentedFlow;
};		};

/// The set of network nodes.		/// The set of network nodes.
std::vector<Node> Nodes;		std::vector<Node> Nodes;
/// The set of network edges.		/// The set of network edges.
std::vector<std::vector<Edge>> Edges;		std::vector<std::vector<Edge>> Edges;
/// Source node of the flow.		/// Source node of the flow.
uint64_t Source;		uint64_t Source;
/// Target (sink) node of the flow.		/// Target (sink) node of the flow.
uint64_t Target;		uint64_t Target;
		/// Augmenting edges.
		std::vector<std::vector<Edge *>> AugmentingEdges;
};		};

/// A post-processing adjustment of control flow. It applies two steps by		/// A post-processing adjustment of control flow. It applies two steps by
/// rerouting some flow and making it more realistic:		/// rerouting some flow and making it more realistic:
///		///
/// - First, it removes all isolated components ("islands") with a positive flow		/// - First, it removes all isolated components ("islands") with a positive flow
/// that are unreachable from the entry block. For every such component, we		/// that are unreachable from the entry block. For every such component, we
/// find the shortest from the entry to an exit passing through the component,		/// find the shortest from the entry to an exit passing through the component,
▲ Show 20 Lines • Show All 225 Lines • ▼ Show 20 Lines	private:
}		}

/// Find an unknown subgraph starting at block SrcBlock. The method sets		/// Find an unknown subgraph starting at block SrcBlock. The method sets
/// identified destinations, KnownDstBlocks, and intermediate UnknownBlocks.		/// identified destinations, KnownDstBlocks, and intermediate UnknownBlocks.
void findUnknownSubgraph(const FlowBlock *SrcBlock,		void findUnknownSubgraph(const FlowBlock *SrcBlock,
std::vector<FlowBlock *> &KnownDstBlocks,		std::vector<FlowBlock *> &KnownDstBlocks,
std::vector<FlowBlock *> &UnknownBlocks) {		std::vector<FlowBlock *> &UnknownBlocks) {
// Run BFS from SrcBlock and make sure all paths are going through unknown		// Run BFS from SrcBlock and make sure all paths are going through unknown
// blocks and end at a non-unknown DstBlock		// blocks and end at a known DstBlock
auto Visited = BitVector(NumBlocks(), false);		auto Visited = BitVector(NumBlocks(), false);
std::queue<uint64_t> Queue;		std::queue<uint64_t> Queue;

Queue.push(SrcBlock->Index);		Queue.push(SrcBlock->Index);
Visited[SrcBlock->Index] = true;		Visited[SrcBlock->Index] = true;
while (!Queue.empty()) {		while (!Queue.empty()) {
auto &Block = Func.Blocks[Queue.front()];		auto &Block = Func.Blocks[Queue.front()];
Queue.pop();		Queue.pop();
▲ Show 20 Lines • Show All 432 Lines • Show Last 20 Lines

llvm/test/Transforms/SampleProfile/Inputs/profile-inference-even-count-distribution.prof

This file was added.

				foo1:37078302:0
				1: 1000
				2: 0
				3: 0
				4: 1000
				!CFGChecksum: 157181141624

				foo2:37078302:0
				3: 1000
				!CFGChecksum: 208782362068

				foo3:37078302:0
				1: 1000
				4: 1000
				6: 1000
				!CFGChecksum: 189901498683

llvm/test/Transforms/SampleProfile/csspgo-import-list.ll

	; Make sure Import GUID list for ThinLTO properly set for CSSPGO			; Make sure Import GUID list for ThinLTO properly set for CSSPGO
	; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/csspgo-import-list.prof -S \| FileCheck %s			; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/csspgo-import-list.prof -sample-profile-even-count-distribution=0 -S \| FileCheck %s
	; RUN: llvm-profdata merge --sample --extbinary %S/Inputs/csspgo-import-list.prof -o %t.prof			; RUN: llvm-profdata merge --sample --extbinary %S/Inputs/csspgo-import-list.prof -o %t.prof
	; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%t.prof -S \| FileCheck %s			; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%t.prof -sample-profile-even-count-distribution=0 -S \| FileCheck %s
	; RUN: llvm-profdata show --sample -show-sec-info-only %t.prof \| FileCheck %s --check-prefix=CHECK-ORDERED			; RUN: llvm-profdata show --sample -show-sec-info-only %t.prof \| FileCheck %s --check-prefix=CHECK-ORDERED
	; RUN: llvm-profdata merge --sample --extbinary --use-md5 %S/Inputs/csspgo-import-list.prof -o %t.md5			; RUN: llvm-profdata merge --sample --extbinary --use-md5 %S/Inputs/csspgo-import-list.prof -o %t.md5
	; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%t.md5 -S \| FileCheck %s			; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%t.md5 -sample-profile-even-count-distribution=0 -S \| FileCheck %s
	; RUN: llvm-profdata show --sample -show-sec-info-only %t.md5 \| FileCheck %s --check-prefix=CHECK-ORDERED			; RUN: llvm-profdata show --sample -show-sec-info-only %t.md5 \| FileCheck %s --check-prefix=CHECK-ORDERED

	;; Validate that with replay in effect, we import call sites even if they are below the threshold			;; Validate that with replay in effect, we import call sites even if they are below the threshold
	;; Baseline import decisions			;; Baseline import decisions
	; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/csspgo-import-list.prof -profile-summary-hot-count=10000 -S \| FileCheck %s --check-prefix=THRESHOLD			; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/csspgo-import-list.prof -profile-summary-hot-count=10000 -sample-profile-even-count-distribution=0 -S \| FileCheck %s --check-prefix=THRESHOLD
	;; With replay			;; With replay
	; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/csspgo-import-list.prof -sample-profile-inline-replay=%S/Inputs/csspgo-import-list-replay.txt -sample-profile-inline-replay-scope=Module -profile-summary-hot-count=10000 -S \| FileCheck %s --check-prefix=THRESHOLD-REPLAY			; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/csspgo-import-list.prof -sample-profile-inline-replay=%S/Inputs/csspgo-import-list-replay.txt -sample-profile-inline-replay-scope=Module -profile-summary-hot-count=10000 -sample-profile-even-count-distribution=0 -S \| FileCheck %s --check-prefix=THRESHOLD-REPLAY
	;; With replay but no profile information for call to _Z5funcAi. We import _Z5funcAi because it's explicitly in the replay but don't go further to its callee (_Z3fibi) because we lack samples			;; With replay but no profile information for call to _Z5funcAi. We import _Z5funcAi because it's explicitly in the replay but don't go further to its callee (_Z3fibi) because we lack samples
	; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/csspgo-import-list-no-funca.prof -sample-profile-inline-replay=%S/Inputs/csspgo-import-list-replay.txt -sample-profile-inline-replay-scope=Module -profile-summary-hot-count=10000 -S \| FileCheck %s --check-prefix=THRESHOLD-REPLAY-NO-FUNCA			; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/csspgo-import-list-no-funca.prof -sample-profile-inline-replay=%S/Inputs/csspgo-import-list-replay.txt -sample-profile-inline-replay-scope=Module -profile-summary-hot-count=10000 -sample-profile-even-count-distribution=0 -S \| FileCheck %s --check-prefix=THRESHOLD-REPLAY-NO-FUNCA

	declare i32 @_Z5funcBi(i32 %x)			declare i32 @_Z5funcBi(i32 %x)
	declare i32 @_Z5funcAi(i32 %x)			declare i32 @_Z5funcAi(i32 %x)

	define dso_local i32 @main() local_unnamed_addr #0 !dbg !18 {			define dso_local i32 @main() local_unnamed_addr #0 !dbg !18 {
	entry:			entry:
	br label %for.body, !dbg !25			br label %for.body, !dbg !25

	▲ Show 20 Lines • Show All 80 Lines • Show Last 20 Lines

llvm/test/Transforms/SampleProfile/profile-context-tracker.ll

	; Test for CSSPGO's SampleContextTracker to make sure context profile tree is promoted and merged properly			; Test for CSSPGO's SampleContextTracker to make sure context profile tree is promoted and merged properly
	; based on inline decision, so post inline counts are accurate.			; based on inline decision, so post inline counts are accurate.

	; RUN: llvm-profdata merge --sample --extbinary %S/Inputs/profile-context-tracker.prof -o %t			; RUN: llvm-profdata merge --sample --extbinary %S/Inputs/profile-context-tracker.prof -o %t

	; Note that we need new pass manager to enable top-down processing for sample profile loader			; Note that we need new pass manager to enable top-down processing for sample profile loader
	; Test we inlined the following in top-down order and entry counts accurate reflects post-inline base profile			; Test we inlined the following in top-down order and entry counts accurate reflects post-inline base profile
	; main:3 @ _Z5funcAi			; main:3 @ _Z5funcAi
	; main:3 @ _Z5funcAi:1 @ _Z8funcLeafi			; main:3 @ _Z5funcAi:1 @ _Z8funcLeafi
	; _Z5funcBi:1 @ _Z8funcLeafi			; _Z5funcBi:1 @ _Z8funcLeafi
	; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S \| FileCheck %s --check-prefix=INLINE-ALL			; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -sample-profile-even-count-distribution=0 -S \| FileCheck %s --check-prefix=INLINE-ALL
	; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S \| FileCheck %s --check-prefix=INLINE-ALL			; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -sample-profile-even-count-distribution=0 -S \| FileCheck %s --check-prefix=INLINE-ALL
	; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-cold-inline-threshold=200 -profile-sample-accurate -S \| FileCheck %s --check-prefix=INLINE-ALL			; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-cold-inline-threshold=200 -profile-sample-accurate -sample-profile-even-count-distribution=0 -S \| FileCheck %s --check-prefix=INLINE-ALL
	; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t -sample-profile-inline-size -sample-profile-cold-inline-threshold=200 -profile-sample-accurate -S \| FileCheck %s --check-prefix=INLINE-ALL			; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t -sample-profile-inline-size -sample-profile-cold-inline-threshold=200 -profile-sample-accurate -sample-profile-even-count-distribution=0 -S \| FileCheck %s --check-prefix=INLINE-ALL
	;			;
	; Test we inlined the following in top-down order and entry counts accurate reflects post-inline base profile			; Test we inlined the following in top-down order and entry counts accurate reflects post-inline base profile
	; _Z5funcAi:1 @ _Z8funcLeafi			; _Z5funcAi:1 @ _Z8funcLeafi
	; _Z5funcBi:1 @ _Z8funcLeafi			; _Z5funcBi:1 @ _Z8funcLeafi

	; Test the functions won't be inlined as a result of sampled profile if `disable-sample-loader-inlining` is true.			; Test the functions won't be inlined as a result of sampled profile if `disable-sample-loader-inlining` is true.
	;			;
	; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -disable-sample-loader-inlining -S \| FileCheck %s --check-prefix=INLINE-NONE			; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -disable-sample-loader-inlining -S \| FileCheck %s --check-prefix=INLINE-NONE
	▲ Show 20 Lines • Show All 117 Lines • ▼ Show 20 Lines
	; INLINE-ALL-DAG-SAME: [[LEAF_PROF]] = !{!"function_entry_count", i64 0}			; INLINE-ALL-DAG-SAME: [[LEAF_PROF]] = !{!"function_entry_count", i64 0}
	; INLINE-ALL-DAG: [[FUNCB_PROF]] = !{!"function_entry_count", i64 13}			; INLINE-ALL-DAG: [[FUNCB_PROF]] = !{!"function_entry_count", i64 13}

	; INLINE-HOT-DAG: [[MAIN_PROF]] = !{!"function_entry_count", i64 1}			; INLINE-HOT-DAG: [[MAIN_PROF]] = !{!"function_entry_count", i64 1}
	; INLINE-HOT-DAG: [[FUNCA_PROF]] = !{!"function_entry_count", i64 12}			; INLINE-HOT-DAG: [[FUNCA_PROF]] = !{!"function_entry_count", i64 12}
	; INLINE-HOT-DAG-SAME: [[LEAF_PROF]] = !{!"function_entry_count", i64 0}			; INLINE-HOT-DAG-SAME: [[LEAF_PROF]] = !{!"function_entry_count", i64 0}
	; INLINE-HOT-DAG: [[FUNCB_PROF]] = !{!"function_entry_count", i64 13}			; INLINE-HOT-DAG: [[FUNCB_PROF]] = !{!"function_entry_count", i64 13}

	; INLINE-NONE: [[MAIN_PROF]] = !{!"function_entry_count", i64 1}			; INLINE-NONE: [[MAIN_PROF]] = !{!"function_entry_count", i64 13}
	; INLINE-NONE: [[FUNCA_PROF]] = !{!"function_entry_count", i64 24}			; INLINE-NONE: [[FUNCA_PROF]] = !{!"function_entry_count", i64 24}
	; INLINE-NONE-DAG-SAME: [[LEAF_PROF]] = !{!"function_entry_count", i64 22}			; INLINE-NONE-DAG-SAME: [[LEAF_PROF]] = !{!"function_entry_count", i64 21}
	; INLINE-NONE-DAG: [[FUNCB_PROF]] = !{!"function_entry_count", i64 32}			; INLINE-NONE-DAG: [[FUNCB_PROF]] = !{!"function_entry_count", i64 32}

	declare i32 @_Z3fibi(i32)			declare i32 @_Z3fibi(i32)

	attributes #0 = { nofree noinline norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }			attributes #0 = { nofree noinline norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
	attributes #1 = { nofree norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }			attributes #1 = { nofree norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }

	!llvm.dbg.cu = !{!2}			!llvm.dbg.cu = !{!2}
	▲ Show 20 Lines • Show All 72 Lines • Show Last 20 Lines

llvm/test/Transforms/SampleProfile/profile-inference-even-count-distribution.ll

This file was added.

				; RUN: opt < %s -passes=pseudo-probe,sample-profile -sample-profile-use-profi -sample-profile-file=%S/Inputs/profile-inference-even-count-distribution.prof \| opt -analyze -branch-prob -enable-new-pm=0 \| FileCheck %s
				; RUN: opt < %s -passes=pseudo-probe,sample-profile -sample-profile-use-profi -sample-profile-file=%S/Inputs/profile-inference-even-count-distribution.prof \| opt -analyze -block-freq -enable-new-pm=0 \| FileCheck %s --check-prefix=CHECK2

				; The test verifies that counts are evenly distributed among branches with
				; equal weights.
				;
				; +-----------+ +-----------+
				; \| b3 [0] \| <-- \| b1 [1000] \|
				; +-----------+ +-----------+
				; \| \|
				; \| \|
				; \| v
				; \| +-----------+
				; \| \| b2 [0] \|
				; \| +-----------+
				; \| \|
				; \| \|
				; \| v
				; \| +-----------+
				; +-------------> \| b4 [1000] \|
				; +-----------+

				@yydebug = dso_local global i32 0, align 4

				; Function Attrs: nounwind uwtable
				define dso_local i32 @foo1(i32 %0, i32 %1) #0 {
				b11:
				call void @llvm.pseudoprobe(i64 7682762345278052905, i64 1, i32 0, i64 -1)
				%cmp = icmp ne i32 %0, 0
				br i1 %cmp, label %b12, label %b13
				; CHECK: edge b11 -> b12 probability is 0x40000000 / 0x80000000 = 50.00%
				; CHECK: edge b11 -> b13 probability is 0x40000000 / 0x80000000 = 50.00%
				; CHECK2: - b11: float = {{.}}, int = {{.}}, count = 1000

				b12:
				call void @llvm.pseudoprobe(i64 7682762345278052905, i64 2, i32 0, i64 -1)
				br label %b14
				; CHECK2: - b12: float = {{.}}, int = {{.}}, count = 500

				b13:
				call void @llvm.pseudoprobe(i64 7682762345278052905, i64 3, i32 0, i64 -1)
				br label %b14
				; CHECK2: - b13: float = {{.}}, int = {{.}}, count = 500

				b14:
				call void @llvm.pseudoprobe(i64 7682762345278052905, i64 4, i32 0, i64 -1)
				ret i32 %1
				; CHECK2: - b14: float = {{.}}, int = {{.}}, count = 1000
				}


				; The test verifies that counts are evenly distributed when the entry basic
				; block is dangling.
				;
				; +-----------+
				; \| b1 [?] \| -+
				; +-----------+ \|
				; \| \|
				; \| \|
				; v \|
				; +-----------+ \|
				; \| b2 [?] \| \|
				; +-----------+ \|
				; \| \|
				; \| \|
				; v \|
				; +-----------+ \|
				; \| b3 [1000] \| <+
				; +-----------+

				define dso_local i32 @foo2(i32 %0, i32 %1) #0 {
				b21:
				call void @llvm.pseudoprobe(i64 2494702099028631698, i64 1, i32 0, i64 -1)
				%cmp = icmp ne i32 %0, 0
				br i1 %cmp, label %b22, label %b23
				; CHECK: edge b21 -> b22 probability is 0x40000000 / 0x80000000 = 50.00%
				; CHECK: edge b21 -> b23 probability is 0x40000000 / 0x80000000 = 50.00%
				; CHECK2: - b21: float = {{.}}, int = {{.}}, count = 1000

				b22:
				call void @llvm.pseudoprobe(i64 2494702099028631698, i64 2, i32 0, i64 -1)
				br label %b23
				; CHECK2: - b22: float = {{.}}, int = {{.}}, count = 500

				b23:
				call void @llvm.pseudoprobe(i64 2494702099028631698, i64 3, i32 0, i64 -1)
				ret i32 %1
				; CHECK2: - b23: float = {{.}}, int = {{.}}, count = 1000

				}

				; The test verifies even count distribution in the presence of multiple sinks.
				;
				; +-----------+
				; \| b1 [1000] \|
				; +-----------+
				; \|
				; \|
				; v
				; +-----------+
				; \| b2 [?] \| -+
				; +-----------+ \|
				; \| \|
				; \| \|
				; v \|
				; +--------+ +-----------+ \|
				; \| b5 [?] \| <-- \| b3 [?] \| \|
				; +--------+ +-----------+ \|
				; \| \| \|
				; \| \| \|
				; \| v \|
				; \| +-----------+ \|
				; \| \| b4 [1000] \| <+
				; \| +-----------+
				; \| \|
				; \| \|
				; \| v
				; \| +-----------+
				; +----------> \| b6 [1000] \|
				; +-----------+
				;

				define dso_local i32 @foo3(i32 %0, i32 %1) #0 {
				b31:
				call void @llvm.pseudoprobe(i64 -7908226060800700466, i64 1, i32 0, i64 -1)
				%cmp = icmp ne i32 %0, 0
				br label %b32
				; CHECK2: - b31: float = {{.}}, int = {{.}}, count = 1000

				b32:
				call void @llvm.pseudoprobe(i64 -7908226060800700466, i64 2, i32 0, i64 -1)
				br i1 %cmp, label %b33, label %b34
				; CHECK: edge b32 -> b33 probability is 0x40000000 / 0x80000000 = 50.00%
				; CHECK: edge b32 -> b34 probability is 0x40000000 / 0x80000000 = 50.00%
				; CHECK2: - b32: float = {{.}}, int = {{.}}, count = 1000

				b33:
				call void @llvm.pseudoprobe(i64 -7908226060800700466, i64 3, i32 0, i64 -1)
				br i1 %cmp, label %b35, label %b34
				; CHECK: edge b33 -> b35 probability is 0x00000000 / 0x80000000 = 0.00%
				; CHECK: edge b33 -> b34 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
				; CHECK2: - b33: float = {{.}}, int = {{.}}, count = 500

				b34:
				call void @llvm.pseudoprobe(i64 -7908226060800700466, i64 4, i32 0, i64 -1)
				br label %b36
				; CHECK2: - b34: float = {{.}}, int = {{.}}, count = 1000

				b35:
				call void @llvm.pseudoprobe(i64 -7908226060800700466, i64 5, i32 0, i64 -1)
				br label %b36
				; CHECK2: - b35: float = {{.}}, int = {{.}}, count = 0

				b36:
				call void @llvm.pseudoprobe(i64 -7908226060800700466, i64 6, i32 0, i64 -1)
				ret i32 %1
				; CHECK2: - b36: float = {{.}}, int = {{.}}, count = 1000
				}



				; Function Attrs: inaccessiblememonly nounwind willreturn
				declare void @llvm.pseudoprobe(i64, i64, i32, i64) #4

				attributes #0 = { noinline nounwind uwtable "use-sample-profile" }
				attributes #4 = { inaccessiblememonly nounwind willreturn }

				!llvm.pseudo_probe_desc = !{!7, !8, !9, !10}

				!7 = !{i64 7682762345278052905, i64 157181141624, !"foo1", null}
				!8 = !{i64 2494702099028631698, i64 208782362068, !"foo2", null}
				!9 = !{i64 -7908226060800700466, i64 189901498683, !"foo3", null}
				!10 = !{i64 -6882312132165544686, i64 241030178952, !"foo4", null}

This is an archive of the discontinued LLVM Phabricator instance.

[CSSPGO] Even count distribution
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 412523

llvm/lib/Transforms/Utils/SampleProfileInference.cpp

llvm/test/Transforms/SampleProfile/Inputs/profile-inference-even-count-distribution.prof

llvm/test/Transforms/SampleProfile/csspgo-import-list.ll

llvm/test/Transforms/SampleProfile/profile-context-tracker.ll

llvm/test/Transforms/SampleProfile/profile-inference-even-count-distribution.ll

This is an archive of the discontinued LLVM Phabricator instance.

[CSSPGO] Even count distributionClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 412523

llvm/lib/Transforms/Utils/SampleProfileInference.cpp

llvm/test/Transforms/SampleProfile/Inputs/profile-inference-even-count-distribution.prof

llvm/test/Transforms/SampleProfile/csspgo-import-list.ll

llvm/test/Transforms/SampleProfile/profile-context-tracker.ll

llvm/test/Transforms/SampleProfile/profile-inference-even-count-distribution.ll

[CSSPGO] Even count distribution
ClosedPublic