Diff 488388

llvm/include/llvm/Transforms/Utils/SampleProfileInference.h

Show First 20 Lines • Show All 42 Lines • ▼ Show 20 Lines
} // end namespace afdo_detail		} // end namespace afdo_detail

struct FlowJump;		struct FlowJump;

/// A wrapper of a binary basic block.		/// A wrapper of a binary basic block.
struct FlowBlock {		struct FlowBlock {
uint64_t Index;		uint64_t Index;
uint64_t Weight{0};		uint64_t Weight{0};
bool HasUnknownWeight{false};		bool HasUnknownWeight{true};
		bool IsUnlikely{false};
uint64_t Flow{0};		uint64_t Flow{0};
bool HasSelfEdge{false};
std::vector<FlowJump *> SuccJumps;		std::vector<FlowJump *> SuccJumps;
std::vector<FlowJump *> PredJumps;		std::vector<FlowJump *> PredJumps;

/// Check if it is the entry block in the function.		/// Check if it is the entry block in the function.
bool isEntry() const { return PredJumps.empty(); }		bool isEntry() const { return PredJumps.empty(); }

/// Check if it is an exit block in the function.		/// Check if it is an exit block in the function.
bool isExit() const { return SuccJumps.empty(); }		bool isExit() const { return SuccJumps.empty(); }
};		};

/// A wrapper of a jump between two basic blocks.		/// A wrapper of a jump between two basic blocks.
struct FlowJump {		struct FlowJump {
uint64_t Source;		uint64_t Source;
uint64_t Target;		uint64_t Target;
uint64_t Flow{0};		uint64_t Weight{0};
		bool HasUnknownWeight{true};
bool IsUnlikely{false};		bool IsUnlikely{false};
		uint64_t Flow{0};
};		};

/// A wrapper of binary function with basic blocks and jumps.		/// A wrapper of binary function with basic blocks and jumps.
struct FlowFunction {		struct FlowFunction {
		/// Basic blocks in the function.
std::vector<FlowBlock> Blocks;		std::vector<FlowBlock> Blocks;
		/// Jumps between the basic blocks.
std::vector<FlowJump> Jumps;		std::vector<FlowJump> Jumps;
/// The index of the entry block.		/// The index of the entry block.
uint64_t Entry{0};		uint64_t Entry{0};
};		};

/// Various thresholds and options controlling the behavior of the profile		/// Various thresholds and options controlling the behavior of the profile
/// inference algorithm. Default values are tuned for several large-scale		/// inference algorithm. Default values are tuned for several large-scale
/// applications, and can be modified via corresponding command-line flags.		/// applications, and can be modified via corresponding command-line flags.
Show All 20 Lines	struct ProfiParams {
unsigned CostBlockEntryInc{0};		unsigned CostBlockEntryInc{0};

/// The cost of decreasing the entry block's count by one.		/// The cost of decreasing the entry block's count by one.
unsigned CostBlockEntryDec{0};		unsigned CostBlockEntryDec{0};

/// The cost of increasing an unknown block's count by one.		/// The cost of increasing an unknown block's count by one.
unsigned CostBlockUnknownInc{0};		unsigned CostBlockUnknownInc{0};

		/// The cost of increasing a jump's count by one.
		unsigned CostJumpInc{0};

		/// The cost of increasing a fall-through jump's count by one.
		unsigned CostJumpFTInc{0};

		/// The cost of decreasing a jump's count by one.
		unsigned CostJumpDec{0};

		/// The cost of decreasing a fall-through jump's count by one.
		unsigned CostJumpFTDec{0};

		/// The cost of increasing an unknown jump's count by one.
		unsigned CostJumpUnknownInc{0};

		/// The cost of increasing an unknown fall-through jump's count by one.
		unsigned CostJumpUnknownFTInc{0};

/// The cost of taking an unlikely block/jump.		/// The cost of taking an unlikely block/jump.
const int64_t CostUnlikely = ((int64_t)1) << 30;		const int64_t CostUnlikely = ((int64_t)1) << 30;
};		};

void applyFlowInference(const ProfiParams &Params, FlowFunction &Func);		void applyFlowInference(const ProfiParams &Params, FlowFunction &Func);
void applyFlowInference(FlowFunction &Func);		void applyFlowInference(FlowFunction &Func);

/// Sample profile inference pass.		/// Sample profile inference pass.
Show All 10 Lines	public:
SampleProfileInference(FunctionT &F, BlockEdgeMap &Successors,		SampleProfileInference(FunctionT &F, BlockEdgeMap &Successors,
BlockWeightMap &SampleBlockWeights)		BlockWeightMap &SampleBlockWeights)
: F(F), Successors(Successors), SampleBlockWeights(SampleBlockWeights) {}		: F(F), Successors(Successors), SampleBlockWeights(SampleBlockWeights) {}

/// Apply the profile inference algorithm for a given function		/// Apply the profile inference algorithm for a given function
void apply(BlockWeightMap &BlockWeights, EdgeWeightMap &EdgeWeights);		void apply(BlockWeightMap &BlockWeights, EdgeWeightMap &EdgeWeights);

private:		private:
		/// Initialize flow function blocks, jumps and misc metadata.
		void initFunction(FlowFunction &Func,
		const std::vector<const BasicBlockT *> &BasicBlocks,
		DenseMap<const BasicBlockT *, uint64_t> &BlockIndex);

/// Try to infer branch probabilities mimicking implementation of		/// Try to infer branch probabilities mimicking implementation of
/// BranchProbabilityInfo. Unlikely taken branches are marked so that the		/// BranchProbabilityInfo. Unlikely taken branches are marked so that the
/// inference algorithm can avoid sending flow along corresponding edges.		/// inference algorithm can avoid sending flow along corresponding edges.
void findUnlikelyJumps(const std::vector<const BasicBlockT *> &BasicBlocks,		void findUnlikelyJumps(const std::vector<const BasicBlockT *> &BasicBlocks,
BlockEdgeMap &Successors, FlowFunction &Func);		BlockEdgeMap &Successors, FlowFunction &Func);

/// Determine whether the block is an exit in the CFG.		/// Determine whether the block is an exit in the CFG.
bool isExit(const BasicBlockT *BB);		bool isExit(const BasicBlockT *BB);
▲ Show 20 Lines • Show All 52 Lines • ▼ Show 20 Lines	void SampleProfileInference<BT>::apply(BlockWeightMap &BlockWeights,
}		}
// Quit early for functions with a single block or ones w/o samples		// Quit early for functions with a single block or ones w/o samples
if (BasicBlocks.size() <= 1 \|\| !HasSamples) {		if (BasicBlocks.size() <= 1 \|\| !HasSamples) {
return;		return;
}		}

// Create necessary objects		// Create necessary objects
FlowFunction Func;		FlowFunction Func;
		initFunction(Func, BasicBlocks, BlockIndex);

		// Create and apply the inference network model.
		applyFlowInference(Func);

		// Extract the resulting weights from the control flow
		// All weights are increased by one to avoid propagation errors introduced by
		// zero weights.
		for (const auto *BB : BasicBlocks) {
		BlockWeights[BB] = Func.Blocks[BlockIndex[BB]].Flow;
		}
		for (auto &Jump : Func.Jumps) {
		Edge E = std::make_pair(BasicBlocks[Jump.Source], BasicBlocks[Jump.Target]);
		EdgeWeights[E] = Jump.Flow;
		}

		#ifndef NDEBUG
		// Unreachable blocks and edges should not have a weight.
		for (auto &I : BlockWeights) {
		assert(Reachable.contains(I.first));
		assert(InverseReachable.contains(I.first));
		}
		for (auto &I : EdgeWeights) {
		assert(Reachable.contains(I.first.first) &&
		Reachable.contains(I.first.second));
		assert(InverseReachable.contains(I.first.first) &&
		InverseReachable.contains(I.first.second));
		}
		#endif
		}

		template <typename BT>
		void SampleProfileInference<BT>::initFunction(
		FlowFunction &Func, const std::vector<const BasicBlockT *> &BasicBlocks,
		DenseMap<const BasicBlockT *, uint64_t> &BlockIndex) {
Func.Blocks.reserve(BasicBlocks.size());		Func.Blocks.reserve(BasicBlocks.size());
// Create FlowBlocks		// Create FlowBlocks
for (const auto *BB : BasicBlocks) {		for (const auto *BB : BasicBlocks) {
FlowBlock Block;		FlowBlock Block;
if (SampleBlockWeights.find(BB) != SampleBlockWeights.end()) {		if (SampleBlockWeights.find(BB) != SampleBlockWeights.end()) {
Block.HasUnknownWeight = false;		Block.HasUnknownWeight = false;
Block.Weight = SampleBlockWeights[BB];		Block.Weight = SampleBlockWeights[BB];
} else {		} else {
Block.HasUnknownWeight = true;		Block.HasUnknownWeight = true;
Block.Weight = 0;		Block.Weight = 0;
}		}
Block.Index = Func.Blocks.size();		Block.Index = Func.Blocks.size();
Func.Blocks.push_back(Block);		Func.Blocks.push_back(Block);
}		}
// Create FlowEdges		// Create FlowEdges
for (const auto *BB : BasicBlocks) {		for (const auto *BB : BasicBlocks) {
for (auto *Succ : Successors[BB]) {		for (auto *Succ : Successors[BB]) {
if (!BlockIndex.count(Succ))		if (!BlockIndex.count(Succ))
continue;		continue;
FlowJump Jump;		FlowJump Jump;
Jump.Source = BlockIndex[BB];		Jump.Source = BlockIndex[BB];
Jump.Target = BlockIndex[Succ];		Jump.Target = BlockIndex[Succ];
Func.Jumps.push_back(Jump);		Func.Jumps.push_back(Jump);
if (BB == Succ) {
Func.Blocks[BlockIndex[BB]].HasSelfEdge = true;
}
}		}
}		}
for (auto &Jump : Func.Jumps) {		for (auto &Jump : Func.Jumps) {
Func.Blocks[Jump.Source].SuccJumps.push_back(&Jump);		uint64_t Src = Jump.Source;
Func.Blocks[Jump.Target].PredJumps.push_back(&Jump);		uint64_t Dst = Jump.Target;
		Func.Blocks[Src].SuccJumps.push_back(&Jump);
		Func.Blocks[Dst].PredJumps.push_back(&Jump);
}		}

// Try to infer probabilities of jumps based on the content of basic block		// Try to infer probabilities of jumps based on the content of basic block
findUnlikelyJumps(BasicBlocks, Successors, Func);		findUnlikelyJumps(BasicBlocks, Successors, Func);

// Find the entry block		// Find the entry block
for (size_t I = 0; I < Func.Blocks.size(); I++) {		for (size_t I = 0; I < Func.Blocks.size(); I++) {
if (Func.Blocks[I].isEntry()) {		if (Func.Blocks[I].isEntry()) {
Func.Entry = I;		Func.Entry = I;
break;		break;
}		}
}		}
		assert(Func.Entry == 0 && "incorrect index of the entry block");

// Create and apply the inference network model.		// Pre-process data: make sure the entry weight is at least 1
applyFlowInference(Func);		auto &EntryBlock = Func.Blocks[Func.Entry];
		if (EntryBlock.Weight == 0 && !EntryBlock.HasUnknownWeight) {
// Extract the resulting weights from the control flow		EntryBlock.Weight = 1;
// All weights are increased by one to avoid propagation errors introduced by		EntryBlock.HasUnknownWeight = false;
// zero weights.
for (const auto *BB : BasicBlocks) {
BlockWeights[BB] = Func.Blocks[BlockIndex[BB]].Flow;
}		}
for (auto &Jump : Func.Jumps) {
Edge E = std::make_pair(BasicBlocks[Jump.Source], BasicBlocks[Jump.Target]);
EdgeWeights[E] = Jump.Flow;
}

#ifndef NDEBUG
// Unreachable blocks and edges should not have a weight.
for (auto &I : BlockWeights) {
assert(Reachable.contains(I.first));
assert(InverseReachable.contains(I.first));
}
for (auto &I : EdgeWeights) {
assert(Reachable.contains(I.first.first) &&
Reachable.contains(I.first.second));
assert(InverseReachable.contains(I.first.first) &&
InverseReachable.contains(I.first.second));
}
#endif
}		}

template <typename BT>		template <typename BT>
inline void SampleProfileInference<BT>::findUnlikelyJumps(		inline void SampleProfileInference<BT>::findUnlikelyJumps(
const std::vector<const BasicBlockT *> &BasicBlocks,		const std::vector<const BasicBlockT *> &BasicBlocks,
BlockEdgeMap &Successors, FlowFunction &Func) {}		BlockEdgeMap &Successors, FlowFunction &Func) {}

template <>		template <>
Show All 36 Lines

llvm/lib/Transforms/Utils/SampleProfileInference.cpp

Show First 20 Lines • Show All 94 Lines • ▼ Show 20 Lines	void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) {
Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>());		Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>());
if (Params.EvenFlowDistribution)		if (Params.EvenFlowDistribution)
AugmentingEdges =		AugmentingEdges =
std::vector<std::vector<Edge >>(NodeCount, std::vector<Edge >());		std::vector<std::vector<Edge >>(NodeCount, std::vector<Edge >());
}		}

// Run the algorithm.		// Run the algorithm.
int64_t run() {		int64_t run() {
		LLVM_DEBUG(dbgs() << "Starting profi for " << Nodes.size() << " nodes\n");

// Iteratively find an augmentation path/dag in the network and send the		// Iteratively find an augmentation path/dag in the network and send the
// flow along its edges		// flow along its edges
size_t AugmentationIters = applyFlowAugmentation();		size_t AugmentationIters = applyFlowAugmentation();

// Compute the total flow and its cost		// Compute the total flow and its cost
int64_t TotalCost = 0;		int64_t TotalCost = 0;
int64_t TotalFlow = 0;		int64_t TotalFlow = 0;
for (uint64_t Src = 0; Src < Nodes.size(); Src++) {		for (uint64_t Src = 0; Src < Nodes.size(); Src++) {
▲ Show 20 Lines • Show All 462 Lines • ▼ Show 20 Lines	private:
/// Target (sink) node of the flow.		/// Target (sink) node of the flow.
uint64_t Target;		uint64_t Target;
/// Augmenting edges.		/// Augmenting edges.
std::vector<std::vector<Edge *>> AugmentingEdges;		std::vector<std::vector<Edge *>> AugmentingEdges;
/// Params for flow computation.		/// Params for flow computation.
const ProfiParams &Params;		const ProfiParams &Params;
};		};

/// A post-processing adjustment of control flow. It applies two steps by		/// A post-processing adjustment of the control flow. It applies two steps by
/// rerouting some flow and making it more realistic:		/// rerouting some flow and making it more realistic:
///		///
/// - First, it removes all isolated components ("islands") with a positive flow		/// - First, it removes all isolated components ("islands") with a positive flow
/// that are unreachable from the entry block. For every such component, we		/// that are unreachable from the entry block. For every such component, we
/// find the shortest from the entry to an exit passing through the component,		/// find the shortest from the entry to an exit passing through the component,
/// and increase the flow by one unit along the path.		/// and increase the flow by one unit along the path.
///		///
/// - Second, it identifies all "unknown subgraphs" consisting of basic blocks		/// - Second, it identifies all "unknown subgraphs" consisting of basic blocks
/// with no sampled counts. Then it rebalnces the flow that goes through such		/// with no sampled counts. Then it rebalnces the flow that goes through such
/// a subgraph so that each branch is taken with probability 50%.		/// a subgraph so that each branch is taken with probability 50%.
/// An unknown subgraph is such that for every two nodes u and v:		/// An unknown subgraph is such that for every two nodes u and v:
/// - u dominates v and u is not unknown;		/// - u dominates v and u is not unknown;
/// - v post-dominates u; and		/// - v post-dominates u; and
/// - all inner-nodes of all (u,v)-paths are unknown.		/// - all inner-nodes of all (u,v)-paths are unknown.
///		///
class FlowAdjuster {		class FlowAdjuster {
public:		public:
FlowAdjuster(const ProfiParams &Params, FlowFunction &Func)		FlowAdjuster(const ProfiParams &Params, FlowFunction &Func)
: Params(Params), Func(Func) {		: Params(Params), Func(Func) {}
assert(Func.Blocks[Func.Entry].isEntry() &&
"incorrect index of the entry block");
}

// Run the post-processing		/// Apply the post-processing.
void run() {		void run() {
if (Params.JoinIslands) {		if (Params.JoinIslands) {
/// Adjust the flow to get rid of isolated components.		// Adjust the flow to get rid of isolated components
joinIsolatedComponents();		joinIsolatedComponents();
}		}

if (Params.RebalanceUnknown) {		if (Params.RebalanceUnknown) {
/// Rebalance the flow inside unknown subgraphs.		// Rebalance the flow inside unknown subgraphs
rebalanceUnknownSubgraphs();		rebalanceUnknownSubgraphs();
}		}
}		}

private:		private:
void joinIsolatedComponents() {		void joinIsolatedComponents() {
// Find blocks that are reachable from the source		// Find blocks that are reachable from the source
auto Visited = BitVector(NumBlocks(), false);		auto Visited = BitVector(NumBlocks(), false);
▲ Show 20 Lines • Show All 126 Lines • ▼ Show 20 Lines	private:
/// - to minimize the number of unlikely jumps used and subject to that,		/// - to minimize the number of unlikely jumps used and subject to that,
/// - to minimize the number of Flow == 0 jumps used and subject to that,		/// - to minimize the number of Flow == 0 jumps used and subject to that,
/// - minimizes total multiplicative Flow increase for the remaining edges.		/// - minimizes total multiplicative Flow increase for the remaining edges.
/// To capture this objective with integer distances, we round off fractional		/// To capture this objective with integer distances, we round off fractional
/// parts to a multiple of 1 / BaseDistance.		/// parts to a multiple of 1 / BaseDistance.
int64_t jumpDistance(FlowJump *Jump) const {		int64_t jumpDistance(FlowJump *Jump) const {
if (Jump->IsUnlikely)		if (Jump->IsUnlikely)
return Params.CostUnlikely;		return Params.CostUnlikely;

uint64_t BaseDistance =		uint64_t BaseDistance =
std::max(FlowAdjuster::MinBaseDistance,		std::max(FlowAdjuster::MinBaseDistance,
std::min(Func.Blocks[Func.Entry].Flow,		std::min(Func.Blocks[Func.Entry].Flow,
Params.CostUnlikely / NumBlocks()));		Params.CostUnlikely / (2 * (NumBlocks() + 1))));
if (Jump->Flow > 0)		if (Jump->Flow > 0)
return BaseDistance + BaseDistance / Jump->Flow;		return BaseDistance + BaseDistance / Jump->Flow;
return BaseDistance * NumBlocks();		return 2 * BaseDistance * (NumBlocks() + 1);
};		};

uint64_t NumBlocks() const { return Func.Blocks.size(); }		uint64_t NumBlocks() const { return Func.Blocks.size(); }

/// Rebalance unknown subgraphs so that the flow is split evenly across the		/// Rebalance unknown subgraphs so that the flow is split evenly across the
/// outgoing branches of every block of the subgraph. The method iterates over		/// outgoing branches of every block of the subgraph. The method iterates over
/// blocks with known weight and identifies unknown subgraphs rooted at the		/// blocks with known weight and identifies unknown subgraphs rooted at the
/// blocks. Then it verifies if flow rebalancing is feasible and applies it.		/// blocks. Then it verifies if flow rebalancing is feasible and applies it.
▲ Show 20 Lines • Show All 267 Lines • ▼ Show 20 Lines	private:
static constexpr uint64_t MinBaseDistance = 10000;		static constexpr uint64_t MinBaseDistance = 10000;

/// Params for flow computation.		/// Params for flow computation.
const ProfiParams &Params;		const ProfiParams &Params;
/// The function.		/// The function.
FlowFunction &Func;		FlowFunction &Func;
};		};

		std::pair<int64_t, int64_t> assignBlockCosts(const ProfiParams &Params,
		const FlowBlock &Block);
		std::pair<int64_t, int64_t> assignJumpCosts(const ProfiParams &Params,
		const FlowJump &Jump);

/// Initializing flow network for a given function.		/// Initializing flow network for a given function.
///		///
/// Every block is split into three nodes that are responsible for (i) an		/// Every block is split into two nodes that are responsible for (i) an
/// incoming flow, (ii) an outgoing flow, and (iii) penalizing an increase or		/// incoming flow, (ii) an outgoing flow; they penalize an increase or a
/// reduction of the block weight.		/// reduction of the block weight.
void initializeNetwork(const ProfiParams &Params, MinCostMaxFlow &Network,		void initializeNetwork(const ProfiParams &Params, MinCostMaxFlow &Network,
FlowFunction &Func) {		FlowFunction &Func) {
uint64_t NumBlocks = Func.Blocks.size();		uint64_t NumBlocks = Func.Blocks.size();
assert(NumBlocks > 1 && "Too few blocks in a function");		assert(NumBlocks > 1 && "Too few blocks in a function");
LLVM_DEBUG(dbgs() << "Initializing profi for " << NumBlocks << " blocks\n");		uint64_t NumJumps = Func.Jumps.size();
		assert(NumJumps > 0 && "Too few jumps in a function");

// Pre-process data: make sure the entry weight is at least 1
if (Func.Blocks[Func.Entry].Weight == 0) {
Func.Blocks[Func.Entry].Weight = 1;
}
// Introducing dummy source/sink pairs to allow flow circulation.		// Introducing dummy source/sink pairs to allow flow circulation.
// The nodes corresponding to blocks of Func have indicies in the range		// The nodes corresponding to blocks of the function have indicies in
// [0..3 * NumBlocks); the dummy nodes are indexed by the next four values.		// the range [0 .. 2 * NumBlocks); the dummy sources/sinks are indexed by the
uint64_t S = 3 * NumBlocks;		// next four values.
		uint64_t S = 2 * NumBlocks;
uint64_t T = S + 1;		uint64_t T = S + 1;
uint64_t S1 = S + 2;		uint64_t S1 = S + 2;
uint64_t T1 = S + 3;		uint64_t T1 = S + 3;

Network.initialize(3 * NumBlocks + 4, S1, T1);		Network.initialize(2 * NumBlocks + 4, S1, T1);

// Create three nodes for every block of the function		// Initialize nodes of the flow network
for (uint64_t B = 0; B < NumBlocks; B++) {		for (uint64_t B = 0; B < NumBlocks; B++) {
auto &Block = Func.Blocks[B];		auto &Block = Func.Blocks[B];
assert((!Block.HasUnknownWeight \|\| Block.Weight == 0 \|\| Block.isEntry()) &&
"non-zero weight of a block w/o weight except for an entry");

// Split every block into two nodes		// Split every block into two auxiliary nodes to allow
uint64_t Bin = 3 * B;		// increase/reduction of the block count.
uint64_t Bout = 3 * B + 1;		uint64_t Bin = 2 * B;
uint64_t Baux = 3 * B + 2;		uint64_t Bout = 2 * B + 1;
if (Block.Weight > 0) {
Network.addEdge(S1, Bout, Block.Weight, 0);
Network.addEdge(Bin, T1, Block.Weight, 0);
}

// Edges from S and to T		// Edges from S and to T
assert((!Block.isEntry() \|\| !Block.isExit()) &&
hoyUnsubmitted Not Done Reply Inline Actions Does this still hold? hoy: Does this still hold?
spupyrevAuthorUnsubmitted Done Reply Inline Actions the assert is moved to `verifyInput` spupyrev: the assert is moved to `verifyInput`
"a block cannot be an entry and an exit");
if (Block.isEntry()) {		if (Block.isEntry()) {
Network.addEdge(S, Bin, 0);		Network.addEdge(S, Bin, 0);
} else if (Block.isExit()) {		} else if (Block.isExit()) {
Network.addEdge(Bout, T, 0);		Network.addEdge(Bout, T, 0);
}		}

// An auxiliary node to allow increase/reduction of block counts:		// Assign costs for increasing/decreasing the block counts
// We assume that decreasing block counts is more expensive than increasing,		auto [AuxCostInc, AuxCostDec] = assignBlockCosts(Params, Block);
// and thus, setting separate costs here. In the future we may want to tune
// the relative costs so as to maximize the quality of generated profiles.		// Add the corresponding edges to the network
int64_t AuxCostInc = Params.CostBlockInc;		Network.addEdge(Bin, Bout, AuxCostInc);
int64_t AuxCostDec = Params.CostBlockDec;		if (Block.Weight > 0) {
		Network.addEdge(Bout, Bin, Block.Weight, AuxCostDec);
		Network.addEdge(S1, Bout, Block.Weight, 0);
		Network.addEdge(Bin, T1, Block.Weight, 0);
		}
		}

		// Initialize edges of the flow network
		for (uint64_t J = 0; J < NumJumps; J++) {
		auto &Jump = Func.Jumps[J];

		// Get the endpoints corresponding to the jump
		uint64_t Jin = 2 * Jump.Source + 1;
		uint64_t Jout = 2 * Jump.Target;

		// Assign costs for increasing/decreasing the jump counts
		auto [AuxCostInc, AuxCostDec] = assignJumpCosts(Params, Jump);

		// Add the corresponding edges to the network
		Network.addEdge(Jin, Jout, AuxCostInc);
		if (Jump.Weight > 0) {
		Network.addEdge(Jout, Jin, Jump.Weight, AuxCostDec);
		Network.addEdge(S1, Jout, Jump.Weight, 0);
		Network.addEdge(Jin, T1, Jump.Weight, 0);
		}
		}

		// Make sure we have a valid flow circulation
		Network.addEdge(T, S, 0);
		}

		/// Assign costs for increasing/decreasing the block counts.
		std::pair<int64_t, int64_t> assignBlockCosts(const ProfiParams &Params,
		const FlowBlock &Block) {
		// Modifying the weight of an unlikely block is expensive
		if (Block.IsUnlikely)
		return std::make_pair(Params.CostUnlikely, Params.CostUnlikely);

		// Assign default values for the costs
		int64_t CostInc = Params.CostBlockInc;
		int64_t CostDec = Params.CostBlockDec;
		// Update the costs depending on the block metadata
if (Block.HasUnknownWeight) {		if (Block.HasUnknownWeight) {
// Do not penalize changing weights of blocks w/o known profile count		CostInc = Params.CostBlockUnknownInc;
AuxCostInc = Params.CostBlockUnknownInc;		CostDec = 0;
AuxCostDec = 0;
} else {		} else {
// Increasing the count for "cold" blocks with zero initial count is more		// Increasing the count for "cold" blocks with zero initial count is more
// expensive than for "hot" ones		// expensive than for "hot" ones
if (Block.Weight == 0) {		if (Block.Weight == 0)
AuxCostInc = Params.CostBlockZeroInc;		CostInc = Params.CostBlockZeroInc;
}
// Modifying the count of the entry block is expensive		// Modifying the count of the entry block is expensive
if (Block.isEntry()) {		if (Block.isEntry()) {
AuxCostInc = Params.CostBlockEntryInc;		CostInc = Params.CostBlockEntryInc;
AuxCostDec = Params.CostBlockEntryDec;		CostDec = Params.CostBlockEntryDec;
}		}
}		}
// For blocks with self-edges, do not penalize a reduction of the count,		return std::make_pair(CostInc, CostDec);
// as all of the increase can be attributed to the self-edge
if (Block.HasSelfEdge) {
hoyUnsubmitted Not Done Reply Inline Actions Where is this handled in the new implementation? hoy: Where is this handled in the new implementation?
spupyrevAuthorUnsubmitted Done Reply Inline Actions This is a good question; we don't have this condition anymore. While in theory there might be a difference, I do not see a single instance (in my benchmark with 10K functions) where this statement yields a different result. So it must be quite rare. More importantly, I do not remember the original motivation: Why is it "correct" not to penalize blocks with self loops? spupyrev: This is a good question; we don't have this condition anymore. While in theory there might be a…
hoyUnsubmitted Not Done Reply Inline Actions Oh, it might be related to a Skylake-specific hardware issue where a LBR entry can occur consecutively redundantly. Decreasing the backedge count may make sense. Otherwise I cannot think of why self-loop is special from normal loops. The Skylake issue is worked around in the profile generation time so we should be free from that now. hoy: Oh, it might be related to a Skylake-specific hardware issue where a LBR entry can occur…
AuxCostDec = 0;
}		}

Network.addEdge(Bin, Baux, AuxCostInc);		/// Assign costs for increasing/decreasing the jump counts.
Network.addEdge(Baux, Bout, AuxCostInc);		std::pair<int64_t, int64_t> assignJumpCosts(const ProfiParams &Params,
hoyUnsubmitted Not Done Reply Inline Actions Previously the cost it takes to go from `Bin` to `Bout` is `2AuxCostInc`. Now without `Baux` the cost for the same path becomes `AuxCostInc`. Is it a discrepancy or am I missing anything? hoy:* Previously the cost it takes to go from `Bin` to `Bout` is `2*AuxCostInc`. Now without `Baux`…
spupyrevAuthorUnsubmitted Done Reply Inline Actions Now all the costs are reduced by a factor of 2, that is, instead of `2AuxCostInc` we have `AuxCostInc` and instead of `2AuxCostDec` we have `AuxCostDec`. This does not have any impact on the algorithm and the produced solution; only the objective is (uniformly) scaled spupyrev: Now all the costs are reduced by a factor of 2, that is, instead of `2*AuxCostInc` we have…
if (Block.Weight > 0) {		const FlowJump &Jump) {
Network.addEdge(Bout, Baux, AuxCostDec);		// Modifying the weight of an unlikely jump is expensive
Network.addEdge(Baux, Bin, AuxCostDec);		if (Jump.IsUnlikely)
}		return std::make_pair(Params.CostUnlikely, Params.CostUnlikely);
}
		// Assign default values for the costs
// Creating edges for every jump		int64_t CostInc = Params.CostJumpInc;
for (auto &Jump : Func.Jumps) {		int64_t CostDec = Params.CostJumpDec;
uint64_t Src = Jump.Source;		// Update the costs depending on the block metadata
uint64_t Dst = Jump.Target;		if (Jump.Source + 1 == Jump.Target) {
		hoyUnsubmitted Done Reply Inline Actions Add a comment to indicate the check is for fall-through branches? hoy: Add a comment to indicate the check is for fall-through branches?
if (Src != Dst) {		// Adjusting the fall-through branch
uint64_t SrcOut = 3 * Src + 1;		CostInc = Params.CostJumpFTInc;
uint64_t DstIn = 3 * Dst;		CostDec = Params.CostJumpFTDec;
uint64_t Cost = Jump.IsUnlikely ? Params.CostUnlikely : 0;		}
Network.addEdge(SrcOut, DstIn, Cost);		if (Jump.HasUnknownWeight) {
}		// The cost is different for fall-through and non-fall-through branches
		if (Jump.Source + 1 == Jump.Target)
		CostInc = Params.CostJumpUnknownFTInc;
		else
		CostInc = Params.CostJumpUnknownInc;
		CostDec = 0;
		} else {
		assert(Jump.Weight > 0 && "found zero-weight jump with a positive weight");
}		}
		return std::make_pair(CostInc, CostDec);
// Make sure we have a valid flow circulation
Network.addEdge(T, S, 0);
}		}

/// Extract resulting block and edge counts from the flow network.		/// Extract resulting block and edge counts from the flow network.
void extractWeights(MinCostMaxFlow &Network, FlowFunction &Func) {		void extractWeights(const ProfiParams &Params, MinCostMaxFlow &Network,
		FlowFunction &Func) {
uint64_t NumBlocks = Func.Blocks.size();		uint64_t NumBlocks = Func.Blocks.size();
		uint64_t NumJumps = Func.Jumps.size();

		// Extract resulting jump counts
		for (uint64_t J = 0; J < NumJumps; J++) {
		auto &Jump = Func.Jumps[J];
		uint64_t SrcOut = 2 * Jump.Source + 1;
		uint64_t DstIn = 2 * Jump.Target;

// Extract resulting block counts
for (uint64_t Src = 0; Src < NumBlocks; Src++) {
auto &Block = Func.Blocks[Src];
uint64_t SrcOut = 3 * Src + 1;
int64_t Flow = 0;		int64_t Flow = 0;
for (const auto &Adj : Network.getFlow(SrcOut)) {		int64_t AuxFlow = Network.getFlow(SrcOut, DstIn);
uint64_t DstIn = Adj.first;		if (Jump.Source != Jump.Target)
int64_t DstFlow = Adj.second;		Flow = int64_t(Jump.Weight) + AuxFlow;
bool IsAuxNode = (DstIn < 3 * NumBlocks && DstIn % 3 == 2);		else
if (!IsAuxNode \|\| Block.HasSelfEdge) {		Flow = int64_t(Jump.Weight) + (AuxFlow > 0 ? AuxFlow : 0);
Flow += DstFlow;
}		Jump.Flow = Flow;
}		assert(Flow >= 0 && "negative jump flow");
Block.Flow = Flow;
assert(Flow >= 0 && "negative block flow");
}		}

// Extract resulting jump counts		// Extract resulting block counts
		auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
		auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
for (auto &Jump : Func.Jumps) {		for (auto &Jump : Func.Jumps) {
uint64_t Src = Jump.Source;		InFlow[Jump.Target] += Jump.Flow;
uint64_t Dst = Jump.Target;		OutFlow[Jump.Source] += Jump.Flow;
int64_t Flow = 0;
if (Src != Dst) {
uint64_t SrcOut = 3 * Src + 1;
uint64_t DstIn = 3 * Dst;
Flow = Network.getFlow(SrcOut, DstIn);
} else {
uint64_t SrcOut = 3 * Src + 1;
uint64_t SrcAux = 3 * Src + 2;
int64_t AuxFlow = Network.getFlow(SrcOut, SrcAux);
if (AuxFlow > 0)
Flow = AuxFlow;
}		}
Jump.Flow = Flow;		for (uint64_t B = 0; B < NumBlocks; B++) {
assert(Flow >= 0 && "negative jump flow");		auto &Block = Func.Blocks[B];
		Block.Flow = std::max(OutFlow[B], InFlow[B]);
}		}
}		}

#ifndef NDEBUG		#ifndef NDEBUG
/// Verify that the computed flow values satisfy flow conservation rules		/// Verify that the provided block/jump weights are as expected.
void verifyWeights(const FlowFunction &Func) {		void verifyInput(const FlowFunction &Func) {
		// Verify the entry block
		assert(Func.Entry == 0 && Func.Blocks[0].isEntry());
		for (size_t I = 1; I < Func.Blocks.size(); I++) {
		assert(!Func.Blocks[I].isEntry() && "multiple entry blocks");
		hoyUnsubmitted Done Reply Inline Actions Please add a message for the assert and the one right below. hoy: Please add a message for the assert and the one right below.
		}
		// Verify CFG jumps
		for (auto &Block : Func.Blocks) {
		assert((!Block.isEntry() \|\| !Block.isExit()) &&
		"a block cannot be an entry and an exit");
		}
		// Verify input block weights
		for (auto &Block : Func.Blocks) {
		assert((!Block.HasUnknownWeight \|\| Block.Weight == 0 \|\| Block.isEntry()) &&
		"non-zero weight of a block w/o weight except for an entry");
		}
		// Verify input jump weights
		for (auto &Jump : Func.Jumps) {
		assert((!Jump.HasUnknownWeight \|\| Jump.Weight == 0) &&
		"non-zero weight of a jump w/o weight");
		}
		}

		/// Verify that the computed flow values satisfy flow conservation rules.
		void verifyOutput(const FlowFunction &Func) {
const uint64_t NumBlocks = Func.Blocks.size();		const uint64_t NumBlocks = Func.Blocks.size();
auto InFlow = std::vector<uint64_t>(NumBlocks, 0);		auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);		auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
for (const auto &Jump : Func.Jumps) {		for (const auto &Jump : Func.Jumps) {
InFlow[Jump.Target] += Jump.Flow;		InFlow[Jump.Target] += Jump.Flow;
OutFlow[Jump.Source] += Jump.Flow;		OutFlow[Jump.Source] += Jump.Flow;
}		}

▲ Show 20 Lines • Show All 46 Lines • ▼ Show 20 Lines	for (uint64_t I = 0; I < NumBlocks; I++) {
auto &Block = Func.Blocks[I];		auto &Block = Func.Blocks[I];
assert((Visited[I] \|\| Block.Flow == 0) && "an isolated flow component");		assert((Visited[I] \|\| Block.Flow == 0) && "an isolated flow component");
}		}
}		}
#endif		#endif

} // end of anonymous namespace		} // end of anonymous namespace

/// Apply the profile inference algorithm for a given flow function		/// Apply the profile inference algorithm for a given function
void llvm::applyFlowInference(const ProfiParams &Params, FlowFunction &Func) {		void llvm::applyFlowInference(const ProfiParams &Params, FlowFunction &Func) {
		#ifndef NDEBUG
		// Verify the input data
		verifyInput(Func);
		#endif

// Create and apply an inference network model		// Create and apply an inference network model
auto InferenceNetwork = MinCostMaxFlow(Params);		auto InferenceNetwork = MinCostMaxFlow(Params);
initializeNetwork(Params, InferenceNetwork, Func);		initializeNetwork(Params, InferenceNetwork, Func);
InferenceNetwork.run();		InferenceNetwork.run();

// Extract flow values for every block and every edge		// Extract flow values for every block and every edge
extractWeights(InferenceNetwork, Func);		extractWeights(Params, InferenceNetwork, Func);

// Post-processing adjustments to the flow		// Post-processing adjustments to the flow
auto Adjuster = FlowAdjuster(Params, Func);		auto Adjuster = FlowAdjuster(Params, Func);
Adjuster.run();		Adjuster.run();

#ifndef NDEBUG		#ifndef NDEBUG
// Verify the result		// Verify the result
verifyWeights(Func);		verifyOutput(Func);
#endif		#endif
}		}

/// Apply the profile inference algorithm for a given flow function		/// Apply the profile inference algorithm for a given flow function
void llvm::applyFlowInference(FlowFunction &Func) {		void llvm::applyFlowInference(FlowFunction &Func) {
ProfiParams Params;		ProfiParams Params;
// Set the params from the command-line flags.		// Set the params from the command-line flags.
Params.EvenFlowDistribution = SampleProfileEvenFlowDistribution;		Params.EvenFlowDistribution = SampleProfileEvenFlowDistribution;
Show All 11 Lines

llvm/test/Transforms/SampleProfile/profile-context-tracker.ll

	Show First 20 Lines • Show All 139 Lines • ▼ Show 20 Lines
	; INLINE-ALL-DAG-SAME: [[LEAF_PROF]] = !{!"function_entry_count", i64 0}			; INLINE-ALL-DAG-SAME: [[LEAF_PROF]] = !{!"function_entry_count", i64 0}
	; INLINE-ALL-DAG: [[FUNCB_PROF]] = !{!"function_entry_count", i64 13}			; INLINE-ALL-DAG: [[FUNCB_PROF]] = !{!"function_entry_count", i64 13}

	; INLINE-HOT-DAG: [[MAIN_PROF]] = !{!"function_entry_count", i64 1}			; INLINE-HOT-DAG: [[MAIN_PROF]] = !{!"function_entry_count", i64 1}
	; INLINE-HOT-DAG: [[FUNCA_PROF]] = !{!"function_entry_count", i64 12}			; INLINE-HOT-DAG: [[FUNCA_PROF]] = !{!"function_entry_count", i64 12}
	; INLINE-HOT-DAG-SAME: [[LEAF_PROF]] = !{!"function_entry_count", i64 0}			; INLINE-HOT-DAG-SAME: [[LEAF_PROF]] = !{!"function_entry_count", i64 0}
	; INLINE-HOT-DAG: [[FUNCB_PROF]] = !{!"function_entry_count", i64 13}			; INLINE-HOT-DAG: [[FUNCB_PROF]] = !{!"function_entry_count", i64 13}

	; INLINE-NONE: [[MAIN_PROF]] = !{!"function_entry_count", i64 13}			; INLINE-NONE: [[MAIN_PROF]] = !{!"function_entry_count", i64 14}
	; INLINE-NONE: [[FUNCA_PROF]] = !{!"function_entry_count", i64 24}			; INLINE-NONE: [[FUNCA_PROF]] = !{!"function_entry_count", i64 24}
	; INLINE-NONE-DAG-SAME: [[LEAF_PROF]] = !{!"function_entry_count", i64 21}			; INLINE-NONE-DAG-SAME: [[LEAF_PROF]] = !{!"function_entry_count", i64 21}
	; INLINE-NONE-DAG: [[FUNCB_PROF]] = !{!"function_entry_count", i64 32}			; INLINE-NONE-DAG: [[FUNCB_PROF]] = !{!"function_entry_count", i64 32}

	declare i32 @_Z3fibi(i32)			declare i32 @_Z3fibi(i32)

	attributes #0 = { nofree noinline norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }			attributes #0 = { nofree noinline norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
	attributes #1 = { nofree norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }			attributes #1 = { nofree norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
	▲ Show 20 Lines • Show All 74 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[BOLT] using jump weights in profi
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 488388

llvm/include/llvm/Transforms/Utils/SampleProfileInference.h

llvm/lib/Transforms/Utils/SampleProfileInference.cpp

llvm/test/Transforms/SampleProfile/profile-context-tracker.ll

This is an archive of the discontinued LLVM Phabricator instance.

[BOLT] using jump weights in profiClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 488388

llvm/include/llvm/Transforms/Utils/SampleProfileInference.h

llvm/lib/Transforms/Utils/SampleProfileInference.cpp

llvm/test/Transforms/SampleProfile/profile-context-tracker.ll

[BOLT] using jump weights in profi
ClosedPublic