Diff 400917

llvm/lib/Transforms/Utils/SampleProfileInference.cpp

Show First 20 Lines • Show All 139 Lines • ▼ Show 20 Lines	public:
static constexpr int64_t AuxCostDec = 20;		static constexpr int64_t AuxCostDec = 20;
/// A cost of increasing a count of zero-weight block by one.		/// A cost of increasing a count of zero-weight block by one.
static constexpr int64_t AuxCostIncZero = 11;		static constexpr int64_t AuxCostIncZero = 11;
/// A cost of increasing the entry block's count by one.		/// A cost of increasing the entry block's count by one.
static constexpr int64_t AuxCostIncEntry = 40;		static constexpr int64_t AuxCostIncEntry = 40;
/// A cost of decreasing the entry block's count by one.		/// A cost of decreasing the entry block's count by one.
static constexpr int64_t AuxCostDecEntry = 10;		static constexpr int64_t AuxCostDecEntry = 10;
/// A cost of taking an unlikely jump.		/// A cost of taking an unlikely jump.
static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 20;		static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 30;
		hoyUnsubmitted Done Reply Inline Actions How did this value come up? Have you tried other values between 20 and 30? hoy: How did this value come up? Have you tried other values between 20 and 30?
		spupyrevAuthorUnsubmitted Done Reply Inline Actions This is supposed to be an "infinity" value. I noticed however that 2^20 isn't large enough for some extreme cases (huge CFGs with thousand of basic blocks and very hot loops). Hence, increasing the value to 2^30. spupyrev: This is supposed to be an "infinity" value. I noticed however that 2^20 isn't large enough for…

private:		private:
/// Check for existence of an augmenting path with a positive capacity.		/// Check for existence of an augmenting path with a positive capacity.
bool findAugmentingPath() {		bool findAugmentingPath() {
// Initialize data structures		// Initialize data structures
for (auto &Node : Nodes) {		for (auto &Node : Nodes) {
Node.Distance = INF;		Node.Distance = INF;
Node.ParentNode = uint64_t(-1);		Node.ParentNode = uint64_t(-1);
▲ Show 20 Lines • Show All 75 Lines • ▼ Show 20 Lines	while (Now != Source) {

Edge.Flow += PathCapacity;		Edge.Flow += PathCapacity;
RevEdge.Flow -= PathCapacity;		RevEdge.Flow -= PathCapacity;

Now = Pred;		Now = Pred;
}		}
}		}

/// An node in a flow network.		/// A node in a flow network.
struct Node {		struct Node {
/// The cost of the cheapest path from the source to the current node.		/// The cost of the cheapest path from the source to the current node.
int64_t Distance;		int64_t Distance;
/// The node preceding the current one in the path.		/// The node preceding the current one in the path.
uint64_t ParentNode;		uint64_t ParentNode;
/// The index of the edge between ParentNode and the current node.		/// The index of the edge between ParentNode and the current node.
uint64_t ParentEdgeIndex;		uint64_t ParentEdgeIndex;
/// An indicator of whether the current node is in a queue.		/// An indicator of whether the current node is in a queue.
▲ Show 20 Lines • Show All 50 Lines • ▼ Show 20 Lines	public:
void run() {		void run() {
/// Adjust the flow to get rid of isolated components.		/// Adjust the flow to get rid of isolated components.
joinIsolatedComponents();		joinIsolatedComponents();

/// Rebalance the flow inside unknown subgraphs.		/// Rebalance the flow inside unknown subgraphs.
rebalanceUnknownSubgraphs();		rebalanceUnknownSubgraphs();
}		}

/// The probability for the first successor of a unknown subgraph
static constexpr double UnknownFirstSuccProbability = 0.5;

private:		private:
void joinIsolatedComponents() {		void joinIsolatedComponents() {
// Find blocks that are reachable from the source		// Find blocks that are reachable from the source
auto Visited = BitVector(NumBlocks(), false);		auto Visited = BitVector(NumBlocks(), false);
findReachable(Func.Entry, Visited);		findReachable(Func.Entry, Visited);

// Iterate over all non-reachable blocks and adjust their weights		// Iterate over all non-reachable blocks and adjust their weights
for (uint64_t I = 0; I < NumBlocks(); I++) {		for (uint64_t I = 0; I < NumBlocks(); I++) {
▲ Show 20 Lines • Show All 130 Lines • ▼ Show 20 Lines	if (Jump->Flow > 0)
return std::max(BaseDistance - (int64_t)Jump->Flow, (int64_t)0);		return std::max(BaseDistance - (int64_t)Jump->Flow, (int64_t)0);
if (Func.Blocks[Jump->Target].Weight > 0)		if (Func.Blocks[Jump->Target].Weight > 0)
return BaseDistance;		return BaseDistance;
return BaseDistance * (NumBlocks() + 1);		return BaseDistance * (NumBlocks() + 1);
};		};

uint64_t NumBlocks() const { return Func.Blocks.size(); }		uint64_t NumBlocks() const { return Func.Blocks.size(); }

/// Rebalance unknown subgraphs so as each branch splits with probabilities		/// Rebalance unknown subgraphs so that the flow is split evenly across the
/// UnknownFirstSuccProbability and 1 - UnknownFirstSuccProbability		/// outgoing branches of every block of the subgraph. The method iterates over
		/// blocks with known weight and identifies unknown subgraphs rooted at the
		/// blocks. Then it verifies if flow rebalancing is feasible and applies it.
void rebalanceUnknownSubgraphs() {		void rebalanceUnknownSubgraphs() {
static_assert(		// Try to find unknown subgraphs from each block
UnknownFirstSuccProbability >= 0.0 &&
UnknownFirstSuccProbability <= 1.0,
"the share of the unknown successor should be between 0 and 1");
// Try to find unknown subgraphs from each non-unknown block
for (uint64_t I = 0; I < Func.Blocks.size(); I++) {		for (uint64_t I = 0; I < Func.Blocks.size(); I++) {
auto SrcBlock = &Func.Blocks[I];		auto SrcBlock = &Func.Blocks[I];
// Do not attempt to find unknown successors from a unknown or a		// Verify if rebalancing rooted at SrcBlock is feasible
// zero-flow block		if (!canRebalanceAtRoot(SrcBlock))
if (SrcBlock->UnknownWeight \|\| SrcBlock->Flow == 0)
continue;		continue;

std::vector<FlowBlock *> UnknownSuccs;		// Find an unknown subgraphs starting at SrcBlock. Along the way,
		// fill in known destinations and intermediate unknown blocks.
		std::vector<FlowBlock *> UnknownBlocks;
		std::vector<FlowBlock *> KnownDstBlocks;
		findUnknownSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks);

		// Verify if rebalancing of the subgraph is feasible. If the search is
		// successful, find the unique destination block (which can be null)
FlowBlock *DstBlock = nullptr;		FlowBlock *DstBlock = nullptr;
// Find a unknown subgraphs starting at block SrcBlock		if (!canRebalanceSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks,
if (!findUnknownSubgraph(SrcBlock, DstBlock, UnknownSuccs))		DstBlock))
continue;		continue;
// At the moment, we do not rebalance subgraphs containing cycles among
// unknown blocks		// We cannot rebalance subgraphs containing cycles among unknown blocks
if (!isAcyclicSubgraph(SrcBlock, DstBlock, UnknownSuccs))		if (!isAcyclicSubgraph(SrcBlock, DstBlock, UnknownBlocks))
continue;		continue;

// Rebalance the flow		// Rebalance the flow
rebalanceUnknownSubgraph(SrcBlock, DstBlock, UnknownSuccs);		rebalanceUnknownSubgraph(SrcBlock, DstBlock, UnknownBlocks);
}		}
}		}

/// Find a unknown subgraph starting at block SrcBlock.		/// Verify if rebalancing rooted at a given block is possible.
/// If the search is successful, the method sets DstBlock and UnknownSuccs.		bool canRebalanceAtRoot(const FlowBlock *SrcBlock) {
bool findUnknownSubgraph(FlowBlock SrcBlock, FlowBlock &DstBlock,		// Do not attempt to find unknown subgraphs from an unknown or a
std::vector<FlowBlock *> &UnknownSuccs) {		// zero-flow block
		if (SrcBlock->UnknownWeight \|\| SrcBlock->Flow == 0)
		return false;

		// Do not attempt to process subgraphs from a block w/o unknown sucessors
		bool HasUnknownSuccs = false;
		for (auto Jump : SrcBlock->SuccJumps) {
		if (Func.Blocks[Jump->Target].UnknownWeight) {
		HasUnknownSuccs = true;
		break;
		}
		}
		if (!HasUnknownSuccs)
		return false;

		return true;
		}

		/// Find an unknown subgraph starting at block SrcBlock. The method sets
		/// identified destinations, KnownDstBlocks, and intermediate UnknownBlocks.
		void findUnknownSubgraph(const FlowBlock *SrcBlock,
		std::vector<FlowBlock *> &KnownDstBlocks,
		std::vector<FlowBlock *> &UnknownBlocks) {
// Run BFS from SrcBlock and make sure all paths are going through unknown		// Run BFS from SrcBlock and make sure all paths are going through unknown
// blocks and end at a non-unknown DstBlock		// blocks and end at a non-unknown DstBlock
auto Visited = BitVector(NumBlocks(), false);		auto Visited = BitVector(NumBlocks(), false);
std::queue<uint64_t> Queue;		std::queue<uint64_t> Queue;
DstBlock = nullptr;

Queue.push(SrcBlock->Index);		Queue.push(SrcBlock->Index);
Visited[SrcBlock->Index] = true;		Visited[SrcBlock->Index] = true;
while (!Queue.empty()) {		while (!Queue.empty()) {
auto &Block = Func.Blocks[Queue.front()];		auto &Block = Func.Blocks[Queue.front()];
Queue.pop();		Queue.pop();
// Process blocks reachable from Block		// Process blocks reachable from Block
for (auto Jump : Block.SuccJumps) {		for (auto Jump : Block.SuccJumps) {
		// If Jump can be ignored, skip it
		if (ignoreJump(SrcBlock, nullptr, Jump))
		continue;

uint64_t Dst = Jump->Target;		uint64_t Dst = Jump->Target;
		// If Dst has been visited, skip Jump
if (Visited[Dst])		if (Visited[Dst])
continue;		continue;
		// Process block Dst
Visited[Dst] = true;		Visited[Dst] = true;
if (!Func.Blocks[Dst].UnknownWeight) {		if (!Func.Blocks[Dst].UnknownWeight) {
// If we see non-unique non-unknown block reachable from SrcBlock,		KnownDstBlocks.push_back(&Func.Blocks[Dst]);
// stop processing and skip rebalancing
FlowBlock *CandidateDstBlock = &Func.Blocks[Dst];
if (DstBlock != nullptr && DstBlock != CandidateDstBlock)
return false;
DstBlock = CandidateDstBlock;
} else {		} else {
Queue.push(Dst);		Queue.push(Dst);
UnknownSuccs.push_back(&Func.Blocks[Dst]);		UnknownBlocks.push_back(&Func.Blocks[Dst]);
		}
}		}
}		}
}		}

		/// Verify if rebalancing of the subgraph is feasible. If the checks are
		/// successful, set the unique destination block, DstBlock (can be null).
		bool canRebalanceSubgraph(const FlowBlock *SrcBlock,
		const std::vector<FlowBlock *> &KnownDstBlocks,
		const std::vector<FlowBlock *> &UnknownBlocks,
		FlowBlock *&DstBlock) {
// If the list of unknown blocks is empty, we don't need rebalancing		// If the list of unknown blocks is empty, we don't need rebalancing
if (UnknownSuccs.empty())		if (UnknownBlocks.empty())
return false;		return false;
// If all reachable nodes from SrcBlock are unknown, skip rebalancing
if (DstBlock == nullptr)		// If there are multiple known sinks, we can't rebalance
		if (KnownDstBlocks.size() > 1)
		hoyUnsubmitted Done Reply Inline Actions Can this be relaxed? Say if the exit block of a subgraph has unknown weight, the block may never be rebalanced. Eg. A / \ B(?) C(?) \| \| D E \ / F (?) Did I understanding correctly? hoy: Can this be relaxed? Say if the exit block of a subgraph has unknown weight, the block may…
		spupyrevAuthorUnsubmitted Done Reply Inline Actions I am not sure I understand this particular example, what needs to be rebalanced here? If A, D, and E have known weights, then a solution is unique, no? But in general you're right that instances with `KnownDstBlocks.size() > 1` won't be rebalanced. We thought about this for a long time but the problem is difficult. spupyrev: I am not sure I understand this particular example, what needs to be rebalanced here? If A, D…
		hoyUnsubmitted Not Done Reply Inline Actions If A, D, and E have known weights, then a solution is unique, no? Say if we are rebalancing between `B` and `C`, due to this check and the fact `KnownDstBlocks` will include `D` and `E`, it'll bail out here? But in general you're right that instances with KnownDstBlocks.size() > 1 won't be rebalanced. We thought about this for a long time but the problem is difficult. Can blocks in `KnownDstBlocks` be wired to a single artificial block which serves as the final only known dst? This is just my random thought. Hopefully the multi known case isn't too common. hoy: > If A, D, and E have known weights, then a solution is unique, no? Say if we are rebalancing…
		spupyrevAuthorUnsubmitted Done Reply Inline Actions Discussed this example offline. It seems we can slightly relax the condition here, but that only affects a single instance in clang binary and no instances in gcc binary. However, the adjustment makes the algorithm quadratic. Thus, keeping it as is. spupyrev: Discussed this example offline. It seems we can slightly relax the condition here, but that…
return false;		return false;
// If any of the unknown blocks is an exit block, skip rebalancing		DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front();
for (auto Block : UnknownSuccs) {
if (Block->isExit())		// Verify sinks of the subgraph
		for (auto Block : UnknownBlocks) {
		if (Block->SuccJumps.empty()) {
		// If there are multiple (known and unknown) sinks, we can't rebalance
		if (DstBlock != nullptr)
return false;		return false;
		continue;
		hoyUnsubmitted Done Reply Inline Actions return false here if DstBlock != nullptr ? hoy: return false here if DstBlock != nullptr ?
		}
		size_t NumIgnoredJumps = 0;
		for (auto Jump : Block->SuccJumps) {
		if (ignoreJump(SrcBlock, DstBlock, Jump))
		NumIgnoredJumps++;
		}
		// If there is a non-sink block in UnknownBlocks with all jumps ignored,
		hoyUnsubmitted Done Reply Inline Actions The check NumIgnoredJumps > 0 isn't necessary since Block->SuccJumps must not be empty here. hoy: The check NumIgnoredJumps > 0 isn't necessary since Block->SuccJumps must not be empty here.
		// then we can't rebalance
		if (NumIgnoredJumps == Block->SuccJumps.size())
		return false;
		}

		return true;
}		}

		/// Decide whether the Jump is ignored while processing an unknown subgraphs
		/// rooted at basic block SrcBlock with the destination block, DstBlock.
		hoyUnsubmitted Done Reply Inline Actions Please add omment for SrcBlock and DstBlock. hoy: Please add omment for SrcBlock and DstBlock.
		bool ignoreJump(const FlowBlock SrcBlock, const FlowBlock DstBlock,
		const FlowJump *Jump) {
		// Ignore unlikely jumps with zero flow
		if (Jump->IsUnlikely && Jump->Flow == 0)
		return true;

		auto JumpSource = &Func.Blocks[Jump->Source];
		auto JumpTarget = &Func.Blocks[Jump->Target];

		// Do not ignore jumps coming into DstBlock
		if (DstBlock != nullptr && JumpTarget == DstBlock)
		return false;

		// Ignore jumps out of SrcBlock to known blocks
		if (!JumpTarget->UnknownWeight && JumpSource == SrcBlock)
		return true;

		// Ignore jumps to known blocks with zero flow
		if (!JumpTarget->UnknownWeight && JumpTarget->Flow == 0)
return true;		return true;

		return false;
}		}

/// Verify if the given unknown subgraph is acyclic, and if yes, reorder		/// Verify if the given unknown subgraph is acyclic, and if yes, reorder
/// UnknownSuccs in the topological order (so that all jumps are "forward").		/// UnknownBlocks in the topological order (so that all jumps are "forward").
bool isAcyclicSubgraph(FlowBlock SrcBlock, FlowBlock DstBlock,		bool isAcyclicSubgraph(const FlowBlock SrcBlock, const FlowBlock DstBlock,
std::vector<FlowBlock *> &UnknownSuccs) {		std::vector<FlowBlock *> &UnknownBlocks) {
// Extract local in-degrees in the considered subgraph		// Extract local in-degrees in the considered subgraph
auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0);		auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0);
for (auto Jump : SrcBlock->SuccJumps) {		auto fillInDegree = [&](const FlowBlock *Block) {
		hoyUnsubmitted Not Done Reply Inline Actions nit: more compact without using continue: if (!ignoreJump(SrcBlock, DstBlock, Jump)) LocalInDegree[Jump->Target]++; hoy: nit: more compact without using continue: ``` if (!ignoreJump(SrcBlock, DstBlock, Jump))…
LocalInDegree[Jump->Target]++;		for (auto Jump : Block->SuccJumps) {
}		if (ignoreJump(SrcBlock, DstBlock, Jump))
for (uint64_t I = 0; I < UnknownSuccs.size(); I++) {		continue;
for (auto Jump : UnknownSuccs[I]->SuccJumps) {
LocalInDegree[Jump->Target]++;		LocalInDegree[Jump->Target]++;
}		}
		};
		fillInDegree(SrcBlock);
		for (auto Block : UnknownBlocks) {
		fillInDegree(Block);
}		}
// A loop containing SrcBlock		// A loop containing SrcBlock
if (LocalInDegree[SrcBlock->Index] > 0)		if (LocalInDegree[SrcBlock->Index] > 0)
return false;		return false;

std::vector<FlowBlock *> AcyclicOrder;		std::vector<FlowBlock *> AcyclicOrder;
std::queue<uint64_t> Queue;		std::queue<uint64_t> Queue;
Queue.push(SrcBlock->Index);		Queue.push(SrcBlock->Index);
while (!Queue.empty()) {		while (!Queue.empty()) {
auto &Block = Func.Blocks[Queue.front()];		FlowBlock *Block = &Func.Blocks[Queue.front()];
Queue.pop();		Queue.pop();
// Stop propagation once we reach DstBlock		// Stop propagation once we reach DstBlock, if any
if (Block.Index == DstBlock->Index)		if (DstBlock != nullptr && Block == DstBlock)
break;		break;

AcyclicOrder.push_back(&Block);		// Keep an acyclic order of unknown blocks
		if (Block->UnknownWeight && Block != SrcBlock)
		AcyclicOrder.push_back(Block);

// Add to the queue all successors with zero local in-degree		// Add to the queue all successors with zero local in-degree
for (auto Jump : Block.SuccJumps) {		for (auto Jump : Block->SuccJumps) {
		if (ignoreJump(SrcBlock, DstBlock, Jump))
		continue;
uint64_t Dst = Jump->Target;		uint64_t Dst = Jump->Target;
LocalInDegree[Dst]--;		LocalInDegree[Dst]--;
if (LocalInDegree[Dst] == 0) {		if (LocalInDegree[Dst] == 0) {
Queue.push(Dst);		Queue.push(Dst);
}		}
}		}
}		}

// If there is a cycle in the subgraph, AcyclicOrder contains only a subset		// If there is a cycle in the subgraph, AcyclicOrder contains only a subset
// of all blocks		// of all blocks
if (UnknownSuccs.size() + 1 != AcyclicOrder.size())		if (UnknownBlocks.size() != AcyclicOrder.size())
return false;		return false;
UnknownSuccs = AcyclicOrder;		UnknownBlocks = AcyclicOrder;
return true;		return true;
}		}

/// Rebalance a given subgraph.		/// Rebalance a given subgraph rooted at SrcBlock, ending at DstBlock and
void rebalanceUnknownSubgraph(FlowBlock SrcBlock, FlowBlock DstBlock,		/// having UnknownBlocks intermediate blocks.
std::vector<FlowBlock *> &UnknownSuccs) {		void rebalanceUnknownSubgraph(const FlowBlock *SrcBlock,
		const FlowBlock *DstBlock,
		const std::vector<FlowBlock *> &UnknownBlocks) {
assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph");		assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph");
assert(UnknownSuccs.front() == SrcBlock && "incorrect order of unknowns");

for (auto Block : UnknownSuccs) {		// Ditribute flow from the source block
		uint64_t BlockFlow = 0;
		// SrcBlock's flow is the sum of outgoing flows along non-ignored jumps
		for (auto Jump : SrcBlock->SuccJumps) {
		if (ignoreJump(SrcBlock, DstBlock, Jump))
		continue;
		BlockFlow += Jump->Flow;
		}
		rebalanceBlock(SrcBlock, DstBlock, SrcBlock, BlockFlow);

		// Ditribute flow from the remaining blocks
		for (auto Block : UnknownBlocks) {
		assert(Block->UnknownWeight && "incorrect unknown subgraph");
		uint64_t BlockFlow = 0;
// Block's flow is the sum of incoming flows		// Block's flow is the sum of incoming flows
uint64_t TotalFlow = 0;
if (Block == SrcBlock) {
TotalFlow = Block->Flow;
} else {
for (auto Jump : Block->PredJumps) {		for (auto Jump : Block->PredJumps) {
		hoyUnsubmitted Not Done Reply Inline Actions Can SrcBlock have unknown weight? It should have been filtered out by `canRebalanceAtRoot`. hoy: Can SrcBlock have unknown weight? It should have been filtered out by `canRebalanceAtRoot`.
		spupyrevAuthorUnsubmitted Done Reply Inline Actions good point. The reason is UnknownBlocks contains SrcBlock in this function. Let me try to reimplement this part or apply some renaming to avoid confusion spupyrev: good point. The reason is UnknownBlocks contains SrcBlock in this function. Let me try to…
TotalFlow += Jump->Flow;		BlockFlow += Jump->Flow;
		}
		Block->Flow = BlockFlow;
		rebalanceBlock(SrcBlock, DstBlock, Block, BlockFlow);
}		}
Block->Flow = TotalFlow;
}		}

		/// Redistribute flow for a block in a subgraph rooted at SrcBlock,
		/// and ending at DstBlock.
		void rebalanceBlock(const FlowBlock SrcBlock, const FlowBlock DstBlock,
		const FlowBlock *Block, uint64_t BlockFlow) {
// Process all successor jumps and update corresponding flow values		// Process all successor jumps and update corresponding flow values
for (uint64_t I = 0; I < Block->SuccJumps.size(); I++) {		size_t BlockDegree = 0;
auto Jump = Block->SuccJumps[I];		for (auto Jump : Block->SuccJumps) {
if (I + 1 == Block->SuccJumps.size()) {		if (ignoreJump(SrcBlock, DstBlock, Jump))
Jump->Flow = TotalFlow;
continue;		continue;
		BlockDegree++;
}		}
uint64_t Flow = uint64_t(TotalFlow * UnknownFirstSuccProbability);		// If all successor jumps of the block are ignored, skip it
		if (DstBlock == nullptr && BlockDegree == 0)
		return;
		assert(BlockDegree > 0 && "all outgoing jumps are ignored");

		// Each of the Block's successors gets the following amount of flow.
		// Rounding the value up so that all flow is propagated
		uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree;
		for (auto Jump : Block->SuccJumps) {
		if (ignoreJump(SrcBlock, DstBlock, Jump))
		continue;
		uint64_t Flow = std::min(SuccFlow, BlockFlow);
Jump->Flow = Flow;		Jump->Flow = Flow;
TotalFlow -= Flow;		BlockFlow -= Flow;
}
}		}
		assert(BlockFlow == 0 && "not all flow is propagated");
}		}

/// A constant indicating an arbitrary exit block of a function.		/// A constant indicating an arbitrary exit block of a function.
static constexpr uint64_t AnyExitBlock = uint64_t(-1);		static constexpr uint64_t AnyExitBlock = uint64_t(-1);

/// The function.		/// The function.
FlowFunction &Func;		FlowFunction &Func;
};		};
▲ Show 20 Lines • Show All 227 Lines • Show Last 20 Lines

llvm/test/Transforms/SampleProfile/Inputs/profile-inference-rebalance-large.prof

This file was added.

				foo1:37078302:0
				1: 3300
				2: 0
				6: 3300
				!CFGChecksum: 157181141624

				foo2:37078302:0
				1: 128
				2: 128
				3: 128
				4: 128
				!CFGChecksum: 208782362068

				foo3:37078302:0
				1: 500
				2: 1500
				4: 1200
				6: 900
				9: 500
				!CFGChecksum: 189901498683

				foo4:37078302:0
				1: 400
				3: 400
				10: 400
				!CFGChecksum: 241030178952

llvm/test/Transforms/SampleProfile/profile-inference-rebalance-large.ll

This file was added.

				; RUN: opt < %s -passes=pseudo-probe,sample-profile -sample-profile-use-profi -sample-profile-file=%S/Inputs/profile-inference-rebalance-large.prof \| opt -analyze -branch-prob -enable-new-pm=0 \| FileCheck %s
				; RUN: opt < %s -passes=pseudo-probe,sample-profile -sample-profile-use-profi -sample-profile-file=%S/Inputs/profile-inference-rebalance-large.prof \| opt -analyze -block-freq -enable-new-pm=0 \| FileCheck %s --check-prefix=CHECK2

				; The test verifies that counts can rebalanced in switch statements that contain
				; both 'known' and 'unknown' basic blocks.
				;
				; +---------+
				; +----------------- \| b15 [?] \|
				; \| +---------+
				; \| ^
				; \| \|
				; \| \|
				; \| +---------+ +--------------+ +---------+
				; \| \| b13 [?] \| <-- \| b11 [3300] \| --> \| b14 [?] \|
				; \| +---------+ +--------------+ +---------+
				; \| \| \| \| \|
				; \| \| \| \| \|
				; \| \| v \| \|
				; \| \| +---------+ \| \|
				; \| \| \| b12 [0] \| \| \|
				; \| \| +---------+ \| \|
				; \| \| \| \| \|
				; \| \| \| \| \|
				; \| \| v v \|
				; \| \| +--------------+ \|
				; \| +-----------> \| \| <-----+
				; \| \| b16 [3300] \|
				; +----------------> \| \|
				; +--------------+

				@yydebug = dso_local global i32 0, align 4

				; Function Attrs: nounwind uwtable
				define dso_local i32 @foo1(i32 %0, i32 %1) #0 {
				b11:
				call void @llvm.pseudoprobe(i64 7682762345278052905, i64 1, i32 0, i64 -1)
				%cmp = icmp ne i32 %0, 0
				switch i32 %1, label %b12 [
				i32 1, label %b13
				i32 2, label %b14
				i32 3, label %b15
				i32 4, label %b16
				]
				; CHECK: edge b11 -> b12 probability is 0x00000000 / 0x80000000 = 0.00%
				; CHECK: edge b11 -> b13 probability is 0x20000000 / 0x80000000 = 25.00%
				; CHECK: edge b11 -> b14 probability is 0x20000000 / 0x80000000 = 25.00%
				; CHECK: edge b11 -> b15 probability is 0x20000000 / 0x80000000 = 25.00%
				; CHECK: edge b11 -> b16 probability is 0x20000000 / 0x80000000 = 25.00%
				; CHECK2: - b11: float = {{.}}, int = {{.}}, count = 3300

				b12:
				call void @llvm.pseudoprobe(i64 7682762345278052905, i64 2, i32 0, i64 -1)
				br label %b16
				; CHECK2: - b12: float = {{.}}, int = {{.}}, count = 0

				b13:
				call void @llvm.pseudoprobe(i64 7682762345278052905, i64 3, i32 0, i64 -1)
				br label %b16
				; CHECK2: - b13: float = {{.}}, int = {{.}}, count = 825

				b14:
				call void @llvm.pseudoprobe(i64 7682762345278052905, i64 4, i32 0, i64 -1)
				br label %b16
				; CHECK2: - b14: float = {{.}}, int = {{.}}, count = 825

				b15:
				call void @llvm.pseudoprobe(i64 7682762345278052905, i64 5, i32 0, i64 -1)
				br label %b16
				; CHECK2: - b15: float = {{.}}, int = {{.}}, count = 825

				b16:
				call void @llvm.pseudoprobe(i64 7682762345278052905, i64 6, i32 0, i64 -1)
				ret i32 %1
				; CHECK2: - b16: float = {{.}}, int = {{.}}, count = 3300
				}


				; The test verifies that counts can rebalanced even when control-flow ends at
				; a basic block with an unknown count.
				;
				; +-----------+
				; \| b21 [128] \| -+
				; +-----------+ \|
				; \| \|
				; v \|
				; +-----------+ \|
				; \| b22 [128] \| \|
				; +-----------+ \|
				; \| \|
				; v \|
				; +-----------+ \|
				; +------------ \| b23 [128] \| <+
				; \| +-----------+
				; \| \|
				; v v
				; +---------+ +-----------+
				; \| b26 [?] \| <-- \| b24 [128] \|
				; +---------+ +-----------+
				; \| \|
				; \| v
				; \| +-----------+
				; \| \| b25 [?] \|
				; \| +-----------+
				; \| \|
				; \| v
				; \| +-----------+
				; +-----------> \| b27 [?] \| -+
				; +-----------+ \|
				; \| \|
				; v \|
				; +-----------+ \|
				; \| b28 [?] \| \|
				; +-----------+ \|
				; \| \|
				; v \|
				; +-----------+ \|
				; \| b29 [?] \| <+
				; +-----------+

				define dso_local i32 @foo2(i32 %0, i32 %1) #0 {
				b21:
				call void @llvm.pseudoprobe(i64 2494702099028631698, i64 1, i32 0, i64 -1)
				%cmp = icmp ne i32 %0, 0
				br i1 %cmp, label %b22, label %b23
				; CHECK: edge b21 -> b22 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
				; CHECK: edge b21 -> b23 probability is 0x00000000 / 0x80000000 = 0.00%
				; CHECK2: - b21: float = {{.}}, int = {{.}}, count = 128

				b22:
				call void @llvm.pseudoprobe(i64 2494702099028631698, i64 2, i32 0, i64 -1)
				br label %b23

				b23:
				call void @llvm.pseudoprobe(i64 2494702099028631698, i64 3, i32 0, i64 -1)
				br i1 %cmp, label %b24, label %b26
				; CHECK: edge b23 -> b24 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
				; CHECK: edge b23 -> b26 probability is 0x00000000 / 0x80000000 = 0.00%
				; CHECK2: - b23: float = {{.}}, int = {{.}}, count = 128

				b24:
				call void @llvm.pseudoprobe(i64 2494702099028631698, i64 4, i32 0, i64 -1)
				br i1 %cmp, label %b25, label %b26
				; CHECK: edge b24 -> b25 probability is 0x40000000 / 0x80000000 = 50.00%
				; CHECK: edge b24 -> b26 probability is 0x40000000 / 0x80000000 = 50.00%

				b25:
				call void @llvm.pseudoprobe(i64 2494702099028631698, i64 5, i32 0, i64 -1)
				br label %b27
				; CHECK2: - b25: float = {{.}}, int = {{.}}, count = 64

				b26:
				call void @llvm.pseudoprobe(i64 2494702099028631698, i64 6, i32 0, i64 -1)
				br label %b27
				; CHECK2: - b26: float = {{.}}, int = {{.}}, count = 64

				b27:
				call void @llvm.pseudoprobe(i64 2494702099028631698, i64 7, i32 0, i64 -1)
				br i1 %cmp, label %b28, label %b29
				; CHECK: edge b27 -> b28 probability is 0x40000000 / 0x80000000 = 50.00%
				; CHECK: edge b27 -> b29 probability is 0x40000000 / 0x80000000 = 50.00%
				; CHECK2: - b27: float = {{.}}, int = {{.}}, count = 128

				b28:
				call void @llvm.pseudoprobe(i64 2494702099028631698, i64 8, i32 0, i64 -1)
				br label %b29
				; CHECK2: - b28: float = {{.}}, int = {{.}}, count = 64

				b29:
				call void @llvm.pseudoprobe(i64 2494702099028631698, i64 9, i32 0, i64 -1)
				ret i32 %1
				; CHECK2: - b29: float = {{.}}, int = {{.}}, count = 128
				}


				; The test verifies a flexible mode of rebalancing in which some jumps to known
				; basic blocks are ignored.
				;
				; +------------+
				; \| b31 [500] \|
				; +------------+
				; \|
				; v
				; +---------+ +------------+
				; \| b33 [?] \| <-- \| b32 [1500] \| <-----+
				; +---------+ +------------+ \|
				; \| \| \|
				; \| v \|
				; \| +------------+ +-----------+
				; \| \| b34 [1200] \| --> \| b36 [900] \|
				; \| +------------+ +-----------+
				; \| \|
				; \| v
				; \| +------------+
				; \| \| b35 [?] \|
				; \| +------------+
				; \| \|
				; \| v
				; \| +------------+
				; +-----------> \| b37 [?] \| -+
				; +------------+ \|
				; \| \|
				; v \|
				; +------------+ \|
				; \| b38 [?] \| \|
				; +------------+ \|
				; \| \|
				; v \|
				; +------------+ \|
				; \| b39 [500] \| <+
				; +------------+
				;

				define dso_local i32 @foo3(i32 %0, i32 %1) #0 {
				b31:
				call void @llvm.pseudoprobe(i64 -7908226060800700466, i64 1, i32 0, i64 -1)
				%cmp = icmp ne i32 %0, 0
				br label %b32
				; CHECK2: - b31: float = {{.}}, int = {{.}}, count = 500

				b32:
				call void @llvm.pseudoprobe(i64 -7908226060800700466, i64 2, i32 0, i64 -1)
				br i1 %cmp, label %b33, label %b34
				; CHECK: edge b32 -> b33 probability is 0x1999999a / 0x80000000 = 20.00%
				; CHECK: edge b32 -> b34 probability is 0x66666666 / 0x80000000 = 80.00%
				; CHECK2: - b32: float = {{.}}, int = {{.}}, count = 1500

				b33:
				call void @llvm.pseudoprobe(i64 -7908226060800700466, i64 3, i32 0, i64 -1)
				br label %b37
				; CHECK2: - b33: float = {{.}}, int = {{.}}, count = 300

				b34:
				call void @llvm.pseudoprobe(i64 -7908226060800700466, i64 4, i32 0, i64 -1)
				br i1 %cmp, label %b35, label %b36
				; CHECK: edge b34 -> b35 probability is 0x15555555 / 0x80000000 = 16.67%
				; CHECK: edge b34 -> b36 probability is 0x6aaaaaab / 0x80000000 = 83.33% [HOT edge]
				; CHECK2: - b34: float = {{.}}, int = {{.}}, count = 1200

				b35:
				call void @llvm.pseudoprobe(i64 -7908226060800700466, i64 5, i32 0, i64 -1)
				br label %b37
				; CHECK2: - b35: float = {{.}}, int = {{.}}, count = 200

				b36:
				call void @llvm.pseudoprobe(i64 -7908226060800700466, i64 6, i32 0, i64 -1)
				br label %b32
				; CHECK2: - b36: float = {{.}}, int = {{.}}, count = 1000

				b37:
				call void @llvm.pseudoprobe(i64 -7908226060800700466, i64 7, i32 0, i64 -1)
				br i1 %cmp, label %b38, label %b39
				; CHECK: edge b37 -> b38 probability is 0x40000000 / 0x80000000 = 50.00%
				; CHECK: edge b37 -> b39 probability is 0x40000000 / 0x80000000 = 50.00%
				; CHECK2: - b37: float = {{.}}, int = {{.}}, count = 500

				b38:
				call void @llvm.pseudoprobe(i64 -7908226060800700466, i64 8, i32 0, i64 -1)
				br label %b39
				; CHECK2: - b38: float = {{.}}, int = {{.}}, count = 250

				b39:
				call void @llvm.pseudoprobe(i64 -7908226060800700466, i64 9, i32 0, i64 -1)
				ret i32 %1
				; CHECK2: - b39: float = {{.}}, int = {{.}}, count = 500
				}


				; The test verifies that flow rebalancer can ignore 'unlikely' jumps.
				;
				; +-----------+
				; \| b41 [400] \| -+
				; +-----------+ \|
				; \| \|
				; \| \|
				; v \|
				; +-----------+ \|
				; \| b42 [?] \| \|
				; +-----------+ \|
				; \| \|
				; \| \|
				; v v
				; +---------++---------+ +---------------------------+ +---------++---------+
				; \| b48 [?] \|\| b46 [?] \| <-- \| \| --> \| b47 [?] \|\| b49 [?] \|
				; +---------++---------+ \| \| +---------++---------+
				; \| ^ \| \| \| \| ^
				; \| \| \| \| b43 [400] \| \| \|
				; \| +-------+-------------\| \| \| \|
				; \| \| \| \| \| \|
				; \| \| \| \| ------+----------+
				; \| \| +---------------------------+ \|
				; \| \| \| \| \|
				; \| \| \| \| \|
				; \| \| v v \|
				; \| \| +-----------+ +---------+ \|
				; \| \| \| b44 [?] \| \| b45 [?] \| \|
				; \| \| +-----------+ +---------+ \|
				; \| \| \| \| \|
				; \| \| \| \| \|
				; \| \| v v \|
				; \| \| +---------------------------+ \|
				; \| +-----------> \| \| <-----+
				; \| \| b410 [400] \|
				; \| \| \|
				; +----------------------> \| \|
				; +---------------------------+


				define dso_local void @foo4(i32 %0, i32 %1) #0 {
				b41:
				call void @llvm.pseudoprobe(i64 -6882312132165544686, i64 1, i32 0, i64 -1)
				%cmp = icmp ne i32 %0, 0
				br i1 %cmp, label %b42, label %b43
				; CHECK: edge b41 -> b42 probability is 0x40000000 / 0x80000000 = 50.00%
				; CHECK: edge b41 -> b43 probability is 0x40000000 / 0x80000000 = 50.00%
				; CHECK2: - b41: float = {{.}}, int = {{.}}, count = 400

				b42:
				call void @llvm.pseudoprobe(i64 -6882312132165544686, i64 2, i32 0, i64 -1)
				br label %b43
				; CHECK2: - b42: float = {{.}}, int = {{.}}, count = 200

				b43:
				call void @llvm.pseudoprobe(i64 -6882312132165544686, i64 3, i32 0, i64 -1)
				switch i32 %1, label %b49 [
				i32 1, label %b44
				i32 2, label %b45
				i32 3, label %b46
				i32 4, label %b47
				i32 5, label %b48
				]
				; CHECK: edge b43 -> b49 probability is 0x00000000 / 0x80000000 = 0.00%
				; CHECK: edge b43 -> b44 probability is 0x1999999a / 0x80000000 = 20.00%
				; CHECK: edge b43 -> b45 probability is 0x1999999a / 0x80000000 = 20.00%
				; CHECK: edge b43 -> b46 probability is 0x1999999a / 0x80000000 = 20.00%
				; CHECK: edge b43 -> b47 probability is 0x1999999a / 0x80000000 = 20.00%
				; CHECK: edge b43 -> b48 probability is 0x1999999a / 0x80000000 = 20.00%
				; CHECK2: - b43: float = {{.}}, int = {{.}}, count = 400

				b44:
				call void @llvm.pseudoprobe(i64 -6882312132165544686, i64 4, i32 0, i64 -1)
				br label %b410
				; CHECK2: - b44: float = {{.}}, int = {{.}}, count = 80

				b45:
				call void @llvm.pseudoprobe(i64 -6882312132165544686, i64 5, i32 0, i64 -1)
				br label %b410
				; CHECK2: - b45: float = {{.}}, int = {{.}}, count = 80

				b46:
				call void @llvm.pseudoprobe(i64 -6882312132165544686, i64 6, i32 0, i64 -1)
				br label %b410
				; CHECK2: - b46: float = {{.}}, int = {{.}}, count = 80

				b47:
				call void @llvm.pseudoprobe(i64 -6882312132165544686, i64 7, i32 0, i64 -1)
				br label %b410
				; CHECK2: - b47: float = {{.}}, int = {{.}}, count = 80

				b48:
				call void @llvm.pseudoprobe(i64 -6882312132165544686, i64 8, i32 0, i64 -1)
				br label %b410
				; CHECK2: - b48: float = {{.}}, int = {{.}}, count = 80

				b49:
				call void @llvm.pseudoprobe(i64 -6882312132165544686, i64 9, i32 0, i64 -1)
				unreachable
				; CHECK2: - b49: float = {{.}}, int = {{.}}, count = 0

				b410:
				call void @llvm.pseudoprobe(i64 -6882312132165544686, i64 10, i32 0, i64 -1)
				ret void
				; CHECK2: - b410: float = {{.}}, int = {{.}}, count = 400
				}


				; Function Attrs: inaccessiblememonly nounwind willreturn
				declare void @llvm.pseudoprobe(i64, i64, i32, i64) #4

				attributes #0 = { noinline nounwind uwtable "use-sample-profile" }
				attributes #4 = { inaccessiblememonly nounwind willreturn }

				!llvm.pseudo_probe_desc = !{!7, !8, !9, !10}

				!7 = !{i64 7682762345278052905, i64 157181141624, !"foo1", null}
				!8 = !{i64 2494702099028631698, i64 208782362068, !"foo2", null}
				!9 = !{i64 -7908226060800700466, i64 189901498683, !"foo3", null}
				!10 = !{i64 -6882312132165544686, i64 241030178952, !"foo4", null}

llvm/test/Transforms/SampleProfile/profile-inference-rebalance.ll

	; RUN: opt < %s -passes=pseudo-probe,sample-profile -sample-profile-use-profi -sample-profile-file=%S/Inputs/profile-inference-rebalance.prof \| opt -analyze -branch-prob -enable-new-pm=0 \| FileCheck %s			; RUN: opt < %s -passes=pseudo-probe,sample-profile -sample-profile-use-profi -sample-profile-file=%S/Inputs/profile-inference-rebalance.prof \| opt -analyze -branch-prob -enable-new-pm=0 \| FileCheck %s
	; RUN: opt < %s -passes=pseudo-probe,sample-profile -sample-profile-use-profi -sample-profile-file=%S/Inputs/profile-inference-rebalance.prof \| opt -analyze -block-freq -enable-new-pm=0 \| FileCheck %s --check-prefix=CHECK2			; RUN: opt < %s -passes=pseudo-probe,sample-profile -sample-profile-use-profi -sample-profile-file=%S/Inputs/profile-inference-rebalance.prof \| opt -analyze -block-freq -enable-new-pm=0 \| FileCheck %s --check-prefix=CHECK2

	; The test contains a "dimanond" and a "triangle" that needs to be rebalanced			; The test contains a "diamond" and a "triangle" that needs to be rebalanced
	; after basic profile inference.			; after basic profile inference.
	;			;
	; +----------------+			; +----------------+
	; \| b11 [?] \|			; \| b11 [?] \|
	; +----------------+			; +----------------+
	; \|			; \|
	; v			; v
	; +----------+ +----------------+			; +----------+ +----------------+
	▲ Show 20 Lines • Show All 278 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

a better profi rebalancer
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 400917

llvm/lib/Transforms/Utils/SampleProfileInference.cpp

llvm/test/Transforms/SampleProfile/Inputs/profile-inference-rebalance-large.prof

llvm/test/Transforms/SampleProfile/profile-inference-rebalance-large.ll

llvm/test/Transforms/SampleProfile/profile-inference-rebalance.ll

This is an archive of the discontinued LLVM Phabricator instance.

a better profi rebalancerClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 400917

llvm/lib/Transforms/Utils/SampleProfileInference.cpp

llvm/test/Transforms/SampleProfile/Inputs/profile-inference-rebalance-large.prof

llvm/test/Transforms/SampleProfile/profile-inference-rebalance-large.ll

llvm/test/Transforms/SampleProfile/profile-inference-rebalance.ll

a better profi rebalancer
ClosedPublic