diff --git a/llvm/lib/Transforms/Utils/SampleProfileInference.cpp b/llvm/lib/Transforms/Utils/SampleProfileInference.cpp --- a/llvm/lib/Transforms/Utils/SampleProfileInference.cpp +++ b/llvm/lib/Transforms/Utils/SampleProfileInference.cpp @@ -271,6 +271,177 @@ uint64_t Target; }; +/// Post-processing adjustment of the control flow. +class FlowAdjuster { +public: + FlowAdjuster(FlowFunction &Func) : Func(Func) { + assert(Func.Blocks[Func.Entry].isEntry() && + "incorrect index of the entry block"); + } + + // Run the post-processing + void run() { + /// We adjust the control flow in a function so as to remove all + /// "isolated" components with positive flow that are unreachable + /// from the entry block. For every such component, we find the shortest + /// path from the entry to an exit passing through the component, and + /// increase the flow by one unit along the path. + joinIsolatedComponents(); + } + +private: + void joinIsolatedComponents() { + // Find blocks that are reachable from the source + auto Visited = std::vector(NumBlocks(), false); + findReachable(Func.Entry, Visited); + + // Iterate over all non-reachable blocks and adjust their weights + for (uint64_t I = 0; I < NumBlocks(); I++) { + auto &Block = Func.Blocks[I]; + if (Block.Flow > 0 && !Visited[I]) { + // Find a path from the entry to an exit passing through the block I + auto Path = findShortestPath(I); + // Increase the flow along the path + assert(Path.size() > 0 && Path[0]->Source == Func.Entry && + "incorrectly computed path adjusting control flow"); + Func.Blocks[Func.Entry].Flow += 1; + for (auto &Jump : Path) { + Jump->Flow += 1; + Func.Blocks[Jump->Target].Flow += 1; + // Update reachability + findReachable(Jump->Target, Visited); + } + } + } + } + + /// Run bfs from a given block along the jumps with a positive flow and mark + /// all reachable blocks. + void findReachable(uint64_t Src, std::vector &Visited) { + if (Visited[Src]) + return; + std::queue Queue; + Queue.push(Src); + Visited[Src] = true; + while (!Queue.empty()) { + Src = Queue.front(); + Queue.pop(); + for (auto Jump : Func.Blocks[Src].SuccJumps) { + uint64_t Dst = Jump->Target; + if (Jump->Flow > 0 && !Visited[Dst]) { + Queue.push(Dst); + Visited[Dst] = true; + } + } + } + } + + /// Find the shortest path from the entry block to an exit block passing + /// through a given block. + std::vector findShortestPath(uint64_t BlockIdx) { + // A path from the entry block to BlockIdx + auto ForwardPath = findShortestPath(Func.Entry, BlockIdx); + // A path from BlockIdx to an exit block + auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock); + + // Concatenate the two paths + std::vector Result; + Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end()); + Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end()); + return Result; + } + + /// Apply the Dijkstra algorithm to find the shortest path from a given + /// Source to a given Target block. + /// If Target == -1, then the path ends at an exit block. + std::vector findShortestPath(uint64_t Source, uint64_t Target) { + // Quit early, if possible + if (Source == Target) + return std::vector(); + if (Func.Blocks[Source].isExit() && Target == AnyExitBlock) + return std::vector(); + + // Initialize data structures + auto Distance = std::vector(NumBlocks(), INF); + auto Parent = std::vector(NumBlocks(), nullptr); + Distance[Source] = 0; + std::set> Queue; + Queue.insert(std::make_pair(Distance[Source], Source)); + + // Run the Dijkstra algorithm + while (!Queue.empty()) { + uint64_t Src = Queue.begin()->second; + Queue.erase(Queue.begin()); + // If we found a solution, quit early + if (Src == Target || + (Func.Blocks[Src].isExit() && Target == AnyExitBlock)) + break; + + for (auto Jump : Func.Blocks[Src].SuccJumps) { + uint64_t Dst = Jump->Target; + int64_t JumpDist = jumpDistance(Jump); + if (Distance[Dst] > Distance[Src] + JumpDist) { + Queue.erase(std::make_pair(Distance[Dst], Dst)); + + Distance[Dst] = Distance[Src] + JumpDist; + Parent[Dst] = Jump; + + Queue.insert(std::make_pair(Distance[Dst], Dst)); + } + } + } + // If Target is not provided, find the closest exit block + if (Target == AnyExitBlock) { + for (uint64_t I = 0; I < NumBlocks(); I++) { + if (Func.Blocks[I].isExit() && Parent[I] != nullptr) { + if (Target == AnyExitBlock || Distance[Target] > Distance[I]) { + Target = I; + } + } + } + } + assert(Parent[Target] != nullptr && "a path does not exist"); + + // Extract the constructed path + std::vector Result; + uint64_t Now = Target; + while (Now != Source) { + assert(Now == Parent[Now]->Target && "incorrect parent jump"); + Result.push_back(Parent[Now]); + Now = Parent[Now]->Source; + } + // Reverse the path, since it is extracted from Target to Source + std::reverse(Result.begin(), Result.end()); + return Result; + } + + /// A distance of a path for a given jump. + /// In order to incite the path to use blocks/jumps with large positive flow, + /// and avoid changing branch probability of outgoing edges drastically, + /// set the distance as follows: + /// if Jump.Flow > 0, then distance = max(100 - Jump->Flow, 0) + /// if Block.Weight > 0, then distance = 1 + /// otherwise distance >> 1 + int64_t jumpDistance(FlowJump *Jump) const { + int64_t BaseDistance = 100; + if (Jump->IsUnlikely) + return MinCostMaxFlow::AuxCostUnlikely; + if (Jump->Flow > 0) + return std::max(BaseDistance - (int64_t)Jump->Flow, (int64_t)0); + if (Func.Blocks[Jump->Target].Weight > 0) + return BaseDistance; + return BaseDistance * (NumBlocks() + 1); + }; + + uint64_t NumBlocks() const { return Func.Blocks.size(); } + + /// A constant indicating an arbitrary exit block of a function. + static constexpr uint64_t AnyExitBlock = uint64_t(-1); + + /// The function. + FlowFunction &Func; +}; + /// Initializing flow network for a given function. /// /// Every block is split into three nodes that are responsible for (i) an @@ -440,6 +611,39 @@ } } assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow"); + + // Verify that there are no isolated flow components + // One could modify FlowFunction to hold edges indexed by the sources, which + // will avoid a creation of the object + auto PositiveFlowEdges = std::vector>(NumBlocks); + for (auto &Jump : Func.Jumps) { + if (Jump.Flow > 0) { + PositiveFlowEdges[Jump.Source].push_back(Jump.Target); + } + } + + // Run bfs from the source along edges with positive flow + std::queue Queue; + auto Visited = std::vector(NumBlocks, false); + Queue.push(Func.Entry); + Visited[Func.Entry] = true; + while (!Queue.empty()) { + uint64_t Src = Queue.front(); + Queue.pop(); + for (uint64_t Dst : PositiveFlowEdges[Src]) { + if (!Visited[Dst]) { + Queue.push(Dst); + Visited[Dst] = true; + } + } + } + + // Verify that every block that has a positive flow is reached from the source + // along edges with a positive flow + for (uint64_t I = 0; I < NumBlocks; I++) { + auto &Block = Func.Blocks[I]; + assert((Visited[I] || Block.Flow == 0) && "an isolated flow component"); + } } #endif @@ -455,6 +659,10 @@ // Extract flow values for every block and every edge extractWeights(InferenceNetwork, Func); + // Post-processing adjustments to the flow + auto Adjuster = FlowAdjuster(Func); + Adjuster.run(); + #ifndef NDEBUG // Verify the result verifyWeights(Func); diff --git a/llvm/test/Transforms/SampleProfile/Inputs/profile-inference-islands.prof b/llvm/test/Transforms/SampleProfile/Inputs/profile-inference-islands.prof new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/profile-inference-islands.prof @@ -0,0 +1,27 @@ +islands_1:10822:0 + 1: 1 + 2: 100 + 3: 100 + 4: 1 + 5: 0 + 6: 1 + 7: 0 + !CFGChecksum: 120879332589 + +islands_2:108:0 + 1: 0 + 2: 10000 + 3: 10000 + 4: 0 + !CFGChecksum: 69495280403 + +islands_3:108:0 + 1: 10 + 2: 0 + 3: 10 + 4: 0 + 5: 1000 + 6: 1000 + 7: 0 + 8: 10 + !CFGChecksum: 156608410269 diff --git a/llvm/test/Transforms/SampleProfile/profile-inference-islands.ll b/llvm/test/Transforms/SampleProfile/profile-inference-islands.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/profile-inference-islands.ll @@ -0,0 +1,212 @@ +; RUN: opt < %s -passes=pseudo-probe,sample-profile -sample-profile-use-profi -sample-profile-file=%S/Inputs/profile-inference-islands.prof -S -o %t +; RUN: FileCheck %s < %t -check-prefix=CHECK-ENTRY-COUNT +; RUN: opt < %t -analyze -block-freq -enable-new-pm=0 | FileCheck %s + + +; The test contains an isolated flow component ("island") that needs to be +; reconnected to the entry point via edges with a positive flow. +; The corresponding CFG is shown below: +; +; +--------+ +--------+ +----------+ +; | b6 [1] | <-- | b4 [1] | <-- | b1 [1] | +; +--------+ +--------+ +----------+ +; | | +; | | +; v v +; +--------+ +----------+ +; | b5 [0] | | b2 [100] | <+ +; +--------+ +----------+ | +; | | +; | | +; v | +; +----------+ | +; | b3 [100] | -+ +; +----------+ +; | +; | +; v +; +----------+ +; | b7 [0] | +; +----------+ + + +; Function Attrs: nounwind uwtable +define dso_local i32 @islands_1(i32 %0, i32 %1) #0 { +b1: + call void @llvm.pseudoprobe(i64 -5646793257986063976, i64 1, i32 0, i64 -1) + %cmp = icmp ne i32 %0, 0 + br i1 %cmp, label %b2, label %b4 +; CHECK: - b1: float = {{.*}}, int = {{.*}}, count = 2 + +b2: + call void @llvm.pseudoprobe(i64 -5646793257986063976, i64 2, i32 0, i64 -1) + br label %b3 +; CHECK: - b2: float = {{.*}}, int = {{.*}}, count = 101 + +b3: + call void @llvm.pseudoprobe(i64 -5646793257986063976, i64 3, i32 0, i64 -1) + br i1 %cmp, label %b2, label %b7 +; CHECK: - b3: float = {{.*}}, int = {{.*}}, count = 101 + +b4: + call void @llvm.pseudoprobe(i64 -5646793257986063976, i64 4, i32 0, i64 -1) + br i1 %cmp, label %b5, label %b6 +; CHECK: - b4: float = {{.*}}, int = {{.*}}, count = 1 + +b5: + call void @llvm.pseudoprobe(i64 -5646793257986063976, i64 5, i32 0, i64 -1) + ret i32 %1 +; CHECK: - b5: float = {{.*}}, int = {{.*}}, count = 0 + +b6: + call void @llvm.pseudoprobe(i64 -5646793257986063976, i64 6, i32 0, i64 -1) + ret i32 %1 +; CHECK: - b6: float = {{.*}}, int = {{.*}}, count = 1 + +b7: + call void @llvm.pseudoprobe(i64 -5646793257986063976, i64 7, i32 0, i64 -1) + ret i32 %1 +; CHECK: - b7: float = {{.*}}, int = {{.*}}, count = 1 + +} + +; Another test with an island. +; +; +----------+ +; | b1 [0] | +; +----------+ +; | +; | +; v +; +----------+ +; | b2 [100] | <+ +; +----------+ | +; | | +; | | +; v | +; +----------+ | +; | b3 [100] | -+ +; +----------+ +; | +; | +; v +; +----------+ +; | b4 [0] | +; +----------+ + +; Function Attrs: nounwind uwtable +define dso_local i32 @islands_2(i32 %0, i32 %1) #1 { +b1: + call void @llvm.pseudoprobe(i64 -7683376842751444845, i64 1, i32 0, i64 -1) + %cmp = icmp ne i32 %0, 0 + br label %b2 +; CHECK: - b1: float = {{.*}}, int = {{.*}}, count = 1 + +b2: + call void @llvm.pseudoprobe(i64 -7683376842751444845, i64 2, i32 0, i64 -1) + br label %b3 +; CHECK: - b2: float = {{.*}}, int = {{.*}}, count = 10001 + +b3: + call void @llvm.pseudoprobe(i64 -7683376842751444845, i64 3, i32 0, i64 -1) + br i1 %cmp, label %b2, label %b4 +; CHECK: - b3: float = {{.*}}, int = {{.*}}, count = 10001 + +b4: + call void @llvm.pseudoprobe(i64 -7683376842751444845, i64 4, i32 0, i64 -1) + ret i32 %1 +; CHECK: - b4: float = {{.*}}, int = {{.*}}, count = 1 +} + + +; The test verifies that the island is connected to the entry block via a +; cheapest path (that is, passing through blocks with large counts). +; +; +---------+ +---------+ +----------+ +--------+ +; | b8 [10] | <-- | b3 [10] | <-- | b1 [10] | --> | b4 [0] | +; +---------+ +---------+ +----------+ +--------+ +; | | | +; | | | +; | v | +; | +----------+ | +; | | b2 [0] | | +; | +----------+ | +; | | | +; | | | +; | v v +; | +-------------------------+ +; +-----------> | b5 [100] | +; +-------------------------+ +; | ^ +; | | +; v | +; +----------+ | +; | b6 [100] | -+ +; +----------+ +; | +; | +; v +; +----------+ +; | b7 [0] | +; +----------+ + +; Function Attrs: nounwind uwtable +define dso_local i32 @islands_3(i32 %0, i32 %1) #1 { +b1: + call void @llvm.pseudoprobe(i64 -9095645063288297061, i64 1, i32 0, i64 -1) + %cmp = icmp ne i32 %0, 0 + switch i32 %1, label %b2 [ + i32 1, label %b3 + i32 2, label %b4 + ] +; CHECK: - b1: float = {{.*}}, int = {{.*}}, count = 11 + +b2: + call void @llvm.pseudoprobe(i64 -9095645063288297061, i64 2, i32 0, i64 -1) + br label %b5 +; CHECK: - b2: float = {{.*}}, int = {{.*}}, count = 0 + +b3: + call void @llvm.pseudoprobe(i64 -9095645063288297061, i64 3, i32 0, i64 -1) + br i1 %cmp, label %b8, label %b5 +; CHECK: - b3: float = {{.*}}, int = {{.*}}, count = 11 + +b4: + call void @llvm.pseudoprobe(i64 -9095645063288297061, i64 4, i32 0, i64 -1) + ret i32 %1 +; CHECK: - b4: float = {{.*}}, int = {{.*}}, count = 0 + +b5: + call void @llvm.pseudoprobe(i64 -9095645063288297061, i64 5, i32 0, i64 -1) + br label %b6 +; CHECK: - b5: float = {{.*}}, int = {{.*}}, count = 1001 + +b6: + call void @llvm.pseudoprobe(i64 -9095645063288297061, i64 6, i32 0, i64 -1) + br i1 %cmp, label %b7, label %b5 +; CHECK: - b6: float = {{.*}}, int = {{.*}}, count = 1001 + +b7: + call void @llvm.pseudoprobe(i64 -9095645063288297061, i64 7, i32 0, i64 -1) + ret i32 %1 +; CHECK: - b7: float = {{.*}}, int = {{.*}}, count = 1 + +b8: + call void @llvm.pseudoprobe(i64 -9095645063288297061, i64 8, i32 0, i64 -1) + ret i32 %1 +; CHECK: - b8: float = {{.*}}, int = {{.*}}, count = 10 +} + +declare void @llvm.pseudoprobe(i64, i64, i32, i64) #2 + +attributes #0 = { noinline nounwind uwtable "use-sample-profile"} +attributes #1 = { noinline nounwind uwtable "use-sample-profile"} +attributes #2 = { nounwind } + +!llvm.pseudo_probe_desc = !{!7, !8} + +!7 = !{i64 -5646793257986063976, i64 120879332589, !"islands_1"} +!8 = !{i64 -7683376842751444845, i64 69495280403, !"islands_2"} +!9 = !{i64 -9095645063288297061, i64 156608410269, !"islands_3"} + +; CHECK-ENTRY-COUNT: = !{!"function_entry_count", i64 2}