Index: llvm/lib/Transforms/IPO/HotColdSplitting.cpp =================================================================== --- llvm/lib/Transforms/IPO/HotColdSplitting.cpp +++ llvm/lib/Transforms/IPO/HotColdSplitting.cpp @@ -332,6 +332,8 @@ static constexpr unsigned ScoreForSuccBlock = 1; static constexpr unsigned ScoreForSinkBlock = 1; + using PHIFrontierMap = SmallDenseMap; + OutliningRegion(const OutliningRegion &) = delete; OutliningRegion &operator=(const OutliningRegion &) = delete; @@ -344,6 +346,20 @@ const PostDomTree &PDT) { OutliningRegion ColdRegion; + // Map PHIs outside of the outlining region to a unique incoming value + // originating from the outlining region. Don't update the PHI frontier for + // sink-predecessor blocks, because their successors are post-dominated by + // the sink block. + PHIFrontierMap PHIFrontier; + + SmallPtrSet RegionBlocks; + + auto addBlockToRegion = [&](BasicBlock *BB, unsigned Score) { + RegionBlocks.insert(BB); + ColdRegion.Blocks.emplace_back(BB, Score); + assert(RegionBlocks.size() == ColdRegion.Blocks.size() && "Duplicate BB"); + }; + // The ancestor farthest-away from SinkBB, and also post-dominated by it. unsigned SinkScore = getEntryPointScore(SinkBB, ScoreForSinkBlock); ColdRegion.SuggestedEntryPoint = (SinkScore > 0) ? &SinkBB : nullptr; @@ -379,13 +395,14 @@ BestScore = PredScore; } - ColdRegion.Blocks.emplace_back(&PredBB, PredScore); + addBlockToRegion(&PredBB, PredScore); ++PredIt; } // Add SinkBB to the cold region. It's considered as an entry point before // any sink-successor blocks. - ColdRegion.Blocks.emplace_back(&SinkBB, SinkScore); + addBlockToRegion(&SinkBB, SinkScore); + updatePHIFrontier(SinkBB, RegionBlocks, PHIFrontier); // Find all successors of SinkBB dominated by SinkBB using DFS. auto SuccIt = ++df_begin(&SinkBB); @@ -394,9 +411,13 @@ BasicBlock &SuccBB = **SuccIt; bool SinkDom = DT.dominates(&SinkBB, &SuccBB); + // Don't allow the backwards & forwards DFSes to mark the same block. + bool DuplicateBlock = RegionBlocks.count(&SuccBB); + // If SinkBB does not dominate a successor, do not mark the successor (or // any of its successors) cold. - if (!SinkDom || !mayExtractBlock(SuccBB)) { + if (DuplicateBlock || !SinkDom || !mayExtractBlock(SuccBB) || + !updatePHIFrontier(SuccBB, RegionBlocks, PHIFrontier)) { SuccIt.skipChildren(); continue; } @@ -407,7 +428,7 @@ BestScore = SuccScore; } - ColdRegion.Blocks.emplace_back(&SuccBB, SuccScore); + addBlockToRegion(&SuccBB, SuccScore); ++SuccIt; } @@ -454,6 +475,34 @@ return SubRegion; } + +private: + /// Update the PHI frontier by recording \p BB's outgoing values. If \p BB + /// can be added to the outlining region, return true. + static bool + updatePHIFrontier(const BasicBlock &BB, + const SmallPtrSetImpl &RegionBlocks, + PHIFrontierMap &PHIFrontier) { + // Make sure that each successor of BB in the frontier has a unique + // incoming value originating from the outlining region. + for (const BasicBlock *SuccBB : successors(&BB)) { + // Ignore self-edges and successors which aren't in the frontier. + if (SuccBB == &BB || RegionBlocks.count(SuccBB)) + continue; + + for (const PHINode &SuccPhi : SuccBB->phis()) { + // Record the incoming value from BB. Flag it if it isn't unique. + Value *&RecordedIncomingVal = PHIFrontier[&SuccPhi]; + Value *ActualIncomingVal = SuccPhi.getIncomingValueForBlock(&BB); + if (RecordedIncomingVal) + if (ActualIncomingVal != RecordedIncomingVal) + return false; + + RecordedIncomingVal = ActualIncomingVal; + } + } + return true; + } }; bool HotColdSplitting::outlineColdRegions(Function &F, ProfileSummaryInfo &PSI, Index: llvm/test/Transforms/HotColdSplit/forward-dfs-reaches-marked-block.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/HotColdSplit/forward-dfs-reaches-marked-block.ll @@ -0,0 +1,29 @@ +; RUN: opt -hotcoldsplit -S < %s | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.14.0" + +; CHECK-LABEL: define {{.*}}@fun +; CHECK: call {{.*}}@fun.cold.1( +define void @fun() { +entry: + br i1 undef, label %if.then, label %if.else + +if.then: + ; This will be marked by the inverse DFS on sink-predecesors. + br label %sink + +sink: + call void @sink() + + ; Do not allow the forward-DFS on sink-successors to mark the block again. + br i1 undef, label %if.then, label %if.then.exit + +if.then.exit: + ret void + +if.else: + ret void +} + +declare void @sink() cold Index: llvm/test/Transforms/HotColdSplit/outline-while-loop.ll =================================================================== --- llvm/test/Transforms/HotColdSplit/outline-while-loop.ll +++ llvm/test/Transforms/HotColdSplit/outline-while-loop.ll @@ -55,6 +55,47 @@ ret void } +; This is the same as @foo, but the while loop comes after the sink block. +; CHECK-LABEL: define {{.*}}@while_loop_after_sink( +; CHECK: br i1 {{.*}}, label %if.end, label %codeRepl +; CHECK-LABEL: codeRepl: +; CHECK-NEXT: call void @while_loop_after_sink.cold.1 +; CHECK-LABEL: if.end: +; CHECK: call void @sideeffect(i32 1) +define void @while_loop_after_sink(i32 %cond) { +entry: + %tobool = icmp eq i32 %cond, 0 + br i1 %tobool, label %if.end, label %sink + +sink: + tail call void (...) @sink() + br label %while.cond.preheader + +while.cond.preheader: + %cmp3 = icmp sgt i32 %cond, 10 + br i1 %cmp3, label %while.body.preheader, label %while.end + +while.body.preheader: ; preds = %while.cond.preheader + br label %while.body + +while.body: ; preds = %while.body.preheader, %while.body + %cond.addr.04 = phi i32 [ %dec, %while.body ], [ %cond, %while.body.preheader ] + %dec = add nsw i32 %cond.addr.04, -1 + tail call void @sideeffect(i32 0) #3 + %cmp = icmp sgt i32 %dec, 10 + br i1 %cmp, label %while.body, label %while.end.loopexit + +while.end.loopexit: ; preds = %while.body + br label %while.end + +while.end: ; preds = %while.end.loopexit, %while.cond.preheader + ret void + +if.end: ; preds = %entry + tail call void @sideeffect(i32 1) + ret void +} + ; CHECK-LABEL: define {{.*}}@foo.cold.1 ; CHECK: phi i32 ; CHECK-NEXT: add nsw i32 @@ -62,6 +103,14 @@ ; CHECK-NEXT: icmp ; CHECK-NEXT: br +; CHECK-LABEL: define {{.*}}@while_loop_after_sink.cold.1 +; CHECK: call {{.*}}@sink +; CHECK: phi i32 +; CHECK-NEXT: add nsw i32 +; CHECK-NEXT: call {{.*}}@sideeffect +; CHECK-NEXT: icmp +; CHECK-NEXT: br + declare void @sideeffect(i32) declare void @sink(...) cold Index: llvm/test/Transforms/HotColdSplit/phi-with-distinct-outlined-values.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/HotColdSplit/phi-with-distinct-outlined-values.ll @@ -0,0 +1,33 @@ +; RUN: opt -S -hotcoldsplit < %s | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.14.0" + +; CHECK-LABEL: define {{.*}}@foo( +; CHECK: phi i32 [ 0, %entry ], [ 1, %codeRepl ], [ 3, %coldbb2 ] + +; CHECK-LABEL: define {{.*}}@foo.cold.1( +; CHECK: call {{.*}}@sink + +define void @foo(i32 %cond) { +entry: + %tobool = icmp eq i32 %cond, 0 + br i1 %tobool, label %if.end, label %coldbb + +coldbb: + call void @sink() + call void @sideeffect() + call void @sideeffect() + br i1 undef, label %if.end, label %coldbb2 + +coldbb2: + br label %if.end + +if.end: + %p = phi i32 [0, %entry], [1, %coldbb], [3, %coldbb2] + ret void +} + +declare void @sink() cold + +declare void @sideeffect() Index: llvm/test/Transforms/HotColdSplit/succ-block-with-self-edge.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/HotColdSplit/succ-block-with-self-edge.ll @@ -0,0 +1,56 @@ +; RUN: opt -S -hotcoldsplit < %s | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.14.0" + +; CHECK-LABEL: define {{.*}}@exit_block_with_same_incoming_vals +; CHECK: call {{.*}}@exit_block_with_same_incoming_vals.cold.1( +; CHECK-NOT: br i1 undef +; CHECK: phi i32 [ 0, %entry ], [ 1, %codeRepl ] +define void @exit_block_with_same_incoming_vals(i32 %cond) { +entry: + %tobool = icmp eq i32 %cond, 0 + br i1 %tobool, label %if.end, label %coldbb + +coldbb: + call void @sink() + call void @sideeffect() + call void @sideeffect() + br i1 undef, label %if.end, label %coldbb2 + +coldbb2: + %p2 = phi i32 [0, %coldbb], [1, %coldbb2] + br i1 undef, label %if.end, label %coldbb2 + +if.end: + %p = phi i32 [0, %entry], [1, %coldbb], [1, %coldbb2] + ret void +} + +; CHECK-LABEL: define {{.*}}@exit_block_with_distinct_incoming_vals +; CHECK: call {{.*}}@exit_block_with_distinct_incoming_vals.cold.1( +; CHECK: br i1 undef +; CHECK: phi i32 [ 0, %entry ], [ 1, %codeRepl ], [ 2, %coldbb2 ] +define void @exit_block_with_distinct_incoming_vals(i32 %cond) { +entry: + %tobool = icmp eq i32 %cond, 0 + br i1 %tobool, label %if.end, label %coldbb + +coldbb: + call void @sink() + call void @sideeffect() + call void @sideeffect() + br i1 undef, label %if.end, label %coldbb2 + +coldbb2: + %p2 = phi i32 [0, %coldbb], [1, %coldbb2] + br i1 undef, label %if.end, label %coldbb2 + +if.end: + %p = phi i32 [0, %entry], [1, %coldbb], [2, %coldbb2] + ret void +} + +declare void @sink() cold + +declare void @sideeffect()