Index: llvm/lib/Transforms/IPO/HotColdSplitting.cpp =================================================================== --- llvm/lib/Transforms/IPO/HotColdSplitting.cpp +++ llvm/lib/Transforms/IPO/HotColdSplitting.cpp @@ -343,6 +343,13 @@ static OutliningRegion create(BasicBlock &SinkBB, const DominatorTree &DT, const PostDomTree &PDT) { OutliningRegion ColdRegion; + SmallPtrSet ColdBlocks; + + auto addBlockToRegion = [&](BasicBlock *BB, unsigned Score) { + ColdBlocks.insert(BB); + ColdRegion.Blocks.emplace_back(BB, Score); + assert(ColdBlocks.size() == ColdRegion.Blocks.size() && "Duplicate BBs"); + }; // The ancestor farthest-away from SinkBB, and also post-dominated by it. unsigned SinkScore = getEntryPointScore(SinkBB, ScoreForSinkBlock); @@ -379,13 +386,49 @@ BestScore = PredScore; } - ColdRegion.Blocks.emplace_back(&PredBB, PredScore); + addBlockToRegion(&PredBB, PredScore); ++PredIt; } + // Map PHIs outside of the outlining region to incoming values originating + // from the outlining region. These incoming values must be unique. + SmallDenseMap PHIFrontier; + + // Update the PHI frontier by recording \p BB's outgoing values. If \p BB + // can be added to the outlining region, return true. Don't update the PHI + // frontier for sink-predecessor blocks, because their successors are post- + // dominated by the sink block. + auto updatePHIFrontier = [&](BasicBlock &BB) -> bool { + // Make sure that each successor of BB in the frontier has a unique + // incoming value originating from the outlining region. + for (BasicBlock *SuccBB : successors(&BB)) { + // Ignore self-edges and successors which aren't in the frontier. + if (SuccBB == &BB || ColdBlocks.count(SuccBB)) + continue; + + for (PHINode &SuccPhi : SuccBB->phis()) { + // Ignore successor PHIs without an incoming value from BB. + int BBIndex = SuccPhi.getBasicBlockIndex(&BB); + if (BBIndex == -1) + continue; + + // Record the incoming value from BB. Flag it if it isn't unique. + Value *&RecordedIncomingVal = PHIFrontier[&SuccPhi]; + Value *ActualIncomingVal = SuccPhi.getIncomingValue(BBIndex); + if (RecordedIncomingVal) + if (ActualIncomingVal != RecordedIncomingVal) + return false; + + RecordedIncomingVal = ActualIncomingVal; + } + } + return true; + }; + // Add SinkBB to the cold region. It's considered as an entry point before // any sink-successor blocks. - ColdRegion.Blocks.emplace_back(&SinkBB, SinkScore); + addBlockToRegion(&SinkBB, SinkScore); + updatePHIFrontier(SinkBB); // Find all successors of SinkBB dominated by SinkBB using DFS. auto SuccIt = ++df_begin(&SinkBB); @@ -396,7 +439,7 @@ // If SinkBB does not dominate a successor, do not mark the successor (or // any of its successors) cold. - if (!SinkDom || !mayExtractBlock(SuccBB)) { + if (!SinkDom || !mayExtractBlock(SuccBB) || !updatePHIFrontier(SuccBB)) { SuccIt.skipChildren(); continue; } @@ -407,7 +450,7 @@ BestScore = SuccScore; } - ColdRegion.Blocks.emplace_back(&SuccBB, SuccScore); + addBlockToRegion(&SuccBB, SuccScore); ++SuccIt; } Index: llvm/test/Transforms/HotColdSplit/outline-while-loop.ll =================================================================== --- llvm/test/Transforms/HotColdSplit/outline-while-loop.ll +++ llvm/test/Transforms/HotColdSplit/outline-while-loop.ll @@ -55,6 +55,47 @@ ret void } +; This is the same as @foo, but the while loop comes after the sink block. +; CHECK-LABEL: define {{.*}}@while_loop_after_sink( +; CHECK: br i1 {{.*}}, label %if.end, label %codeRepl +; CHECK-LABEL: codeRepl: +; CHECK-NEXT: call void @while_loop_after_sink.cold.1 +; CHECK-LABEL: if.end: +; CHECK: call void @sideeffect(i32 1) +define void @while_loop_after_sink(i32 %cond) { +entry: + %tobool = icmp eq i32 %cond, 0 + br i1 %tobool, label %if.end, label %sink + +sink: + tail call void (...) @sink() + br label %while.cond.preheader + +while.cond.preheader: + %cmp3 = icmp sgt i32 %cond, 10 + br i1 %cmp3, label %while.body.preheader, label %while.end + +while.body.preheader: ; preds = %while.cond.preheader + br label %while.body + +while.body: ; preds = %while.body.preheader, %while.body + %cond.addr.04 = phi i32 [ %dec, %while.body ], [ %cond, %while.body.preheader ] + %dec = add nsw i32 %cond.addr.04, -1 + tail call void @sideeffect(i32 0) #3 + %cmp = icmp sgt i32 %dec, 10 + br i1 %cmp, label %while.body, label %while.end.loopexit + +while.end.loopexit: ; preds = %while.body + br label %while.end + +while.end: ; preds = %while.end.loopexit, %while.cond.preheader + ret void + +if.end: ; preds = %entry + tail call void @sideeffect(i32 1) + ret void +} + ; CHECK-LABEL: define {{.*}}@foo.cold.1 ; CHECK: phi i32 ; CHECK-NEXT: add nsw i32 @@ -62,6 +103,14 @@ ; CHECK-NEXT: icmp ; CHECK-NEXT: br +; CHECK-LABEL: define {{.*}}@while_loop_after_sink.cold.1 +; CHECK: call {{.*}}@sink +; CHECK: phi i32 +; CHECK-NEXT: add nsw i32 +; CHECK-NEXT: call {{.*}}@sideeffect +; CHECK-NEXT: icmp +; CHECK-NEXT: br + declare void @sideeffect(i32) declare void @sink(...) cold Index: llvm/test/Transforms/HotColdSplit/phi-with-distinct-outlined-values.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/HotColdSplit/phi-with-distinct-outlined-values.ll @@ -0,0 +1,33 @@ +; RUN: opt -S -hotcoldsplit < %s | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.14.0" + +; CHECK-LABEL: define {{.*}}@foo( +; CHECK: phi i32 [ 0, %entry ], [ 1, %codeRepl ], [ 3, %coldbb2 ] + +; CHECK-LABEL: define {{.*}}@foo.cold.1( +; CHECK: call {{.*}}@sink + +define void @foo(i32 %cond) { +entry: + %tobool = icmp eq i32 %cond, 0 + br i1 %tobool, label %if.end, label %coldbb + +coldbb: + call void @sink() + call void @sideeffect() + call void @sideeffect() + br i1 undef, label %if.end, label %coldbb2 + +coldbb2: + br label %if.end + +if.end: + %p = phi i32 [0, %entry], [1, %coldbb], [3, %coldbb2] + ret void +} + +declare void @sink() cold + +declare void @sideeffect() Index: llvm/test/Transforms/HotColdSplit/succ-block-with-self-edge.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/HotColdSplit/succ-block-with-self-edge.ll @@ -0,0 +1,56 @@ +; RUN: opt -S -hotcoldsplit < %s | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.14.0" + +; CHECK-LABEL: define {{.*}}@exit_block_with_same_incoming_vals +; CHECK: call {{.*}}@exit_block_with_same_incoming_vals.cold.1( +; CHECK-NOT: br i1 undef +; CHECK: phi i32 [ 0, %entry ], [ 1, %codeRepl ] +define void @exit_block_with_same_incoming_vals(i32 %cond) { +entry: + %tobool = icmp eq i32 %cond, 0 + br i1 %tobool, label %if.end, label %coldbb + +coldbb: + call void @sink() + call void @sideeffect() + call void @sideeffect() + br i1 undef, label %if.end, label %coldbb2 + +coldbb2: + %p2 = phi i32 [0, %coldbb], [1, %coldbb2] + br i1 undef, label %if.end, label %coldbb2 + +if.end: + %p = phi i32 [0, %entry], [1, %coldbb], [1, %coldbb2] + ret void +} + +; CHECK-LABEL: define {{.*}}@exit_block_with_distinct_incoming_vals +; CHECK: call {{.*}}@exit_block_with_distinct_incoming_vals.cold.1( +; CHECK: br i1 undef +; CHECK: phi i32 [ 0, %entry ], [ 1, %codeRepl ], [ 2, %coldbb2 ] +define void @exit_block_with_distinct_incoming_vals(i32 %cond) { +entry: + %tobool = icmp eq i32 %cond, 0 + br i1 %tobool, label %if.end, label %coldbb + +coldbb: + call void @sink() + call void @sideeffect() + call void @sideeffect() + br i1 undef, label %if.end, label %coldbb2 + +coldbb2: + %p2 = phi i32 [0, %coldbb], [1, %coldbb2] + br i1 undef, label %if.end, label %coldbb2 + +if.end: + %p = phi i32 [0, %entry], [1, %coldbb], [2, %coldbb2] + ret void +} + +declare void @sink() cold + +declare void @sideeffect()