diff --git a/llvm/lib/CodeGen/CallBrPrepare.cpp b/llvm/lib/CodeGen/CallBrPrepare.cpp --- a/llvm/lib/CodeGen/CallBrPrepare.cpp +++ b/llvm/lib/CodeGen/CallBrPrepare.cpp @@ -22,12 +22,18 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/CFG.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; @@ -36,31 +42,105 @@ namespace { class CallBrPrepare : public FunctionPass { + bool SplitCriticalEdges(ArrayRef CBRs, DominatorTree &DT); + public: CallBrPrepare() : FunctionPass(ID) {} - static char ID; void getAnalysisUsage(AnalysisUsage &AU) const override; bool runOnFunction(Function &Fn) override; + static char ID; }; } // end anonymous namespace char CallBrPrepare::ID = 0; -INITIALIZE_PASS(CallBrPrepare, DEBUG_TYPE, "Prepare callbr", false, false) +INITIALIZE_PASS_BEGIN(CallBrPrepare, DEBUG_TYPE, "Prepare callbr", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(CallBrPrepare, DEBUG_TYPE, "Prepare callbr", false, false) FunctionPass *llvm::createCallBrPass() { return new CallBrPrepare(); } void CallBrPrepare::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); + AU.addPreserved(); } -bool CallBrPrepare::runOnFunction(Function &Fn) { +static bool ShouldRun(const CallBrInst *CBR) { + return CBR && !CBR->getType()->isVoidTy() && !CBR->use_empty(); +} + +static SmallVector FindCallBrs(Function &Fn) { + SmallVector CBRs; for (BasicBlock &BB : Fn) { auto *CBR = dyn_cast(BB.getTerminator()); - if (!CBR) - continue; - // TODO: something interesting. - // https://discourse.llvm.org/t/rfc-syncing-asm-goto-with-outputs-with-gcc/65453/8 + if (ShouldRun(CBR)) + CBRs.push_back(CBR); } - return false; + return CBRs; +} + +bool CallBrPrepare::SplitCriticalEdges(ArrayRef CBRs, + DominatorTree &DT) { + bool Changed = false; + CriticalEdgeSplittingOptions Options(&DT); + Options.setMergeIdenticalEdges(); + + for (CallBrInst *CBR : CBRs) { + SmallPtrSet Visited; + SmallVector UniqueCriticalSuccessorIndices; + + // Do one pass to find the indices of unique indirect successors BEFORE + // attempting to split critical edges (otherwise the BasicBlock inserted + // in the Visited Set below will not catch duplicates; it would get + // updated to the new synthetic block while iterating this loop). Start + // at index 1 to avoid the default destination. + for (unsigned i = 1, e = CBR->getNumSuccessors(); i != e; ++i) { + // The same destination may appear multiple times in a callbr + // indirect label list. Also, a phi may list the same predecessor + // multiple times as long as they use the same value. + // + // The CriticalEdgeSplittingOptions option MergeIdenticalEdges will + // handle this properly for us, but we don't want to call + // SplitCriticalEdge on such repeated destinations more than once. + if (!Visited.insert(CBR->getSuccessor(i)).second) + continue; + if (isCriticalEdge(CBR, i)) + UniqueCriticalSuccessorIndices.push_back(i); + } + for (unsigned i : UniqueCriticalSuccessorIndices) { + BasicBlock *Synth = SplitKnownCriticalEdge(CBR, i, Options); + assert(Synth && "Failed to split a known critical edge from callbr"); + (void)Synth; + Changed = true; + } + } + return Changed; +} + +bool CallBrPrepare::runOnFunction(Function &Fn) { + bool Changed = false; + SmallVector CBRs = FindCallBrs(Fn); + + if (CBRs.empty()) + return Changed; + + // It's highly likely that most programs do not contain CallBrInsts. Follow a + // similar pattern from SafeStackLegacyPass::runOnFunction to reuse previous + // domtree analysis if available, otherwise compute it lazily. This avoids + // forcing Dominator Tree Construction at -O0 for programs that likely do not + // contain CallBrInsts. It does pessimize programs with callbr at higher + // optimization levels, as the DominatorTree created here is not reused by + // subsequent passes. + DominatorTree *DT; + std::optional LazilyComputedDomTree; + if (auto *DTWP = getAnalysisIfAvailable()) + DT = &DTWP->getDomTree(); + else { + LazilyComputedDomTree.emplace(Fn); + DT = &*LazilyComputedDomTree; + } + + if (SplitCriticalEdges(CBRs, *DT)) + Changed = true; + + return Changed; } diff --git a/llvm/test/CodeGen/AArch64/callbr-prepare.ll b/llvm/test/CodeGen/AArch64/callbr-prepare.ll --- a/llvm/test/CodeGen/AArch64/callbr-prepare.ll +++ b/llvm/test/CodeGen/AArch64/callbr-prepare.ll @@ -1,19 +1,22 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt %s -callbrprepare -S -o - | FileCheck %s -; TODO: update this test to split critical edges. define i32 @test0() { ; CHECK-LABEL: @test0( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[OUT:%.*]] = callbr i32 asm "# $0", "=r,!i"() -; CHECK-NEXT: to label [[DIRECT:%.*]] [label %indirect] +; CHECK-NEXT: to label [[DIRECT:%.*]] [label %entry.indirect_crit_edge] +; CHECK: entry.indirect_crit_edge: +; CHECK-NEXT: br label [[INDIRECT:%.*]] ; CHECK: direct: ; CHECK-NEXT: [[OUT2:%.*]] = callbr i32 asm "# $0", "=r,!i"() -; CHECK-NEXT: to label [[DIRECT2:%.*]] [label %indirect] +; CHECK-NEXT: to label [[DIRECT2:%.*]] [label %direct.indirect_crit_edge] +; CHECK: direct.indirect_crit_edge: +; CHECK-NEXT: br label [[INDIRECT]] ; CHECK: direct2: ; CHECK-NEXT: ret i32 0 ; CHECK: indirect: -; CHECK-NEXT: [[OUT3:%.*]] = phi i32 [ [[OUT]], [[ENTRY:%.*]] ], [ [[OUT2]], [[DIRECT]] ] +; CHECK-NEXT: [[OUT3:%.*]] = phi i32 [ [[OUT]], [[ENTRY_INDIRECT_CRIT_EDGE:%.*]] ], [ [[OUT2]], [[DIRECT_INDIRECT_CRIT_EDGE:%.*]] ] ; CHECK-NEXT: ret i32 [[OUT3]] ; entry: @@ -28,3 +31,193 @@ %out3 = phi i32 [%out, %entry], [%out2, %direct] ret i32 %out3 } + +; Don't split edges unless they are critical, and callbr produces output, and +; that output is used. +; Here we have none of the above. +define i32 @dont_split0() { +; CHECK-LABEL: @dont_split0( +; CHECK-NEXT: entry: +; CHECK-NEXT: callbr void asm "", "!i"() +; CHECK-NEXT: to label [[X:%.*]] [label %y] +; CHECK: x: +; CHECK-NEXT: ret i32 42 +; CHECK: y: +; CHECK-NEXT: ret i32 0 +; +entry: + callbr void asm "", "!i"() + to label %x [label %y] + +x: + ret i32 42 + +y: + ret i32 0 +} + +; Don't split edges unless they are critical, and callbr produces output, and +; that output is used. +; Here we have output, but no critical edge. +define i32 @dont_split1() { +; CHECK-LABEL: @dont_split1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = callbr i32 asm "", "=r,!i"() +; CHECK-NEXT: to label [[X:%.*]] [label %y] +; CHECK: x: +; CHECK-NEXT: ret i32 42 +; CHECK: y: +; CHECK-NEXT: ret i32 [[TMP0]] +; +entry: + %0 = callbr i32 asm "", "=r,!i"() + to label %x [label %y] + +x: + ret i32 42 + +y: + ret i32 %0 +} + +; Don't split edges unless they are critical, and callbr produces output, and +; that output is used. +; Here we have a critical edge along an indirect branch, but no output. +define i32 @dont_split2() { +; CHECK-LABEL: @dont_split2( +; CHECK-NEXT: entry: +; CHECK-NEXT: callbr void asm "", "!i"() +; CHECK-NEXT: to label [[X:%.*]] [label %y] +; CHECK: x: +; CHECK-NEXT: br label [[Y:%.*]] +; CHECK: y: +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ 0, [[X]] ], [ 42, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i32 [[TMP0]] +; +entry: + callbr void asm "", "!i"() + to label %x [label %y] + +x: + br label %y + +y: + %0 = phi i32 [ 0, %x ], [ 42, %entry ] + ret i32 %0 +} + +; Don't split edges unless they are critical, and callbr produces output, and +; that output is used. +; Here we're missing a use. +define i32 @dont_split3() { +; CHECK-LABEL: @dont_split3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = callbr i32 asm "", "=r,!i"() +; CHECK-NEXT: to label [[X:%.*]] [label %v] +; CHECK: x: +; CHECK-NEXT: br label [[V:%.*]] +; CHECK: v: +; CHECK-NEXT: ret i32 42 +; +entry: + %0 = callbr i32 asm "", "=r,!i"() to label %x [label %v] + +x: + br label %v + +v: + ret i32 42 +} + +; Don't split edges unless they are critical, and callbr produces output, and +; that output is used. +; Here we have output and a critical edge along an indirect branch. +define i32 @split_me0() { +; CHECK-LABEL: @split_me0( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = callbr i32 asm "", "=r,!i"() +; CHECK-NEXT: to label [[X:%.*]] [label %entry.y_crit_edge] +; CHECK: entry.y_crit_edge: +; CHECK-NEXT: br label [[Y:%.*]] +; CHECK: x: +; CHECK-NEXT: br label [[Y]] +; CHECK: y: +; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[TMP0]], [[ENTRY_Y_CRIT_EDGE:%.*]] ], [ 42, [[X]] ] +; CHECK-NEXT: ret i32 [[TMP1]] +; +entry: + %0 = callbr i32 asm "", "=r,!i"() + to label %x [label %y] + +x: + br label %y + +y: + %1 = phi i32 [ %0, %entry ], [ 42, %x ] + ret i32 %1 +} + +; Here we have output and a critical edge along an indirect branch. +; Ensure that if we repeat the indirect destination, that we only split it +; once. +define i32 @split_me1(i1 %z) { +; CHECK-LABEL: @split_me1( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[Z:%.*]], label [[W:%.*]], label [[V:%.*]] +; CHECK: w: +; CHECK-NEXT: [[TMP0:%.*]] = callbr i32 asm "", "=r,!i,!i"() +; CHECK-NEXT: to label [[X:%.*]] [label [[W_V_CRIT_EDGE:%.*]], label %w.v_crit_edge] +; CHECK: w.v_crit_edge: +; CHECK-NEXT: br label [[V]] +; CHECK: x: +; CHECK-NEXT: ret i32 42 +; CHECK: v: +; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[TMP0]], [[W_V_CRIT_EDGE]] ], [ undef, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i32 [[TMP1]] +; +entry: + br i1 %z, label %w, label %v + +w: + %0 = callbr i32 asm "", "=r,!i,!i"() + to label %x [label %v, label %v] + +x: + ret i32 42 + +v: + %1 = phi i32 [%0, %w], [%0, %w], [undef, %entry] + ret i32 %1 +} + +; A more interessting case of @split_me1. Check that we still only split the +; critical edge from w to v once. +define i32 @split_me2(i1 %z) { +; CHECK-LABEL: @split_me2( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[Z:%.*]], label [[W:%.*]], label [[V:%.*]] +; CHECK: w: +; CHECK-NEXT: [[TMP0:%.*]] = callbr i32 asm "", "=r,!i,!i"() +; CHECK-NEXT: to label [[X:%.*]] [label [[W_V_CRIT_EDGE:%.*]], label %w.v_crit_edge] +; CHECK: w.v_crit_edge: +; CHECK-NEXT: br label [[V]] +; CHECK: x: +; CHECK-NEXT: ret i32 42 +; CHECK: v: +; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[TMP0]], [[W_V_CRIT_EDGE]] ], [ 42, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i32 [[TMP1]] +; +entry: + br i1 %z, label %w, label %v + +w: + %0 = callbr i32 asm "", "=r,!i,!i"() + to label %x [label %v, label %v] + +x: + ret i32 42 + +v: + %1 = phi i32 [ %0, %w ], [ 42, %entry ], [ %0, %w ] + ret i32 %1 +}