diff --git a/llvm/lib/CodeGen/CallBrPrepare.cpp b/llvm/lib/CodeGen/CallBrPrepare.cpp
--- a/llvm/lib/CodeGen/CallBrPrepare.cpp
+++ b/llvm/lib/CodeGen/CallBrPrepare.cpp
@@ -22,12 +22,18 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/CFG.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
 
 using namespace llvm;
 
@@ -36,31 +42,105 @@
 namespace {
 
 class CallBrPrepare : public FunctionPass {
+  bool SplitCriticalEdges(ArrayRef<CallBrInst *> CBRs, DominatorTree &DT);
+
 public:
   CallBrPrepare() : FunctionPass(ID) {}
-  static char ID;
   void getAnalysisUsage(AnalysisUsage &AU) const override;
   bool runOnFunction(Function &Fn) override;
+  static char ID;
 };
 
 } // end anonymous namespace
 
 char CallBrPrepare::ID = 0;
-INITIALIZE_PASS(CallBrPrepare, DEBUG_TYPE, "Prepare callbr", false, false)
+INITIALIZE_PASS_BEGIN(CallBrPrepare, DEBUG_TYPE, "Prepare callbr", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(CallBrPrepare, DEBUG_TYPE, "Prepare callbr", false, false)
 
 FunctionPass *llvm::createCallBrPass() { return new CallBrPrepare(); }
 
 void CallBrPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.setPreservesAll();
+  AU.addPreserved<DominatorTreeWrapperPass>();
 }
 
-bool CallBrPrepare::runOnFunction(Function &Fn) {
+static bool ShouldRun(const CallBrInst *CBR) {
+  return CBR && !CBR->getType()->isVoidTy() && !CBR->use_empty();
+}
+
+static SmallVector<CallBrInst *, 2> FindCallBrs(Function &Fn) {
+  SmallVector<CallBrInst *, 2> CBRs;
   for (BasicBlock &BB : Fn) {
     auto *CBR = dyn_cast<CallBrInst>(BB.getTerminator());
-    if (!CBR)
-      continue;
-    // TODO: something interesting.
-    // https://discourse.llvm.org/t/rfc-syncing-asm-goto-with-outputs-with-gcc/65453/8
+    if (ShouldRun(CBR))
+      CBRs.push_back(CBR);
   }
-  return false;
+  return CBRs;
+}
+
+bool CallBrPrepare::SplitCriticalEdges(ArrayRef<CallBrInst *> CBRs,
+                                       DominatorTree &DT) {
+  bool Changed = false;
+  CriticalEdgeSplittingOptions Options(&DT);
+  Options.setMergeIdenticalEdges();
+
+  for (CallBrInst *CBR : CBRs) {
+    SmallPtrSet<const BasicBlock *, 4> Visited;
+    SmallVector<unsigned, 4> UniqueCriticalSuccessorIndices;
+
+    // Do one pass to find the indices of unique indirect successors BEFORE
+    // attempting to split critical edges (otherwise the BasicBlock inserted
+    // in the Visited Set below will not catch duplicates; it would get
+    // updated to the new synthetic block while iterating this loop). Start
+    // at index 1 to avoid the default destination.
+    for (unsigned i = 1, e = CBR->getNumSuccessors(); i != e; ++i) {
+      // The same destination may appear multiple times in a callbr
+      // indirect label list. Also, a phi may list the same predecessor
+      // multiple times as long as they use the same value.
+      //
+      // The CriticalEdgeSplittingOptions option MergeIdenticalEdges will
+      // handle this properly for us, but we don't want to call
+      // SplitCriticalEdge on such repeated destinations more than once.
+      if (!Visited.insert(CBR->getSuccessor(i)).second)
+        continue;
+      if (isCriticalEdge(CBR, i))
+        UniqueCriticalSuccessorIndices.push_back(i);
+    }
+    for (unsigned i : UniqueCriticalSuccessorIndices) {
+      BasicBlock *Synth = SplitKnownCriticalEdge(CBR, i, Options);
+      assert(Synth && "Failed to split a known critical edge from callbr");
+      if (Synth)
+        Changed = true;
+    }
+  }
+  return Changed;
+}
+
+bool CallBrPrepare::runOnFunction(Function &Fn) {
+  bool Changed = false;
+  SmallVector<CallBrInst *, 2> CBRs = FindCallBrs(Fn);
+
+  if (CBRs.empty())
+    return Changed;
+
+  // It's highly likely that most programs do not contain CallBrInsts. Follow a
+  // similar pattern from SafeStackLegacyPass::runOnFunction to reuse previous
+  // domtree analysis if available, otherwise compute it lazily. This avoids
+  // forcing Dominator Tree Construction at -O0 for programs that likely do not
+  // contain CallBrInsts. It does pessimize programs with callbr at higher
+  // optimization levels, as the DominatorTree created here is not reused by
+  // subsequent passes.
+  DominatorTree *DT;
+  std::optional<DominatorTree> LazilyComputedDomTree;
+  if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
+    DT = &DTWP->getDomTree();
+  else {
+    LazilyComputedDomTree.emplace(Fn);
+    DT = &*LazilyComputedDomTree;
+  }
+
+  if (SplitCriticalEdges(CBRs, *DT))
+    Changed = true;
+
+  return Changed;
 }
diff --git a/llvm/test/CodeGen/AArch64/callbr-prepare.ll b/llvm/test/CodeGen/AArch64/callbr-prepare.ll
--- a/llvm/test/CodeGen/AArch64/callbr-prepare.ll
+++ b/llvm/test/CodeGen/AArch64/callbr-prepare.ll
@@ -1,19 +1,22 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt %s -callbrprepare -S -o - | FileCheck %s
 
-; TODO: update this test to split critical edges.
 define i32 @test0() {
 ; CHECK-LABEL: @test0(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[OUT:%.*]] = callbr i32 asm "# $0", "=r,!i"()
-; CHECK-NEXT:    to label [[DIRECT:%.*]] [label %indirect]
+; CHECK-NEXT:    to label [[DIRECT:%.*]] [label %entry.indirect_crit_edge]
+; CHECK:       entry.indirect_crit_edge:
+; CHECK-NEXT:    br label [[INDIRECT:%.*]]
 ; CHECK:       direct:
 ; CHECK-NEXT:    [[OUT2:%.*]] = callbr i32 asm "# $0", "=r,!i"()
-; CHECK-NEXT:    to label [[DIRECT2:%.*]] [label %indirect]
+; CHECK-NEXT:    to label [[DIRECT2:%.*]] [label %direct.indirect_crit_edge]
+; CHECK:       direct.indirect_crit_edge:
+; CHECK-NEXT:    br label [[INDIRECT]]
 ; CHECK:       direct2:
 ; CHECK-NEXT:    ret i32 0
 ; CHECK:       indirect:
-; CHECK-NEXT:    [[OUT3:%.*]] = phi i32 [ [[OUT]], [[ENTRY:%.*]] ], [ [[OUT2]], [[DIRECT]] ]
+; CHECK-NEXT:    [[OUT3:%.*]] = phi i32 [ [[OUT]], [[ENTRY_INDIRECT_CRIT_EDGE:%.*]] ], [ [[OUT2]], [[DIRECT_INDIRECT_CRIT_EDGE:%.*]] ]
 ; CHECK-NEXT:    ret i32 [[OUT3]]
 ;
 entry:
@@ -28,3 +31,193 @@
   %out3 = phi i32 [%out, %entry], [%out2, %direct]
   ret i32 %out3
 }
+
+; Don't split edges unless they are critical, and callbr produces output, and
+; that output is used.
+; Here we have none of the above.
+define i32 @dont_split0() {
+; CHECK-LABEL: @dont_split0(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    callbr void asm "", "!i"()
+; CHECK-NEXT:    to label [[X:%.*]] [label %y]
+; CHECK:       x:
+; CHECK-NEXT:    ret i32 42
+; CHECK:       y:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  callbr void asm "", "!i"()
+  to label %x [label %y]
+
+x:
+  ret i32 42
+
+y:
+  ret i32 0
+}
+
+; Don't split edges unless they are critical, and callbr produces output, and
+; that output is used.
+; Here we have output, but no critical edge.
+define i32 @dont_split1() {
+; CHECK-LABEL: @dont_split1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = callbr i32 asm "", "=r,!i"()
+; CHECK-NEXT:    to label [[X:%.*]] [label %y]
+; CHECK:       x:
+; CHECK-NEXT:    ret i32 42
+; CHECK:       y:
+; CHECK-NEXT:    ret i32 [[TMP0]]
+;
+entry:
+  %0 = callbr i32 asm "", "=r,!i"()
+  to label %x [label %y]
+
+x:
+  ret i32 42
+
+y:
+  ret i32 %0
+}
+
+; Don't split edges unless they are critical, and callbr produces output, and
+; that output is used.
+; Here we have a critical edge along an indirect branch, but no output.
+define i32 @dont_split2() {
+; CHECK-LABEL: @dont_split2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    callbr void asm "", "!i"()
+; CHECK-NEXT:    to label [[X:%.*]] [label %y]
+; CHECK:       x:
+; CHECK-NEXT:    br label [[Y:%.*]]
+; CHECK:       y:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ 0, [[X]] ], [ 42, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret i32 [[TMP0]]
+;
+entry:
+  callbr void asm "", "!i"()
+  to label %x [label %y]
+
+x:
+  br label %y
+
+y:
+  %0 = phi i32 [ 0, %x ], [ 42, %entry ]
+  ret i32 %0
+}
+
+; Don't split edges unless they are critical, and callbr produces output, and
+; that output is used.
+; Here we're missing a use.
+define i32 @dont_split3() {
+; CHECK-LABEL: @dont_split3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = callbr i32 asm "", "=r,!i"()
+; CHECK-NEXT:    to label [[X:%.*]] [label %v]
+; CHECK:       x:
+; CHECK-NEXT:    br label [[V:%.*]]
+; CHECK:       v:
+; CHECK-NEXT:    ret i32 42
+;
+entry:
+  %0 = callbr i32 asm "", "=r,!i"() to label %x [label %v]
+
+x:
+  br label %v
+
+v:
+  ret i32 42
+}
+
+; Don't split edges unless they are critical, and callbr produces output, and
+; that output is used.
+; Here we have output and a critical edge along an indirect branch.
+define i32 @split_me0() {
+; CHECK-LABEL: @split_me0(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = callbr i32 asm "", "=r,!i"()
+; CHECK-NEXT:    to label [[X:%.*]] [label %entry.y_crit_edge]
+; CHECK:       entry.y_crit_edge:
+; CHECK-NEXT:    br label [[Y:%.*]]
+; CHECK:       x:
+; CHECK-NEXT:    br label [[Y]]
+; CHECK:       y:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[TMP0]], [[ENTRY_Y_CRIT_EDGE:%.*]] ], [ 42, [[X]] ]
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+entry:
+  %0 = callbr i32 asm "", "=r,!i"()
+  to label %x [label %y]
+
+x:
+  br label %y
+
+y:
+  %1 = phi i32 [ %0, %entry ], [ 42, %x ]
+  ret i32 %1
+}
+
+; Here we have output and a critical edge along an indirect branch.
+; Ensure that if we repeat the indirect destination, that we only split it
+; once.
+define i32 @split_me1(i1 %z) {
+; CHECK-LABEL: @split_me1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[Z:%.*]], label [[W:%.*]], label [[V:%.*]]
+; CHECK:       w:
+; CHECK-NEXT:    [[TMP0:%.*]] = callbr i32 asm "", "=r,!i,!i"()
+; CHECK-NEXT:    to label [[X:%.*]] [label [[W_V_CRIT_EDGE:%.*]], label %w.v_crit_edge]
+; CHECK:       w.v_crit_edge:
+; CHECK-NEXT:    br label [[V]]
+; CHECK:       x:
+; CHECK-NEXT:    ret i32 42
+; CHECK:       v:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[TMP0]], [[W_V_CRIT_EDGE]] ], [ undef, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+entry:
+  br i1 %z, label %w, label %v
+
+w:
+  %0 = callbr i32 asm "", "=r,!i,!i"()
+  to label %x [label %v, label %v]
+
+x:
+  ret i32 42
+
+v:
+  %1 = phi i32 [%0, %w], [%0, %w], [undef, %entry]
+  ret i32 %1
+}
+
+; A more interessting case of @split_me1. Check that we still only split the
+; critical edge from w to v once.
+define i32 @split_me2(i1 %z) {
+; CHECK-LABEL: @split_me2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[Z:%.*]], label [[W:%.*]], label [[V:%.*]]
+; CHECK:       w:
+; CHECK-NEXT:    [[TMP0:%.*]] = callbr i32 asm "", "=r,!i,!i"()
+; CHECK-NEXT:    to label [[X:%.*]] [label [[W_V_CRIT_EDGE:%.*]], label %w.v_crit_edge]
+; CHECK:       w.v_crit_edge:
+; CHECK-NEXT:    br label [[V]]
+; CHECK:       x:
+; CHECK-NEXT:    ret i32 42
+; CHECK:       v:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[TMP0]], [[W_V_CRIT_EDGE]] ], [ 42, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+entry:
+  br i1 %z, label %w, label %v
+
+w:
+  %0 = callbr i32 asm "", "=r,!i,!i"()
+  to label %x [label %v, label %v]
+
+x:
+  ret i32 42
+
+v:
+  %1 = phi i32 [ %0, %w ], [ 42, %entry ], [ %0, %w ]
+  ret i32 %1
+}