Index: llvm/include/llvm/Transforms/Utils/ImproveReadingOrder.h
===================================================================
--- /dev/null
+++ llvm/include/llvm/Transforms/Utils/ImproveReadingOrder.h
@@ -0,0 +1,29 @@
+//===- ImproveReadingOrder.h - Improve IR readability -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_IMPROVEREADINGORDER_H
+#define LLVM_TRANSFORMS_UTILS_IMPROVEREADINGORDER_H
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+struct ImproveReadingOrder : public PassInfoMixin<ImproveReadingOrder> {
+  ImproveReadingOrder() = default;
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+
+private:
+  BasicBlock *preferredNextBlock(SetVector<BasicBlock *> &,
+                                 const PostDominatorTree &);
+};
+
+#endif
Index: llvm/lib/Passes/PassBuilder.cpp
===================================================================
--- llvm/lib/Passes/PassBuilder.cpp
+++ llvm/lib/Passes/PassBuilder.cpp
@@ -159,6 +159,7 @@
 #include "llvm/Transforms/Utils/BreakCriticalEdges.h"
 #include "llvm/Transforms/Utils/CanonicalizeAliases.h"
 #include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
+#include "llvm/Transforms/Utils/ImproveReadingOrder.h"
 #include "llvm/Transforms/Utils/LCSSA.h"
 #include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"
 #include "llvm/Transforms/Utils/LoopSimplify.h"
@@ -1696,7 +1697,7 @@
     return Error::success();
   }
 
-  // Finally expand the basic registered passes from the .inc file.
+  // Finally expand the basic registered passes from PassRegistry.def.
 #define MODULE_PASS(NAME, CREATE_PASS)                                         \
   if (Name == NAME) {                                                          \
     MPM.addPass(CREATE_PASS);                                                  \
Index: llvm/lib/Passes/PassRegistry.def
===================================================================
--- llvm/lib/Passes/PassRegistry.def
+++ llvm/lib/Passes/PassRegistry.def
@@ -183,6 +183,7 @@
 FUNCTION_PASS("lower-widenable-condition", LowerWidenableConditionPass())
 FUNCTION_PASS("guard-widening", GuardWideningPass())
 FUNCTION_PASS("gvn", GVN())
+FUNCTION_PASS("improve-reading-order", ImproveReadingOrder())
 FUNCTION_PASS("load-store-vectorizer", LoadStoreVectorizerPass())
 FUNCTION_PASS("loop-simplify", LoopSimplifyPass())
 FUNCTION_PASS("loop-sink", LoopSinkPass())
Index: llvm/lib/Transforms/Utils/CMakeLists.txt
===================================================================
--- llvm/lib/Transforms/Utils/CMakeLists.txt
+++ llvm/lib/Transforms/Utils/CMakeLists.txt
@@ -22,6 +22,7 @@
   GuardUtils.cpp
   InlineFunction.cpp
   ImportedFunctionsInliningStatistics.cpp
+  ImproveReadingOrder.cpp
   InstructionNamer.cpp
   IntegerDivision.cpp
   LCSSA.cpp
Index: llvm/lib/Transforms/Utils/ImproveReadingOrder.cpp
===================================================================
--- /dev/null
+++ llvm/lib/Transforms/Utils/ImproveReadingOrder.cpp
@@ -0,0 +1,171 @@
+//===- ImproveReadingOrder.cpp - Improve IR readability ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/ImproveReadingOrder.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Pass.h"
+
+#include <set>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "improvereadingorder"
+
+// Reorder the function's basic blocks so the happy path is near the top and
+// the exceptional cases move down. The general order is: Happy path first, then
+// in-function error handling, then exception handlers, finally dead code.
+//
+// The result has exactly the same control flow graph as the input, the only
+// difference is that if you need to read IR output, then the blocks that form a
+// loop or other group tend to be together in the IR. In code like the
+// following, the IR blocks tend to be ordered in order of these lines, even if
+// the frontend creates blocks in a totally different order:
+//
+//     if(a)
+//       for(b : c)
+//         e(b);
+//     else
+//       for(e : f)
+//         g(a, e);
+//
+// This makes no difference for the code generators, but can lower the strain on
+// a poor brain trying to read IR code.
+//
+// The order of the basic blocks is based largely on the control flow graph and
+// branch weights, although in some cases the order in the input is used. This
+// seems to matter mostly for the switch instruction.
+
+// It never uses source line numbers, because I've found that that helps little
+// in the best cases and hurts readability significantly in the worst (when the
+// function is based on macros and/or inlining).
+
+PreservedAnalyses ImproveReadingOrder::run(Function &F,
+                                           FunctionAnalysisManager &FAM) {
+  BasicBlock *lastSoFar = nullptr;
+  SmallPtrSet<BasicBlock *, 16> processed;
+  SetVector<BasicBlock *> toBeDone;
+  SetVector<BasicBlock *> exceptionHandlers;
+
+  SmallPtrSet<BasicBlock *, 16> blocksWithUnreachables;
+  for (auto &b : F.getBasicBlockList())
+    if (isa<UnreachableInst>(b.getTerminator()))
+      blocksWithUnreachables.insert(&b);
+
+  PostDominatorTree &PDT = FAM.getResult<PostDominatorTreeAnalysis>(F);
+
+  BasicBlock *current = &F.getEntryBlock();
+  while (current) {
+    processed.insert(current);
+    if (lastSoFar)
+      current->moveAfter(lastSoFar);
+    lastSoFar = current;
+
+    Instruction *t = current->getTerminator();
+    MDNode *branchWeights = t->getMetadata(LLVMContext::MD_prof);
+    SmallVector<BasicBlock *, 16> successors;
+    if (isa<InvokeInst>(t)) {
+      // Invoke's normal destination is the happy path
+      InvokeInst *i = cast<InvokeInst>(t);
+      successors.push_back(i->getUnwindDest());
+      successors.push_back(i->getNormalDest());
+    } else if (t->getNumSuccessors() == 2 &&
+               PDT.dominates(t->getSuccessor(0), t->getSuccessor(1))) {
+      // This is an if-then construction without any else clause, so we prefer
+      // to start with the 'then' block.
+      successors.push_back(t->getSuccessor(0));
+      successors.push_back(t->getSuccessor(1));
+    } else if (t->getNumSuccessors() == 2 &&
+               PDT.dominates(t->getSuccessor(1), t->getSuccessor(0))) {
+      // This too is an if-then construction, so ditto
+      successors.push_back(t->getSuccessor(1));
+      successors.push_back(t->getSuccessor(0));
+    } else if (branchWeights) {
+      // There are branch weights; we start with the most-likely branches, and
+      // use successor numbers just for tiebreaking.
+      assert(branchWeights->getNumOperands() == t->getNumSuccessors() + 1);
+      std::map<std::pair<uint, uint>, BasicBlock *> ordered;
+      uint n = 0;
+      while (n < t->getNumSuccessors()) {
+        ConstantInt *weight =
+            mdconst::dyn_extract<ConstantInt>(branchWeights->getOperand(n + 1));
+        assert(weight && "Weight should be a ConstantInt");
+        assert(weight->getValue().getActiveBits() <= 32 &&
+               "Too many bits for uint32_t");
+        ordered.emplace(std::make_pair((uint)(weight->getZExtValue()), n),
+                        t->getSuccessor(n));
+        n++;
+      }
+      for (auto it : ordered)
+        successors.push_back(it.second);
+    } else {
+      // There's nothing to base any decision on... we'll start with the
+      // lowest-numbered successor. This is ungood: Most of switch's arguments
+      // are an unordered set, so this bases basic block order on an unordered
+      // aspect of the input syntax rather than on a significant aspect of the
+      // code.
+      uint n = t->getNumSuccessors();
+      while (n-- > 0)
+        successors.push_back(t->getSuccessor(n));
+    }
+    BasicBlock *headingTowardsUnreachable = nullptr;
+    for (auto u : blocksWithUnreachables)
+      if (!headingTowardsUnreachable && PDT.dominates(u, current))
+        headingTowardsUnreachable = u;
+    for (auto s : successors) {
+      if (!processed.count(s)) {
+        bool leadsToUnreachable = false;
+        for (auto u : blocksWithUnreachables)
+          if (u != headingTowardsUnreachable && PDT.dominates(u, s))
+            leadsToUnreachable = true;
+        if (leadsToUnreachable) {
+          exceptionHandlers.insert(s);
+        } else {
+          // later insertion has to win over earlier so that inner loops are
+          // inside outer loops
+          toBeDone.remove(s);
+          toBeDone.insert(s);
+        }
+      }
+    }
+
+    while (current && processed.count(current)) {
+      if (!toBeDone.empty())
+        current = preferredNextBlock(toBeDone, PDT);
+      else if (!exceptionHandlers.empty())
+        current = preferredNextBlock(exceptionHandlers, PDT);
+      else
+        current = nullptr;
+    }
+  }
+
+  return PreservedAnalyses::all();
+}
+
+BasicBlock *
+ImproveReadingOrder::preferredNextBlock(SetVector<BasicBlock *> &input,
+                                        const PostDominatorTree &PDT) {
+  BasicBlock *result = nullptr;
+  BasicBlock *betterCandidate = input.back();
+  while (betterCandidate) {
+    result = betterCandidate;
+    betterCandidate = nullptr;
+
+    SmallVector<BasicBlock *, 16> possiblePredecessors;
+    PDT.getDescendants(result, possiblePredecessors);
+    for (auto i : possiblePredecessors) {
+      if (i != result && input.count(i)) {
+        betterCandidate = i;
+        break;
+      }
+    }
+  }
+  input.remove(result);
+  return result;
+}
Index: llvm/test/Transforms/Util/improve-reading-order.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/Util/improve-reading-order.ll
@@ -0,0 +1,109 @@
+; RUN: opt -S -passes improve-reading-order < %s | FileCheck %s
+
+; test utility/debugging pass that reorders basic block for easier
+; reading, without making any functional changes.
+
+; the function below is typed by hand, with blocks made in the order a
+; simplistic frontend might might make them. it's sort of like:
+;
+;  if(bar) {
+;  } else {
+;  }
+;  if(bar) {
+;    for(int i = 0; i < 10; i++)
+;      test1(42);
+;  }
+;  foo.test1(true); // throws if foo is null
+;  if(foo)
+;    test1(foo, false);
+;  return 1
+
+declare void @throw()
+
+define i32 @test1(i32 %foo, i1 %bar) {
+entry:
+  br label %if1
+
+if1:
+  br i1 0, label %then1, label %else1
+
+continue1:
+  br i1 0, label %then2, label %continue2
+
+then1:
+  br label %continue1
+
+else1:
+  br label %continue1
+
+continue2:
+  %cmp2 = icmp eq i32 %foo, 0
+  br i1 %cmp2, label %throw1, label %normal1, !prof !2
+
+throw1:
+  call void @throw();
+  unreachable
+
+normal1:
+  br i1 %cmp2, label %then4, label %continue4, !prof !2
+
+then2:
+  br label %loop1
+
+else2:
+  br label %continue2
+
+loop1:
+  %acc = phi i32 [ 0, %then2 ], [ %next, %body1 ]
+  %next = add i32 1, %acc
+  %cmp1 = icmp eq i32 %next, 10
+  br i1 %cmp1, label %body1, label %continue3
+
+continue3:
+  br label %continue2
+
+body1:
+  call i32 @test1(i32 42, i1 0)
+  br label %loop1
+
+then4:
+  br label %continue4
+
+continue4:
+  ret i32 1
+}
+
+!1 = !{!"branch_weights", i32 2, i32 1}
+!2 = !{!"branch_weights", i32 1, i32 2}
+
+; CHECK: entry
+
+; a basic if-then-else gets order matching that description
+; CHECK: if1
+; CHECK: then1
+; CHECK: else1
+; CHECK: continue1
+
+; a basic if-then places all of the then blocks before the continuing
+; block, and a loop head goes before the loop body.
+; CHECK: then2
+; CHECK: loop1
+; CHECK: body1
+; CHECK: continue3
+
+; a null pointer test generally passes, and the code reads better if
+; the jump to the passing code is short, and the exception handler is
+; delayed until far down.
+; CHECK: continue2
+; CHECK: normal1
+
+; an unlikely 'then' branch is best placed before the block it jumps
+; to, even if skipping the 'then' is the most common path
+; CHECK then4
+; CHECK continue4
+
+; exception handlers are pushed towards the end
+; CHECK: throw1
+
+; dead blocks are at the very end
+; CHECK: else2