Index: llvm/include/llvm/Transforms/Utils/ImproveReadingOrder.h
===================================================================
--- /dev/null
+++ llvm/include/llvm/Transforms/Utils/ImproveReadingOrder.h
@@ -0,0 +1,42 @@
+//===- ImproveReadingOrder.h - Improve IR readability -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_IMPROVEREADINGORDER_H
+#define LLVM_TRANSFORMS_UTILS_IMPROVEREADINGORDER_H
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+
+/// This pass reorders the basic blocks in each function such that the function
+/// isn't unnecessarily difficult for a human to read.
+///
+/// Frontends tend to create basic blocks in the order they discover that they
+/// need the blocks, which is often not a great order for reading and
+/// understanding the IR output. This pass reorders the blocks such that blocks
+/// that form a loop tend to be near each other, inner loops are inside outer
+/// loops, blocks that form if(a){b}c are ordered a, b, c, and so on.
+///
+/// This pass does not change either the control flow, instruction order or any
+/// instruction. It also doesn't add, remove or merge blocks. Even if it
+/// discovers dead code, the code is left in the function (at the end of the
+/// function, instead of being mixed with live code).
+///
+/// A convenient way to use this pass is:
+///     opt -S -passes improve-reading-order somefile.bc
+struct ImproveReadingOrder : public PassInfoMixin<ImproveReadingOrder> {
+  ImproveReadingOrder() = default;
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+};
+} // namespace llvm
+
+#endif
Index: llvm/lib/Passes/PassBuilder.cpp
===================================================================
--- llvm/lib/Passes/PassBuilder.cpp
+++ llvm/lib/Passes/PassBuilder.cpp
@@ -163,6 +163,7 @@
 #include "llvm/Transforms/Utils/BreakCriticalEdges.h"
 #include "llvm/Transforms/Utils/CanonicalizeAliases.h"
 #include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
+#include "llvm/Transforms/Utils/ImproveReadingOrder.h"
 #include "llvm/Transforms/Utils/LCSSA.h"
 #include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"
 #include "llvm/Transforms/Utils/LoopSimplify.h"
@@ -1818,7 +1819,7 @@
     return Error::success();
   }
 
-  // Finally expand the basic registered passes from the .inc file.
+  // Finally expand the basic registered passes from PassRegistry.def.
 #define MODULE_PASS(NAME, CREATE_PASS)                                         \
   if (Name == NAME) {                                                          \
     MPM.addPass(CREATE_PASS);                                                  \
Index: llvm/lib/Passes/PassRegistry.def
===================================================================
--- llvm/lib/Passes/PassRegistry.def
+++ llvm/lib/Passes/PassRegistry.def
@@ -185,6 +185,7 @@
 FUNCTION_PASS("lower-widenable-condition", LowerWidenableConditionPass())
 FUNCTION_PASS("guard-widening", GuardWideningPass())
 FUNCTION_PASS("gvn", GVN())
+FUNCTION_PASS("improve-reading-order", ImproveReadingOrder())
 FUNCTION_PASS("load-store-vectorizer", LoadStoreVectorizerPass())
 FUNCTION_PASS("loop-simplify", LoopSimplifyPass())
 FUNCTION_PASS("loop-sink", LoopSinkPass())
Index: llvm/lib/Transforms/Utils/CMakeLists.txt
===================================================================
--- llvm/lib/Transforms/Utils/CMakeLists.txt
+++ llvm/lib/Transforms/Utils/CMakeLists.txt
@@ -22,6 +22,7 @@
   GuardUtils.cpp
   InlineFunction.cpp
   ImportedFunctionsInliningStatistics.cpp
+  ImproveReadingOrder.cpp
   InstructionNamer.cpp
   IntegerDivision.cpp
   LCSSA.cpp
Index: llvm/lib/Transforms/Utils/ImproveReadingOrder.cpp
===================================================================
--- /dev/null
+++ llvm/lib/Transforms/Utils/ImproveReadingOrder.cpp
@@ -0,0 +1,177 @@
+//===- ImproveReadingOrder.cpp - Improve IR readability ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/ImproveReadingOrder.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "improve-reading-order"
+
+static BasicBlock *preferredNextBlock(SetVector<BasicBlock *> &Input,
+                                      const PostDominatorTree &PDT) {
+  BasicBlock *Result = nullptr;
+  BasicBlock *BetterCandidate = Input.back();
+  while (BetterCandidate) {
+    Result = BetterCandidate;
+    BetterCandidate = nullptr;
+
+    SmallVector<BasicBlock *, 16> PossiblePredecessors;
+    PDT.getDescendants(Result, PossiblePredecessors);
+    for (auto i : PossiblePredecessors) {
+      if (i != Result && Input.count(i)) {
+        BetterCandidate = i;
+        break;
+      }
+    }
+  }
+  Input.remove(Result);
+  return Result;
+}
+
+// Reorder the function's basic blocks so the happy path is near the top and
+// the exceptional cases move down. The general order is: Happy path first, then
+// in-function error handling, then exception handlers, finally dead code.
+//
+// The result has exactly the same control flow graph as the input, the only
+// difference is that if you need to read IR output, then the blocks that form a
+// loop or other group tend to be together in the IR. In code like the
+// following, the IR blocks tend to be ordered in order of these lines, even if
+// the frontend creates blocks in a totally different order:
+//
+//     if(a)
+//       for(b : c)
+//         e(b);
+//     else
+//       for(e : f)
+//         g(a, e);
+//
+// This makes no difference for the code generators, but can lower the strain on
+// a poor brain trying to read IR code.
+//
+// The order of the basic blocks is based largely on the control flow graph and
+// branch weights, although in some cases the order in the input is used. This
+// seems to matter mostly for the switch instruction.
+
+// It never uses source line numbers, because I've found that that helps little
+// in the best cases and hurts readability significantly in the worst (when the
+// function is based on macros and/or inlining).
+
+PreservedAnalyses ImproveReadingOrder::run(Function &F,
+                                           FunctionAnalysisManager &FAM) {
+  BasicBlock *LastSoFar = nullptr;
+  SmallPtrSet<BasicBlock *, 16> Processed;
+  SetVector<BasicBlock *> ToBeDone;
+  SetVector<BasicBlock *> ExceptionHandlers;
+
+  SmallPtrSet<BasicBlock *, 16> BlocksWithUnreachables;
+  for (auto &b : F.getBasicBlockList())
+    if (isa<UnreachableInst>(b.getTerminator()))
+      BlocksWithUnreachables.insert(&b);
+
+  PostDominatorTree &PDT = FAM.getResult<PostDominatorTreeAnalysis>(F);
+
+  BasicBlock *Current = &F.getEntryBlock();
+  while (Current) {
+    Processed.insert(Current);
+    if (LastSoFar)
+      Current->moveAfter(LastSoFar);
+    LastSoFar = Current;
+
+    Instruction *Terminator = Current->getTerminator();
+    MDNode *BranchWeights = Terminator->getMetadata(LLVMContext::MD_prof);
+    SmallVector<BasicBlock *, 16> Successors;
+    if (isa<InvokeInst>(Terminator)) {
+      // Invoke's normal destination is the happy path
+      InvokeInst *Invoke = cast<InvokeInst>(Terminator);
+      Successors.push_back(Invoke->getUnwindDest());
+      Successors.push_back(Invoke->getNormalDest());
+    } else if (Terminator->getNumSuccessors() == 2 &&
+               PDT.dominates(Terminator->getSuccessor(0),
+                             Terminator->getSuccessor(1))) {
+      // This is an if-then construction without any else clause, so we prefer
+      // to start with the 'then' block.
+      Successors.push_back(Terminator->getSuccessor(0));
+      Successors.push_back(Terminator->getSuccessor(1));
+    } else if (Terminator->getNumSuccessors() == 2 &&
+               PDT.dominates(Terminator->getSuccessor(1),
+                             Terminator->getSuccessor(0))) {
+      // This too is an if-then construction, so ditto
+      Successors.push_back(Terminator->getSuccessor(1));
+      Successors.push_back(Terminator->getSuccessor(0));
+    } else if (BranchWeights) {
+      // There are branch weights; we start with the most-likely branches, and
+      // use successor numbers just for tiebreaking.
+      assert(BranchWeights->getNumOperands() ==
+             Terminator->getNumSuccessors() + 1);
+      std::map<std::pair<unsigned int, unsigned int>, BasicBlock *> Ordered;
+      unsigned int n = 0;
+      while (n < Terminator->getNumSuccessors()) {
+        ConstantInt *Weight =
+            mdconst::dyn_extract<ConstantInt>(BranchWeights->getOperand(n + 1));
+        assert(Weight && "Weight should be a ConstantInt");
+        assert(Weight->getValue().getActiveBits() <= 32 &&
+               "Too many bits for uint32_t");
+        Ordered.emplace(
+            std::make_pair((unsigned int)(Weight->getZExtValue()), n),
+            Terminator->getSuccessor(n));
+        n++;
+      }
+      for (auto it : Ordered)
+        Successors.push_back(it.second);
+    } else {
+      // There's nothing to base any decision on... we'll start with the
+      // lowest-numbered successor. This is ungood: Most of switch's arguments
+      // form an unordered set, so this bases basic block order on an unordered
+      // aspect of the input syntax rather than on a significant aspect of the
+      // code.
+      unsigned int n = Terminator->getNumSuccessors();
+      while (n-- > 0)
+        Successors.push_back(Terminator->getSuccessor(n));
+    }
+    BasicBlock *UnreachablePostDominator = nullptr;
+    for (BasicBlock *BB : BlocksWithUnreachables)
+      if (!UnreachablePostDominator && PDT.dominates(BB, Current))
+        UnreachablePostDominator = BB;
+    // If the current block is post-dominated by a block that ends with
+    // unreachable, then it's best to emit blocks related to that unreachable,
+    // and wait with blocks that have other successors.  (This generally means
+    // that all of the code that's involved in throwing one exception, or one
+    // kind of exception, goes together.)
+    for (auto s : Successors) {
+      if (!Processed.count(s)) {
+        bool LeadsToOtherUnreachable = false;
+        for (auto u : BlocksWithUnreachables)
+          if (u != UnreachablePostDominator && PDT.dominates(u, s))
+            LeadsToOtherUnreachable = true;
+        if (LeadsToOtherUnreachable) {
+          ExceptionHandlers.insert(s);
+        } else {
+          // Later insertion has to win over earlier so that inner loops are
+          // inside outer loops.
+          ToBeDone.remove(s);
+          ToBeDone.insert(s);
+        }
+      }
+    }
+
+    while (Current && Processed.count(Current)) {
+      if (!ToBeDone.empty())
+        Current = preferredNextBlock(ToBeDone, PDT);
+      else if (!ExceptionHandlers.empty())
+        Current = preferredNextBlock(ExceptionHandlers, PDT);
+      else
+        Current = nullptr;
+    }
+  }
+
+  return PreservedAnalyses::all();
+}
Index: llvm/test/Transforms/Util/improve-reading-order.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/Util/improve-reading-order.ll
@@ -0,0 +1,109 @@
+; RUN: opt -S -passes improve-reading-order < %s | FileCheck %s
+
+; test utility/debugging pass that reorders basic block for easier
+; reading, without making any functional changes.
+
+; the function below is typed by hand, with blocks made in the order a
+; simplistic frontend might might make them. it's sort of like:
+;
+;  if(bar) {
+;  } else {
+;  }
+;  if(bar) {
+;    for(int i = 0; i < 10; i++)
+;      test1(42);
+;  }
+;  foo.test1(true); // throws if foo is null
+;  if(foo)
+;    test1(foo, false);
+;  return 1
+
+declare void @throw()
+
+define i32 @test1(i32 %foo, i1 %bar) {
+entry:
+  br label %if1
+
+if1:
+  br i1 0, label %then1, label %else1
+
+continue1:
+  br i1 0, label %then2, label %continue2
+
+then1:
+  br label %continue1
+
+else1:
+  br label %continue1
+
+continue2:
+  %cmp2 = icmp eq i32 %foo, 0
+  br i1 %cmp2, label %throw1, label %normal1, !prof !2
+
+throw1:
+  call void @throw();
+  unreachable
+
+normal1:
+  br i1 %cmp2, label %then4, label %continue4, !prof !2
+
+then2:
+  br label %loop1
+
+else2:
+  br label %continue2
+
+loop1:
+  %acc = phi i32 [ 0, %then2 ], [ %next, %body1 ]
+  %next = add i32 1, %acc
+  %cmp1 = icmp eq i32 %next, 10
+  br i1 %cmp1, label %body1, label %continue3
+
+continue3:
+  br label %continue2
+
+body1:
+  call i32 @test1(i32 42, i1 0)
+  br label %loop1
+
+then4:
+  br label %continue4
+
+continue4:
+  ret i32 1
+}
+
+!1 = !{!"branch_weights", i32 2, i32 1}
+!2 = !{!"branch_weights", i32 1, i32 2}
+
+; CHECK: entry
+
+; a basic if-then-else gets order matching that description
+; CHECK: if1
+; CHECK: then1
+; CHECK: else1
+; CHECK: continue1
+
+; a basic if-then places all of the then blocks before the continuing
+; block, and a loop head goes before the loop body.
+; CHECK: then2
+; CHECK: loop1
+; CHECK: body1
+; CHECK: continue3
+
+; a null pointer test generally passes, and the code reads better if
+; the jump to the passing code is short, and the exception handler is
+; delayed until far down.
+; CHECK: continue2
+; CHECK: normal1
+
+; an unlikely 'then' branch is best placed before the block it jumps
+; to, even if skipping the 'then' is the most common path
+; CHECK then4
+; CHECK continue4
+
+; exception handlers are pushed towards the end
+; CHECK: throw1
+
+; dead blocks are at the very end
+; CHECK: else2