Index: llvm/include/llvm/Transforms/Utils/ImproveReadingOrder.h =================================================================== --- /dev/null +++ llvm/include/llvm/Transforms/Utils/ImproveReadingOrder.h @@ -0,0 +1,42 @@ +//===- ImproveReadingOrder.h - Improve IR readability -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_IMPROVEREADINGORDER_H +#define LLVM_TRANSFORMS_UTILS_IMPROVEREADINGORDER_H + +#include "llvm/ADT/SetVector.h" +#include "llvm/Analysis/PostDominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" + +namespace llvm { + +/// This pass reorders the basic blocks in each function such that the function +/// isn't unnecessarily difficult for a human to read. +/// +/// Frontends tend to create basic blocks in the order they discover that they +/// need the blocks, which is often not a great order for reading and +/// understanding the IR output. This pass reorders the blocks such that blocks +/// that form a loop tend to be near each other, inner loops are inside outer +/// loops, blocks that form if(a){b}c are ordered a, b, c, and so on. +/// +/// This pass does not change either the control flow, instruction order or any +/// instruction. It also doesn't add, remove or merge blocks. Even if it +/// discovers dead code, the code is left in the function (at the end of the +/// function, instead of being mixed with live code). +/// +/// A convenient way to use this pass is: +/// opt -S -passes improve-reading-order somefile.bc +struct ImproveReadingOrder : public PassInfoMixin { + ImproveReadingOrder() = default; + PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); +}; +} // namespace llvm + +#endif Index: llvm/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/lib/Passes/PassBuilder.cpp +++ llvm/lib/Passes/PassBuilder.cpp @@ -163,6 +163,7 @@ #include "llvm/Transforms/Utils/BreakCriticalEdges.h" #include "llvm/Transforms/Utils/CanonicalizeAliases.h" #include "llvm/Transforms/Utils/EntryExitInstrumenter.h" +#include "llvm/Transforms/Utils/ImproveReadingOrder.h" #include "llvm/Transforms/Utils/LCSSA.h" #include "llvm/Transforms/Utils/LibCallsShrinkWrap.h" #include "llvm/Transforms/Utils/LoopSimplify.h" @@ -1818,7 +1819,7 @@ return Error::success(); } - // Finally expand the basic registered passes from the .inc file. + // Finally expand the basic registered passes from PassRegistry.def. #define MODULE_PASS(NAME, CREATE_PASS) \ if (Name == NAME) { \ MPM.addPass(CREATE_PASS); \ Index: llvm/lib/Passes/PassRegistry.def =================================================================== --- llvm/lib/Passes/PassRegistry.def +++ llvm/lib/Passes/PassRegistry.def @@ -185,6 +185,7 @@ FUNCTION_PASS("lower-widenable-condition", LowerWidenableConditionPass()) FUNCTION_PASS("guard-widening", GuardWideningPass()) FUNCTION_PASS("gvn", GVN()) +FUNCTION_PASS("improve-reading-order", ImproveReadingOrder()) FUNCTION_PASS("load-store-vectorizer", LoadStoreVectorizerPass()) FUNCTION_PASS("loop-simplify", LoopSimplifyPass()) FUNCTION_PASS("loop-sink", LoopSinkPass()) Index: llvm/lib/Transforms/Utils/CMakeLists.txt =================================================================== --- llvm/lib/Transforms/Utils/CMakeLists.txt +++ llvm/lib/Transforms/Utils/CMakeLists.txt @@ -22,6 +22,7 @@ GuardUtils.cpp InlineFunction.cpp ImportedFunctionsInliningStatistics.cpp + ImproveReadingOrder.cpp InstructionNamer.cpp IntegerDivision.cpp LCSSA.cpp Index: llvm/lib/Transforms/Utils/ImproveReadingOrder.cpp =================================================================== --- /dev/null +++ llvm/lib/Transforms/Utils/ImproveReadingOrder.cpp @@ -0,0 +1,177 @@ +//===- ImproveReadingOrder.cpp - Improve IR readability ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/ImproveReadingOrder.h" +#include "llvm/Analysis/PostDominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Pass.h" + +using namespace llvm; + +#define DEBUG_TYPE "improve-reading-order" + +static BasicBlock *preferredNextBlock(SetVector &Input, + const PostDominatorTree &PDT) { + BasicBlock *Result = nullptr; + BasicBlock *BetterCandidate = Input.back(); + while (BetterCandidate) { + Result = BetterCandidate; + BetterCandidate = nullptr; + + SmallVector PossiblePredecessors; + PDT.getDescendants(Result, PossiblePredecessors); + for (auto i : PossiblePredecessors) { + if (i != Result && Input.count(i)) { + BetterCandidate = i; + break; + } + } + } + Input.remove(Result); + return Result; +} + +// Reorder the function's basic blocks so the happy path is near the top and +// the exceptional cases move down. The general order is: Happy path first, then +// in-function error handling, then exception handlers, finally dead code. +// +// The result has exactly the same control flow graph as the input, the only +// difference is that if you need to read IR output, then the blocks that form a +// loop or other group tend to be together in the IR. In code like the +// following, the IR blocks tend to be ordered in order of these lines, even if +// the frontend creates blocks in a totally different order: +// +// if(a) +// for(b : c) +// e(b); +// else +// for(e : f) +// g(a, e); +// +// This makes no difference for the code generators, but can lower the strain on +// a poor brain trying to read IR code. +// +// The order of the basic blocks is based largely on the control flow graph and +// branch weights, although in some cases the order in the input is used. This +// seems to matter mostly for the switch instruction. + +// It never uses source line numbers, because I've found that that helps little +// in the best cases and hurts readability significantly in the worst (when the +// function is based on macros and/or inlining). + +PreservedAnalyses ImproveReadingOrder::run(Function &F, + FunctionAnalysisManager &FAM) { + BasicBlock *LastSoFar = nullptr; + SmallPtrSet Processed; + SetVector ToBeDone; + SetVector ExceptionHandlers; + + SmallPtrSet BlocksWithUnreachables; + for (auto &b : F.getBasicBlockList()) + if (isa(b.getTerminator())) + BlocksWithUnreachables.insert(&b); + + PostDominatorTree &PDT = FAM.getResult(F); + + BasicBlock *Current = &F.getEntryBlock(); + while (Current) { + Processed.insert(Current); + if (LastSoFar) + Current->moveAfter(LastSoFar); + LastSoFar = Current; + + Instruction *Terminator = Current->getTerminator(); + MDNode *BranchWeights = Terminator->getMetadata(LLVMContext::MD_prof); + SmallVector Successors; + if (isa(Terminator)) { + // Invoke's normal destination is the happy path + InvokeInst *Invoke = cast(Terminator); + Successors.push_back(Invoke->getUnwindDest()); + Successors.push_back(Invoke->getNormalDest()); + } else if (Terminator->getNumSuccessors() == 2 && + PDT.dominates(Terminator->getSuccessor(0), + Terminator->getSuccessor(1))) { + // This is an if-then construction without any else clause, so we prefer + // to start with the 'then' block. + Successors.push_back(Terminator->getSuccessor(0)); + Successors.push_back(Terminator->getSuccessor(1)); + } else if (Terminator->getNumSuccessors() == 2 && + PDT.dominates(Terminator->getSuccessor(1), + Terminator->getSuccessor(0))) { + // This too is an if-then construction, so ditto + Successors.push_back(Terminator->getSuccessor(1)); + Successors.push_back(Terminator->getSuccessor(0)); + } else if (BranchWeights) { + // There are branch weights; we start with the most-likely branches, and + // use successor numbers just for tiebreaking. + assert(BranchWeights->getNumOperands() == + Terminator->getNumSuccessors() + 1); + std::map, BasicBlock *> Ordered; + unsigned int n = 0; + while (n < Terminator->getNumSuccessors()) { + ConstantInt *Weight = + mdconst::dyn_extract(BranchWeights->getOperand(n + 1)); + assert(Weight && "Weight should be a ConstantInt"); + assert(Weight->getValue().getActiveBits() <= 32 && + "Too many bits for uint32_t"); + Ordered.emplace( + std::make_pair((unsigned int)(Weight->getZExtValue()), n), + Terminator->getSuccessor(n)); + n++; + } + for (auto it : Ordered) + Successors.push_back(it.second); + } else { + // There's nothing to base any decision on... we'll start with the + // lowest-numbered successor. This is ungood: Most of switch's arguments + // form an unordered set, so this bases basic block order on an unordered + // aspect of the input syntax rather than on a significant aspect of the + // code. + unsigned int n = Terminator->getNumSuccessors(); + while (n-- > 0) + Successors.push_back(Terminator->getSuccessor(n)); + } + BasicBlock *UnreachablePostDominator = nullptr; + for (BasicBlock *BB : BlocksWithUnreachables) + if (!UnreachablePostDominator && PDT.dominates(BB, Current)) + UnreachablePostDominator = BB; + // If the current block is post-dominated by a block that ends with + // unreachable, then it's best to emit blocks related to that unreachable, + // and wait with blocks that have other successors. (This generally means + // that all of the code that's involved in throwing one exception, or one + // kind of exception, goes together.) + for (auto s : Successors) { + if (!Processed.count(s)) { + bool LeadsToOtherUnreachable = false; + for (auto u : BlocksWithUnreachables) + if (u != UnreachablePostDominator && PDT.dominates(u, s)) + LeadsToOtherUnreachable = true; + if (LeadsToOtherUnreachable) { + ExceptionHandlers.insert(s); + } else { + // Later insertion has to win over earlier so that inner loops are + // inside outer loops. + ToBeDone.remove(s); + ToBeDone.insert(s); + } + } + } + + while (Current && Processed.count(Current)) { + if (!ToBeDone.empty()) + Current = preferredNextBlock(ToBeDone, PDT); + else if (!ExceptionHandlers.empty()) + Current = preferredNextBlock(ExceptionHandlers, PDT); + else + Current = nullptr; + } + } + + return PreservedAnalyses::all(); +} Index: llvm/test/Transforms/Util/improve-reading-order.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Util/improve-reading-order.ll @@ -0,0 +1,109 @@ +; RUN: opt -S -passes improve-reading-order < %s | FileCheck %s + +; test utility/debugging pass that reorders basic block for easier +; reading, without making any functional changes. + +; the function below is typed by hand, with blocks made in the order a +; simplistic frontend might might make them. it's sort of like: +; +; if(bar) { +; } else { +; } +; if(bar) { +; for(int i = 0; i < 10; i++) +; test1(42); +; } +; foo.test1(true); // throws if foo is null +; if(foo) +; test1(foo, false); +; return 1 + +declare void @throw() + +define i32 @test1(i32 %foo, i1 %bar) { +entry: + br label %if1 + +if1: + br i1 0, label %then1, label %else1 + +continue1: + br i1 0, label %then2, label %continue2 + +then1: + br label %continue1 + +else1: + br label %continue1 + +continue2: + %cmp2 = icmp eq i32 %foo, 0 + br i1 %cmp2, label %throw1, label %normal1, !prof !2 + +throw1: + call void @throw(); + unreachable + +normal1: + br i1 %cmp2, label %then4, label %continue4, !prof !2 + +then2: + br label %loop1 + +else2: + br label %continue2 + +loop1: + %acc = phi i32 [ 0, %then2 ], [ %next, %body1 ] + %next = add i32 1, %acc + %cmp1 = icmp eq i32 %next, 10 + br i1 %cmp1, label %body1, label %continue3 + +continue3: + br label %continue2 + +body1: + call i32 @test1(i32 42, i1 0) + br label %loop1 + +then4: + br label %continue4 + +continue4: + ret i32 1 +} + +!1 = !{!"branch_weights", i32 2, i32 1} +!2 = !{!"branch_weights", i32 1, i32 2} + +; CHECK: entry + +; a basic if-then-else gets order matching that description +; CHECK: if1 +; CHECK: then1 +; CHECK: else1 +; CHECK: continue1 + +; a basic if-then places all of the then blocks before the continuing +; block, and a loop head goes before the loop body. +; CHECK: then2 +; CHECK: loop1 +; CHECK: body1 +; CHECK: continue3 + +; a null pointer test generally passes, and the code reads better if +; the jump to the passing code is short, and the exception handler is +; delayed until far down. +; CHECK: continue2 +; CHECK: normal1 + +; an unlikely 'then' branch is best placed before the block it jumps +; to, even if skipping the 'then' is the most common path +; CHECK then4 +; CHECK continue4 + +; exception handlers are pushed towards the end +; CHECK: throw1 + +; dead blocks are at the very end +; CHECK: else2