Index: llvm/include/llvm/Transforms/Utils/ImproveReadingOrder.h =================================================================== --- /dev/null +++ llvm/include/llvm/Transforms/Utils/ImproveReadingOrder.h @@ -0,0 +1,29 @@ +//===- ImproveReadingOrder.h - Improve IR readability -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_IMPROVEREADINGORDER_H +#define LLVM_TRANSFORMS_UTILS_IMPROVEREADINGORDER_H + +#include "llvm/ADT/SetVector.h" +#include "llvm/Analysis/PostDominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" + +using namespace llvm; + +struct ImproveReadingOrder : public PassInfoMixin { + ImproveReadingOrder() = default; + PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); + +private: + BasicBlock *preferredNextBlock(SetVector &, + const PostDominatorTree &); +}; + +#endif Index: llvm/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/lib/Passes/PassBuilder.cpp +++ llvm/lib/Passes/PassBuilder.cpp @@ -159,6 +159,7 @@ #include "llvm/Transforms/Utils/BreakCriticalEdges.h" #include "llvm/Transforms/Utils/CanonicalizeAliases.h" #include "llvm/Transforms/Utils/EntryExitInstrumenter.h" +#include "llvm/Transforms/Utils/ImproveReadingOrder.h" #include "llvm/Transforms/Utils/LCSSA.h" #include "llvm/Transforms/Utils/LibCallsShrinkWrap.h" #include "llvm/Transforms/Utils/LoopSimplify.h" @@ -1696,7 +1697,7 @@ return Error::success(); } - // Finally expand the basic registered passes from the .inc file. + // Finally expand the basic registered passes from PassRegistry.def. #define MODULE_PASS(NAME, CREATE_PASS) \ if (Name == NAME) { \ MPM.addPass(CREATE_PASS); \ Index: llvm/lib/Passes/PassRegistry.def =================================================================== --- llvm/lib/Passes/PassRegistry.def +++ llvm/lib/Passes/PassRegistry.def @@ -183,6 +183,7 @@ FUNCTION_PASS("lower-widenable-condition", LowerWidenableConditionPass()) FUNCTION_PASS("guard-widening", GuardWideningPass()) FUNCTION_PASS("gvn", GVN()) +FUNCTION_PASS("improve-reading-order", ImproveReadingOrder()) FUNCTION_PASS("load-store-vectorizer", LoadStoreVectorizerPass()) FUNCTION_PASS("loop-simplify", LoopSimplifyPass()) FUNCTION_PASS("loop-sink", LoopSinkPass()) Index: llvm/lib/Transforms/Utils/CMakeLists.txt =================================================================== --- llvm/lib/Transforms/Utils/CMakeLists.txt +++ llvm/lib/Transforms/Utils/CMakeLists.txt @@ -22,6 +22,7 @@ GuardUtils.cpp InlineFunction.cpp ImportedFunctionsInliningStatistics.cpp + ImproveReadingOrder.cpp InstructionNamer.cpp IntegerDivision.cpp LCSSA.cpp Index: llvm/lib/Transforms/Utils/ImproveReadingOrder.cpp =================================================================== --- /dev/null +++ llvm/lib/Transforms/Utils/ImproveReadingOrder.cpp @@ -0,0 +1,171 @@ +//===- ImproveReadingOrder.cpp - Improve IR readability ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/ImproveReadingOrder.h" +#include "llvm/Analysis/PostDominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Pass.h" + +#include + +using namespace llvm; + +#define DEBUG_TYPE "improvereadingorder" + +// Reorder the function's basic blocks so the happy path is near the top and +// the exceptional cases move down. The general order is: Happy path first, then +// in-function error handling, then exception handlers, finally dead code. +// +// The result has exactly the same control flow graph as the input, the only +// difference is that if you need to read IR output, then the blocks that form a +// loop or other group tend to be together in the IR. In code like the +// following, the IR blocks tend to be ordered in order of these lines, even if +// the frontend creates blocks in a totally different order: +// +// if(a) +// for(b : c) +// e(b); +// else +// for(e : f) +// g(a, e); +// +// This makes no difference for the code generators, but can lower the strain on +// a poor brain trying to read IR code. +// +// The order of the basic blocks is based largely on the control flow graph and +// branch weights, although in some cases the order in the input is used. This +// seems to matter mostly for the switch instruction. + +// It never uses source line numbers, because I've found that that helps little +// in the best cases and hurts readability significantly in the worst (when the +// function is based on macros and/or inlining). + +PreservedAnalyses ImproveReadingOrder::run(Function &F, + FunctionAnalysisManager &FAM) { + BasicBlock *lastSoFar = nullptr; + SmallPtrSet processed; + SetVector toBeDone; + SetVector exceptionHandlers; + + SmallPtrSet blocksWithUnreachables; + for (auto &b : F.getBasicBlockList()) + if (isa(b.getTerminator())) + blocksWithUnreachables.insert(&b); + + PostDominatorTree &PDT = FAM.getResult(F); + + BasicBlock *current = &F.getEntryBlock(); + while (current) { + processed.insert(current); + if (lastSoFar) + current->moveAfter(lastSoFar); + lastSoFar = current; + + Instruction *t = current->getTerminator(); + MDNode *branchWeights = t->getMetadata(LLVMContext::MD_prof); + SmallVector successors; + if (isa(t)) { + // Invoke's normal destination is the happy path + InvokeInst *i = cast(t); + successors.push_back(i->getUnwindDest()); + successors.push_back(i->getNormalDest()); + } else if (t->getNumSuccessors() == 2 && + PDT.dominates(t->getSuccessor(0), t->getSuccessor(1))) { + // This is an if-then construction without any else clause, so we prefer + // to start with the 'then' block. + successors.push_back(t->getSuccessor(0)); + successors.push_back(t->getSuccessor(1)); + } else if (t->getNumSuccessors() == 2 && + PDT.dominates(t->getSuccessor(1), t->getSuccessor(0))) { + // This too is an if-then construction, so ditto + successors.push_back(t->getSuccessor(1)); + successors.push_back(t->getSuccessor(0)); + } else if (branchWeights) { + // There are branch weights; we start with the most-likely branches, and + // use successor numbers just for tiebreaking. + assert(branchWeights->getNumOperands() == t->getNumSuccessors() + 1); + std::map, BasicBlock *> ordered; + uint n = 0; + while (n < t->getNumSuccessors()) { + ConstantInt *weight = + mdconst::dyn_extract(branchWeights->getOperand(n + 1)); + assert(weight && "Weight should be a ConstantInt"); + assert(weight->getValue().getActiveBits() <= 32 && + "Too many bits for uint32_t"); + ordered.emplace(std::make_pair((uint)(weight->getZExtValue()), n), + t->getSuccessor(n)); + n++; + } + for (auto it : ordered) + successors.push_back(it.second); + } else { + // There's nothing to base any decision on... we'll start with the + // lowest-numbered successor. This is ungood: Most of switch's arguments + // are an unordered set, so this bases basic block order on an unordered + // aspect of the input syntax rather than on a significant aspect of the + // code. + uint n = t->getNumSuccessors(); + while (n-- > 0) + successors.push_back(t->getSuccessor(n)); + } + BasicBlock *headingTowardsUnreachable = nullptr; + for (auto u : blocksWithUnreachables) + if (!headingTowardsUnreachable && PDT.dominates(u, current)) + headingTowardsUnreachable = u; + for (auto s : successors) { + if (!processed.count(s)) { + bool leadsToUnreachable = false; + for (auto u : blocksWithUnreachables) + if (u != headingTowardsUnreachable && PDT.dominates(u, s)) + leadsToUnreachable = true; + if (leadsToUnreachable) { + exceptionHandlers.insert(s); + } else { + // later insertion has to win over earlier so that inner loops are + // inside outer loops + toBeDone.remove(s); + toBeDone.insert(s); + } + } + } + + while (current && processed.count(current)) { + if (!toBeDone.empty()) + current = preferredNextBlock(toBeDone, PDT); + else if (!exceptionHandlers.empty()) + current = preferredNextBlock(exceptionHandlers, PDT); + else + current = nullptr; + } + } + + return PreservedAnalyses::all(); +} + +BasicBlock * +ImproveReadingOrder::preferredNextBlock(SetVector &input, + const PostDominatorTree &PDT) { + BasicBlock *result = nullptr; + BasicBlock *betterCandidate = input.back(); + while (betterCandidate) { + result = betterCandidate; + betterCandidate = nullptr; + + SmallVector possiblePredecessors; + PDT.getDescendants(result, possiblePredecessors); + for (auto i : possiblePredecessors) { + if (i != result && input.count(i)) { + betterCandidate = i; + break; + } + } + } + input.remove(result); + return result; +} Index: llvm/test/Transforms/Util/improve-reading-order.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Util/improve-reading-order.ll @@ -0,0 +1,109 @@ +; RUN: opt -S -passes improve-reading-order < %s | FileCheck %s + +; test utility/debugging pass that reorders basic block for easier +; reading, without making any functional changes. + +; the function below is typed by hand, with blocks made in the order a +; simplistic frontend might might make them. it's sort of like: +; +; if(bar) { +; } else { +; } +; if(bar) { +; for(int i = 0; i < 10; i++) +; test1(42); +; } +; foo.test1(true); // throws if foo is null +; if(foo) +; test1(foo, false); +; return 1 + +declare void @throw() + +define i32 @test1(i32 %foo, i1 %bar) { +entry: + br label %if1 + +if1: + br i1 0, label %then1, label %else1 + +continue1: + br i1 0, label %then2, label %continue2 + +then1: + br label %continue1 + +else1: + br label %continue1 + +continue2: + %cmp2 = icmp eq i32 %foo, 0 + br i1 %cmp2, label %throw1, label %normal1, !prof !2 + +throw1: + call void @throw(); + unreachable + +normal1: + br i1 %cmp2, label %then4, label %continue4, !prof !2 + +then2: + br label %loop1 + +else2: + br label %continue2 + +loop1: + %acc = phi i32 [ 0, %then2 ], [ %next, %body1 ] + %next = add i32 1, %acc + %cmp1 = icmp eq i32 %next, 10 + br i1 %cmp1, label %body1, label %continue3 + +continue3: + br label %continue2 + +body1: + call i32 @test1(i32 42, i1 0) + br label %loop1 + +then4: + br label %continue4 + +continue4: + ret i32 1 +} + +!1 = !{!"branch_weights", i32 2, i32 1} +!2 = !{!"branch_weights", i32 1, i32 2} + +; CHECK: entry + +; a basic if-then-else gets order matching that description +; CHECK: if1 +; CHECK: then1 +; CHECK: else1 +; CHECK: continue1 + +; a basic if-then places all of the then blocks before the continuing +; block, and a loop head goes before the loop body. +; CHECK: then2 +; CHECK: loop1 +; CHECK: body1 +; CHECK: continue3 + +; a null pointer test generally passes, and the code reads better if +; the jump to the passing code is short, and the exception handler is +; delayed until far down. +; CHECK: continue2 +; CHECK: normal1 + +; an unlikely 'then' branch is best placed before the block it jumps +; to, even if skipping the 'then' is the most common path +; CHECK then4 +; CHECK continue4 + +; exception handlers are pushed towards the end +; CHECK: throw1 + +; dead blocks are at the very end +; CHECK: else2