diff --git a/bolt/include/bolt/Passes/BinaryPasses.h b/bolt/include/bolt/Passes/BinaryPasses.h --- a/bolt/include/bolt/Passes/BinaryPasses.h +++ b/bolt/include/bolt/Passes/BinaryPasses.h @@ -295,20 +295,23 @@ /// Perform simple peephole optimizations. class Peepholes : public BinaryFunctionPass { +public: + enum PeepholeOpts : char { + PEEP_NONE = 0x0, + PEEP_DOUBLE_JUMPS = 0x2, + PEEP_TAILCALL_TRAPS = 0x4, + PEEP_ALL = 0xf + }; + +private: uint64_t NumDoubleJumps{0}; uint64_t TailCallTraps{0}; - uint64_t NumUselessCondBranches{0}; /// Add trap instructions immediately after indirect tail calls to prevent /// the processor from decoding instructions immediate following the /// tailcall. void addTailcallTraps(BinaryFunction &Function); - /// Remove useless duplicate successors. When the conditional - /// successor is the same as the unconditional successor, we can - /// remove the conditional successor and branch instruction. - void removeUselessCondBranches(BinaryFunction &Function); - public: explicit Peepholes(const cl::opt &PrintPass) : BinaryFunctionPass(PrintPass) {} diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp --- a/bolt/lib/Passes/BinaryPasses.cpp +++ b/bolt/lib/Passes/BinaryPasses.cpp @@ -105,29 +105,17 @@ cl::Hidden, cl::cat(BoltOptCategory)); -enum PeepholeOpts : char { - PEEP_NONE = 0x0, - PEEP_DOUBLE_JUMPS = 0x2, - PEEP_TAILCALL_TRAPS = 0x4, - PEEP_USELESS_BRANCHES = 0x8, - PEEP_ALL = 0xf -}; - -static cl::list -Peepholes("peepholes", - cl::CommaSeparated, - cl::desc("enable peephole optimizations"), - cl::value_desc("opt1,opt2,opt3,..."), - cl::values( - clEnumValN(PEEP_NONE, "none", "disable peepholes"), - clEnumValN(PEEP_DOUBLE_JUMPS, "double-jumps", - "remove double jumps when able"), - clEnumValN(PEEP_TAILCALL_TRAPS, "tailcall-traps", "insert tail call traps"), - clEnumValN(PEEP_USELESS_BRANCHES, "useless-branches", - "remove useless conditional branches"), - clEnumValN(PEEP_ALL, "all", "enable all peephole optimizations")), - cl::ZeroOrMore, - cl::cat(BoltOptCategory)); +static cl::list Peepholes( + "peepholes", cl::CommaSeparated, cl::desc("enable peephole optimizations"), + cl::value_desc("opt1,opt2,opt3,..."), + cl::values(clEnumValN(Peepholes::PEEP_NONE, "none", "disable peepholes"), + clEnumValN(Peepholes::PEEP_DOUBLE_JUMPS, "double-jumps", + "remove double jumps when able"), + clEnumValN(Peepholes::PEEP_TAILCALL_TRAPS, "tailcall-traps", + "insert tail call traps"), + clEnumValN(Peepholes::PEEP_ALL, "all", + "enable all peephole optimizations")), + cl::ZeroOrMore, cl::cat(BoltOptCategory)); static cl::opt PrintFuncStat("print-function-statistics", @@ -664,7 +652,7 @@ // B0: ... // jmp B2 (or jcc B2) // -uint64_t fixDoubleJumps(BinaryFunction &Function, bool MarkInvalid) { +uint64_t fixDoubleJumps(BinaryFunction &Function, bool FixBranches) { uint64_t NumDoubleJumps = 0; MCContext *Ctx = Function.getBinaryContext().Ctx.get(); @@ -754,12 +742,17 @@ if (Pred->getSuccessor() == &BB || (Pred->getConditionalSuccessor(true) == &BB && !IsTailCall) || Pred->getConditionalSuccessor(false) == &BB) - if (checkAndPatch(Pred, Succ, SuccSym) && MarkInvalid) + if (checkAndPatch(Pred, Succ, SuccSym)) BB.markValid(BB.pred_size() != 0 || BB.isLandingPad() || BB.isEntryPoint()); } } + if (FixBranches && NumDoubleJumps) { + Function.eraseInvalidBBs(); + Function.fixBranches(); + } + return NumDoubleJumps; } } // namespace @@ -960,7 +953,7 @@ } if (NumLocalCTCs > 0) { - NumDoubleJumps += fixDoubleJumps(BF, true); + NumDoubleJumps += fixDoubleJumps(BF, false); // Clean-up unreachable tail-call blocks. const std::pair Stats = BF.eraseInvalidBBs(); DeletedBlocks += Stats.first; @@ -1065,57 +1058,27 @@ } } -void Peepholes::removeUselessCondBranches(BinaryFunction &Function) { - for (BinaryBasicBlock &BB : Function) { - if (BB.succ_size() != 2) - continue; - - BinaryBasicBlock *CondBB = BB.getConditionalSuccessor(true); - BinaryBasicBlock *UncondBB = BB.getConditionalSuccessor(false); - if (CondBB != UncondBB) - continue; - - const MCSymbol *TBB = nullptr; - const MCSymbol *FBB = nullptr; - MCInst *CondBranch = nullptr; - MCInst *UncondBranch = nullptr; - bool Result = BB.analyzeBranch(TBB, FBB, CondBranch, UncondBranch); - - // analyzeBranch() can fail due to unusual branch instructions, - // e.g. jrcxz, or jump tables (indirect jump). - if (!Result || !CondBranch) - continue; - - BB.removeDuplicateConditionalSuccessor(CondBranch); - ++NumUselessCondBranches; - } -} - void Peepholes::runOnFunctions(BinaryContext &BC) { - const char Opts = std::accumulate( - opts::Peepholes.begin(), opts::Peepholes.end(), 0, - [](const char A, const opts::PeepholeOpts B) { return A | B; }); - if (Opts == opts::PEEP_NONE || !BC.isX86()) + const char Opts = + std::accumulate(opts::Peepholes.begin(), opts::Peepholes.end(), 0, + [](const char A, const PeepholeOpts B) { return A | B; }); + if (Opts == PEEP_NONE || !BC.isX86()) return; for (auto &It : BC.getBinaryFunctions()) { BinaryFunction &Function = It.second; if (shouldOptimize(Function)) { - if (Opts & opts::PEEP_DOUBLE_JUMPS) - NumDoubleJumps += fixDoubleJumps(Function, false); - if (Opts & opts::PEEP_TAILCALL_TRAPS) + if (Opts & PEEP_DOUBLE_JUMPS) + NumDoubleJumps += fixDoubleJumps(Function, true); + if (Opts & PEEP_TAILCALL_TRAPS) addTailcallTraps(Function); - if (Opts & opts::PEEP_USELESS_BRANCHES) - removeUselessCondBranches(Function); assert(Function.validateCFG()); } } outs() << "BOLT-INFO: Peephole: " << NumDoubleJumps << " double jumps patched.\n" << "BOLT-INFO: Peephole: " << TailCallTraps - << " tail call traps inserted.\n" - << "BOLT-INFO: Peephole: " << NumUselessCondBranches - << " useless conditional branches removed.\n"; + << " tail call traps inserted.\n"; } bool SimplifyRODataLoads::simplifyRODataLoads(BinaryFunction &BF) { diff --git a/bolt/test/AArch64/useless-cond-branch.s b/bolt/test/AArch64/useless-cond-branch.s new file mode 100644 --- /dev/null +++ b/bolt/test/AArch64/useless-cond-branch.s @@ -0,0 +1,25 @@ +## This test checks that the useless conditional branch is removed by bolt + +# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \ +# RUN: %s -o %t.o +# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q +# RUN: llvm-bolt %t.exe -o %t.bolt -print-normalized -print-only=main | FileCheck %s + +# CHECK: cmp x0, #0x1 +# CHECK-NEXT: b {{.*}} + + .text + .align 4 + .global main + .type main, %function +main: + cmp x0, #2 + b.eq 1f + cmp x0, #1 + b.eq 2f + b 2f +1: + add x0, x0, #1 +2: + ret + .size main, .-main