diff --git a/bolt/include/bolt/Passes/BinaryPasses.h b/bolt/include/bolt/Passes/BinaryPasses.h --- a/bolt/include/bolt/Passes/BinaryPasses.h +++ b/bolt/include/bolt/Passes/BinaryPasses.h @@ -295,6 +295,16 @@ /// Perform simple peephole optimizations. class Peepholes : public BinaryFunctionPass { +public: + enum PeepholeOpts : char { + PEEP_NONE = 0x0, + PEEP_DOUBLE_JUMPS = 0x2, + PEEP_TAILCALL_TRAPS = 0x4, + PEEP_USELESS_BRANCHES = 0x8, + PEEP_ALL = 0xf + }; + +private: uint64_t NumDoubleJumps{0}; uint64_t TailCallTraps{0}; uint64_t NumUselessCondBranches{0}; diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp --- a/bolt/lib/Passes/BinaryPasses.cpp +++ b/bolt/lib/Passes/BinaryPasses.cpp @@ -105,29 +105,19 @@ cl::Hidden, cl::cat(BoltOptCategory)); -enum PeepholeOpts : char { - PEEP_NONE = 0x0, - PEEP_DOUBLE_JUMPS = 0x2, - PEEP_TAILCALL_TRAPS = 0x4, - PEEP_USELESS_BRANCHES = 0x8, - PEEP_ALL = 0xf -}; - -static cl::list -Peepholes("peepholes", - cl::CommaSeparated, - cl::desc("enable peephole optimizations"), - cl::value_desc("opt1,opt2,opt3,..."), - cl::values( - clEnumValN(PEEP_NONE, "none", "disable peepholes"), - clEnumValN(PEEP_DOUBLE_JUMPS, "double-jumps", - "remove double jumps when able"), - clEnumValN(PEEP_TAILCALL_TRAPS, "tailcall-traps", "insert tail call traps"), - clEnumValN(PEEP_USELESS_BRANCHES, "useless-branches", - "remove useless conditional branches"), - clEnumValN(PEEP_ALL, "all", "enable all peephole optimizations")), - cl::ZeroOrMore, - cl::cat(BoltOptCategory)); +static cl::list Peepholes( + "peepholes", cl::CommaSeparated, cl::desc("enable peephole optimizations"), + cl::value_desc("opt1,opt2,opt3,..."), + cl::values(clEnumValN(Peepholes::PEEP_NONE, "none", "disable peepholes"), + clEnumValN(Peepholes::PEEP_DOUBLE_JUMPS, "double-jumps", + "remove double jumps when able"), + clEnumValN(Peepholes::PEEP_TAILCALL_TRAPS, "tailcall-traps", + "insert tail call traps"), + clEnumValN(Peepholes::PEEP_USELESS_BRANCHES, "useless-branches", + "remove useless conditional branches"), + clEnumValN(Peepholes::PEEP_ALL, "all", + "enable all peephole optimizations")), + cl::ZeroOrMore, cl::cat(BoltOptCategory)); static cl::opt PrintFuncStat("print-function-statistics", @@ -1092,20 +1082,20 @@ } void Peepholes::runOnFunctions(BinaryContext &BC) { - const char Opts = std::accumulate( - opts::Peepholes.begin(), opts::Peepholes.end(), 0, - [](const char A, const opts::PeepholeOpts B) { return A | B; }); - if (Opts == opts::PEEP_NONE || !BC.isX86()) + const char Opts = + std::accumulate(opts::Peepholes.begin(), opts::Peepholes.end(), 0, + [](const char A, const PeepholeOpts B) { return A | B; }); + if (Opts == PEEP_NONE) return; for (auto &It : BC.getBinaryFunctions()) { BinaryFunction &Function = It.second; if (shouldOptimize(Function)) { - if (Opts & opts::PEEP_DOUBLE_JUMPS) + if (Opts & PEEP_DOUBLE_JUMPS) NumDoubleJumps += fixDoubleJumps(Function, false); - if (Opts & opts::PEEP_TAILCALL_TRAPS) + if (Opts & PEEP_TAILCALL_TRAPS) addTailcallTraps(Function); - if (Opts & opts::PEEP_USELESS_BRANCHES) + if (Opts & PEEP_USELESS_BRANCHES) removeUselessCondBranches(Function); assert(Function.validateCFG()); } diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp --- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp @@ -798,6 +798,13 @@ createShortJmp(Seq, Target, Ctx, /*IsTailCall*/ true); } + bool createTrap(MCInst &Inst) const override { + Inst.clear(); + Inst.setOpcode(AArch64::BRK); + Inst.addOperand(MCOperand::createImm(1)); + return true; + } + bool convertJmpToTailCall(MCInst &Inst) override { setTailCall(Inst); return true; diff --git a/bolt/test/AArch64/double_jump.cpp b/bolt/test/AArch64/double_jump.cpp new file mode 100644 --- /dev/null +++ b/bolt/test/AArch64/double_jump.cpp @@ -0,0 +1,55 @@ +// A contrived example to test the double jump removal peephole. + +// RUN: %clang %cflags -O0 %s -o %t.exe +// RUN: llvm-bolt %t.exe -o %t.bolt -peepholes=double-jumps | \ +// RUN: FileCheck %s -check-prefix=CHECKBOLT +// RUN: llvm-objdump -d %t.bolt | FileCheck %s + +// CHECKBOLT: BOLT-INFO: Peephole: 1 double jumps patched. + +// CHECK: <_Z3foom>: +// CHECK-NEXT: sub sp, sp, #16 +// CHECK-NEXT: str x0, [sp, #8] +// CHECK-NEXT: ldr [[REG:x[0-28]+]], [sp, #8] +// CHECK-NEXT: cmp [[REG]], #0 +// CHECK-NEXT: b.eq {{.*}} <_Z3foom+0x34> +// CHECK-NEXT: add [[REG]], [[REG]], #1 +// CHECK-NEXT: add [[REG]], [[REG]], #1 +// CHECK-NEXT: cmp [[REG]], #2 +// CHECK-NEXT: b.eq {{.*}} <_Z3foom+0x28> +// CHECK-NEXT: add [[REG]], [[REG]], #1 +// CHECK-NEXT: mov [[REG]], x1 +// CHECK-NEXT: ldr x1, [sp] +// CHECK-NEXT: b {{.*}} +// CHECK-NEXT: ldr x1, [sp] +// CHECK-NEXT: add [[REG]], [[REG]], #1 +// CHECK-NEXT: b {{.*}} + +extern "C" unsigned long bar(unsigned long count) { return count + 1; } + +unsigned long foo(unsigned long count) { + asm volatile(" cmp %0,#0\n" + " b.eq .L7\n" + " add %0, %0, #1\n" + " b .L1\n" + ".L1: b .L2\n" + ".L2: add %0, %0, #1\n" + " cmp %0, #2\n" + " b.ne .L3\n" + " b .L4\n" + ".L3: b .L5\n" + ".L5: add %0, %0, #1\n" + ".L4: mov %0,x1\n" + " ldr x1, [sp]\n" + " b .L6\n" + ".L7: ldr x1, [sp]\n" + " add %0, %0, #1\n" + " b .L6\n" + ".L6: b bar\n" + : + : "r"(count) + :); + return count; +} + +int main(int argc, const char *argv[]) { return foo(38); } diff --git a/bolt/test/AArch64/tailcall_traps.s b/bolt/test/AArch64/tailcall_traps.s new file mode 100644 --- /dev/null +++ b/bolt/test/AArch64/tailcall_traps.s @@ -0,0 +1,37 @@ +## Tests the peephole that adds trap instructions following indirect tail calls. + +# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \ +# RUN: %s -o %t.o +# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q +# RUN: llvm-bolt %t.exe -o %t.bolt -peepholes=tailcall-traps \ +# RUN: -print-peepholes -funcs=foo,bar 2>&1 | FileCheck %s + +# CHECK: Binary Function "foo" +# CHECK: br x0 # TAILCALL +# CHECK-NEXT: brk #0x1 +# CHECK: End of Function "foo" + +# CHECK: Binary Function "bar" +# CHECK: b foo # TAILCALL +# CHECK: End of Function "bar" + + .text + .align 4 + .global main + .type main, %function +main: + nop + ret + .size main, .-main + + .global foo + .type foo, %function +foo: + br x0 + .size foo, .-foo + + .global bar + .type bar, %function +bar: + b foo + .size bar, .-bar diff --git a/bolt/test/X86/Inputs/double_jump.cpp b/bolt/test/X86/Inputs/double_jump.cpp --- a/bolt/test/X86/Inputs/double_jump.cpp +++ b/bolt/test/X86/Inputs/double_jump.cpp @@ -1,6 +1,4 @@ -/* - * A contrived example to test the double jump removal peephole. - */ +// A contrived example to test the double jump removal peephole. extern "C" unsigned long bar(unsigned long count) { return count + 1;