diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -471,6 +471,11 @@ return 0; } + virtual bool replaceZeroIdiom(MCInst &Inst) const { + llvm_unreachable("not implemented"); + return false; + } + /// Create increment contents of target by 1 for Instrumentation virtual InstructionListType createInstrIncMemory(const MCSymbol *Target, MCContext *Ctx, diff --git a/bolt/include/bolt/Passes/ZeroIdiom.h b/bolt/include/bolt/Passes/ZeroIdiom.h new file mode 100644 --- /dev/null +++ b/bolt/include/bolt/Passes/ZeroIdiom.h @@ -0,0 +1,39 @@ +//===- bolt/Passes/ZeroIdiom.h ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Pass to convert eligible instructions into zero idiom (xor reg, reg). +// +//===----------------------------------------------------------------------===// + +#ifndef BOLT_PASSES_ZEROIDIOM_H +#define BOLT_PASSES_ZEROIDIOM_H + +#include "bolt/Passes/BinaryPasses.h" + +namespace llvm { +namespace bolt { + +class DataflowInfoManager; + +/// Pass for converting eligible instructions into zero idiom. +class ZeroIdiomPass : public BinaryFunctionPass { + void runOnFunction(BinaryFunction &Function, DataflowInfoManager &Info); + +public: + explicit ZeroIdiomPass(const cl::opt &PrintPass) + : BinaryFunctionPass(PrintPass) {} + + const char *getName() const override { return "zero-idiom"; } + + void runOnFunctions(BinaryContext &BC) override; +}; + +} // end namespace bolt +} // end namespace llvm + +#endif diff --git a/bolt/lib/Passes/CMakeLists.txt b/bolt/lib/Passes/CMakeLists.txt --- a/bolt/lib/Passes/CMakeLists.txt +++ b/bolt/lib/Passes/CMakeLists.txt @@ -47,6 +47,7 @@ ValidateMemRefs.cpp VeneerElimination.cpp RetpolineInsertion.cpp + ZeroIdiom.cpp DISABLE_LLVM_LINK_LLVM_DYLIB diff --git a/bolt/lib/Passes/ZeroIdiom.cpp b/bolt/lib/Passes/ZeroIdiom.cpp new file mode 100644 --- /dev/null +++ b/bolt/lib/Passes/ZeroIdiom.cpp @@ -0,0 +1,58 @@ +//===- bolt/Passes/ZeroIdiom.cpp ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the Zero Idiom conversion pass. +// +//===----------------------------------------------------------------------===// + +#include "bolt/Passes/ZeroIdiom.h" +#include "bolt/Core/ParallelUtilities.h" +#include "bolt/Passes/BinaryFunctionCallGraph.h" +#include "bolt/Passes/DataflowInfoManager.h" + +#define DEBUG_TYPE "zeroidiom" + +namespace llvm { +namespace bolt { + +void ZeroIdiomPass::runOnFunction(BinaryFunction &BF, + DataflowInfoManager &Info) { + BinaryContext &BC = BF.getBinaryContext(); + LivenessAnalysis &LA = Info.getLivenessAnalysis(); + + for (BinaryBasicBlock &BB : BF) { + for (MCInst &Inst : BB) { + if (LA.isAlive(ProgramPoint(&Inst), BC.MIB->getFlagsReg())) + continue; + BC.MIB->replaceZeroIdiom(Inst); + } + } +} + +void ZeroIdiomPass::runOnFunctions(BinaryContext &BC) { + if (!BC.isX86()) + return; + + std::unique_ptr CG( + new BinaryFunctionCallGraph(buildCallGraph(BC))); + std::unique_ptr RA( + new RegAnalysis(BC, &BC.getBinaryFunctions(), &*CG)); + + ParallelUtilities::WorkFuncWithAllocTy WorkFun = + [&](BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocId) { + DataflowInfoManager Info(BF, RA.get(), nullptr, AllocId); + runOnFunction(BF, Info); + }; + + ParallelUtilities::runOnEachFunctionWithUniqueAllocId( + BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun, nullptr, + "ZeroIdiom"); +} + +} // end namespace bolt +} // end namespace llvm diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp --- a/bolt/lib/Rewrite/BinaryPassManager.cpp +++ b/bolt/lib/Rewrite/BinaryPassManager.cpp @@ -35,6 +35,7 @@ #include "bolt/Passes/ValidateInternalCalls.h" #include "bolt/Passes/ValidateMemRefs.h" #include "bolt/Passes/VeneerElimination.h" +#include "bolt/Passes/ZeroIdiom.h" #include "bolt/Utils/CommandLineOpts.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/Timer.h" @@ -195,6 +196,11 @@ cl::desc("print functions after unreachable code elimination"), cl::Hidden, cl::cat(BoltOptCategory)); +static cl::opt + PrintZeroIdiom("print-zero-idiom", + cl::desc("print functions after zero idiom"), + cl::cat(BoltOptCategory)); + static cl::opt RegReAssign( "reg-reassign", cl::desc( @@ -402,6 +408,8 @@ Manager.registerPass(std::make_unique(), opts::CMOVConversionFlag); + Manager.registerPass(std::make_unique(PrintZeroIdiom)); + // This pass syncs local branches with CFG. If any of the following // passes breaks the sync - they either need to re-run the pass or // fix branches consistency internally. diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp --- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp +++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp @@ -2871,6 +2871,32 @@ } } + bool replaceZeroIdiom(MCInst &Inst) const override { + unsigned Opcode = Inst.getOpcode(); + + // Replace `movl $0x0, %eax` with `xorl %eax, %eax` + switch (Opcode) { + case X86::MOV64ri: + case X86::MOV64ri32: + case X86::MOV32ri: + auto OpNum = MCPlus::getNumPrimeOperands(Inst) - 1; + if (Inst.getOperand(OpNum).isImm() && !Inst.getOperand(OpNum).getImm()) { + if (Opcode == X86::MOV32ri) + Opcode = X86::XOR32rr; + else + Opcode = X86::XOR64rr; + MCOperand Op = Inst.getOperand(0); + Inst.setOpcode(Opcode); + Inst.clear(); + Inst.addOperand(Op); + Inst.addOperand(Op); + Inst.addOperand(Op); + } + return true; + } + return false; + } + MCPhysReg getIntArgRegister(unsigned ArgNo) const override { // FIXME: this should depend on the calling convention. switch (ArgNo) { diff --git a/bolt/test/X86/zero-idiom.s b/bolt/test/X86/zero-idiom.s new file mode 100644 --- /dev/null +++ b/bolt/test/X86/zero-idiom.s @@ -0,0 +1,16 @@ +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o +# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib +# RUN: llvm-bolt %t.exe -o %t -funcs=_start +# RUN: llvm-objdump -d --disassemble-symbols=_start %t | FileCheck %s +# CHECK-NOT: xorl %edx, %edx + .globl _start + .type _start, %function +_start: + .cfi_startproc + cmpb $0x0, 0x8(%rax) + movl $0x0, %edx + movq -0x218(%rbp), %r13 + movq (%rdi), %rax + cmovel %edx, %ebx + .cfi_endproc +.size _start, .-_start