diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -1282,7 +1282,8 @@ /// Replace instruction with a shorter version that could be relaxed later /// if needed. - virtual bool shortenInstruction(MCInst &Inst) const { + virtual bool shortenInstruction(MCInst &Inst, + const MCSubtargetInfo &STI) const { llvm_unreachable("not implemented"); return false; } diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp --- a/bolt/lib/Passes/BinaryPasses.cpp +++ b/bolt/lib/Passes/BinaryPasses.cpp @@ -1011,7 +1011,7 @@ if (opts::Verbosity > 2) OriginalInst = Inst; - if (!BC.MIB->shortenInstruction(Inst)) + if (!BC.MIB->shortenInstruction(Inst, *BC.STI)) continue; if (opts::Verbosity > 2) { diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp --- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp @@ -51,7 +51,9 @@ return false; } - bool shortenInstruction(MCInst &) const override { return false; } + bool shortenInstruction(MCInst &, const MCSubtargetInfo &) const override { + return false; + } bool isADRP(const MCInst &Inst) const override { return Inst.getOpcode() == AArch64::ADRP; diff --git a/bolt/lib/Target/X86/CMakeLists.txt b/bolt/lib/Target/X86/CMakeLists.txt --- a/bolt/lib/Target/X86/CMakeLists.txt +++ b/bolt/lib/Target/X86/CMakeLists.txt @@ -1,5 +1,6 @@ set(LLVM_LINK_COMPONENTS BOLTCore + BOLTUtils MC Object Support diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp --- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp +++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp @@ -23,6 +23,7 @@ #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegister.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/DataExtractor.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Errc.h" @@ -35,6 +36,17 @@ using namespace llvm; using namespace bolt; +namespace opts { + +extern cl::OptionCategory BoltOptCategory; + +static cl::opt X86StripRedundantAddressSize( + "x86-strip-redundant-address-size", + cl::desc("Remove redundant Address-Size override prefix"), cl::init(true), + cl::ZeroOrMore, cl::cat(BoltOptCategory)); + +} // namespace opts + namespace { unsigned getShortBranchOpcode(unsigned Opcode) { @@ -2031,14 +2043,26 @@ llvm_unreachable("not implemented"); } - bool shortenInstruction(MCInst &Inst) const override { + bool shortenInstruction(MCInst &Inst, + const MCSubtargetInfo &STI) const override { unsigned OldOpcode = Inst.getOpcode(); unsigned NewOpcode = OldOpcode; - // Check and remove EIZ/RIZ. These cases represent ambiguous cases where SIB - // byte is present, but no index is used and modrm alone shoud have been - // enough. Converting to NoRegister effectively removes the SIB byte. int MemOpNo = getMemoryOperandNo(Inst); + + // Check and remove redundant Address-Size override prefix. + if (opts::X86StripRedundantAddressSize) { + uint64_t TSFlags = Info->get(OldOpcode).TSFlags; + unsigned Flags = Inst.getFlags(); + + if (!X86_MC::needsAddressSizeOverride(Inst, STI, MemOpNo, TSFlags) && + Flags & X86::IP_HAS_AD_SIZE) + Inst.setFlags(Flags ^ X86::IP_HAS_AD_SIZE); + } + + // Check and remove EIZ/RIZ. These cases represent ambiguous cases where + // SIB byte is present, but no index is used and modrm alone should have + // been enough. Converting to NoRegister effectively removes the SIB byte. if (MemOpNo >= 0) { MCOperand &IndexOp = Inst.getOperand(static_cast(MemOpNo) + X86::AddrIndexReg); @@ -3877,7 +3901,7 @@ return BlocksVectorTy(); CompareInst.addOperand(MCOperand::createImm(CaseIdx)); - shortenInstruction(CompareInst); + shortenInstruction(CompareInst, *Ctx->getSubtargetInfo()); // jump to next target compare. NextTarget = diff --git a/bolt/test/X86/addr32.s b/bolt/test/X86/addr32.s new file mode 100644 --- /dev/null +++ b/bolt/test/X86/addr32.s @@ -0,0 +1,25 @@ +# Check that we don't accidentally strip addr32 prefix + +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o +# RUN: ld.lld %t.o -o %t.exe -nostdlib +# RUN: llvm-objdump -d %t.exe | FileCheck %s +# RUN: llvm-bolt %t.exe -o %t.out -lite=0 -x86-strip-redundant-address-size=false +# RUN: llvm-objdump -d %t.out | FileCheck %s +# CHECK: 67 e8 {{.*}} addr32 callq {{.*}} +# RUN: llvm-bolt %t.exe -o %t.out -lite=0 -x86-strip-redundant-address-size=true +# remove test name from objdump output, to only search for addr32 in disassembly +# RUN: llvm-objdump -d %t.out | grep -v addr32.s | FileCheck %s --check-prefix=CHECK-STRIP +# CHECK-STRIP-NOT: addr32 + +.globl _start +.type _start, @function +_start: +.code64 + addr32 callq foo + ret + .size _start, .-_start + +.globl foo +.type foo, @function +foo: + ud2