Index: lib/MC/MCCodePadder.cpp =================================================================== --- lib/MC/MCCodePadder.cpp +++ lib/MC/MCCodePadder.cpp @@ -39,10 +39,12 @@ ArePoliciesActive = usePoliciesForBasicBlock(Context); bool InsertionPoint = basicBlockRequiresInsertionPoint(Context); - assert((!InsertionPoint || - OS->getCurrentFragment()->getKind() != MCFragment::FT_Align) && - "Cannot insert padding nops right after an alignment fragment as it " - "will ruin the alignment"); + bool BasicBlockHasAlignment = + OS->getCurrentFragment() == nullptr || + OS->getCurrentFragment()->getKind() == MCFragment::FT_Align; + assert((!InsertionPoint || !BasicBlockHasAlignment) && + "Cannot insert padding nops right after a basic block that has " + "alignment"); uint64_t PoliciesMask = MCPaddingFragment::PFK_None; if (ArePoliciesActive) { Index: lib/Target/X86/MCTargetDesc/CMakeLists.txt =================================================================== --- lib/Target/X86/MCTargetDesc/CMakeLists.txt +++ lib/Target/X86/MCTargetDesc/CMakeLists.txt @@ -3,6 +3,7 @@ X86MCTargetDesc.cpp X86MCAsmInfo.cpp X86MCCodeEmitter.cpp + X86MCCodePadder.cpp X86MachObjectWriter.cpp X86ELFObjectWriter.cpp X86WinCOFFObjectWriter.cpp Index: lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp =================================================================== --- lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/X86BaseInfo.h" +#include "MCTargetDesc/X86MCCodePadder.h" #include "MCTargetDesc/X86FixupKinds.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/BinaryFormat/ELF.h" @@ -75,7 +76,7 @@ const uint64_t MaxNopLength; public: X86AsmBackend(const Target &T, StringRef CPU) - : MCAsmBackend(), CPU(CPU), + : MCAsmBackend(llvm::make_unique(T, CPU)), CPU(CPU), MaxNopLength((CPU == "slm" || CPU == "silvermont") ? 7 : 15) { HasNopl = CPU != "generic" && CPU != "i386" && CPU != "i486" && CPU != "i586" && CPU != "pentium" && CPU != "pentium-mmx" && Index: lib/Target/X86/MCTargetDesc/X86MCCodePadder.h =================================================================== --- /dev/null +++ lib/Target/X86/MCTargetDesc/X86MCCodePadder.h @@ -0,0 +1,120 @@ +//===-- X86MCCodePadder.h - X86 Specific Code Padding Handling --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_X86_MCTARGETDESC_X86MCCODEPADDER_H +#define LLVM_LIB_TARGET_X86_MCTARGETDESC_X86MCCODEPADDER_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCCodePadder.h" +#include "llvm/MC/MCInstrInfo.h" +#include + +namespace llvm { + +class MCAsmLayout; +class MCPaddingFragment; +class Target; + +namespace X86 { + +/// The X86-specific class in charge of all code padding decisions for the X86 +/// target. +class X86MCCodePadder : public MCCodePadder { + X86MCCodePadder() = delete; + X86MCCodePadder(const X86MCCodePadder &) = delete; + void operator=(const X86MCCodePadder &) = delete; + +protected: + bool basicBlockRequiresInsertionPoint( + const MCCodePaddingContext &Context) override; + + bool usePoliciesForBasicBlock(const MCCodePaddingContext &Context) override; + +public: + X86MCCodePadder(const Target &T, StringRef CPU); + virtual ~X86MCCodePadder() {} +}; + +/// A padding policy that handles branch instructions (all types of jmps and +/// calls) and the first instruction after a branch (i.e. first instruction in a +/// basic block reachable by branch). +/// This policy tries to enforce that: +/// 1. Branch instructions and first instructions in basic blocks won't cross a +/// 16B aligned window. +/// 2. Branch instructions will end at a 0mod16 address. +/// +/// Note that this is also the order of importance implemented in the policy. +/// +/// This policy essentially implements part of rule 12 of section 3.4.1.5 ("Code +/// alignment") of Intel's Architectures Optimization Reference Manual: +/// "When executing code from the legacy decode pipeline, direct branches that +/// are mostly taken should have all their instruction bytes in a 16B aligned +/// chunk of memory and nearer the end of that 16B aligned chunk." +class BranchInstructionAndTargetAlignmentPolicy : public MCCodePaddingPolicy { + BranchInstructionAndTargetAlignmentPolicy() = delete; + BranchInstructionAndTargetAlignmentPolicy( + const BranchInstructionAndTargetAlignmentPolicy &) = delete; + void operator=(const BranchInstructionAndTargetAlignmentPolicy &) = delete; + + std::unique_ptr InstrInfo; + +protected: + /// Computes the penalty weight caused by having branch instruction or the + /// instruction after a branch (i.e. first instruction in a basic block + /// reachable by branch) being splitted over more than one instruction window, + /// and a branch instruction not being adjacent to the end of its 16B code + /// chunk. + /// + /// \param Window The instruction window. + /// \param Offset The offset of the parent section. + /// \param Layout Code layout information. + /// + /// \returns the penalty weight caused by having branch instruction or + /// instruction after a branch being splitted over more than one instruction + /// window, and a branch instruction not being adjacent to the end of its 16B + /// code chunk. + double computeWindowPenaltyWeight(const MCPFRange &Window, uint64_t Offset, + MCAsmLayout &Layout) const override; + +public: + BranchInstructionAndTargetAlignmentPolicy(const Target &T); + virtual ~BranchInstructionAndTargetAlignmentPolicy() {} + + /// Determines if a basic block may cause the case of first instruction after + /// a branch (i.e. first instruction in a basic block reachable by branch) + /// being splitted over more than one instruction window. + /// + /// A basic block will be considered hazardous by this policy if it is + /// reachable by a branch (and not only via fallthrough). + /// + /// \param Context the context of the padding, Embeds the basic block's + /// parameters. + /// + /// \returns true iff \p Context indicates that the basic block is reachable + /// via branch. + bool basicBlockRequiresPaddingFragment( + const MCCodePaddingContext &Context) const override; + + /// Determines if an instruction may cause the case of a branch instrucion + /// being splitted over more than one instruction window or a branch not + /// being adjacent to the end of its 16B code chunk. + /// + /// An instruction will be considered hazardous by this policy if it is + /// a branch (all types of jmps and calls). + /// + /// \param Inst Instruction to examine. + /// + /// \returns true iff \p Inst is a branch (all types of jmps and calls). + bool instructionRequiresPaddingFragment(const MCInst &Inst) const override; +}; + +} // namespace X86 +} // namespace llvm + +#endif // LLVM_LIB_TARGET_X86_MCTARGETDESC_X86MCCODEPADDER_H Index: lib/Target/X86/MCTargetDesc/X86MCCodePadder.cpp =================================================================== --- /dev/null +++ lib/Target/X86/MCTargetDesc/X86MCCodePadder.cpp @@ -0,0 +1,108 @@ +///=== X86MCCodePadder.cpp - X86 Specific Code Padding Handling -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/X86MCCodePadder.h" +#include "MCTargetDesc/X86BaseInfo.h" +#include "llvm/MC/MCAsmLayout.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCObjectStreamer.h" +#include "llvm/Support/TargetRegistry.h" + +namespace llvm { +namespace X86 { + +enum PerfNopFragmentKind { + BranchInstructionAndTargetAlignment = + MCPaddingFragment::FirstTargetPerfNopFragmentKind +}; + +//--------------------------------------------------------------------------- +// X86MCCodePadder +// + +X86MCCodePadder::X86MCCodePadder(const Target &T, StringRef CPU) { + + if (CPU != "sandybridge" && CPU != "corei7-avx" && CPU != "ivybridge" && + CPU != "core-avx-i" && CPU != "haswell" && CPU != "core-avx2" && + CPU != "broadwell" && CPU != "skylake" && CPU != "skylake-avx512") + return; + + addPolicy(new BranchInstructionAndTargetAlignmentPolicy(T)); +} + +bool X86MCCodePadder::basicBlockRequiresInsertionPoint( + const MCCodePaddingContext &Context) { + // Insertion points are places that, if contain padding, then this padding + // will never be executed (unreachable code). + bool BasicBlockHasAlignment = + OS->getCurrentFragment() == nullptr || + OS->getCurrentFragment()->getKind() == MCFragment::FT_Align; + return MCCodePadder::basicBlockRequiresInsertionPoint(Context) || + (!Context.IsBasicBlockReachableViaFallthrough && + !BasicBlockHasAlignment); +} + +bool X86MCCodePadder::usePoliciesForBasicBlock( + const MCCodePaddingContext &Context) { + return MCCodePadder::usePoliciesForBasicBlock(Context) && + Context.IsBasicBlockInsideInnermostLoop; +} + +//--------------------------------------------------------------------------- +// BranchInstructionAndTargetAlignmentPolicy +// + +BranchInstructionAndTargetAlignmentPolicy:: + BranchInstructionAndTargetAlignmentPolicy(const Target &T) + : MCCodePaddingPolicy(BranchInstructionAndTargetAlignment, UINT64_C(16), + false), + InstrInfo(T.createMCInstrInfo()) {} + +bool BranchInstructionAndTargetAlignmentPolicy:: + basicBlockRequiresPaddingFragment( + const MCCodePaddingContext &Context) const { + return Context.IsBasicBlockReachableViaBranch; +} + +bool BranchInstructionAndTargetAlignmentPolicy:: + instructionRequiresPaddingFragment(const MCInst &Inst) const { + const MCInstrDesc &InstDesc = InstrInfo->get(Inst.getOpcode()); + return !InstDesc.isPseudo() && !InstDesc.isReturn() && + (InstDesc.isCall() || InstDesc.isBranch()); +} + +double BranchInstructionAndTargetAlignmentPolicy::computeWindowPenaltyWeight( + const MCPFRange &Window, uint64_t Offset, MCAsmLayout &Layout) const { + + static const double SPLIT_INST_WEIGHT = 10.0; + static const double BRANCH_NOT_AT_CHUNK_END_WEIGHT = 1.0; + + double Weight = 0.0; + for (const MCPaddingFragment *Fragment : Window) { + if (!Fragment->isInstructionInitialized()) + continue; + uint64_t InstructionStartAddress = getNextFragmentOffset(Fragment, Layout); + uint64_t InstructionSecondByteAddress = + InstructionStartAddress + UINT64_C(1); + uint64_t InstructionEndAddress = + InstructionStartAddress + Fragment->getInstSize(); + // Checking if the instruction pointed by the fragment splits over more than + // one window. + if (alignTo(InstructionSecondByteAddress, WindowSize) != + alignTo(InstructionEndAddress, WindowSize)) + Weight += SPLIT_INST_WEIGHT; + if (instructionRequiresPaddingFragment(Fragment->getInst()) && + (InstructionEndAddress & UINT64_C(0xF)) != UINT64_C(0)) + Weight += BRANCH_NOT_AT_CHUNK_END_WEIGHT; + } + return Weight; +} + +} // namespace X86 +} // namespace llvm Index: test/CodeGen/X86/branch_instruction_and_target_split_perf_nops.mir =================================================================== --- test/CodeGen/X86/branch_instruction_and_target_split_perf_nops.mir +++ test/CodeGen/X86/branch_instruction_and_target_split_perf_nops.mir @@ -1,5 +1,10 @@ # RUN: llc -mcpu=haswell -filetype=obj -start-before stack-protector -O2 %s -o - | llvm-objdump -d - | FileCheck %s +# The test: Branch instructions should be pushed to the end of their 16B code +# chunks. +# Expect insertion of nops in unreachable code space to make this +# happen. If this situation happens "natuarlly", expect no nops in +# the appropriate unreachable code space. # Test 1: # # Source C code: @@ -27,12 +32,21 @@ # return result; # } # -# CHECK: 49: eb 4a jmp 74 -# CHECK: 57: eb 3c jmp 60 -# CHECK: 65: eb 2e jmp 46 -# CHECK: 73: eb 20 jmp 32 -# CHECK: 81: eb 12 jmp 18 -# CHECK: 93: 7f 8b jg -117 +# CHECK: 49: eb 65 jmp 101 +# CHECK-NEXT: 4b: 0f 1f 80 00 00 00 00 nopl (%rax) +# CHECK: 5e: eb 50 jmp 80 +# CHECK-NEXT: 60: 66 90 nop +# CHECK: 6c: 7f b2 jg -78 +# CHECK-NEXT: 6e: eb 40 jmp 64 +# CHECK-NEXT: 70: 66 90 nop +# CHECK: 7c: 7f a2 jg -94 +# CHECK-NEXT: 7e: eb 30 jmp 48 +# CHECK-NEXT: 80: 66 90 nop +# CHECK: 8e: eb 20 jmp 32 +# CHECK-NEXT: 90: 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:(%rax,%rax) +# CHECK: aa: 0f 8f 70 ff ff ff jg -144 +# CHECK-NEXT: b0: {{.*}} + # Test 2: # @@ -57,11 +71,17 @@ # return w; # } # -# CHECK: 129: eb 13 jmp 19 -# CHECK: 12e: eb a0 jmp -96 -# CHECK: 132: eb 9c jmp -100 -# CHECK: 137: eb 97 jmp -105 -# CHECK: 13c: eb 92 jmp -110 +# CHECK: 139: eb 35 jmp 53 +# CHECK-NOT: {{.*}}nop{{.*}} +# CHECK: 13e: eb a0 jmp -96 +# CHECK-NEXT: 140: 66 66 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:(%rax,%rax) +# CHECK: 14e: eb 90 jmp -112 +# CHECK-NEXT: 150: 66 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:(%rax,%rax) +# CHECK: 15e: eb 80 jmp -128 +# CHECK-NEXT: 160: 0f 1f 84 00 00 00 00 00 nopl (%rax,%rax) +# CHECK: 16b: e9 70 ff ff ff jmp -144 +# CHECK-NEXT: 170: {{.*}} + --- | ; ModuleID = 'D:\iusers\opaparo\dev_test\branch_instruction_and_target_split_perf_nops.ll' source_filename = "D:\5C\5Ciusers\5C\5Copaparo\5C\5Cdev_test\5C\5Cbranch_instruction_and_target_split_perf_nops.c"