Index: docs/CommandGuide/llvm-mca.rst =================================================================== --- docs/CommandGuide/llvm-mca.rst +++ docs/CommandGuide/llvm-mca.rst @@ -174,6 +174,10 @@ Enable the instruction info view. This is enabled by default. +.. option:: -show-encoding + + Enable the printing of instruction encodings within the instruction info view. + .. option:: -all-stats Print all hardware statistics. This enables extra statistics related to the @@ -415,10 +419,10 @@ resources, and the *Resource pressure view* can help to identify the problematic resource usage. -The second section of the report shows the latency and reciprocal -throughput of every instruction in the sequence. That section also reports -extra information related to the number of micro opcodes, and opcode properties -(i.e., 'MayLoad', 'MayStore', and 'HasSideEffects'). +The second section of the report is the `instruction info view`. It shows the +latency and reciprocal throughput of every instruction in the sequence. It also +reports extra information related to the number of micro opcodes, and opcode +properties (i.e., 'MayLoad', 'MayStore', and 'HasSideEffects'). Field *RThroughput* is the reciprocal of the instruction throughput. Throughput is computed as the maximum number of instructions of a same type that can be @@ -427,6 +431,31 @@ cycles/instruction. That is because the FP multiplier JFPM is only available from pipeline JFPU1. +Instruction encodings are displayed within the instruction info view when flag +`-show-encoding` is specified. + +Below is an example of `-show-encoding` output for the dot-product kernel: + +.. code-block:: none + + Instruction Info: + [1]: #uOps + [2]: Latency + [3]: RThroughput + [4]: MayLoad + [5]: MayStore + [6]: HasSideEffects (U) + [7]: Encoding Size + + [1] [2] [3] [4] [5] [6] [7] Encodings: Instructions: + 1 2 1.00 4 c5 f0 59 d0 vmulps %xmm0, %xmm1, %xmm2 + 1 4 1.00 4 c5 eb 7c da vhaddps %xmm2, %xmm2, %xmm3 + 1 4 1.00 4 c5 e3 7c e3 vhaddps %xmm3, %xmm3, %xmm4 + +The `Encoding Size` column shows the size in bytes of instructions. The +`Encodings` column shows the actual instruction encodings (byte sequences in +hex). + The third section is the *Resource pressure view*. This view reports the average number of resource cycles consumed every iteration by instructions for every processor resource unit available on the target. Information is Index: include/llvm/MCA/CodeEmitter.h =================================================================== --- include/llvm/MCA/CodeEmitter.h +++ include/llvm/MCA/CodeEmitter.h @@ -0,0 +1,72 @@ +//===--------------------- CodeEmitter.h ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// A utility class used to compute instruction encodings. It buffers encodings +/// for later usage. It exposes a simple API to compute and get the encodings as +/// StringRef. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MCA_CODEEMITTER_H +#define LLVM_MCA_CODEEMITTER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MCA/Instruction.h" +#include "llvm/MCA/Support.h" +#include "llvm/Support/raw_ostream.h" + +#include + +namespace llvm { +namespace mca { + +/// A utility class used to compute instruction encodings for a code region. +/// +/// It provides a simple API to compute and return instruction encodings as +/// strings. Encodings are cached internally for later usage. +class CodeEmitter { + const MCSubtargetInfo &STI; + const MCAsmBackend &MAB; + const MCCodeEmitter &MCE; + + SmallString<256> Code; + raw_svector_ostream VecOS; + ArrayRef Sequence; + + // An EncodingInfo pair stores information. Base (i.e. first) + // is an index to the `Code`. Length (i.e. second) is the encoding size. + using EncodingInfo = std::pair; + + // A cache of encodings. + SmallVector Encodings; + + EncodingInfo getOrCreateEncodingInfo(unsigned MCID); + +public: + CodeEmitter(const MCSubtargetInfo &ST, const MCAsmBackend &AB, + const MCCodeEmitter &CE, ArrayRef S) + : STI(ST), MAB(AB), MCE(CE), VecOS(Code), Sequence(S), + Encodings(S.size()) {} + + StringRef getEncoding(unsigned MCID) { + EncodingInfo EI = getOrCreateEncodingInfo(MCID); + return StringRef(&Code[EI.first], EI.second); + } +}; + +} // namespace mca +} // namespace llvm + +#endif // LLVM_MCA_CODEEMITTER_H Index: lib/MCA/CMakeLists.txt =================================================================== --- lib/MCA/CMakeLists.txt +++ lib/MCA/CMakeLists.txt @@ -1,4 +1,5 @@ add_llvm_library(LLVMMCA + CodeEmitter.cpp Context.cpp HWEventListener.cpp HardwareUnits/HardwareUnit.cpp Index: lib/MCA/CodeEmitter.cpp =================================================================== --- lib/MCA/CodeEmitter.cpp +++ lib/MCA/CodeEmitter.cpp @@ -0,0 +1,37 @@ +//===--------------------- CodeEmitter.cpp ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the CodeEmitter API. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/CodeEmitter.h" + +namespace llvm { +namespace mca { + +CodeEmitter::EncodingInfo +CodeEmitter::getOrCreateEncodingInfo(unsigned MCID) { + EncodingInfo &EI = Encodings[MCID]; + if (EI.second) + return EI; + + SmallVector Fixups; + const MCInst &Inst = Sequence[MCID]; + MCInst Relaxed(Sequence[MCID]); + if (MAB.mayNeedRelaxation(Inst, STI)) + MAB.relaxInstruction(Inst, STI, Relaxed); + + EI.first = Code.size(); + MCE.encodeInstruction(Relaxed, VecOS, Fixups, STI); + EI.second = Code.size() - EI.first; + return EI; +} + +} // namespace mca +} // namespace llvm Index: test/tools/llvm-mca/X86/show-encoding.s =================================================================== --- test/tools/llvm-mca/X86/show-encoding.s +++ test/tools/llvm-mca/X86/show-encoding.s @@ -0,0 +1,77 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -all-views=false -instruction-info < %s | FileCheck %s --check-prefix=NORMAL +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -all-views=false -instruction-info -show-encoding=false < %s | FileCheck %s --check-prefix=NORMAL +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -all-views=false -instruction-info -show-encoding < %s | FileCheck %s --check-prefix=WITHENCODINGS + + movq 0x170(%rbp), %r10 + lea (%r8,%r8,2), %r9d + movsx %r9d, %r9 + inc %r8d + movq 0x178(%rbp), %r11 + vmovups (%r10,%r9,4), %xmm3 + vpslldq $0x4, %xmm3, %xmm2 + vpslldq $0x4, %xmm3, %xmm4 + vaddps %xmm2, %xmm3, %xmm6 + vpslldq $0xc, %xmm3, %xmm5 + vaddps %xmm4, %xmm5, %xmm7 + vaddps %xmm6, %xmm7, %xmm8 + vaddps %xmm8, %xmm0, %xmm9 + vshufps $0xff, %xmm9, %xmm9, %xmm0 + vmovups %xmm9, (%r11,%r9,4) + cmp %r8d, %esi + jl -90 + +# NORMAL: Instruction Info: +# NORMAL-NEXT: [1]: #uOps +# NORMAL-NEXT: [2]: Latency +# NORMAL-NEXT: [3]: RThroughput +# NORMAL-NEXT: [4]: MayLoad +# NORMAL-NEXT: [5]: MayStore +# NORMAL-NEXT: [6]: HasSideEffects (U) + +# WITHENCODINGS: Instruction Info: +# WITHENCODINGS-NEXT: [1]: #uOps +# WITHENCODINGS-NEXT: [2]: Latency +# WITHENCODINGS-NEXT: [3]: RThroughput +# WITHENCODINGS-NEXT: [4]: MayLoad +# WITHENCODINGS-NEXT: [5]: MayStore +# WITHENCODINGS-NEXT: [6]: HasSideEffects (U) +# WITHENCODINGS-NEXT: [7]: Encoding Size + +# NORMAL: [1] [2] [3] [4] [5] [6] Instructions: +# NORMAL-NEXT: 1 3 1.00 * movq 368(%rbp), %r10 +# NORMAL-NEXT: 1 2 1.00 leal (%r8,%r8,2), %r9d +# NORMAL-NEXT: 1 1 0.50 movslq %r9d, %r9 +# NORMAL-NEXT: 1 1 0.50 incl %r8d +# NORMAL-NEXT: 1 3 1.00 * movq 376(%rbp), %r11 +# NORMAL-NEXT: 1 5 1.00 * vmovups (%r10,%r9,4), %xmm3 +# NORMAL-NEXT: 1 1 0.50 vpslldq $4, %xmm3, %xmm2 +# NORMAL-NEXT: 1 1 0.50 vpslldq $4, %xmm3, %xmm4 +# NORMAL-NEXT: 1 3 1.00 vaddps %xmm2, %xmm3, %xmm6 +# NORMAL-NEXT: 1 1 0.50 vpslldq $12, %xmm3, %xmm5 +# NORMAL-NEXT: 1 3 1.00 vaddps %xmm4, %xmm5, %xmm7 +# NORMAL-NEXT: 1 3 1.00 vaddps %xmm6, %xmm7, %xmm8 +# NORMAL-NEXT: 1 3 1.00 vaddps %xmm8, %xmm0, %xmm9 +# NORMAL-NEXT: 1 1 0.50 vshufps $255, %xmm9, %xmm9, %xmm0 +# NORMAL-NEXT: 1 1 1.00 * vmovups %xmm9, (%r11,%r9,4) +# NORMAL-NEXT: 1 1 0.50 cmpl %r8d, %esi +# NORMAL-NEXT: 1 1 0.50 jl -90 + +# WITHENCODINGS: [1] [2] [3] [4] [5] [6] [7] Encodings: Instructions: +# WITHENCODINGS-NEXT: 1 3 1.00 * 7 4c 8b 95 70 01 00 00 movq 368(%rbp), %r10 +# WITHENCODINGS-NEXT: 1 2 1.00 4 47 8d 0c 40 leal (%r8,%r8,2), %r9d +# WITHENCODINGS-NEXT: 1 1 0.50 3 4d 63 c9 movslq %r9d, %r9 +# WITHENCODINGS-NEXT: 1 1 0.50 3 41 ff c0 incl %r8d +# WITHENCODINGS-NEXT: 1 3 1.00 * 7 4c 8b 9d 78 01 00 00 movq 376(%rbp), %r11 +# WITHENCODINGS-NEXT: 1 5 1.00 * 6 c4 81 78 10 1c 8a vmovups (%r10,%r9,4), %xmm3 +# WITHENCODINGS-NEXT: 1 1 0.50 5 c5 e9 73 fb 04 vpslldq $4, %xmm3, %xmm2 +# WITHENCODINGS-NEXT: 1 1 0.50 5 c5 d9 73 fb 04 vpslldq $4, %xmm3, %xmm4 +# WITHENCODINGS-NEXT: 1 3 1.00 4 c5 e0 58 f2 vaddps %xmm2, %xmm3, %xmm6 +# WITHENCODINGS-NEXT: 1 1 0.50 5 c5 d1 73 fb 0c vpslldq $12, %xmm3, %xmm5 +# WITHENCODINGS-NEXT: 1 3 1.00 4 c5 d0 58 fc vaddps %xmm4, %xmm5, %xmm7 +# WITHENCODINGS-NEXT: 1 3 1.00 4 c5 40 58 c6 vaddps %xmm6, %xmm7, %xmm8 +# WITHENCODINGS-NEXT: 1 3 1.00 5 c4 41 78 58 c8 vaddps %xmm8, %xmm0, %xmm9 +# WITHENCODINGS-NEXT: 1 1 0.50 6 c4 c1 30 c6 c1 ff vshufps $255, %xmm9, %xmm9, %xmm0 +# WITHENCODINGS-NEXT: 1 1 1.00 * 6 c4 01 78 11 0c 8b vmovups %xmm9, (%r11,%r9,4) +# WITHENCODINGS-NEXT: 1 1 0.50 3 44 39 c6 cmpl %r8d, %esi +# WITHENCODINGS-NEXT: 1 1 0.50 6 0f 8c 00 00 00 00 jl -90 Index: tools/llvm-mca/Views/InstructionInfoView.h =================================================================== --- tools/llvm-mca/Views/InstructionInfoView.h +++ tools/llvm-mca/Views/InstructionInfoView.h @@ -40,6 +40,7 @@ #include "llvm/MC/MCInstPrinter.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MCA/CodeEmitter.h" #include "llvm/Support/raw_ostream.h" #define DEBUG_TYPE "llvm-mca" @@ -51,14 +52,18 @@ class InstructionInfoView : public View { const llvm::MCSubtargetInfo &STI; const llvm::MCInstrInfo &MCII; + CodeEmitter &CE; + bool PrintEncodings; llvm::ArrayRef Source; llvm::MCInstPrinter &MCIP; public: - InstructionInfoView(const llvm::MCSubtargetInfo &sti, - const llvm::MCInstrInfo &mcii, - llvm::ArrayRef S, llvm::MCInstPrinter &IP) - : STI(sti), MCII(mcii), Source(S), MCIP(IP) {} + InstructionInfoView(const llvm::MCSubtargetInfo &ST, + const llvm::MCInstrInfo &II, CodeEmitter &C, + bool ShouldPrintEncodings, llvm::ArrayRef S, + llvm::MCInstPrinter &IP) + : STI(ST), MCII(II), CE(C), PrintEncodings(ShouldPrintEncodings), + Source(S), MCIP(IP) {} void printView(llvm::raw_ostream &OS) const override; }; Index: tools/llvm-mca/Views/InstructionInfoView.cpp =================================================================== --- tools/llvm-mca/Views/InstructionInfoView.cpp +++ tools/llvm-mca/Views/InstructionInfoView.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "Views/InstructionInfoView.h" +#include "llvm/Support/FormattedStream.h" namespace llvm { namespace mca { @@ -26,10 +27,17 @@ TempStream << "\n\nInstruction Info:\n"; TempStream << "[1]: #uOps\n[2]: Latency\n[3]: RThroughput\n" - << "[4]: MayLoad\n[5]: MayStore\n[6]: HasSideEffects (U)\n\n"; + << "[4]: MayLoad\n[5]: MayStore\n[6]: HasSideEffects (U)\n"; + if (PrintEncodings) { + TempStream << "[7]: Encoding Size\n"; + TempStream << "\n[1] [2] [3] [4] [5] [6] [7] " + << "Encodings: Instructions:\n"; + } else { + TempStream << "\n[1] [2] [3] [4] [5] [6] Instructions:\n"; + } - TempStream << "[1] [2] [3] [4] [5] [6] Instructions:\n"; - for (const MCInst &Inst : Source) { + for (unsigned I = 0, E = Source.size(); I < E; ++I) { + const MCInst &Inst = Source[I]; const MCInstrDesc &MCDesc = MCII.get(Inst.getOpcode()); // Obtain the scheduling class information from the instruction. @@ -72,7 +80,20 @@ } TempStream << (MCDesc.mayLoad() ? " * " : " "); TempStream << (MCDesc.mayStore() ? " * " : " "); - TempStream << (MCDesc.hasUnmodeledSideEffects() ? " U " : " "); + TempStream << (MCDesc.hasUnmodeledSideEffects() ? " U " : " "); + + if (PrintEncodings) { + StringRef Encoding(CE.getEncoding(I)); + unsigned EncodingSize = Encoding.size(); + TempStream << " " << EncodingSize + << (EncodingSize < 10 ? " " : " "); + TempStream.flush(); + formatted_raw_ostream FOS(TempStream); + for (unsigned i = 0, e = Encoding.size(); i != e; ++i) + FOS << format("%02x ", (uint8_t)Encoding[i]); + FOS.PadToColumn(30); + FOS.flush(); + } MCIP.printInst(&Inst, InstrStream, "", STI); InstrStream.flush(); @@ -80,7 +101,7 @@ // Consume any tabs or spaces at the beginning of the string. StringRef Str(Instruction); Str = Str.ltrim(); - TempStream << " " << Str << '\n'; + TempStream << Str << '\n'; Instruction = ""; } Index: tools/llvm-mca/llvm-mca.cpp =================================================================== --- tools/llvm-mca/llvm-mca.cpp +++ tools/llvm-mca/llvm-mca.cpp @@ -32,12 +32,15 @@ #include "Views/SchedulerStatistics.h" #include "Views/SummaryView.h" #include "Views/TimelineView.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCContext.h" #include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCTargetOptionsCommandFlags.inc" +#include "llvm/MCA/CodeEmitter.h" #include "llvm/MCA/Context.h" #include "llvm/MCA/InstrBuilder.h" #include "llvm/MCA/Pipeline.h" @@ -200,6 +203,11 @@ cl::desc("Enable bottleneck analysis (disabled by default)"), cl::cat(ViewOptions), cl::init(false)); +static cl::opt ShowEncoding( + "show-encoding", + cl::desc("Print encoding information in the instruction info view"), + cl::cat(ViewOptions), cl::init(false)); + namespace { const Target *getTarget(const char *ProgName) { @@ -424,6 +432,12 @@ // Number each region in the sequence. unsigned RegionIdx = 0; + std::unique_ptr MCE( + TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx)); + + std::unique_ptr MAB(TheTarget->createMCAsmBackend( + *STI, *MRI, InitMCTargetOptionsFromFlags())); + for (const std::unique_ptr &Region : Regions) { // Skip empty code regions. if (Region->empty()) @@ -441,6 +455,7 @@ // Lower the MCInst sequence into an mca::Instruction sequence. ArrayRef Insts = Region->getInstructions(); + mca::CodeEmitter CE(*STI, *MAB, *MCE, Insts); std::vector> LoweredSequence; for (const MCInst &MCI : Insts) { Expected> Inst = @@ -478,7 +493,7 @@ // Create the views for this pipeline, execute, and emit a report. if (PrintInstructionInfoView) { Printer.addView(llvm::make_unique( - *STI, *MCII, Insts, *IP)); + *STI, *MCII, CE, ShowEncoding, Insts, *IP)); } Printer.addView( llvm::make_unique(*STI, *IP, Insts)); @@ -504,8 +519,8 @@ } if (PrintInstructionInfoView) - Printer.addView( - llvm::make_unique(*STI, *MCII, Insts, *IP)); + Printer.addView(llvm::make_unique( + *STI, *MCII, CE, ShowEncoding, Insts, *IP)); if (PrintDispatchStats) Printer.addView(llvm::make_unique());