diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h --- a/llvm/include/llvm/Object/ELFObjectFile.h +++ b/llvm/include/llvm/Object/ELFObjectFile.h @@ -530,10 +530,10 @@ // If the symbol name is empty use the section name. if ((*SymOrErr)->getType() == ELF::STT_SECTION) { - if (Expected SecOrErr = getSymbolSection(Sym)) { - consumeError(Name.takeError()); + if (Expected SecOrErr = getSymbolSection(Sym)) return (*SecOrErr)->getName(); - } + else + return SecOrErr.takeError(); } return Name; } diff --git a/llvm/test/CMakeLists.txt b/llvm/test/CMakeLists.txt --- a/llvm/test/CMakeLists.txt +++ b/llvm/test/CMakeLists.txt @@ -72,6 +72,7 @@ llvm-c-test llvm-cat llvm-cfi-verify + llvm-cm llvm-config llvm-cov llvm-cvtres diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py --- a/llvm/test/lit.cfg.py +++ b/llvm/test/lit.cfg.py @@ -180,6 +180,7 @@ "llvm-addr2line", "llvm-bcanalyzer", "llvm-bitcode-strip", + "llvm-cm", "llvm-config", "llvm-cov", "llvm-cxxdump", diff --git a/llvm/test/tools/llvm-cm/X86/bad_triple.s b/llvm/test/tools/llvm-cm/X86/bad_triple.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-cm/X86/bad_triple.s @@ -0,0 +1,5 @@ +## Check that llvm-cm fails with an error when given an invalid triple. +# RUN: llvm-mc -o %t.o --filetype=obj -triple=x86_64-unknown-linux-gnu %s +# RUN: not llvm-cm -triple=not_real_triple %t.o 2>&1 | FileCheck %s + +# CHECK: llvm-cm: error: No available targets are compatible with triple "not_real_triple" diff --git a/llvm/test/tools/llvm-cm/X86/bb-addr-map.test b/llvm/test/tools/llvm-cm/X86/bb-addr-map.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-cm/X86/bb-addr-map.test @@ -0,0 +1,20 @@ +## This test checks that llvm-cm outputs an error when +## failing to read a valid basic block address mapping. +# RUN: yaml2obj %s -o %t.o +# RUN: not llvm-cm %t.o 2>&1 | FileCheck %s + +# CHECK: failed to read basic block address mapping + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + - Name: .llvm_cm_bb_addr_map + Type: SHT_LLVM_BB_ADDR_MAP + Link: .text diff --git a/llvm/test/tools/llvm-cm/X86/empty.s b/llvm/test/tools/llvm-cm/X86/empty.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-cm/X86/empty.s @@ -0,0 +1,5 @@ +## Check that llvm-cm does not produce any output on an empty input file. +# RUN: llvm-mc -o %t.o --filetype=obj -triple=x86_64-unknown-linux-gnu %s +# RUN: llvm-cm %t.o 2>&1 | count 0 + +main: diff --git a/llvm/test/tools/llvm-cm/X86/inst_count.s b/llvm/test/tools/llvm-cm/X86/inst_count.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-cm/X86/inst_count.s @@ -0,0 +1,111 @@ +## LLVM-CM instruction counting functionality test. +# RUN: llvm-mc -o %t.o --filetype=obj -triple=x86_64-unknown-linux-gnu %s +# RUN: llvm-cm %t.o 2>&1 | FileCheck %s + +# CHECK: +# CHECK: total # of instructions: 3 +# CHECK-NEXT: multiply: +# CHECK-NEXT: +# CHECK-NEXT: total # of instructions: 4 +# CHECK-NEXT: abs_val: +# CHECK-NEXT: +# CHECK-NEXT: Number of instructions in BB: 3 +# CHECK-NEXT: +# CHECK-NEXT: Number of instructions in BB: 1 +# CHECK-NEXT: +# CHECK-NEXT: Number of instructions in BB: 2 +# CHECK-NEXT: total # of instructions: 6 + + .text + .file "inst_count.ll" + .globl main # -- Begin function main + .p2align 4, 0x90 + .type main,@function +main: # @main +.Lfunc_begin0: + .cfi_startproc +# %bb.0: + # kill: def $edi killed $edi def $rdi + leal 1(%rdi), %eax + retq +.LBB_END0_0: +.Lfunc_end0: + .size main, .Lfunc_end0-main + .cfi_endproc + .section .llvm_bb_addr_map,"o",@llvm_bb_addr_map,.text + .byte 2 # version + .byte 0 # feature + .quad .Lfunc_begin0 # function address + .byte 1 # number of basic blocks + .byte 0 # BB id + .uleb128 .Lfunc_begin0-.Lfunc_begin0 + .uleb128 .LBB_END0_0-.Lfunc_begin0 + .byte 1 + .text + # -- End function + .globl multiply # -- Begin function multiply + .p2align 4, 0x90 + .type multiply,@function +multiply: # @multiply +.Lfunc_begin1: + .cfi_startproc +# %bb.0: + movl %edi, %eax + imull %esi, %eax + retq +.LBB_END1_0: +.Lfunc_end1: + .size multiply, .Lfunc_end1-multiply + .cfi_endproc + .section .llvm_bb_addr_map,"o",@llvm_bb_addr_map,.text + .byte 2 # version + .byte 0 # feature + .quad .Lfunc_begin1 # function address + .byte 1 # number of basic blocks + .byte 0 # BB id + .uleb128 .Lfunc_begin1-.Lfunc_begin1 + .uleb128 .LBB_END1_0-.Lfunc_begin1 + .byte 1 + .text + # -- End function + .globl abs_val # -- Begin function abs_val + .p2align 4, 0x90 + .type abs_val,@function +abs_val: # @abs_val +.Lfunc_begin2: + .cfi_startproc +# %bb.0: + movl %edi, %eax + testl %edi, %edi + jle .LBB2_2 +.LBB_END2_0: +.LBB2_1: # %if.then + retq +.LBB_END2_1: +.LBB2_2: # %if.else + negl %eax + retq +.LBB_END2_2: +.Lfunc_end2: + .size abs_val, .Lfunc_end2-abs_val + .cfi_endproc + .section .llvm_bb_addr_map,"o",@llvm_bb_addr_map,.text + .byte 2 # version + .byte 0 # feature + .quad .Lfunc_begin2 # function address + .byte 3 # number of basic blocks + .byte 0 # BB id + .uleb128 .Lfunc_begin2-.Lfunc_begin2 + .uleb128 .LBB_END2_0-.Lfunc_begin2 + .byte 8 + .byte 1 # BB id + .uleb128 .LBB2_1-.LBB_END2_0 + .uleb128 .LBB_END2_1-.LBB2_1 + .byte 1 + .byte 2 # BB id + .uleb128 .LBB2_2-.LBB_END2_1 + .uleb128 .LBB_END2_2-.LBB2_2 + .byte 1 + .text + # -- End function + .section ".note.GNU-stack","",@progbits diff --git a/llvm/test/tools/llvm-cm/X86/lit.local.cfg b/llvm/test/tools/llvm-cm/X86/lit.local.cfg new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-cm/X86/lit.local.cfg @@ -0,0 +1,2 @@ +if not "X86" in config.root.targets: + config.unsupported = True diff --git a/llvm/test/tools/llvm-cm/X86/malformed.s b/llvm/test/tools/llvm-cm/X86/malformed.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-cm/X86/malformed.s @@ -0,0 +1,4 @@ +## Check that llvm-cm returns an error when run on a non-object file. +# RUN: not llvm-cm %s 2>&1 | FileCheck %s + +# CHECK: error: reading file: The file was not recognized as a valid object file diff --git a/llvm/test/tools/llvm-cm/X86/multi-func.s b/llvm/test/tools/llvm-cm/X86/multi-func.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-cm/X86/multi-func.s @@ -0,0 +1,309 @@ +## Check that llvm-cm can handle input containing many basic blocks across functions. +# RUN: llvm-mc -o %t.o --filetype=obj -triple=x86_64-unknown-linux-gnu %s +# RUN: llvm-cm %t.o 2>&1 | FileCheck %s + +# CHECK: main: +# CHECK-NEXT: : 0000000000000000 +# CHECK-NEXT: Number of instructions in BB: 2 +# CHECK-NEXT: : 0000000000000005 +# CHECK-NEXT: Number of instructions in BB: 2 +# CHECK-NEXT: : 000000000000000b +# CHECK-NEXT: Number of instructions in BB: 8 +# CHECK-NEXT: total # of instructions: 12 +# CHECK-NEXT: bubbleSort: +# CHECK-NEXT: : 0000000000000020 +# CHECK-NEXT: Number of instructions in BB: 5 +# CHECK-NEXT: : 000000000000002a +# CHECK-NEXT: Number of instructions in BB: 4 +# CHECK-NEXT: : 0000000000000030 +# CHECK-NEXT: Number of instructions in BB: 1 +# CHECK-NEXT: : 0000000000000032 +# CHECK-NEXT: Number of instructions in BB: 10 +# CHECK-NEXT: : 0000000000000060 +# CHECK-NEXT: Number of instructions in BB: 1 +# CHECK-NEXT: : 0000000000000062 +# CHECK-NEXT: Number of instructions in BB: 2 +# CHECK-NEXT: : 0000000000000066 +# CHECK-NEXT: Number of instructions in BB: 6 +# CHECK-NEXT: : 000000000000007a +# CHECK-NEXT: Number of instructions in BB: 7 +# CHECK-NEXT: total # of instructions: 36 +# CHECK-NEXT: isPrime: +# CHECK-NEXT: : 0000000000000090 +# CHECK-NEXT: Number of instructions in BB: 4 +# CHECK-NEXT: : 0000000000000099 +# CHECK-NEXT: Number of instructions in BB: 5 +# CHECK-NEXT: : 00000000000000b0 +# CHECK-NEXT: Number of instructions in BB: 3 +# CHECK-NEXT: : 00000000000000b6 +# CHECK-NEXT: Number of instructions in BB: 5 +# CHECK-NEXT: : 00000000000000bf +# CHECK-NEXT: Number of instructions in BB: 3 +# CHECK-NEXT: : 00000000000000c5 +# CHECK-NEXT: Number of instructions in BB: 2 +# CHECK-NEXT: : 00000000000000c9 +# CHECK-NEXT: Number of instructions in BB: 1 +# CHECK-NEXT: : 00000000000000ce +# CHECK-NEXT: Number of instructions in BB: 3 +# CHECK-NEXT: total # of instructions: 26 + + .text + .file "multi_funct.ll" + .globl main # -- Begin function main + .p2align 4, 0x90 + .type main,@function +main: # @main +.Lfunc_begin0: + .cfi_startproc +# %bb.0: + cmpl $1, %edi + jg .LBB0_2 +.LBB_END0_0: +.LBB0_1: # %base_case + movl $1, %eax + retq +.LBB_END0_1: +.LBB0_2: # %recursive_case + pushq %rbx + .cfi_def_cfa_offset 16 + .cfi_offset %rbx, -16 + movl %edi, %ebx + leal -1(%rbx), %edi + callq main@PLT + imull %ebx, %eax + popq %rbx + .cfi_def_cfa_offset 8 + retq +.LBB_END0_2: +.Lfunc_end0: + .size main, .Lfunc_end0-main + .cfi_endproc + .section .llvm_bb_addr_map,"o",@llvm_bb_addr_map,.text + .byte 2 # version + .byte 0 # feature + .quad .Lfunc_begin0 # function address + .byte 3 # number of basic blocks + .byte 0 # BB id + .uleb128 .Lfunc_begin0-.Lfunc_begin0 + .uleb128 .LBB_END0_0-.Lfunc_begin0 + .byte 8 + .byte 1 # BB id + .uleb128 .LBB0_1-.LBB_END0_0 + .uleb128 .LBB_END0_1-.LBB0_1 + .byte 1 + .byte 2 # BB id + .uleb128 .LBB0_2-.LBB_END0_1 + .uleb128 .LBB_END0_2-.LBB0_2 + .byte 1 + .text + # -- End function + .globl bubbleSort # -- Begin function bubbleSort + .p2align 4, 0x90 + .type bubbleSort,@function +bubbleSort: # @bubbleSort +.Lfunc_begin1: + .cfi_startproc +# %bb.0: + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + decl %esi + testl %esi, %esi + jg .LBB1_3 +.LBB_END1_0: +.LBB1_1: # %exit + movq %rbp, %rsp + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.LBB_END1_1: + .p2align 4, 0x90 +.LBB1_2: # %innerLoopExit + # in Loop: Header=BB1_3 Depth=1 + .cfi_def_cfa %rbp, 16 + incl (%rax) +.LBB_END1_2: +.LBB1_3: # %outerLoop + # =>This Loop Header: Depth=1 + # Child Loop BB1_5 Depth 2 + movq %rsp, %rdx + leaq -16(%rdx), %rax + movq %rax, %rsp + movq %rsp, %r8 + leaq -16(%r8), %rcx + movq %rcx, %rsp + movl $0, -16(%rdx) + movl $0, -16(%r8) + jmp .LBB1_5 +.LBB_END1_3: + .p2align 4, 0x90 +.LBB1_4: # %noSwap + # in Loop: Header=BB1_5 Depth=2 + incl (%rcx) +.LBB_END1_4: +.LBB1_5: # %innerLoopCond + # Parent Loop BB1_3 Depth=1 + # => This Inner Loop Header: Depth=2 + cmpl %esi, (%rcx) + jge .LBB1_2 +.LBB_END1_5: +.LBB1_6: # %innerLoopBody + # in Loop: Header=BB1_5 Depth=2 + movslq (%rax), %rdx + leal 1(%rdx), %r8d + movslq %r8d, %r9 + movl (%rdi,%r9,4), %r8d + cmpl %r8d, (%rdi,%rdx,4) + jle .LBB1_4 +.LBB_END1_6: +.LBB1_7: # %swapElements + # in Loop: Header=BB1_5 Depth=2 + leaq (%rdi,%rdx,4), %rdx + leaq (%rdi,%r9,4), %r9 + movl (%rdx), %r10d + movl %r8d, (%rdx) + movl %r10d, (%r9) + jmp .LBB1_4 +.LBB_END1_7: +.Lfunc_end1: + .size bubbleSort, .Lfunc_end1-bubbleSort + .cfi_endproc + .section .llvm_bb_addr_map,"o",@llvm_bb_addr_map,.text + .byte 2 # version + .byte 0 # feature + .quad .Lfunc_begin1 # function address + .byte 8 # number of basic blocks + .byte 0 # BB id + .uleb128 .Lfunc_begin1-.Lfunc_begin1 + .uleb128 .LBB_END1_0-.Lfunc_begin1 + .byte 8 + .byte 8 # BB id + .uleb128 .LBB1_1-.LBB_END1_0 + .uleb128 .LBB_END1_1-.LBB1_1 + .byte 1 + .byte 7 # BB id + .uleb128 .LBB1_2-.LBB_END1_1 + .uleb128 .LBB_END1_2-.LBB1_2 + .byte 8 + .byte 2 # BB id + .uleb128 .LBB1_3-.LBB_END1_2 + .uleb128 .LBB_END1_3-.LBB1_3 + .byte 0 + .byte 6 # BB id + .uleb128 .LBB1_4-.LBB_END1_3 + .uleb128 .LBB_END1_4-.LBB1_4 + .byte 8 + .byte 3 # BB id + .uleb128 .LBB1_5-.LBB_END1_4 + .uleb128 .LBB_END1_5-.LBB1_5 + .byte 8 + .byte 4 # BB id + .uleb128 .LBB1_6-.LBB_END1_5 + .uleb128 .LBB_END1_6-.LBB1_6 + .byte 8 + .byte 5 # BB id + .uleb128 .LBB1_7-.LBB_END1_6 + .uleb128 .LBB_END1_7-.LBB1_7 + .byte 0 + .text + # -- End function + .globl isPrime # -- Begin function isPrime + .p2align 4, 0x90 + .type isPrime,@function +isPrime: # @isPrime +.Lfunc_begin2: + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + cmpl $2, %edi + jl .LBB2_5 +.LBB_END2_0: +.LBB2_1: # %check_prime + movq %rsp, %rax + leaq -16(%rax), %rcx + movq %rcx, %rsp + movl $2, -16(%rax) +.LBB_END2_1: + .p2align 4, 0x90 +.LBB2_2: # %loop_start + # =>This Inner Loop Header: Depth=1 + movl (%rcx), %esi + cmpl %edi, %esi + jge .LBB2_6 +.LBB_END2_2: +.LBB2_3: # %check_divisibility + # in Loop: Header=BB2_2 Depth=1 + movl %edi, %eax + cltd + idivl %esi + testl %edx, %edx + je .LBB2_5 +.LBB_END2_3: +.LBB2_4: # %increment_counter + # in Loop: Header=BB2_2 Depth=1 + incl %esi + movl %esi, (%rcx) + jmp .LBB2_2 +.LBB_END2_4: +.LBB2_5: # %not_prime + xorl %eax, %eax + jmp .LBB2_7 +.LBB_END2_5: +.LBB2_6: # %exit_loop + movl $1, %eax +.LBB_END2_6: +.LBB2_7: # %not_prime + movq %rbp, %rsp + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.LBB_END2_7: +.Lfunc_end2: + .size isPrime, .Lfunc_end2-isPrime + .cfi_endproc + .section .llvm_bb_addr_map,"o",@llvm_bb_addr_map,.text + .byte 2 # version + .byte 0 # feature + .quad .Lfunc_begin2 # function address + .byte 8 # number of basic blocks + .byte 0 # BB id + .uleb128 .Lfunc_begin2-.Lfunc_begin2 + .uleb128 .LBB_END2_0-.Lfunc_begin2 + .byte 8 + .byte 1 # BB id + .uleb128 .LBB2_1-.LBB_END2_0 + .uleb128 .LBB_END2_1-.LBB2_1 + .byte 8 + .byte 2 # BB id + .uleb128 .LBB2_2-.LBB_END2_1 + .uleb128 .LBB_END2_2-.LBB2_2 + .byte 8 + .byte 3 # BB id + .uleb128 .LBB2_3-.LBB_END2_2 + .uleb128 .LBB_END2_3-.LBB2_3 + .byte 8 + .byte 4 # BB id + .uleb128 .LBB2_4-.LBB_END2_3 + .uleb128 .LBB_END2_4-.LBB2_4 + .byte 0 + .byte 5 # BB id + .uleb128 .LBB2_5-.LBB_END2_4 + .uleb128 .LBB_END2_5-.LBB2_5 + .byte 0 + .byte 6 # BB id + .uleb128 .LBB2_6-.LBB_END2_5 + .uleb128 .LBB_END2_6-.LBB2_6 + .byte 8 + .byte 7 # BB id + .uleb128 .LBB2_7-.LBB_END2_6 + .uleb128 .LBB_END2_7-.LBB2_7 + .byte 1 + .text + # -- End function + .section ".note.GNU-stack","",@progbits diff --git a/llvm/test/tools/llvm-cm/X86/sections-no-symbol-name.test b/llvm/test/tools/llvm-cm/X86/sections-no-symbol-name.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-cm/X86/sections-no-symbol-name.test @@ -0,0 +1,21 @@ +## This test checks that llvm-cm outouts an error message +## when attempting to disassemble a symbol with no name, even +## if there is a valid section name. +# RUN: yaml2obj %s -o %t.o +# RUN: not llvm-cm %t.o 2>&1 | FileCheck %s + +# CHECK: failed to get symbol name + +--- !ELF +FileHeader: + Class: ELFCLASS32 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .foo + Type: SHT_PROGBITS +Symbols: + - Name: "" + Index: 0x43 + Type: STT_SECTION \ No newline at end of file diff --git a/llvm/tools/llvm-cm/CMakeLists.txt b/llvm/tools/llvm-cm/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/llvm/tools/llvm-cm/CMakeLists.txt @@ -0,0 +1,15 @@ +set (LLVM_LINK_COMPONENTS + AllTargetsDescs + AllTargetsDisassemblers + AllTargetsInfos + MC + MCDisassembler + Object + Option + Support + TargetParser + ) + +add_llvm_tool(llvm-cm + llvm-cm.cpp +) diff --git a/llvm/tools/llvm-cm/llvm-cm.cpp b/llvm/tools/llvm-cm/llvm-cm.cpp new file mode 100644 --- /dev/null +++ b/llvm/tools/llvm-cm/llvm-cm.cpp @@ -0,0 +1,420 @@ +//===- llvm-cm.cpp - LLVM cost modeling tool +//----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--------------------------------------------------------------------------===// +// +// llvm-cm is a tool for native cost model evaluation. +// +//===--------------------------------------------------------------------------===// + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/DebugInfo/Symbolize/Symbolize.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrAnalysis.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/MC/MCTargetOptionsCommandFlags.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/ELFTypes.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/InitLLVM.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/WithColor.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace llvm; +using namespace llvm::object; + +// Define the command line options. +static cl::opt InputFilename(cl::Positional, + cl::desc(""), + cl::init("-"), cl::Required); +static cl::opt TripleName("triple", + cl::desc("Target triple name. "), + cl::init(LLVM_DEFAULT_TARGET_TRIPLE), + cl::value_desc("triple")); +static cl::opt CPU("mcpu", cl::desc("Target a specific cpu type"), + cl::init("skylake"), cl::value_desc("cpu-name")); + +static void exitIf(bool Cond, Twine Message) { + if (Cond) { + WithColor::error(errs(), "llvm-cm") << Message << "\n"; + std::exit(1); + } +} +struct FilterResult { + // True if the section should not be skipped. + bool Keep = false; + + // True if the index counter should be incremented, even if the section should + // be skipped. For example, sections may be skipped if they are not included + // in the --section flag, but we still want those to count toward the section + // count. + bool IncrementIndex = false; +}; + +static FilterResult +checkSectionFilter(object::SectionRef S, StringSet<> FoundSectionSet, + const std::vector &FilterSections) { + if (FilterSections.empty()) + return {/*Keep=*/true, /*IncrementIndex=*/true}; + + Expected SecNameOrErr = S.getName(); + if (!SecNameOrErr) { + WithColor::warning(errs(), "llvm-cm") + << "Failed to get section name: " << toString(SecNameOrErr.takeError()) + << "\n"; + return {/*Keep=*/false, /*IncrementIndex=*/false}; + } + StringRef SecName = *SecNameOrErr; + + // StringSet does not allow empty key, so avoid adding sections with + // no name (such as the section with index 0) here. + if (!SecName.empty()) + FoundSectionSet.insert(SecName); + + // Only show the section if it's in the FilterSections list, but always + // increment so the indexing is stable. + return {/*Keep=*/is_contained(FilterSections, SecName), + /*IncrementIndex=*/true}; +} + +SectionFilter +gettoolSectionFilter(object::ObjectFile const &O, uint64_t *Idx, + const std::vector &FilterSections) { + StringSet<> FoundSectionSet; + if (Idx) + *Idx = std::numeric_limits::max(); + return llvm::object::SectionFilter( + /*Pred=*/ + [Idx, FoundSectionSet, FilterSections](object::SectionRef S) { + FilterResult Result = + checkSectionFilter(S, FoundSectionSet, FilterSections); + if (Idx != nullptr && Result.IncrementIndex) + *Idx += 1; + return Result.Keep; + }, + /*Obj=*/O); +} + +[[noreturn]] static void error(Error Err) { + logAllUnhandledErrors(std::move(Err), WithColor::error(outs()), + "reading file: "); + outs().flush(); + exit(1); +} + +template T unwrapOrError(Expected EO) { + if (!EO) + error(EO.takeError()); + return std::move(*EO); +} + +// TODO: Share this with llvm-objdump.cpp. +static uint8_t getElfSymbolType(const llvm::object::ObjectFile &Obj, + const llvm::object::SymbolRef &Sym) { + assert(Obj.isELF()); + if (auto *Elf32LEObj = dyn_cast(&Obj)) + return unwrapOrError(Elf32LEObj->getSymbol(Sym.getRawDataRefImpl())) + ->getType(); + if (auto *Elf64LEObj = dyn_cast(&Obj)) + return unwrapOrError(Elf64LEObj->getSymbol(Sym.getRawDataRefImpl())) + ->getType(); + if (auto *Elf32BEObj = dyn_cast(&Obj)) + return unwrapOrError(Elf32BEObj->getSymbol(Sym.getRawDataRefImpl())) + ->getType(); + if (auto *Elf64BEObj = cast(&Obj)) + return unwrapOrError(Elf64BEObj->getSymbol(Sym.getRawDataRefImpl())) + ->getType(); + llvm_unreachable("Unsupported binary format"); +} + +// TODO: Share this with llvm-objdump.cpp. +SymbolInfoTy createSymbolInfo(const object::ObjectFile &Obj, + const object::SymbolRef Symbol) { + const uint64_t Addr = unwrapOrError(Symbol.getAddress()); + const StringRef SymName = unwrapOrError(Symbol.getName()); + return SymbolInfoTy(Addr, SymName, + Obj.isELF() ? getElfSymbolType(Obj, Symbol) + : static_cast(ELF::STT_NOTYPE)); +} + +void printFunctionNames(ArrayRef &Aliases) { + for (size_t I = 0; I < Aliases.size(); ++I) { + outs() << Aliases[I].Name << ":\n"; + } +} + +// TODO: Share this with llvm-objdump.cpp. +static void collectBBtoAddressLabels( + const DenseMap &AddrToBBAddrMap, + uint64_t SectionAddr, uint64_t Start, uint64_t End, + std::unordered_map> &Labels) { + if (AddrToBBAddrMap.empty()) + return; + Labels.clear(); + uint64_t StartAddress = SectionAddr + Start; + uint64_t EndAddress = SectionAddr + End; + auto Iter = AddrToBBAddrMap.find(StartAddress); + if (Iter == AddrToBBAddrMap.end()) + return; + for (size_t I = 0, Size = Iter->second.BBEntries.size(); I < Size; ++I) { + uint64_t BBAddress = Iter->second.BBEntries[I].Offset + Iter->second.Addr; + if (BBAddress >= EndAddress) + continue; + Labels[BBAddress].push_back(("BB" + Twine(I)).str()); + } +} + +void processInsts( + MCDisassembler &DisAsm, uint64_t SectionAddr, ArrayRef &Bytes, + raw_svector_ostream &CommentStream, uint64_t Start, uint64_t End, + uint64_t Index, uint64_t &NumInstructions, uint64_t NumInstsInBB, + const std::unordered_map> &Labels, + bool CheckedBitSize) { + // Count the number of instructions in each basic block. + bool EnteredBb = false; + while (Index < End) { + uint64_t CurrAddr = SectionAddr + Index; + auto FirstIter = Labels.find(SectionAddr + Index); + if (FirstIter != Labels.end()) { + for (StringRef Label : FirstIter->second) { + if (EnteredBb) { + outs() << "Number of instructions in BB: " << NumInstsInBB << "\n"; + NumInstsInBB = 0; + EnteredBb = false; + } + EnteredBb = true; + outs() << "<" << Label << ">: "; + outs() << format(CheckedBitSize ? "%016" PRIx64 " " : "%08" PRIx64 " ", + CurrAddr) + << "\n"; + } + } + MCInst Inst; + uint64_t Size = 0; + ArrayRef BytesSlice = Bytes.slice(Index); + exitIf( + !DisAsm.getInstruction(Inst, Size, BytesSlice, CurrAddr, CommentStream), + "disassembler cannot disassemble given data at address 0x" + + Twine::utohexstr(CurrAddr).str()); + ++NumInstructions; + ++NumInstsInBB; + if (Size == 0) { + Size = std::min( + BytesSlice.size(), DisAsm.suggestBytesToSkip(BytesSlice, CurrAddr)); + } + Index += Size; + } + + if (EnteredBb && Labels.size() > 1) { + outs() << "Number of instructions in BB: " << NumInstsInBB << "\n"; + NumInstsInBB = 0; + EnteredBb = false; + } +} + +int main(int argc, char *argv[]) { + InitLLVM X(argc, argv); + + cl::ParseCommandLineOptions(argc, argv, "llvm cost model tool\n"); + + // Set up the triple and target features. + InitializeAllTargetInfos(); + InitializeAllTargetMCs(); + InitializeAllDisassemblers(); + + object::OwningBinary BinaryOrErr = + unwrapOrError(object::createBinary(InputFilename)); + object::Binary &Binary = *BinaryOrErr.getBinary(); + object::ObjectFile *Obj = dyn_cast(&Binary); + + // Start setting up the disassembler. + std::string Error; + const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error); + exitIf(!TheTarget, Error); + + std::unique_ptr MRI(TheTarget->createMCRegInfo(TripleName)); + exitIf(!MRI, "no register info for target " + TripleName); + + MCTargetOptions MCOptions; + + std::unique_ptr AsmInfo( + TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions)); + exitIf(!AsmInfo, "no asm info for target " + TripleName); + + Expected FeatureVals = Obj->getFeatures(); + exitIf(!FeatureVals, "no features for target " + TripleName); + std::unique_ptr SubInfo(TheTarget->createMCSubtargetInfo( + TripleName, CPU, FeatureVals->getString())); + exitIf(!SubInfo, "no subtarget info for target " + TripleName + ", CPU " + + CPU.str() + ", features " + FeatureVals->getString()); + + std::unique_ptr MII(TheTarget->createMCInstrInfo()); + exitIf(!MII, "no instruction info for target " + TripleName); + + MCContext Ctx(Triple(TripleName), AsmInfo.get(), MRI.get(), SubInfo.get()); + + std::unique_ptr MOFI( + TheTarget->createMCObjectFileInfo(Ctx, false)); + Ctx.setObjectFileInfo(MOFI.get()); + + std::unique_ptr DisAsm( + TheTarget->createMCDisassembler(*SubInfo, Ctx)); + exitIf(!DisAsm, "cannot create disassembler for target " + TripleName); + + // Section information should be stored to determine whether + // or not the section is relevant to disassembly. + MapVector AllSymbols; + SectionSymbolsTy UndefinedSymbols; + bool Is64Bits = Obj->getBytesInAddress() > 4; + for (const object::SymbolRef &Symbol : Obj->symbols()) { + Expected NameOrErr = Symbol.getName(); + exitIf(!NameOrErr, "failed to get symbol name"); + + // If the symbol is a section symbol, then ignore it. + if (Obj->isELF() && getElfSymbolType(*Obj, Symbol) == ELF::STT_SECTION) + continue; + + object::section_iterator SectionI = unwrapOrError(Symbol.getSection()); + + // If the section iterator does not point to the end of the section + // list, then the symbol is defined in a section. + if (SectionI != Obj->section_end()) { + AllSymbols[*SectionI].push_back(createSymbolInfo(*Obj, Symbol)); + } else { + UndefinedSymbols.push_back(createSymbolInfo(*Obj, Symbol)); + } + } + + // Sort the symbols. + for (std::pair &SortSymbols : AllSymbols) { + llvm::stable_sort(SortSymbols.second); + } + llvm::stable_sort(UndefinedSymbols); + + DenseMap BBAddrMap; + auto GetBBAddrMapping = [&]() { + BBAddrMap.clear(); + if (const auto *Elf = dyn_cast(Obj)) { + auto BBAddrMappingOrErr = Elf->readBBAddrMap(); + exitIf(!BBAddrMappingOrErr, "failed to read basic block address mapping"); + for (auto &BBAddr : *BBAddrMappingOrErr) { + BBAddrMap.try_emplace(BBAddr.Addr, std::move(BBAddr)); + } + } + }; + + GetBBAddrMapping(); + + std::vector FilterSections; + // Begin iterating over the sections. + for (const object::SectionRef &Section : + gettoolSectionFilter(*Obj, nullptr, FilterSections)) { + if (FilterSections.empty() && (!Section.isText() || Section.isVirtual())) { + continue; + } + const uint64_t SectionAddr = Section.getAddress(); + const uint64_t SectionSize = Section.getSize(); + + if (!SectionSize) { + continue; + } + + // Get all the symbols in the section - these were sorted earlier. + SectionSymbolsTy &SortedSymbols = AllSymbols[Section]; + + ArrayRef Bytes = + arrayRefFromStringRef(unwrapOrError(Section.getContents())); + + SmallString<40> Comments; + raw_svector_ostream CommentStream(Comments); + + // Start retrieving the MCInsts + for (size_t SI = 0, SE = SortedSymbols.size(); SI != SE;) { + // Find all symbols in the same "location" by incrementing over + // SI until the starting address changes. The sorted symbols were sorted + // by address. + const size_t FirstSI = SI; + uint64_t Start = SortedSymbols[SI].Addr; + + // If the current symbol's address is the same as the previous + // symbol's address, then we know that the current symbol is an + // alias, and we skip it. + ArrayRef Aliases; + while (SI != SE && SortedSymbols[SI].Addr == Start) + ++SI; + + // End is the end of the current location, the start of the next symbol. + uint64_t End = + SI < SE ? SortedSymbols[SI].Addr : SectionAddr + SectionSize; + + // The aliases are the symbols that have the same address. + Aliases = ArrayRef(&SortedSymbols[FirstSI], SI - FirstSI); + + uint64_t StartAddr = 0; + // If the symbol range does not overlap with our section, + // move to the next symbol. + if (Start >= End || End <= StartAddr) + continue; + + // Adjust the start and end addresses to be relative to the start of the + // section. + Start -= SectionAddr; + End -= SectionAddr; + + std::unordered_map> BBtoAddressLabels; + collectBBtoAddressLabels(BBAddrMap, SectionAddr, Start, End, + BBtoAddressLabels); + + printFunctionNames(Aliases); + + uint64_t Index = Start; + if (SectionAddr < StartAddr) + Index = std::max(Index, StartAddr - SectionAddr); + + uint64_t NumInstructions = 0; + uint64_t NumInstsInBB = 0; + + processInsts(*DisAsm, SectionAddr, Bytes, CommentStream, Start, End, + Index, NumInstructions, NumInstsInBB, BBtoAddressLabels, + Is64Bits); + + outs() << "total # of instructions: " << NumInstructions << "\n"; + } + } +}