diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py --- a/llvm/test/lit.cfg.py +++ b/llvm/test/lit.cfg.py @@ -180,6 +180,7 @@ "llvm-addr2line", "llvm-bcanalyzer", "llvm-bitcode-strip", + "llvm-cm", "llvm-config", "llvm-cov", "llvm-cxxdump", diff --git a/llvm/test/tools/llvm-cm/inst_count.s b/llvm/test/tools/llvm-cm/inst_count.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-cm/inst_count.s @@ -0,0 +1,102 @@ +# REQUIRES: x86_64-linux +# RUN: llvm-mc -o %t.o --filetype=obj -triple=x86_64-unknown-linux-gnu %s +# RUN: llvm-cm %t.o 2>&1 | FileCheck %s + + .text + .file "inst_count.ll" + .globl main # -- Begin function main + .p2align 4, 0x90 + .type main,@function +main: # @main +.Lfunc_begin0: + .cfi_startproc +# %bb.0: + # kill: def $edi killed $edi def $rdi + leal 1(%rdi), %eax + retq +.LBB_END0_0: +.Lfunc_end0: + .size main, .Lfunc_end0-main + .cfi_endproc + .section .llvm_bb_addr_map,"o",@llvm_bb_addr_map,.text + .byte 2 # version + .byte 0 # feature + .quad .Lfunc_begin0 # function address + .byte 1 # number of basic blocks + .byte 0 # BB id + .uleb128 .Lfunc_begin0-.Lfunc_begin0 + .uleb128 .LBB_END0_0-.Lfunc_begin0 + .byte 1 + .text + # -- End function + .globl multiply # -- Begin function multiply + .p2align 4, 0x90 + .type multiply,@function +multiply: # @multiply +.Lfunc_begin1: + .cfi_startproc +# %bb.0: + movl %edi, %eax + imull %esi, %eax + retq +.LBB_END1_0: +.Lfunc_end1: + .size multiply, .Lfunc_end1-multiply + .cfi_endproc + .section .llvm_bb_addr_map,"o",@llvm_bb_addr_map,.text + .byte 2 # version + .byte 0 # feature + .quad .Lfunc_begin1 # function address + .byte 1 # number of basic blocks + .byte 0 # BB id + .uleb128 .Lfunc_begin1-.Lfunc_begin1 + .uleb128 .LBB_END1_0-.Lfunc_begin1 + .byte 1 + .text + # -- End function + .globl abs_val # -- Begin function abs_val + .p2align 4, 0x90 + .type abs_val,@function +abs_val: # @abs_val +.Lfunc_begin2: + .cfi_startproc +# %bb.0: + movl %edi, %eax + testl %edi, %edi + jle .LBB2_2 +.LBB_END2_0: +.LBB2_1: # %if.then + retq +.LBB_END2_1: +.LBB2_2: # %if.else + negl %eax + retq +.LBB_END2_2: +.Lfunc_end2: + .size abs_val, .Lfunc_end2-abs_val + .cfi_endproc + .section .llvm_bb_addr_map,"o",@llvm_bb_addr_map,.text + .byte 2 # version + .byte 0 # feature + .quad .Lfunc_begin2 # function address + .byte 3 # number of basic blocks + .byte 0 # BB id + .uleb128 .Lfunc_begin2-.Lfunc_begin2 + .uleb128 .LBB_END2_0-.Lfunc_begin2 + .byte 8 + .byte 1 # BB id + .uleb128 .LBB2_1-.LBB_END2_0 + .uleb128 .LBB_END2_1-.LBB2_1 + .byte 1 + .byte 2 # BB id + .uleb128 .LBB2_2-.LBB_END2_1 + .uleb128 .LBB_END2_2-.LBB2_2 + .byte 1 + .text + # -- End function + .section ".note.GNU-stack","",@progbits + + +# CHECK: # of instructions: + + diff --git a/llvm/tools/llvm-cm/CMakeLists.txt b/llvm/tools/llvm-cm/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/llvm/tools/llvm-cm/CMakeLists.txt @@ -0,0 +1,20 @@ +#include_directories(include) + +set (LLVM_LINK_COMPONENTS + AllTargetsDescs + AllTargetsDisassemblers + AllTargetsInfos + MC + MCDisassembler + Object + Option + Support + TargetParser + ) + +add_llvm_tool(llvm-cm + llvm-cm.cpp +) + +#set(LLVM_CM_SOURCE_DIR ${CURRENT_SOURCE_DIR}) + diff --git a/llvm/tools/llvm-cm/llvm-cm.cpp b/llvm/tools/llvm-cm/llvm-cm.cpp new file mode 100644 --- /dev/null +++ b/llvm/tools/llvm-cm/llvm-cm.cpp @@ -0,0 +1,439 @@ +//===- llvm-cm.cpp - LLVM cost modeling tool ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--------------------------------------------------------------------------===// +// +// llvm-cm is a tool for native cost model evaluation. +// +//===--------------------------------------------------------------------------===// + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/DebugInfo/Symbolize/Symbolize.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrAnalysis.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/MC/MCTargetOptionsCommandFlags.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/ELFTypes.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/InitLLVM.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/WithColor.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace llvm; + +// Define the command line options. +static cl::opt InputFilename(cl::Positional, cl::desc(""), cl::init("-"), cl::Required); +static cl::opt TripleName("triple", + cl::desc("Target triple name. " + "See -version for available targets."), + cl::init(LLVM_DEFAULT_TARGET_TRIPLE), + cl::value_desc("triple")); +static cl::opt CPU("mcpu", + cl::desc("Target a specific cpu type (-mcpu=help for details)"), + cl::init("skylake"), + cl::value_desc("cpu-name")); + +// Class for error handling. +class ExitIf { + public: + ExitIf(bool Cond, std::string MSG) : Condition(Cond), Message(MSG) {} + ~ExitIf() { + if (Condition) { + errs() << "Error: " << Message << "\n"; + exit(1); + } + } + + private: + bool Condition; + std::string Message; +}; + +struct FilterResult { + // True if the section should not be skipped. + bool Keep = false; + + // True if the index counter should be incremented, even if the section should + // be skipped. For example, sections may be skipped if they are not included + // in the --section flag, but we still want those to count toward the section + // count. + bool IncrementIndex = false; +}; + +static FilterResult +checkSectionFilter(object::SectionRef S, StringSet<> FoundSectionSet, + std::vector FilterSections) { + if (FilterSections.empty()) + return {/*Keep=*/true, /*IncrementIndex=*/true}; + + Expected SecNameOrErr = S.getName(); + if (!SecNameOrErr) { + consumeError(SecNameOrErr.takeError()); + return {/*Keep=*/false, /*IncrementIndex=*/false}; + } + StringRef SecName = *SecNameOrErr; + + // StringSet does not allow empty key so avoid adding sections with + // no name (such as the section with index 0) here. + if (!SecName.empty()) + FoundSectionSet.insert(SecName); + + // Only show the section if it's in the FilterSections list, but always + // increment so the indexing is stable. + return {/*Keep=*/is_contained(FilterSections, SecName), + /*IncrementIndex=*/true}; +} + +llvm::object::SectionFilter +toolSectionFilter(object::ObjectFile const &O, uint64_t *Idx, + std::vector FilterSections) { + StringSet<> FoundSectionSet; + if (Idx) + *Idx = UINT64_MAX; + return llvm::object::SectionFilter( + /*Pred=*/[Idx, FoundSectionSet, FilterSections](object::SectionRef S) { + FilterResult Result = + checkSectionFilter(S, FoundSectionSet, FilterSections); + if (Idx != nullptr && Result.IncrementIndex) + *Idx += 1; + return Result.Keep; + }, + /*Obj=*/O); +} + +// Implement the "error" function. +[[noreturn]] static void error(Error Err) { + logAllUnhandledErrors(std::move(Err), WithColor::error(outs()), + "reading file: "); + outs().flush(); + exit(1); +} + +template +T unwrapOrError(Expected EO) { + if (!EO) + error(EO.takeError()); + return std::move(*EO); +} + +// TODO: Share this with llvm-objdump.cpp. +static uint8_t getElfSymbolType(const llvm::object::ObjectFile &Obj, const llvm::object::SymbolRef &Sym) { + assert(Obj.isELF()); + if (auto *Elf32LEObj = dyn_cast(&Obj)) + return unwrapOrError(Elf32LEObj->getSymbol(Sym.getRawDataRefImpl())) + ->getType(); + if (auto *Elf64LEObj = dyn_cast(&Obj)) + return unwrapOrError(Elf64LEObj->getSymbol(Sym.getRawDataRefImpl())) + ->getType(); + if (auto *Elf32BEObj = dyn_cast(&Obj)) + return unwrapOrError(Elf32BEObj->getSymbol(Sym.getRawDataRefImpl())) + ->getType(); + if (auto *Elf64BEObj = cast(&Obj)) + return unwrapOrError(Elf64BEObj->getSymbol(Sym.getRawDataRefImpl())) + ->getType(); + llvm_unreachable("Unsupported binary format"); +} + +// TODO: Share this with llvm-objdump.cpp. +SymbolInfoTy createSymbolInfo(const object::ObjectFile &Obj, const object::SymbolRef Symbol) { + const uint64_t Addr = unwrapOrError(Symbol.getAddress()); + const StringRef SymName = unwrapOrError(Symbol.getName()); + return SymbolInfoTy(Addr, SymName, Obj.isELF() ? getElfSymbolType(Obj, Symbol) + : (uint8_t)ELF::STT_NOTYPE); + +} + +// Rewrite the printFunction function to only take in aliases. +void printFunction(ArrayRef &Aliases) { + for (size_t I = 0; I < Aliases.size(); ++I) { + const StringRef SymbolName = Aliases[I].Name; + outs() << SymbolName << ":\n"; + } +} + +// TODO: Share this with llvm-objdump.cpp. +static void collectBBtoAddressLabels(const std::unordered_map &AddrToBBAddrMap, + uint64_t SectionAddr, uint64_t Start, uint64_t End, + std::unordered_map> &Labels) { + if (AddrToBBAddrMap.empty()) + return; + Labels.clear(); + uint64_t StartAddress = SectionAddr + Start; + uint64_t EndAddress = SectionAddr + End; + auto Iter = AddrToBBAddrMap.find(StartAddress); + if (Iter == AddrToBBAddrMap.end()) + return; + for (unsigned I = 0, Size = Iter->second.BBEntries.size(); I < Size; ++I) { + uint64_t BBAddress = Iter->second.BBEntries[I].Offset + Iter->second.Addr; + if (BBAddress >= EndAddress) + continue; + Labels[BBAddress].push_back(("BB" + Twine(I)).str()); + } +} + +void processInsts(std::unique_ptr &DisAsm, + const uint64_t &SectionAddr, ArrayRef &Bytes, + raw_svector_ostream &CommentStream, uint64_t &Start, + uint64_t &End, uint64_t &Index, int &NumInstructions, + std::unordered_map> &Labels, + bool CheckedBitSize) { + while (Index < End) { + uint64_t CurrAddr = SectionAddr + Index; + auto FirstIter = Labels.find(SectionAddr + Index); + if (FirstIter != Labels.end()) { + for (StringRef Label : FirstIter->second) { + outs() << "<" << Label << ">: "; + outs() << format(CheckedBitSize ? "%016" PRIx64 " " : "%08" PRIx64 " ", + CurrAddr) << "\n"; + } + } + MCInst Inst; + uint64_t Size = 0; + ArrayRef BytesSlice = Bytes.slice(Index); + ExitIf(!DisAsm->getInstruction(Inst, Size, BytesSlice, CurrAddr, + CommentStream), + "getInstruction failed"); + ++NumInstructions; + if (Size == 0) { + Size = std::min( + BytesSlice.size(), DisAsm->suggestBytesToSkip(BytesSlice, CurrAddr)); + } + Index += Size; + } +} + + +int main(int argc, char *argv[]) { + InitLLVM X(argc, argv); + + // Parse the command line options. + cl::ParseCommandLineOptions(argc, argv, "llvm cost model tool\n"); + + // Set up the triple and target features. + InitializeAllTargetInfos(); + InitializeAllTargetMCs(); + InitializeAllDisassemblers(); + + object::OwningBinary BinaryOrErr = + unwrapOrError(object::createBinary(InputFilename)); + + object::Binary &Binary = *BinaryOrErr.getBinary(); + + // get the object file from the binary. + object::ObjectFile *Obj = dyn_cast(&Binary); + + // Get the Target. + std::string Error; + + const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error); + + std::vector MAttrs; + + Expected Features = Obj->getFeatures(); + + ExitIf(!Features, "error: failed to read object file features\n"); + + SubtargetFeatures TrueFeatures = *Features; + + + for (unsigned I = 0; I != MAttrs.size(); ++I) { + TrueFeatures.AddFeature(MAttrs[I]); + } + + + // Start setting up the disassembler. + std::unique_ptr MRI(TheTarget->createMCRegInfo(TripleName)); + ExitIf(!MRI, "error: no register info for target " + TripleName + "\n"); + + MCTargetOptions MCOptions; + + std::unique_ptr AsmInfo( + TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions)); + ExitIf(!AsmInfo, "error: no assembly info for target " + TripleName + "\n"); + + std::unique_ptr SubInfo(TheTarget->createMCSubtargetInfo( + TripleName, CPU, TrueFeatures.getString())); + ExitIf(!SubInfo, "error: no subtarget info for target " + TripleName + "\n"); + + std::unique_ptr MII(TheTarget->createMCInstrInfo()); + + ExitIf(!MII, "error: no instruction info for target " + TripleName + "\n"); + + MCContext Ctx(Triple(TripleName), AsmInfo.get(), MRI.get(), SubInfo.get()); + + std::unique_ptr MOFI( + TheTarget->createMCObjectFileInfo(Ctx, false)); + Ctx.setObjectFileInfo(MOFI.get()); + + std::unique_ptr DisAsm( + TheTarget->createMCDisassembler(*SubInfo, Ctx)); + + ExitIf(!DisAsm, "error: no disassembler for target " + TripleName + "\n"); + + + + std::map AllSymbols; + SectionSymbolsTy UndefinedSymbols; + + std::vector FilterSections; + + bool Check64Bits = Obj->getBytesInAddress() > 4; + + // Get the symbol table + for (const object::SymbolRef &Symbol : Obj->symbols()) { + Expected NameOrErr = Symbol.getName(); + ExitIf(!NameOrErr, "error: failed to get symbol name\n"); + + // If the symbol is a section symbol, then ignore it. + if (Obj->isELF() && getElfSymbolType(*Obj, Symbol) == ELF::STT_SECTION) + continue; + + // Get the section the symbol is defined in. + object::section_iterator SectionI = unwrapOrError(Symbol.getSection()); + + // If the section iterator does not point to the end of the section + // list, then the symbol is defined in a section. + if (SectionI != Obj->section_end()) { + AllSymbols[*SectionI].push_back(createSymbolInfo(*Obj, Symbol)); + } else { + UndefinedSymbols.push_back(createSymbolInfo(*Obj, Symbol)); + } + } + + // Sort the symbols. + for (std::pair &SortSymbols : + AllSymbols) { + llvm::stable_sort(SortSymbols.second); + } + llvm::stable_sort(UndefinedSymbols); + + uint64_t StartAddr = 0; + + std::unordered_map BBAddrMap; + auto BBAddrMapping = [&]() + { + BBAddrMap.clear(); + if (const auto *Elf = dyn_cast(Obj)) { + auto BBAddrMappingOrErr = Elf->readBBAddrMap(); + ExitIf(!BBAddrMappingOrErr, "error: failed to read basic block address map\n"); + for (auto &BBAddr : *BBAddrMappingOrErr) { + BBAddrMap.emplace(BBAddr.Addr, std::move(BBAddr)); + } + } + }; + + BBAddrMapping(); + + // Begin iterating over the sections. + for (const object::SectionRef &Section : + toolSectionFilter(*Obj, nullptr, FilterSections)) { + if (FilterSections.empty() && (!Section.isText() || Section.isVirtual())) { + continue; + } + const uint64_t SectionAddr = Section.getAddress(); + const uint64_t SectionSize = Section.getSize(); + + if (!SectionSize) { + continue; + } + + // Get all the symbols in the section - these were sorted earlier. + SectionSymbolsTy &SortedSymbols = AllSymbols[Section]; + + ArrayRef Bytes = + arrayRefFromStringRef(unwrapOrError(Section.getContents())); + + SmallString<40> Comments; + raw_svector_ostream CommentStream(Comments); + + + + // Start retrieving the MCInsts + for (size_t SI = 0, SE = SortedSymbols.size(); SI != SE;) { + + // Find all symbols in the same "location" by incrementing over + // SI until the starting address changes. The sorted symbols were sorted by address. + const size_t FirstSI = SI; + uint64_t Start = SortedSymbols[SI].Addr; + + // If the current symbol's address is the same as the previous + // symbol's address, then we know that the current symbol is an + // alias, and we skip it. + ArrayRef Aliases; + while (SI != SE && SortedSymbols[SI].Addr == Start) + ++SI; + + // End is the end of the current location, the start of the next symbol. + uint64_t End = + SI < SE ? SortedSymbols[SI].Addr : SectionAddr + SectionSize; + + + std::unordered_map> BBtoAddresses; + + + + // The aliases are the symbols that have the same address. + Aliases = ArrayRef(&SortedSymbols[FirstSI], SI - FirstSI); + + // If the symbol range does not overlap with our section, + // move to the next symbol. + if (Start >= End || End <= StartAddr) + continue; + + // Adjust the start and end addresses to be relative to the start of the section. + Start -= SectionAddr; + End -= SectionAddr; + + std::unordered_map> BBtoAddressLabels; + collectBBtoAddressLabels(BBAddrMap, SectionAddr, Start, End, BBtoAddressLabels); + + printFunction(Aliases); + + uint64_t Index = Start; + if (SectionAddr < StartAddr) + Index = std::max(Index, StartAddr - SectionAddr); + + // Make sure to keep track of the number of instructions. + int NumInstructions = 0; + + processInsts(DisAsm, SectionAddr, Bytes, CommentStream, Start, End, Index, + NumInstructions, BBtoAddressLabels, Check64Bits); + + outs() << "# of instructions: " << NumInstructions << "\n"; + } + } + return 0; +}