diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py --- a/llvm/test/lit.cfg.py +++ b/llvm/test/lit.cfg.py @@ -180,6 +180,7 @@ "llvm-addr2line", "llvm-bcanalyzer", "llvm-bitcode-strip", + "llvm-cm", "llvm-config", "llvm-cov", "llvm-cxxdump", diff --git a/llvm/test/tools/llvm-cm/inst_count.ll b/llvm/test/tools/llvm-cm/inst_count.ll new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-cm/inst_count.ll @@ -0,0 +1,16 @@ +; REQUIRES: x86_64-linux +; RUN: llc -mtriple=x86_64-unknown-linux-gnu %s -o %t.o --filetype=obj +; RUN: llvm-cm %t.o 2>&1 | FileCheck %s + +define i32 @func1(i32 %0) { + %r = add i32 %0, 1 + ret i32 %r +} + +define i32 @multiply(i32 %a, i32 %b) { + %result = mul i32 %a, %b + ret i32 %result +} + +; CHECK: Number of instructions: 4 +; CHECK: Number of instructions: 3 \ No newline at end of file diff --git a/llvm/tools/llvm-cm/CMakeLists.txt b/llvm/tools/llvm-cm/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/llvm/tools/llvm-cm/CMakeLists.txt @@ -0,0 +1,19 @@ +#include_directories(include) + +set (LLVM_LINK_COMPONENTS + AllTargetsDescs + AllTargetsDisassemblers + AllTargetsInfos + MC + MCDisassembler + Object + Option + Support + TargetParser + ) + +add_llvm_tool(llvm-cm + llvm-cm.cpp +) + +#set(LLVM_CM_SOURCE_DIR ${CURRENT_SOURCE_DIR}) \ No newline at end of file diff --git a/llvm/tools/llvm-cm/llvm-cm.cpp b/llvm/tools/llvm-cm/llvm-cm.cpp new file mode 100644 --- /dev/null +++ b/llvm/tools/llvm-cm/llvm-cm.cpp @@ -0,0 +1,409 @@ +//===- llvm-cm.cpp - LLVM cost modeling tool ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--------------------------------------------------------------------------===// +// +// llvm-cm is a tool for native cost model evaluation. +// +//===--------------------------------------------------------------------------===// + + +#include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/DebugInfo/Symbolize/Symbolize.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrAnalysis.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/MC/MCTargetOptionsCommandFlags.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/InitLLVM.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/WithColor.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include + + +using namespace llvm; + +static uint64_t StartAddr; +static uint64_t StopAddr = UINT64_MAX; +std::vector FilterSections; +StringSet<> FoundSectionSet; + + + +// Define the command line options +static cl::opt InputFilename(cl::Positional, cl::desc(""), cl::init("-"), cl::Required); +static cl::opt TripleName("triple", + cl::desc("Target triple name. " + "See -version for available targets."), + cl::init(LLVM_DEFAULT_TARGET_TRIPLE), + cl::value_desc("triple")); +static cl::opt CPU("mcpu", + cl::desc("Target a specific cpu type (-mcpu=help for details)"), + cl::init("skylake"), + cl::value_desc("cpu-name")); + +struct FilterResult { + // True if the section should not be skipped. + bool Keep; + + // True if the index counter should be incremented, even if the section should + // be skipped. For example, sections may be skipped if they are not included + // in the --section flag, but we still want those to count toward the section + // count. + bool IncrementIndex; +}; + +static FilterResult checkSectionFilter(object::SectionRef S) { + if (FilterSections.empty()) + return {/*Keep=*/true, /*IncrementIndex=*/true}; + + Expected SecNameOrErr = S.getName(); + if (!SecNameOrErr) { + consumeError(SecNameOrErr.takeError()); + return {/*Keep=*/false, /*IncrementIndex=*/false}; + } + StringRef SecName = *SecNameOrErr; + + // StringSet does not allow empty key so avoid adding sections with + // no name (such as the section with index 0) here. + if (!SecName.empty()) + FoundSectionSet.insert(SecName); + + // Only show the section if it's in the FilterSections list, but always + // increment so the indexing is stable. + return {/*Keep=*/is_contained(FilterSections, SecName), + /*IncrementIndex=*/true}; +} + + +llvm::object::SectionFilter toolSectionFilter(object::ObjectFile const &O, uint64_t *Idx) { + if (Idx) + *Idx = UINT64_MAX; + return llvm::object::SectionFilter( + [Idx](object::SectionRef S) { + FilterResult Result = checkSectionFilter(S); + if (Idx != nullptr && Result.IncrementIndex) + *Idx += 1; + return Result.Keep; + }, + O); +} + + + +// Implement the "error" function +[[noreturn]] static void error(Error Err) { + logAllUnhandledErrors(std::move(Err), WithColor::error(outs()), + "reading file: "); + outs().flush(); + exit(1); +} + +template +T unwrapOrError(Expected EO) { + if (!EO) + error(EO.takeError()); + return std::move(*EO); +} + +static uint8_t getElfSymbolType(const llvm::object::ObjectFile &Obj, const llvm::object::SymbolRef &Sym) { + assert(Obj.isELF()); + if (auto *Elf32LEObj = dyn_cast(&Obj)) + return unwrapOrError(Elf32LEObj->getSymbol(Sym.getRawDataRefImpl())) + ->getType(); + if (auto *Elf64LEObj = dyn_cast(&Obj)) + return unwrapOrError(Elf64LEObj->getSymbol(Sym.getRawDataRefImpl())) + ->getType(); + if (auto *Elf32BEObj = dyn_cast(&Obj)) + return unwrapOrError(Elf32BEObj->getSymbol(Sym.getRawDataRefImpl())) + ->getType(); + if (auto *Elf64BEObj = cast(&Obj)) + return unwrapOrError(Elf64BEObj->getSymbol(Sym.getRawDataRefImpl())) + ->getType(); + llvm_unreachable("Unsupported binary format"); +} + +// Define the "createSymbolInfo " function +SymbolInfoTy createSymbolInfo(const object::ObjectFile &Obj, const object::SymbolRef Symbol) { + const uint64_t Addr = unwrapOrError(Symbol.getAddress()); + const StringRef SymName = unwrapOrError(Symbol.getName()); + return SymbolInfoTy(Addr, SymName, Obj.isELF() ? getElfSymbolType(Obj, Symbol) + : (uint8_t)ELF::STT_NOTYPE); + +} + + +// Define a main +int main (int argc, char *argv[]) { + InitLLVM X(argc, argv); + + // Parse the command line options + cl::ParseCommandLineOptions(argc, argv, "llvm cost model tool\n"); + + // Set up the triple and target features + InitializeAllTargetInfos(); + InitializeAllTargetMCs(); + InitializeAllDisassemblers(); + + object::OwningBinary BinaryOrErr = + unwrapOrError(object::createBinary(InputFilename)); + + object::Binary &Binary = *BinaryOrErr.getBinary(); + + // get the object file from the binary + object::ObjectFile *Obj = dyn_cast(&Binary); + + /* + // TEMP CHECK: Get the name of the object file and its format and print it + StringRef FileName = Obj->getFileName(); + outs() << "File Name: " << FileName << "\n"; + StringRef FormatName = Obj->getFileFormatName(); + outs() << "File Format: " << FormatName << "\n"; + */ + + // Get the Target + std::string Error; + + const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error); + + // Check if the target is valid + if (!TheTarget) { + errs() << argv[0] << ": " << Error; + return 1; + } + + std::vector MAttrs; + + Expected Features = + Obj->getFeatures(); + if (!Features) { + error(Features.takeError()); + } + SubtargetFeatures TrueFeatures = *Features; + + if (MAttrs.empty()) { + for (unsigned I = 0; I != MAttrs.size(); ++I) { + TrueFeatures.AddFeature(MAttrs[I]); + } + } + + // Start setting up the disassembler + + std::unique_ptr MRI(TheTarget->createMCRegInfo(TripleName)); + if (!MRI) { + WithColor::error() << "error: no register info for target " << TripleName + << "\n"; + return 1; + } + + MCTargetOptions MCOptions; + + std::unique_ptr AsmInfo( + TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions)); + if (!AsmInfo) { + WithColor::error() << "error: no assembly info for target " << TripleName + << "\n"; + return 1; + } + + std::unique_ptr SubInfo( + TheTarget->createMCSubtargetInfo(TripleName, CPU, TrueFeatures.getString())); + if (!SubInfo) { + WithColor::error() << "error: no subtarget info for target " << TripleName + << "\n"; + return 1; + } + + std::unique_ptr MII(TheTarget->createMCInstrInfo()); + if (!MII) { + WithColor::error() << "error: no instruction info for target " << TripleName + << "\n"; + return 1; + } + + MCContext Ctx(Triple(TripleName), AsmInfo.get(), MRI.get(), SubInfo.get()); + + std::unique_ptr MOFI(TheTarget->createMCObjectFileInfo(Ctx, false)); + Ctx.setObjectFileInfo(MOFI.get()); + + std::unique_ptr DisAsm(TheTarget->createMCDisassembler(*SubInfo, Ctx)); + + // Create a MCInstrAnalysis + std::unique_ptr MIA(TheTarget->createMCInstrAnalysis(MII.get())); + + int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); + + // Create the MCInstPrinter (just in case) + std::unique_ptr IP(TheTarget->createMCInstPrinter( + Triple(TripleName), AsmPrinterVariant, *AsmInfo, *MII, *MRI)); + if (!IP) { + WithColor::error() << "error: no instruction printer for target " << TripleName + << '\n'; + return 1; + } + + IP->setPrintImmHex(true); + IP->setPrintBranchImmAsAddress(true); + IP->setSymbolizeOperands(false); + IP->setMCInstrAnalysis(MIA.get()); + + std::map AllSymbols; + SectionSymbolsTy UndefinedSymbols; + + // Get the symbol table + for (const object::SymbolRef &Symbol : Obj->symbols()) { + Expected NameOrErr = Symbol.getName(); + if (!NameOrErr) { + error(NameOrErr.takeError()); + return 1; + } + + + // If the symbol is a section symbol, then ignore it. + if (Obj->isELF() && getElfSymbolType(*Obj, Symbol) == ELF::STT_SECTION) { + continue; + } + + object::section_iterator SectionI = unwrapOrError(Symbol.getSection()); + + if (SectionI != Obj->section_end()) { + AllSymbols[*SectionI].push_back(createSymbolInfo(*Obj, Symbol)); + } else { + UndefinedSymbols.push_back(createSymbolInfo(*Obj, Symbol)); + } + + } + + // Sort the symbols + for (std::pair &SortSymbols : AllSymbols) { + llvm::stable_sort(SortSymbols.second); + } + llvm::stable_sort(UndefinedSymbols); + + + // Begin iterating over the sections + for (const object::SectionRef &Section : toolSectionFilter(*Obj, nullptr)) { + if (FilterSections.empty() && (!Section.isText() || Section.isVirtual())) { + continue; + + } + + uint64_t SectionAddr = Section.getAddress(); + uint64_t SectionSize = Section.getSize(); + + if (!SectionSize) { + continue; + } + + // Get all the symbols in the section + SectionSymbolsTy &Symbols = AllSymbols[Section]; + + ArrayRef Bytes = arrayRefFromStringRef(unwrapOrError(Section.getContents())); + + // Get the name of the Section we're looking at + StringRef SectionName = unwrapOrError(Section.getName()); + + SmallString<40> Comments; + raw_svector_ostream CommentStream(Comments); + + bool LookedAt = false; + + uint64_t Size; + + //Start retrieving the MCInsts + for (size_t SI = 0, SE = Symbols.size(); SI != SE;) { + unsigned FirstSI = SI; + uint64_t Start = Symbols[SI].Addr; + ArrayRef SymbolsHere; + while (SI != SE && Symbols[SI].Addr == Start) { + ++SI; + } + SymbolsHere = ArrayRef(&Symbols[FirstSI], SI - FirstSI); + std::vector CurrSymName; + + for (const SymbolInfoTy &Symbol : SymbolsHere) { + CurrSymName.push_back(Symbol.Name); + } + + uint64_t End = std::min(SectionAddr + SectionSize, StopAddr); + if (SI < SE) + End = std::min(End, Symbols[SI].Addr); + if (Start >= End || End <= StartAddr) + continue; + Start -= SectionAddr; + End -= SectionAddr; + + if (!LookedAt) { + LookedAt = true; + outs() << "\nCurrent Section: " << SectionName << "\n"; + } + + outs() << "\n"; + + + for (size_t I = 0; I < SymbolsHere.size(); I++) { + + const StringRef SymbolName = CurrSymName[I]; + outs() << SymbolName << ": "; + } + + uint64_t Index = Start; + if (SectionAddr < StartAddr) { + Index = std::max(Index, StartAddr - SectionAddr); + } + + //Make sure to keep track of the number of instructions + int NumInstructions = 0; + + while (Index < End) { + MCInst Inst; + ArrayRef BytesSlice = Bytes.slice(Index); + uint64_t CurrAddr = SectionAddr + Index; + bool Disassembled = DisAsm->getInstruction(Inst, Size, BytesSlice, CurrAddr, + CommentStream); + //Inst.dump(); + NumInstructions++; + if (Size == 0) { + Size = std::min(BytesSlice.size(), DisAsm->suggestBytesToSkip(BytesSlice, CurrAddr)); + } + + if (!Disassembled) { + WithColor::warning() << "invalid instruction encoding\n"; + break; + } + Index += Size; + } + outs() << "# of instructions: " << NumInstructions << "\n"; + } + } + return 0; +} \ No newline at end of file