diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py --- a/llvm/test/lit.cfg.py +++ b/llvm/test/lit.cfg.py @@ -180,6 +180,7 @@ "llvm-addr2line", "llvm-bcanalyzer", "llvm-bitcode-strip", + "llvm-cm", "llvm-config", "llvm-cov", "llvm-cxxdump", diff --git a/llvm/test/tools/llvm-cm/inst_count.ll b/llvm/test/tools/llvm-cm/inst_count.ll new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-cm/inst_count.ll @@ -0,0 +1,27 @@ +; REQUIRES: x86_64-linux +; RUN: llc -mtriple=x86_64-unknown-linux-gnu %s -o %t.o --filetype=obj -basic-block-sections=labels +; RUN: llvm-cm %t.o 2>&1 | FileCheck %s + +define i32 @main(i32 %0) { + %r = add i32 %0, 1 + ret i32 %r +} + +define i32 @multiply(i32 %a, i32 %b) { + %result = mul i32 %a, %b + ret i32 %result +} + +define i32 @abs_val(i32 %a) { + %t = icmp sgt i32 %a, 0 + br i1 %t, label %if.then, label %if.else +if.then: + ret i32 %a +if.else: + %neg = sub i32 0, %a + ret i32 %neg +} + +; CHECK: # of instructions: 4 +; CHECK: # of instructions: 3 + diff --git a/llvm/tools/llvm-cm/CMakeLists.txt b/llvm/tools/llvm-cm/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/llvm/tools/llvm-cm/CMakeLists.txt @@ -0,0 +1,20 @@ +#include_directories(include) + +set (LLVM_LINK_COMPONENTS + AllTargetsDescs + AllTargetsDisassemblers + AllTargetsInfos + MC + MCDisassembler + Object + Option + Support + TargetParser + ) + +add_llvm_tool(llvm-cm + llvm-cm.cpp +) + +#set(LLVM_CM_SOURCE_DIR ${CURRENT_SOURCE_DIR}) + diff --git a/llvm/tools/llvm-cm/llvm-cm.cpp b/llvm/tools/llvm-cm/llvm-cm.cpp new file mode 100644 --- /dev/null +++ b/llvm/tools/llvm-cm/llvm-cm.cpp @@ -0,0 +1,386 @@ +//===- llvm-cm.cpp - LLVM cost modeling tool ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--------------------------------------------------------------------------===// +// +// llvm-cm is a tool for native cost model evaluation. +// +//===--------------------------------------------------------------------------===// + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/DebugInfo/Symbolize/Symbolize.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrAnalysis.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/MC/MCTargetOptionsCommandFlags.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/InitLLVM.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/WithColor.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include +#include + +using namespace llvm; + +// Define the command line options +static cl::opt InputFilename(cl::Positional, cl::desc(""), cl::init("-"), cl::Required); +static cl::opt TripleName("triple", + cl::desc("Target triple name. " + "See -version for available targets."), + cl::init(LLVM_DEFAULT_TARGET_TRIPLE), + cl::value_desc("triple")); +static cl::opt CPU("mcpu", + cl::desc("Target a specific cpu type (-mcpu=help for details)"), + cl::init("skylake"), + cl::value_desc("cpu-name")); +#define EXIT_IF(COND, MSG) \ + do { \ + if (!(COND)) \ + break; \ + (MSG); \ + exit(1); \ + } while (false) + +struct FilterResult { + // True if the section should not be skipped. + bool Keep = false; + + // True if the index counter should be incremented, even if the section should + // be skipped. For example, sections may be skipped if they are not included + // in the --section flag, but we still want those to count toward the section + // count. + bool IncrementIndex = false; +}; + +static FilterResult +checkSectionFilter(object::SectionRef S, StringSet<> FoundSectionSet, + std::vector FilterSections) { + if (FilterSections.empty()) + return {/*Keep=*/true, /*IncrementIndex=*/true}; + + Expected SecNameOrErr = S.getName(); + if (!SecNameOrErr) { + consumeError(SecNameOrErr.takeError()); + return {/*Keep=*/false, /*IncrementIndex=*/false}; + } + StringRef SecName = *SecNameOrErr; + + // StringSet does not allow empty key so avoid adding sections with + // no name (such as the section with index 0) here. + if (!SecName.empty()) + FoundSectionSet.insert(SecName); + + // Only show the section if it's in the FilterSections list, but always + // increment so the indexing is stable. + return {/*Keep=*/is_contained(FilterSections, SecName), + /*IncrementIndex=*/true}; +} + +llvm::object::SectionFilter +toolSectionFilter(object::ObjectFile const &O, uint64_t *Idx, + std::vector FilterSections) { + StringSet<> FoundSectionSet; + if (Idx) + *Idx = UINT64_MAX; + return llvm::object::SectionFilter( + /*Pred=*/[Idx, FoundSectionSet, FilterSections](object::SectionRef S) { + FilterResult Result = + checkSectionFilter(S, FoundSectionSet, FilterSections); + if (Idx != nullptr && Result.IncrementIndex) + *Idx += 1; + return Result.Keep; + }, + /*Obj=*/O); +} + +// Implement the "error" function +[[noreturn]] static void error(Error Err) { + logAllUnhandledErrors(std::move(Err), WithColor::error(outs()), + "reading file: "); + outs().flush(); + exit(1); +} + +template +T unwrapOrError(Expected EO) { + if (!EO) + error(EO.takeError()); + return std::move(*EO); +} + +// TODO: Share this with llvm-objdump.cpp and +static uint8_t getElfSymbolType(const llvm::object::ObjectFile &Obj, const llvm::object::SymbolRef &Sym) { + assert(Obj.isELF()); + if (auto *Elf32LEObj = dyn_cast(&Obj)) + return unwrapOrError(Elf32LEObj->getSymbol(Sym.getRawDataRefImpl())) + ->getType(); + if (auto *Elf64LEObj = dyn_cast(&Obj)) + return unwrapOrError(Elf64LEObj->getSymbol(Sym.getRawDataRefImpl())) + ->getType(); + if (auto *Elf32BEObj = dyn_cast(&Obj)) + return unwrapOrError(Elf32BEObj->getSymbol(Sym.getRawDataRefImpl())) + ->getType(); + if (auto *Elf64BEObj = cast(&Obj)) + return unwrapOrError(Elf64BEObj->getSymbol(Sym.getRawDataRefImpl())) + ->getType(); + llvm_unreachable("Unsupported binary format"); +} + +// TODO: Share this with llvm-objdump.cpp +SymbolInfoTy createSymbolInfo(const object::ObjectFile &Obj, const object::SymbolRef Symbol) { + const uint64_t Addr = unwrapOrError(Symbol.getAddress()); + const StringRef SymName = unwrapOrError(Symbol.getName()); + return SymbolInfoTy(Addr, SymName, Obj.isELF() ? getElfSymbolType(Obj, Symbol) + : (uint8_t)ELF::STT_NOTYPE); + +} + +// Rewrite the printFunction function to only take in aliases +void printFunction(ArrayRef &Aliases) { + for (size_t I = 0; I < Aliases.size(); ++I) { + const StringRef SymbolName = Aliases[I].Name; + outs() << SymbolName << ": "; + } +} + +void processInsts(std::unique_ptr &DisAsm, + const uint64_t &SectionAddr, ArrayRef &Bytes, + raw_svector_ostream &CommentStream, uint64_t &Start, + uint64_t &End, uint64_t &Index, int &NumInstructions) { + while (Index < End) { + MCInst Inst; + uint64_t Size = 0; + ArrayRef BytesSlice = Bytes.slice(Index); + uint64_t CurrAddr = SectionAddr + Index; + EXIT_IF(!DisAsm->getInstruction(Inst, Size, BytesSlice, CurrAddr, + CommentStream), + errs() << "error: getInstruction() failed\n"); + // If the instrcution is the first intruction in the symbol, then print its + // address + if (Index == Start) { + outs() << format("%8" PRIx64 ":", CurrAddr); + } + ++NumInstructions; + if (Size == 0) { + Size = std::min( + BytesSlice.size(), DisAsm->suggestBytesToSkip(BytesSlice, CurrAddr)); + } + Index += Size; + } +} + + +int main(int argc, char *argv[]) { + InitLLVM X(argc, argv); + + // Parse the command line options + cl::ParseCommandLineOptions(argc, argv, "llvm cost model tool\n"); + + // Set up the triple and target features + InitializeAllTargetInfos(); + InitializeAllTargetMCs(); + InitializeAllDisassemblers(); + + object::OwningBinary BinaryOrErr = + unwrapOrError(object::createBinary(InputFilename)); + + object::Binary &Binary = *BinaryOrErr.getBinary(); + + // get the object file from the binary + object::ObjectFile *Obj = dyn_cast(&Binary); + + // Get the Target + std::string Error; + + const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error); + + std::vector MAttrs; + + Expected Features = Obj->getFeatures(); + + EXIT_IF(!Features, errs() << "error: Features not defined" + << "\n"); + + SubtargetFeatures TrueFeatures = *Features; + + + for (unsigned I = 0; I != MAttrs.size(); ++I) { + TrueFeatures.AddFeature(MAttrs[I]); + } + + + // Start setting up the disassembler + std::unique_ptr MRI(TheTarget->createMCRegInfo(TripleName)); + EXIT_IF(!MRI, errs() << "error: no register info for target " << TripleName + << "\n"); + + MCTargetOptions MCOptions; + + std::unique_ptr AsmInfo( + TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions)); + EXIT_IF(!AsmInfo, errs() << "error: no assembly info for target " + << TripleName << "\n"); + + std::unique_ptr SubInfo(TheTarget->createMCSubtargetInfo( + TripleName, CPU, TrueFeatures.getString())); + EXIT_IF(!SubInfo, errs() << "error: no subtarget info for target " + << TripleName << "\n"); + + std::unique_ptr MII(TheTarget->createMCInstrInfo()); + + EXIT_IF(!MII, errs() << "error: no instruction info for target " << TripleName + << "\n"); + + MCContext Ctx(Triple(TripleName), AsmInfo.get(), MRI.get(), SubInfo.get()); + + std::unique_ptr MOFI( + TheTarget->createMCObjectFileInfo(Ctx, false)); + Ctx.setObjectFileInfo(MOFI.get()); + + std::unique_ptr DisAsm( + TheTarget->createMCDisassembler(*SubInfo, Ctx)); + + EXIT_IF(!DisAsm, errs() << "error: no disassembler for target " + << TripleName << "\n"); + + + + std::map AllSymbols; + SectionSymbolsTy UndefinedSymbols; + + std::vector FilterSections; + + // Get the symbol table + for (const object::SymbolRef &Symbol : Obj->symbols()) { + Expected NameOrErr = Symbol.getName(); + EXIT_IF(!NameOrErr, errs() << "error: " << NameOrErr.takeError() << "\n"); + + // If the symbol is a section symbol, then ignore it. + if (Obj->isELF() && getElfSymbolType(*Obj, Symbol) == ELF::STT_SECTION) + continue; + + // Get the section the symbol is defined in + object::section_iterator SectionI = unwrapOrError(Symbol.getSection()); + + // If the section iterator does not point to the end of the section + // list, then the symbol is defined in a section + if (SectionI != Obj->section_end()) { + AllSymbols[*SectionI].push_back(createSymbolInfo(*Obj, Symbol)); + } else { + UndefinedSymbols.push_back(createSymbolInfo(*Obj, Symbol)); + } + } + + // Sort the symbols + for (std::pair &SortSymbols : + AllSymbols) { + llvm::stable_sort(SortSymbols.second); + } + llvm::stable_sort(UndefinedSymbols); + + uint64_t StartAddr = 0; + + // Begin iterating over the sections + for (const object::SectionRef &Section : + toolSectionFilter(*Obj, nullptr, FilterSections)) { + if (FilterSections.empty() && (!Section.isText() || Section.isVirtual())) { + continue; + } + const uint64_t SectionAddr = Section.getAddress(); + const uint64_t SectionSize = Section.getSize(); + + if (!SectionSize) { + continue; + } + + // Get all the symbols in the section - these were sorted earlier + SectionSymbolsTy &SortedSymbols = AllSymbols[Section]; + + ArrayRef Bytes = + arrayRefFromStringRef(unwrapOrError(Section.getContents())); + + SmallString<40> Comments; + raw_svector_ostream CommentStream(Comments); + + + + // Start retrieving the MCInsts + for (size_t SI = 0, SE = SortedSymbols.size(); SI != SE;) { + + // Find all symbols in the same "location" by incrementing over + // SI until the starting address changes. The sorted symbols were sorted by address. + const size_t FirstSI = SI; + uint64_t Start = SortedSymbols[SI].Addr; + + // If the current symbol's address is the same as the previous + // symbol's address, then we know that the current symbol is an + // alias, and we skip it. + ArrayRef Aliases; + while (SI != SE && SortedSymbols[SI].Addr == Start) + ++SI; + + // End is the end of the current location, the start of the next symbol + uint64_t End = + SI < SE ? SortedSymbols[SI].Addr : SectionAddr + SectionSize; + + + + + // The aliases are the symbols that have the same address + Aliases = ArrayRef(&SortedSymbols[FirstSI], SI - FirstSI); + + // If the symbol range does not overlap with our section, + // move to the next symbol + if (Start >= End || End <= StartAddr) + continue; + + // Adjust the start and end addresses to be relative to the start of the section + Start -= SectionAddr; + End -= SectionAddr; + + printFunction(Aliases); + + uint64_t Index = Start; + if (SectionAddr < StartAddr) + Index = std::max(Index, StartAddr - SectionAddr); + + // Make sure to keep track of the number of instructions + int NumInstructions = 0; + + processInsts(DisAsm, SectionAddr, Bytes, CommentStream, Start, End, Index, + NumInstructions); + + outs() << "# of instructions: " << NumInstructions << "\n"; + } + } + return 0; +}