diff --git a/lnt/testing/profile/cPerf.cpp b/lnt/testing/profile/cPerf.cpp index 9d8233c..38b1ff8 100644 --- a/lnt/testing/profile/cPerf.cpp +++ b/lnt/testing/profile/cPerf.cpp @@ -1,1117 +1,1125 @@ //===-cPerf.cpp - Linux perf.data parsing ---------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This is a python module that reads perf.data files generated by Linux Perf. // Writing efficiently-executing Python that reads and manipulates binary files // is difficult and results in code that looks very un-Python-like. // // Also, parsing profiles is on the critical path for result submission and // often has to run on target devices that may not be as fast as a desktop // machine (an example is a pandaboard - people still test on Cortex-A9). // // This algorithm aims to be faster than 'perf report' or 'perf annotate'. // In experiments this module is around 6x faster than 'perf annotate' - more // when we reduce the amount of data imported (see later). // // Algorithm // --------- // // We start by mmapping the perf.data file and reading the header, event // attribute entries and the sample data stream. Every sample is aggregated into // a set of counters counting every event for every PC location (indexed by mmap // ID - a data stream can contain samples for the same PC in different binaries // e.g. if exec() is called). Total counters are also kept for the entire stream // and per-mmap. // // The source for the perf.data format is here: https://lwn.net/Articles/644919/ // and the perf_events manpage. // // Once the aggregation is done, we do a single pass through the event data: // // for each mmap: // if the mmap's event total is < 1% of the total in all counters, // then discard the mmap [1]. // // load symbol data by calling "objdump -t" and parsing the result. // for all PCs we have events for, in sorted order: // find the symbol this PC is part of -> Sym // look up all PCs between Sym.Start and Sym.End and emit data // // The output of this module (cPerf.importPerf) is a dictionary in (almost) // ProfileV1 form (see profile.py and profilev1impl.py). The only difference is // that all counters are absolute. // // [1]: Perf will start sampling from the moment the perf wrapper tool is // invoked, and its samples will continue until the perf wrapper tool exits. // This means that it will often take one or two samples in intermediate // binaries like "perf", "bash", or libaries such as libdl. We don't care about // these, and these binaries and libraries are very large to objdump. // // So we have a threshold - if a binary contains < 1% of all samples, don't // bother importing it. // //===----------------------------------------------------------------------===// #ifdef _WIN32 #define WIN32_LEAN_AND_MEAN #define _CRT_SECURE_NO_WARNINGS #define strtok_r strtok_s #include #include typedef SSIZE_T ssize_t; #define PROT_EXEC 4 #endif #include #include #include #include #include #include #include #include #include #include #include #include #ifndef _WIN32 #include #include #include #endif #include #include //===----------------------------------------------------------------------===// // Helpers //===----------------------------------------------------------------------===// #ifndef STANDALONE #ifdef assert #undef assert #endif // Redefine assert so we can use it without destroying the Python interpreter! #define assert(expr) Assert((expr), #expr, __FILE__, __LINE__) #endif // Remove a uint32_t from a byte stream uint32_t TakeU32(unsigned char *&Buf) { uint32_t X = * (uint32_t*) Buf; Buf += sizeof(uint32_t); return X; } // Remove a uint64_t from a byte stream uint64_t TakeU64(unsigned char *&Buf) { uint64_t X = * (uint64_t*) Buf; Buf += sizeof(uint64_t); return X; } // Forks, execs Cmd under a shell and returns // a file descriptor reading the command's stdout. FILE *ForkAndExec(std::string Cmd) { #ifdef _WIN32 Cmd = "cmd.exe /c " + Cmd; FILE *Stream = _popen(Cmd.c_str(), "rt"); #else int P[2]; pipe(P); FILE *Stream = fdopen(P[0], "r"); if (fork() == 0) { dup2(P[1], 1); close(P[0]); execl("/bin/sh", "sh", "-c", Cmd.c_str(), 0); } else { close(P[1]); } #endif return Stream; } #ifdef _WIN32 ssize_t getline(char** lineptr, size_t* n, FILE* stream) { if (lineptr == nullptr || stream == nullptr || n == nullptr) return -1; int c = fgetc(stream); if (c == EOF) return -1; char* bufptr = *lineptr; size_t size = *n; if (bufptr == nullptr) { size = 128; bufptr = (char*)malloc(size); if (bufptr == nullptr) return -1; *lineptr = bufptr; } char* p = bufptr; while (c != EOF) { size_t offset = (size_t)(p - bufptr); if ((offset + 2) > size) { size = size + 128; bufptr = (char*)realloc(bufptr, size); if (bufptr == nullptr) return -1; *lineptr = bufptr; p = bufptr + offset; } *p++ = c; if (c == '\n') break; c = fgetc(stream); } *p = '\0'; *n = size; return p - bufptr; } #endif void Assert(bool Expr, const char *ExprStr, const char *File, int Line) { if (Expr) return; char Str[512]; sprintf(Str, "%s (%s:%d)", ExprStr, File, Line); throw std::logic_error(Str); } // Returns true if the ELF file given by filename // is a shared object (DYN). bool IsSharedObject(const std::string &Fname) { // We replicate the first part of an ELF header here // so as not to rely on . struct PartialElfHeader { unsigned char e_ident[16]; uint16_t e_type; }; const int ET_DYN = 3; FILE *stream = fopen(Fname.c_str(), "r"); if (stream == NULL) return false; PartialElfHeader H; auto NumRead = fread(&H, 1, sizeof(H), stream); assert(NumRead == sizeof(H)); fclose(stream); return H.e_type == ET_DYN; } //===----------------------------------------------------------------------===// // Perf structures. Taken from https://lwn.net/Articles/644919/ //===----------------------------------------------------------------------===// #define PERF_RECORD_MMAP 1 #define PERF_RECORD_SAMPLE 9 #define PERF_RECORD_MMAP2 10 #define PERF_SAMPLE_IP (1U << 0) #define PERF_SAMPLE_TID (1U << 1) #define PERF_SAMPLE_TIME (1U << 2) #define PERF_SAMPLE_ADDR (1U << 3) #define PERF_SAMPLE_ID (1U << 6) #define PERF_SAMPLE_CPU (1U << 7) #define PERF_SAMPLE_PERIOD (1U << 8) #define PERF_SAMPLE_STREAM_ID (1U << 9) #define PERF_SAMPLE_IDENTIFIER (1U << 16) struct perf_file_section { uint64_t offset; /* offset from start of file */ uint64_t size; /* size of the section */ }; struct perf_header { char magic[8]; /* PERFILE2 */ uint64_t size; /* size of the header */ uint64_t attr_size; /* size of an attribute in attrs */ struct perf_file_section attrs; struct perf_file_section data; struct perf_file_section event_types; uint64_t flags; uint64_t flags1[3]; }; struct perf_event_attr { uint32_t type; uint32_t size; uint64_t config; uint64_t sample_period; uint64_t sample_type; uint64_t read_format; uint64_t flags; uint32_t wakeup_events; uint32_t bp_type; uint64_t bp_addr; uint64_t bp_len; uint64_t branch_sample_type; }; struct perf_event_header { uint32_t type; uint16_t misc; uint16_t size; }; struct perf_event_sample { struct perf_event_header header; uint64_t sample_id; uint64_t ip; uint32_t pid, tid; uint64_t time; uint64_t id; uint64_t period; }; -struct perf_event_mmap { +struct perf_event_mmap_common { struct perf_event_header header; uint32_t pid, tid; uint64_t start, extent, pgoff; +}; + +struct perf_event_mmap { + struct perf_event_mmap_common mmap_common; char filename[1]; }; struct perf_event_mmap2 { - struct perf_event_header header; - - uint32_t pid, tid; - uint64_t start, extent, pgoff; + struct perf_event_mmap_common mmap_common; uint32_t maj, min; uint64_t ino, ino_generation; uint32_t prot, flags; char filename[1]; }; struct perf_trace_event_type { uint64_t event_id; char str[64]; }; -struct perf_sample_id { - uint32_t pid, tid; - uint64_t time; - uint64_t id; -}; - enum perf_type_id { PERF_TYPE_HARDWARE = 0, PERF_TYPE_SOFTWARE = 1, PERF_TYPE_TRACEPOINT = 2, PERF_TYPE_HW_CACHE = 3, PERF_TYPE_RAW = 4, PERF_TYPE_BREAKPOINT = 5, PERF_TYPE_MAX }; enum perf_hw_id { PERF_COUNT_HW_CPU_CYCLES = 0, PERF_COUNT_HW_INSTRUCTIONS = 1, PERF_COUNT_HW_CACHE_REFERENCES = 2, PERF_COUNT_HW_CACHE_MISSES = 3, PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4, PERF_COUNT_HW_BRANCH_MISSES = 5, PERF_COUNT_HW_BUS_CYCLES = 6, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND = 7, PERF_COUNT_HW_STALLED_CYCLES_BACKEND = 8, PERF_COUNT_HW_REF_CPU_CYCLES = 9, PERF_COUNT_HW_MAX }; static const char* hw_event_names[PERF_COUNT_HW_MAX] = { "cycles", "instructions", "cache-references", "cache-misses", "branch-instructions", "branch-misses", "bus-cycles", "stalled-cycles-frontend", "stalled-cycles-backend", "ref-cpu-cycles" }; enum perf_sw_ids { PERF_COUNT_SW_CPU_CLOCK = 0, PERF_COUNT_SW_TASK_CLOCK = 1, PERF_COUNT_SW_PAGE_FAULTS = 2, PERF_COUNT_SW_CONTEXT_SWITCHES = 3, PERF_COUNT_SW_CPU_MIGRATIONS = 4, PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, PERF_COUNT_SW_ALIGNMENT_FAULTS = 7, PERF_COUNT_SW_EMULATION_FAULTS = 8, PERF_COUNT_SW_MAX }; static const char* sw_event_names[PERF_COUNT_SW_MAX] = { "cpu-clock", "task-clock", "page-faults", "context-switches", "cpu-migrations", "minor-faults", "major-faults", "alignment-faults", "emulation-faults" }; //===----------------------------------------------------------------------===// // Readers for objdump output //===----------------------------------------------------------------------===// struct Map { uint64_t Start, End, Adjust; const char *Filename; }; struct Symbol { uint64_t Start; uint64_t End; std::string Name; bool operator<(const Symbol &Other) const { return Start < Other.Start; } bool operator==(const Symbol &Other) const { return Start == Other.Start && End == Other.End && Name == Other.Name; } }; class SymTabOutput : public std::vector { public: std::string Objdump, BinaryCacheRoot; SymTabOutput(std::string Objdump, std::string BinaryCacheRoot) : Objdump(Objdump), BinaryCacheRoot(BinaryCacheRoot) {} void fetchSymbols(Map *M) { std::string Cmd = Objdump + " -t -T -C " + BinaryCacheRoot + std::string(M->Filename) + #ifdef _WIN32 " 2> NUL"; #else " 2>/dev/null"; #endif auto Stream = ForkAndExec(Cmd); char *Line = nullptr; size_t LineLen = 0; while (true) { ssize_t Len = getline(&Line, &LineLen, Stream); if (Len == -1) break; std::stringstream SS(Line); std::string Start; if (!std::getline(SS, Start, ' ')) continue; char* EndPtr = NULL; uint64_t NStart = strtoull(Start.c_str(), &EndPtr, 16); if (EndPtr == Start.c_str()) continue; char GlobLoc; // Local -> 'l', Global -> 'g', Neither -> ' ' if (!SS.get(GlobLoc)) continue; char Weak; // Weak? if (!SS.get(Weak)) continue; char Space; if (!SS.get(Space)) // Constructor. Not supported yet. continue; if (!SS.get(Space)) // Warning. Not supported yet. continue; char IFunc; // Indirect reference to another symbol. if (!SS.get(IFunc)) continue; char Debug; // Debugging (d) or dynamic (D) symbol. if (!SS.get(Debug)) continue; char FileFunc; // Name of function (F), file (f) or object (O). if (!SS.get(FileFunc)) continue; if (FileFunc != 'F') continue; if (!SS.get(Space)) continue; std::string Section; if (!std::getline(SS, Section, '\t')) continue; if (Section != ".text") continue; std::string Extent; if (!std::getline(SS, Extent, ' ')) continue; uint64_t NExtent = strtoull(Extent.c_str(), &EndPtr, 16); if (EndPtr == Extent.c_str()) continue; std::string Func; while (std::getline(SS, Func, ' ') && Func.empty()); // Skip spaces. if (Func.empty()) continue; // Note Func includes the symbol table visibility if any. std::string FuncRest; if (std::getline(SS, FuncRest)) // Read the rest line if any. Func += std::string(" ") + FuncRest; if (Func.back() == '\n') Func.pop_back(); push_back({NStart, NStart + NExtent, Func}); } if (Line) free(Line); #ifdef _WIN32 _pclose(Stream); #else fclose(Stream); wait(NULL); #endif } void reset(Map *M) { clear(); // Fetch both dynamic and static symbols, sort and unique them. fetchSymbols(M); std::sort(begin(), end()); auto NewEnd = std::unique(begin(), end()); erase(NewEnd, end()); } protected: int splitLine(const std::string& line, std::vector& output, char delim = ' ') { std::stringstream ss(line); std::string token; while (std::getline(ss, token, delim)) { output.push_back(token); } return (int)output.size(); } }; class ObjdumpOutput { public: std::string Objdump, BinaryCacheRoot; FILE *Stream; const char *ThisText; uint64_t ThisAddress; uint64_t EndAddress; char *Line; size_t LineLen; ObjdumpOutput(std::string Objdump, std::string BinaryCacheRoot) : Objdump(Objdump), BinaryCacheRoot(BinaryCacheRoot), Stream(nullptr), Line(NULL), LineLen(0) {} ~ObjdumpOutput() { if (Stream) { #ifdef _WIN32 _pclose(Stream); #else fclose(Stream); wait(NULL); #endif } if (Line) free(Line); } void reset(Map *M, uint64_t Start, uint64_t Stop) { ThisAddress = 0; ThisText = ""; if (Stream) { #ifdef _WIN32 _pclose(Stream); #else fclose(Stream); wait(NULL); #endif } char buf1[32], buf2[32]; sprintf(buf1, "%#" PRIx64, Start); sprintf(buf2, "%#" PRIx64, Stop + 4); std::string Cmd = Objdump + " -d --no-show-raw-insn --start-address=" + std::string(buf1) + " --stop-address=" + std::string(buf2) + " " + BinaryCacheRoot + std::string(M->Filename) + #ifdef _WIN32 " 2> NUL"; #else " 2>/dev/null"; #endif Stream = ForkAndExec(Cmd); EndAddress = Stop; }; std::string getText() { return ThisText; } uint64_t next() { getLine(); return ThisAddress; } void getLine() { while (true) { ssize_t Len = getline(&Line, &LineLen, Stream); if (Len == -1) { ThisAddress = EndAddress; ThisText = ""; return; } char *TokBuf; char *One = strtok_r(Line, ":", &TokBuf); char *Two = strtok_r(NULL, "\n", &TokBuf); if (!One || !Two) continue; char *EndPtr = NULL; uint64_t Address = strtoull(One, &EndPtr, 16); if (EndPtr != &One[strlen(One)]) continue; ThisAddress = Address; ThisText = Two; break; } } }; //===----------------------------------------------------------------------===// // PerfReader //===----------------------------------------------------------------------===// class PerfReader { public: PerfReader(const std::string &Filename, std::string Objdump, std::string BinaryCacheRoot); ~PerfReader(); void readHeader(); void readAttrs(); void readEventDesc(); void readDataStream(); + void registerNewMapping(unsigned char *Buf, const char *FileName); unsigned char *readEvent(unsigned char *); perf_event_sample parseEvent(unsigned char *Buf, uint64_t Layout); void emitLine(uint64_t PC, std::map *Counters, const std::string &Text); void emitFunctionStart(std::string &Name); void emitFunctionEnd(std::string &Name, std::map &Counters); void emitTopLevelCounters(); void emitMaps(); void emitSymbol( Symbol &Sym, Map &M, std::map>::iterator Event, std::map &SymEvents, uint64_t Adjust); PyObject *complete(); private: unsigned char *Buffer; #ifdef _WIN32 HANDLE hFile; HANDLE hMapFile; #else size_t BufferLen; #endif perf_header *Header; std::map EventIDs; std::map EventLayouts; std::map>> Events; std::map TotalEvents; std::map> TotalEventsPerMap; std::vector Maps; std::map> CurrentMaps; PyObject *Functions, *TopLevelCounters; std::vector Lines; std::string Objdump, BinaryCacheRoot; }; PerfReader::PerfReader(const std::string &Filename, std::string Objdump, std::string BinaryCacheRoot) : Objdump(Objdump), BinaryCacheRoot(BinaryCacheRoot) { TopLevelCounters = PyDict_New(); Functions = PyDict_New(); #ifdef _WIN32 Buffer = nullptr; hMapFile = nullptr; hFile = ::CreateFileA(Filename.c_str(), GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); assert(hFile != INVALID_HANDLE_VALUE); LARGE_INTEGER size; size.QuadPart = 0; assert(::GetFileSizeEx(hFile, &size) != FALSE); hMapFile = ::CreateFileMapping(hFile, NULL, PAGE_READONLY, size.HighPart, size.LowPart, NULL); assert(hMapFile != nullptr); Buffer = (unsigned char*)::MapViewOfFile(hMapFile, FILE_MAP_READ, 0, 0, 0); assert(Buffer != nullptr); #else int fd = open(Filename.c_str(), O_RDONLY); assert(fd > 0); struct stat sb; assert(fstat(fd, &sb) == 0); BufferLen = (size_t)sb.st_size; Buffer = (unsigned char *)mmap(NULL, BufferLen, PROT_READ, MAP_SHARED, fd, 0); assert(Buffer != MAP_FAILED); #endif } PerfReader::~PerfReader() { #ifdef _WIN32 if (hMapFile != NULL) { if (Buffer != NULL) ::UnmapViewOfFile(Buffer); ::CloseHandle(hMapFile); } if (hFile != INVALID_HANDLE_VALUE) ::CloseHandle(hFile); #else munmap(Buffer, BufferLen); #endif } void PerfReader::readHeader() { Header = (perf_header *)&Buffer[0]; assert(!strncmp(Header->magic, "PERFILE2", 8)); } void PerfReader::readDataStream() { unsigned char *Buf = &Buffer[Header->data.offset]; unsigned char *End = Buf + Header->data.size; while (Buf < End) Buf = readEvent(Buf); } #define HEADER_EVENT_DESC 12 void PerfReader::readAttrs() { if (Header->flags & (1U << HEADER_EVENT_DESC)) { readEventDesc(); } else { uint64_t NumEvents = Header->attrs.size / Header->attr_size; for (unsigned I = 0; I < NumEvents; ++I) { const perf_event_attr* attr = (const perf_event_attr*)&Buffer[Header->attrs.offset + I * Header->attr_size]; const perf_file_section* ids = (const perf_file_section*)((unsigned char *)attr + attr->size); unsigned char* Buf = &Buffer[ids->offset]; uint64_t NumIDs = ids->size / sizeof(uint64_t); const char* Str = "unknown"; switch (attr->type) { case PERF_TYPE_HARDWARE: if (attr->config < PERF_COUNT_HW_MAX) Str = hw_event_names[attr->config]; break; case PERF_TYPE_SOFTWARE: if (attr->config < PERF_COUNT_SW_MAX) Str = sw_event_names[attr->config]; break; } // Weirdness of perf: if there is only one event descriptor, that // event descriptor can be referred to by ANY id! - if (NumEvents == 1 && NumIDs == 0) { + if (NumEvents == 1) { EventIDs[0] = Str; EventLayouts[0] = attr->sample_type; } for (unsigned J = 0; J < NumIDs; ++J) { auto id = TakeU64(Buf); EventIDs[id] = Str; EventLayouts[id] = attr->sample_type; } } } } void PerfReader::readEventDesc() { perf_file_section *P = (perf_file_section *)&Buffer[Header->data.offset + Header->data.size]; for (int I = 0; I < HEADER_EVENT_DESC; ++I) if (Header->flags & (1ULL << I)) ++P; unsigned char *Buf = &Buffer[P->offset]; uint32_t NumEvents = TakeU32(Buf); uint32_t AttrSize = TakeU32(Buf); for (unsigned I = 0; I < NumEvents; ++I) { // Parse out the "config" bitmask for the struct layout. auto *Buf2 = Buf; (void)TakeU32(Buf2); (void)TakeU32(Buf2); (void)TakeU64(Buf2); (void)TakeU64(Buf2); auto Layout = TakeU64(Buf2); Buf += AttrSize; uint32_t NumIDs = TakeU32(Buf); uint32_t StrLen = TakeU32(Buf); const char *Str = (const char *)Buf; Buf += StrLen; // Weirdness of perf: if there is only one event descriptor, that // event descriptor can be referred to by ANY id! - if (NumEvents == 1 && NumIDs == 0) { + if (NumEvents == 1) { EventIDs[0] = Str; EventLayouts[0] = Layout; } for (unsigned J = 0; J < NumIDs; ++J) { auto L = TakeU64(Buf); EventIDs[L] = Str; EventLayouts[L] = Layout; } } } +static uint64_t getTimeFromSampleId(unsigned char *EndOfStruct, + uint64_t Layout) { + uint64_t *Ptr = (uint64_t *)EndOfStruct; + // Each of the PERF_SAMPLE_* bits tested below adds an 8-byte field. + if (Layout & PERF_SAMPLE_IDENTIFIER) + --Ptr; + if (Layout & PERF_SAMPLE_CPU) + --Ptr; + if (Layout & PERF_SAMPLE_STREAM_ID) + --Ptr; + if (Layout & PERF_SAMPLE_ID) + --Ptr; + assert(Layout & PERF_SAMPLE_TIME); + --Ptr; + return *Ptr; +} + +void PerfReader::registerNewMapping(unsigned char *Buf, const char *Filename) { + perf_event_mmap_common *E = (perf_event_mmap_common *)Buf; + auto MapID = Maps.size(); + // EXEC ELF objects aren't relocated. DYN ones are, + // so if it's a DYN object adjust by subtracting the + // map base. + bool IsSO = IsSharedObject(BinaryCacheRoot + std::string(Filename)); + uint64_t End = E->start + E->extent; + uint64_t Adjust = IsSO ? E->start - E->pgoff : 0; + Maps.push_back({E->start, End, Adjust, Filename}); + + unsigned char *EndOfEvent = Buf + E->header.size; + // FIXME: The first EventID is used for every event. + // FIXME: The code assumes perf_event_attr.sample_id_all is set. + uint64_t Time = getTimeFromSampleId(EndOfEvent, EventLayouts.begin()->second); + auto &CurrentMap = CurrentMaps[Time]; + CurrentMap.insert({E->start, MapID}); +} + unsigned char *PerfReader::readEvent(unsigned char *Buf) { perf_event_header *E = (perf_event_header *)Buf; switch (E->type) { case PERF_RECORD_MMAP: { perf_event_mmap *E = (perf_event_mmap *)Buf; - auto MapID = Maps.size(); - // EXEC ELF objects aren't relocated. DYN ones are, - // so if it's a DYN object adjust by subtracting the - // map base. - bool IsSO = IsSharedObject(BinaryCacheRoot + std::string(E->filename)); - Maps.push_back({E->start, E->start + E->extent, - IsSO ? E->start - E->pgoff : 0, E->filename}); - - // FIXME: use EventLayouts.begin()->second! - perf_sample_id *ID = - (perf_sample_id *)(Buf + E->header.size - sizeof(perf_sample_id)); - auto &CurrentMap = CurrentMaps[ID->time]; - CurrentMap.insert({E->start, MapID}); + registerNewMapping(Buf, E->filename); } break; case PERF_RECORD_MMAP2: { perf_event_mmap2 *E = (perf_event_mmap2 *)Buf; if (!(E->prot & PROT_EXEC)) break; - auto MapID = Maps.size(); - // EXEC ELF objects aren't relocated. DYN ones are, - // so if it's a DYN object adjust by subtracting the - // map base. - bool IsSO = IsSharedObject(BinaryCacheRoot + std::string(E->filename)); - Maps.push_back({E->start, E->start + E->extent, - IsSO ? E->start - E->pgoff : 0, E->filename}); - - // FIXME: use EventLayouts.begin()->second! - perf_sample_id *ID = - (perf_sample_id *)(Buf + E->header.size - sizeof(perf_sample_id)); - auto &CurrentMap = CurrentMaps[ID->time]; - CurrentMap.insert({E->start, MapID}); + registerNewMapping(Buf, E->filename); } break; case PERF_RECORD_SAMPLE: { perf_event_sample* E = (perf_event_sample*)Buf; auto NewE = parseEvent(((unsigned char*)E) + sizeof(perf_event_header), EventLayouts.begin()->second); auto EventID = NewE.id; auto PC = NewE.ip; // Search for the map corresponding to this sample. Search backwards through // time, discarding any maps created after our timestamp. uint64_t MapID = ~0ULL; for (auto I = CurrentMaps.rbegin(), E = CurrentMaps.rend(); I != E; ++I) { if (I->first > NewE.time) continue; auto NewI = I->second.upper_bound(PC); if (NewI == I->second.begin()) continue; --NewI; if (NewI->first > PC) continue; MapID = NewI->second; break; } if (MapID != ~0ULL) { assert(EventIDs.count(EventID)); Events[MapID][PC][EventIDs[EventID]] += NewE.period; TotalEvents[EventIDs[EventID]] += NewE.period; TotalEventsPerMap[MapID][EventIDs[EventID]] += NewE.period; } } break; } return &Buf[E->size]; } perf_event_sample PerfReader::parseEvent(unsigned char *Buf, uint64_t Layout) { perf_event_sample E; memset((char*)&E, 0, sizeof(E)); assert(Layout & PERF_SAMPLE_IP); assert(Layout & PERF_SAMPLE_PERIOD); if (Layout & PERF_SAMPLE_IDENTIFIER) E.id = TakeU64(Buf); if (Layout & PERF_SAMPLE_IP) E.ip = TakeU64(Buf); if (Layout & PERF_SAMPLE_TID) { E.pid = TakeU32(Buf); E.tid = TakeU32(Buf); } if (Layout & PERF_SAMPLE_TIME) E.time = TakeU64(Buf); if (Layout & PERF_SAMPLE_ADDR) (void) TakeU64(Buf); if (Layout & PERF_SAMPLE_ID) E.id = TakeU64(Buf); if (Layout & PERF_SAMPLE_STREAM_ID) (void) TakeU64(Buf); if (Layout & PERF_SAMPLE_CPU) (void) TakeU64(Buf); if (Layout & PERF_SAMPLE_PERIOD) E.period = TakeU64(Buf); return E; } void PerfReader::emitFunctionStart(std::string &Name) { Lines.clear(); } void PerfReader::emitFunctionEnd(std::string &Name, std::map &Counters) { auto *CounterDict = PyDict_New(); for (auto &KV : Counters) PyDict_SetItemString(CounterDict, KV.first, PyLong_FromUnsignedLongLong((unsigned long long)KV.second)); auto *LinesList = PyList_New(Lines.size()); unsigned Idx = 0; for (auto *I : Lines) PyList_SetItem(LinesList, Idx++, I); auto *FnDict = PyDict_New(); PyDict_SetItemString(FnDict, "counters", CounterDict); PyDict_SetItemString(FnDict, "data", LinesList); PyDict_SetItemString(Functions, Name.c_str(), FnDict); } void PerfReader::emitLine(uint64_t PC, std::map *Counters, const std::string &Text) { auto *CounterDict = PyDict_New(); if (Counters) for (auto &KV : *Counters) PyDict_SetItemString(CounterDict, KV.first, PyLong_FromUnsignedLongLong((unsigned long long)KV.second)); auto *Line = Py_BuildValue("[NKs]", CounterDict, (unsigned long long) PC, (char*) Text.c_str()); Lines.push_back(Line); } void PerfReader::emitTopLevelCounters() { for (auto &KV : TotalEvents) PyDict_SetItemString(TopLevelCounters, KV.first, PyLong_FromUnsignedLongLong((unsigned long long)KV.second)); } void PerfReader::emitMaps() { for (auto &KV : Events) { auto MapID = KV.first; auto &MapEvents = KV.second; if (MapEvents.empty()) continue; if (MapID > Maps.size()) { // Something went badly wrong. Try and recover. continue; } // Are there enough events here to bother with? bool AllUnderThreshold = true; for (auto &I : TotalEventsPerMap[MapID]) { auto Total = TotalEvents[I.first]; // If a map contains more than 1% of some event, bother with it. if ((float)I.second / (float)Total > 0.01f) { AllUnderThreshold = false; break; } } if (AllUnderThreshold) continue; uint64_t Adjust = Maps[MapID].Adjust; SymTabOutput Syms(Objdump, BinaryCacheRoot); Syms.reset(&Maps[MapID]); // Accumulate the event totals for each symbol auto Sym = Syms.begin(); auto Event = MapEvents.begin(); std::map> SymToEventTotals; while (Event != MapEvents.end() && Sym != Syms.end()) { // Skip events until we find one after the start of Sym auto PC = Event->first - Adjust; if (PC < Sym->Start) { ++Event; continue; } // Skip symbols until the event is before the end of Sym if (PC >= Sym->End) { ++Sym; continue; } // We now know that Event lies within Sym, so add it to the totals for (auto &KV : Event->second) SymToEventTotals[Sym->Start][KV.first] += KV.second; ++Event; } // Emit only symbols that took up > 0.5% of any counter for (auto &Sym : Syms) { bool Keep = false; for (auto &KV : SymToEventTotals[Sym.Start]) { if ((double)KV.second / (double)TotalEvents[KV.first] > 0.005) { Keep = true; break; } } if (Keep) emitSymbol(Sym, Maps[MapID], MapEvents.lower_bound(Sym.Start), SymToEventTotals[Sym.Start], Adjust); } } } void PerfReader::emitSymbol( Symbol &Sym, Map &M, std::map>::iterator Event, std::map &SymEvents, uint64_t Adjust) { ObjdumpOutput Dump(Objdump, BinaryCacheRoot); Dump.reset(&M, Sym.Start, Sym.End); emitFunctionStart(Sym.Name); for (uint64_t I = Dump.next(); I < Sym.End; I = Dump.next()) { auto PC = Event->first - Adjust; auto Text = Dump.getText(); if (PC == I) { emitLine(I, &Event->second, Text); ++Event; } else { emitLine(I, nullptr, Text); } } emitFunctionEnd(Sym.Name, SymEvents); } PyObject *PerfReader::complete() { auto *Obj = PyDict_New(); PyDict_SetItemString(Obj, "counters", TopLevelCounters); PyDict_SetItemString(Obj, "functions", Functions); return Obj; } #ifndef STANDALONE static PyObject *cPerf_importPerf(PyObject *self, PyObject *args) { const char *Fname; const char *Objdump = "objdump"; const char *BinaryCacheRoot = ""; if (!PyArg_ParseTuple(args, "s|ss", &Fname, &Objdump, &BinaryCacheRoot)) return NULL; try { PerfReader P(Fname, Objdump, BinaryCacheRoot); P.readHeader(); P.readAttrs(); P.readDataStream(); P.emitTopLevelCounters(); P.emitMaps(); return P.complete(); } catch (std::logic_error &E) { PyErr_SetString(PyExc_AssertionError, E.what()); return NULL; } catch (std::runtime_error &E) { PyErr_SetString(PyExc_RuntimeError, E.what()); return NULL; } catch (...) { PyErr_SetString(PyExc_RuntimeError, "Unknown error"); return NULL; } } static PyMethodDef cPerfMethods[] = {{"importPerf", cPerf_importPerf, METH_VARARGS, "Import perf.data from a filename"}, {NULL, NULL, 0, NULL}}; #if PY_MAJOR_VERSION >= 3 static PyModuleDef cPerfModuleDef = {PyModuleDef_HEAD_INIT, "cPerf", nullptr, -1, cPerfMethods, nullptr, nullptr, nullptr, nullptr}; #endif #if PY_MAJOR_VERSION >= 3 PyMODINIT_FUNC PyInit_cPerf(void) { return PyModule_Create(&cPerfModuleDef); } #else PyMODINIT_FUNC initcPerf(void) { (void)Py_InitModule("cPerf", cPerfMethods); } #endif #else // STANDALONE // You can build the standalone version for a debug purpose using // clang++ cPerf.cpp -o /tmp/cPerf -DSTANDALONE $(pkg-config --cflags --libs python3-embed) // getenv() is permitted to overwrite its return value on subsequent calls, // so copy it to std::string early. static std::string getEnvVar(const char *VarName, const char *DefaultValue) { const char *Value = getenv(VarName); if (!Value) Value = DefaultValue; return Value; } int main(int argc, char **argv) { Py_Initialize(); if (argc < 2) return -1; std::string BinaryCacheRoot = getEnvVar("LNT_BINARY_CACHE_ROOT", ""); std::string Objdump = getEnvVar("CMAKE_OBJDUMP", "objdump"); PerfReader P(argv[1], Objdump, BinaryCacheRoot); P.readHeader(); P.readAttrs(); P.readDataStream(); P.emitTopLevelCounters(); P.emitMaps(); PyObject_Print(P.complete(), stdout, Py_PRINT_RAW); fputs("\n", stdout); // Usually expected by UNIX shells Py_FinalizeEx(); return 0; } #endif