diff --git a/lnt/testing/profile/cPerf.cpp b/lnt/testing/profile/cPerf.cpp --- a/lnt/testing/profile/cPerf.cpp +++ b/lnt/testing/profile/cPerf.cpp @@ -57,9 +57,14 @@ // //===----------------------------------------------------------------------===// -#ifndef STANDALONE -#include +#ifdef _WIN32 +#define _CRT_SECURE_NO_WARNINGS +#define strtok_r strtok_s +#include +#include +typedef SSIZE_T ssize_t; #endif +#include #include #include #include @@ -71,10 +76,12 @@ #include #include #include +#ifndef _WIN32 #include -#include #include #include +#endif +#include #include //===----------------------------------------------------------------------===// @@ -108,6 +115,10 @@ // Forks, execs Cmd under a shell and returns // a file descriptor reading the command's stdout. FILE *ForkAndExec(std::string Cmd) { +#ifdef _WIN32 + Cmd = "cmd.exe /c " + Cmd; + FILE *Stream = _popen(Cmd.c_str(), "rt"); +#else int P[2]; pipe(P); @@ -120,10 +131,41 @@ } else { close(P[1]); } - +#endif return Stream; } +#ifdef _WIN32 +size_t getline(char** lineptr, size_t* n, FILE* stream) { + if (lineptr == nullptr || stream == nullptr || n == nullptr) return -1; + char* bufptr = *lineptr; + char* p = nullptr; + size_t size = *n; + int c = fgetc(stream); + if (c == EOF) return -1; + if (bufptr == nullptr) { + bufptr = (char*)malloc(128); + if (bufptr == nullptr) return -1; + size = 128; + } + p = bufptr; + while (c != EOF) { + if ((unsigned)(p - bufptr) > (size - 1)) { + size = size + 128; + bufptr = (char*)realloc(bufptr, size); + if (bufptr == nullptr) return -1; + } + *p++ = c; + if (c == '\n') break; + c = fgetc(stream); + } + *p++ = '\0'; + *lineptr = bufptr; + *n = size; + return p - bufptr - 1; +} +#endif + void Assert(bool Expr, const char *ExprStr, const char *File, int Line) { if (Expr) return; @@ -132,30 +174,6 @@ throw std::logic_error(Str); } -// Returns true if the ELF file given by filename -// is a shared object (DYN). -bool IsSharedObject(std::string Fname) { - // We replicate the first part of an ELF header here - // so as not to rely on . - struct PartialElfHeader { - unsigned char e_ident[16]; - uint16_t e_type; - }; - const int ET_DYN = 3; - - FILE *stream = fopen(Fname.c_str(), "r"); - if (stream == NULL) - return false; - - PartialElfHeader H; - auto NumRead = fread(&H, 1, sizeof(H), stream); - assert(NumRead == sizeof(H)); - - fclose(stream); - - return H.e_type == ET_DYN; -} - //===----------------------------------------------------------------------===// // Perf structures. Taken from https://lwn.net/Articles/644919/ //===----------------------------------------------------------------------===// @@ -190,6 +208,21 @@ uint64_t flags1[3]; }; +struct perf_event_attr { + uint32_t type; + uint32_t size; + uint64_t config; + uint64_t sample_period; + uint64_t sample_type; + uint64_t read_format; + uint64_t flags; + uint32_t wakeup_events; + uint32_t bp_type; + uint64_t bp_addr; + uint64_t bp_len; + uint64_t branch_sample_type; +}; + struct perf_event_header { uint32_t type; uint16_t misc; @@ -237,12 +270,74 @@ uint64_t id; }; +enum perf_type_id { + PERF_TYPE_HARDWARE = 0, + PERF_TYPE_SOFTWARE = 1, + PERF_TYPE_TRACEPOINT = 2, + PERF_TYPE_HW_CACHE = 3, + PERF_TYPE_RAW = 4, + PERF_TYPE_BREAKPOINT = 5, + PERF_TYPE_MAX +}; + +enum perf_hw_id { + PERF_COUNT_HW_CPU_CYCLES = 0, + PERF_COUNT_HW_INSTRUCTIONS = 1, + PERF_COUNT_HW_CACHE_REFERENCES = 2, + PERF_COUNT_HW_CACHE_MISSES = 3, + PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4, + PERF_COUNT_HW_BRANCH_MISSES = 5, + PERF_COUNT_HW_BUS_CYCLES = 6, + PERF_COUNT_HW_STALLED_CYCLES_FRONTEND = 7, + PERF_COUNT_HW_STALLED_CYCLES_BACKEND = 8, + PERF_COUNT_HW_REF_CPU_CYCLES = 9, + PERF_COUNT_HW_MAX +}; + +static const char* hw_event_names[PERF_COUNT_HW_MAX] = { + "cycles", + "instructions", + "cache-references", + "cache-misses", + "branch-instructions", + "branch-misses", + "bus-cycles", + "stalled-cycles-frontend", + "stalled-cycles-backend", + "ref-cpu-cycles" +}; + +enum perf_sw_ids { + PERF_COUNT_SW_CPU_CLOCK = 0, + PERF_COUNT_SW_TASK_CLOCK = 1, + PERF_COUNT_SW_PAGE_FAULTS = 2, + PERF_COUNT_SW_CONTEXT_SWITCHES = 3, + PERF_COUNT_SW_CPU_MIGRATIONS = 4, + PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, + PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, + PERF_COUNT_SW_ALIGNMENT_FAULTS = 7, + PERF_COUNT_SW_EMULATION_FAULTS = 8, + PERF_COUNT_SW_MAX +}; + +static const char* sw_event_names[PERF_COUNT_SW_MAX] = { + "cpu-clock", + "task-clock", + "page-faults", + "context-switches", + "cpu-migrations", + "minor-faults", + "major-faults", + "alignment-faults", + "emulation-faults" +}; + //===----------------------------------------------------------------------===// // Readers for nm and objdump output //===----------------------------------------------------------------------===// struct Map { - uint64_t Start, End; + uint64_t Start, End, PgOff; const char *Filename; }; @@ -259,17 +354,23 @@ class NmOutput : public std::vector { public: - std::string Nm; + std::string BinaryCacheRoot, Nm; - NmOutput(std::string Nm) : Nm(Nm) {} + NmOutput(std::string BinaryCacheRoot, std::string Nm) + : BinaryCacheRoot(BinaryCacheRoot), Nm(Nm) {} void fetchSymbols(Map *M, bool Dynamic) { std::string D = "-D"; if (!Dynamic) // Don't fetch the dynamic symbols - instead fetch static ones. D = ""; - std::string Cmd = Nm + " " + D + " -S --defined-only " + std::string(M->Filename) + + std::string Cmd = Nm + " " + D + " -S --defined-only " + + BinaryCacheRoot + std::string(M->Filename) + +#ifdef _WIN32 + " 2> NUL"; +#else " 2>/dev/null"; +#endif auto Stream = ForkAndExec(Cmd); char *Line = nullptr; @@ -289,7 +390,8 @@ std::string& Four = SplittedLine[3]; char *EndPtr = NULL; - uint64_t Start = strtoull(One.c_str(), &EndPtr, 16); + // Symbols with odd addresses signify functions in THUMB mode. + uint64_t Start = strtoull(One.c_str(), &EndPtr, 16) & ~(uint64_t)1; if (EndPtr == One.c_str()) continue; uint64_t Extent = strtoull(Two.c_str(), &EndPtr, 16); @@ -315,8 +417,12 @@ if (Line) free(Line); +#ifdef _WIN32 + _pclose(Stream); +#else fclose(Stream); wait(NULL); +#endif } void reset(Map *M) { @@ -337,13 +443,13 @@ while (std::getline(ss, token, delim)) { output.push_back(token); } - return output.size(); + return (int)output.size(); } }; class ObjdumpOutput { public: - std::string Objdump; + std::string BinaryCacheRoot, Objdump; FILE *Stream; char *ThisText; uint64_t ThisAddress; @@ -351,12 +457,17 @@ char *Line; size_t LineLen; - ObjdumpOutput(std::string Objdump) - : Objdump(Objdump), Stream(nullptr), Line(NULL), LineLen(0) {} + ObjdumpOutput(std::string BinaryCacheRoot, std::string Objdump) + : BinaryCacheRoot(BinaryCacheRoot), Objdump(Objdump), Stream(nullptr), + Line(NULL), LineLen(0) {} ~ObjdumpOutput() { if (Stream) { +#ifdef _WIN32 + _pclose(Stream); +#else fclose(Stream); wait(NULL); +#endif } if (Line) free(Line); @@ -365,8 +476,12 @@ void reset(Map *M, uint64_t Start, uint64_t Stop) { ThisAddress = 0; if (Stream) { +#ifdef _WIN32 + _pclose(Stream); +#else fclose(Stream); wait(NULL); +#endif } char buf1[32], buf2[32]; @@ -375,8 +490,13 @@ std::string Cmd = Objdump + " -d --no-show-raw-insn --start-address=" + std::string(buf1) + " --stop-address=" + - std::string(buf2) + " " + std::string(M->Filename) + + std::string(buf2) + " " + + BinaryCacheRoot + std::string(M->Filename) + +#ifdef _WIN32 + " 2> NUL"; +#else " 2>/dev/null"; +#endif Stream = ForkAndExec(Cmd); EndAddress = Stop; @@ -419,12 +539,13 @@ class PerfReader { public: - PerfReader(const std::string &Filename, std::string Nm, - std::string Objdump); + PerfReader(const std::string &Filename, std::string BinaryCacheRoot, + std::string Nm, std::string Objdump); ~PerfReader(); void readHeader(); void readAttrs(); + void readEventDesc(); void readDataStream(); unsigned char *readEvent(unsigned char *); perf_event_sample parseEvent(unsigned char *Buf, uint64_t Layout); @@ -438,13 +559,17 @@ void emitSymbol( Symbol &Sym, Map &M, std::map>::iterator Event, - std::map &SymEvents, - uint64_t Adjust); + std::map &SymEvents); PyObject *complete(); private: unsigned char *Buffer; +#ifdef _WIN32 + HANDLE hFile; + HANDLE hMapFile; +#else size_t BufferLen; +#endif perf_header *Header; std::map EventIDs; @@ -458,12 +583,27 @@ PyObject *Functions, *TopLevelCounters; std::vector Lines; - std::string Nm, Objdump; + std::string BinaryCacheRoot, Nm, Objdump; }; -PerfReader::PerfReader(const std::string &Filename, +PerfReader::PerfReader(const std::string &Filename, std::string BinaryCacheRoot, std::string Nm, std::string Objdump) - : Nm(Nm), Objdump(Objdump) { + : BinaryCacheRoot(BinaryCacheRoot), Nm(Nm), Objdump(Objdump) { + TopLevelCounters = PyDict_New(); + Functions = PyDict_New(); +#ifdef _WIN32 + Buffer = nullptr; + hMapFile = nullptr; + hFile = ::CreateFileA(Filename.c_str(), GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); + assert(hFile != INVALID_HANDLE_VALUE); + LARGE_INTEGER size; + size.QuadPart = 0; + assert(::GetFileSizeEx(hFile, &size) != FALSE); + hMapFile = ::CreateFileMapping(hFile, NULL, PAGE_READONLY, size.HighPart, size.LowPart, NULL); + assert(hMapFile != nullptr); + Buffer = (unsigned char*)::MapViewOfFile(hMapFile, FILE_MAP_READ, 0, 0, 0); + assert(Buffer != nullptr); +#else int fd = open(Filename.c_str(), O_RDONLY); assert(fd > 0); @@ -473,9 +613,20 @@ Buffer = (unsigned char *)mmap(NULL, BufferLen, PROT_READ, MAP_SHARED, fd, 0); assert(Buffer != MAP_FAILED); +#endif } -PerfReader::~PerfReader() { munmap(Buffer, BufferLen); } +PerfReader::~PerfReader() { +#ifdef _WIN32 + if (hMapFile != NULL) { + if (Buffer != NULL) ::UnmapViewOfFile(Buffer); + ::CloseHandle(hMapFile); + } + if (hFile != INVALID_HANDLE_VALUE) ::CloseHandle(hFile); +#else + munmap(Buffer, BufferLen); +#endif +} void PerfReader::readHeader() { Header = (perf_header *)&Buffer[0]; @@ -490,16 +641,52 @@ Buf = readEvent(Buf); } +#define HEADER_EVENT_DESC 12 + void PerfReader::readAttrs() { - const int HEADER_EVENT_DESC = 12; + if (Header->flags & (1U << HEADER_EVENT_DESC)) { + readEventDesc(); + } else { + uint64_t NumEvents = Header->attrs.size / Header->attr_size; + for (unsigned I = 0; I < NumEvents; ++I) { + const perf_event_attr* attr = (const perf_event_attr*)&Buffer[Header->attrs.offset + I * Header->attr_size]; + const perf_file_section* ids = (const perf_file_section*)((unsigned char *)attr + attr->size); + unsigned char* Buf = &Buffer[ids->offset]; + uint64_t NumIDs = ids->size / sizeof(uint64_t); + + const char* Str = "unknown"; + switch (attr->type) { + case PERF_TYPE_HARDWARE: + if (attr->config < PERF_COUNT_HW_MAX) Str = hw_event_names[attr->config]; + break; + case PERF_TYPE_SOFTWARE: + if (attr->config < PERF_COUNT_SW_MAX) Str = sw_event_names[attr->config]; + break; + } + + // Weirdness of perf: if there is only one event descriptor, that + // event descriptor can be referred to by ANY id! + if (NumEvents == 1 && NumIDs == 0) { + EventIDs[0] = Str; + EventLayouts[0] = attr->sample_type; + } + + for (unsigned J = 0; J < NumIDs; ++J) { + auto id = TakeU64(Buf); + EventIDs[id] = Str; + EventLayouts[id] = attr->sample_type; + } + } + } +} + +void PerfReader::readEventDesc() { perf_file_section *P = (perf_file_section *)&Buffer[Header->data.offset + Header->data.size]; for (int I = 0; I < HEADER_EVENT_DESC; ++I) - if (Header->flags & (1U << I)) + if (Header->flags & (1ULL << I)) ++P; - assert(Header->flags & (1U << HEADER_EVENT_DESC)); - unsigned char *Buf = &Buffer[P->offset]; uint32_t NumEvents = TakeU32(Buf); uint32_t AttrSize = TakeU32(Buf); @@ -532,12 +719,13 @@ } unsigned char *PerfReader::readEvent(unsigned char *Buf) { - perf_event_sample *E = (perf_event_sample *)Buf; - - if (E->header.type == PERF_RECORD_MMAP) { + perf_event_header *E = (perf_event_header *)Buf; + switch (E->type) { + case PERF_RECORD_MMAP: + { perf_event_mmap *E = (perf_event_mmap *)Buf; auto MapID = Maps.size(); - Maps.push_back({E->start, E->start + E->extent, E->filename}); + Maps.push_back({E->start, E->start + E->extent, E->pgoff, E->filename}); // FIXME: use EventLayouts.begin()->second! perf_sample_id *ID = @@ -545,10 +733,12 @@ auto &CurrentMap = CurrentMaps[ID->time]; CurrentMap.insert({E->start, MapID}); } - if (E->header.type == PERF_RECORD_MMAP2) { + break; + case PERF_RECORD_MMAP2: + { perf_event_mmap2 *E = (perf_event_mmap2 *)Buf; auto MapID = Maps.size(); - Maps.push_back({E->start, E->start + E->extent, E->filename}); + Maps.push_back({E->start, E->start + E->extent, E->pgoff, E->filename}); // FIXME: use EventLayouts.begin()->second! perf_sample_id *ID = @@ -556,44 +746,45 @@ auto &CurrentMap = CurrentMaps[ID->time]; CurrentMap.insert({E->start, MapID}); } + break; + case PERF_RECORD_SAMPLE: + { + perf_event_sample* E = (perf_event_sample*)Buf; + auto NewE = parseEvent(((unsigned char*)E) + sizeof(perf_event_header), + EventLayouts.begin()->second); + auto EventID = NewE.id; + auto RawPC = NewE.ip; + + // Search for the map corresponding to this sample. Search backwards through + // time, discarding any maps created after our timestamp. + uint64_t MapID = ~0ULL; + for (auto I = CurrentMaps.rbegin(), E = CurrentMaps.rend(); + I != E; ++I) { + if (I->first > NewE.time) + continue; - if (E->header.type != PERF_RECORD_SAMPLE) - return &Buf[E->header.size]; - - auto NewE = parseEvent(((unsigned char*)E) + sizeof(perf_event_header), - EventLayouts.begin()->second); - auto EventID = NewE.id; - auto PC = NewE.ip; - - // Search for the map corresponding to this sample. Search backwards through - // time, discarding any maps created after our timestamp. - size_t MapID = ~0UL; - for (auto I = CurrentMaps.rbegin(), E = CurrentMaps.rend(); - I != E; ++I) { - if (I->first > NewE.time) - continue; - - auto NewI = I->second.upper_bound(PC); - if (NewI == I->second.begin()) - continue; - --NewI; - - if (NewI->first > PC) - continue; - MapID = NewI->second; - break; - } - if (MapID == ~0UL) - return &Buf[E->header.size]; - assert(MapID != ~0UL); - - assert(EventIDs.count(EventID)); - Events[MapID][PC][EventIDs[EventID]] += NewE.period; + auto NewI = I->second.upper_bound(RawPC); + if (NewI == I->second.begin()) + continue; + --NewI; - TotalEvents[EventIDs[EventID]] += NewE.period; - TotalEventsPerMap[MapID][EventIDs[EventID]] += NewE.period; + if (NewI->first > RawPC) + continue; + MapID = NewI->second; + break; + } + if (MapID != ~0ULL) { + auto DSOPC = RawPC - Maps[MapID].Start + Maps[MapID].PgOff; + assert(EventIDs.count(EventID)); + Events[MapID][DSOPC][EventIDs[EventID]] += NewE.period; - return &Buf[E->header.size]; + TotalEvents[EventIDs[EventID]] += NewE.period; + TotalEventsPerMap[MapID][EventIDs[EventID]] += NewE.period; + } + } + break; + } + return &Buf[E->size]; } perf_event_sample PerfReader::parseEvent(unsigned char *Buf, uint64_t Layout) { @@ -667,12 +858,9 @@ } void PerfReader::emitTopLevelCounters() { - TopLevelCounters = PyDict_New(); for (auto &KV : TotalEvents) PyDict_SetItemString(TopLevelCounters, KV.first, PyLong_FromUnsignedLongLong((unsigned long long)KV.second)); - - Functions = PyDict_New(); } void PerfReader::emitMaps() { @@ -704,10 +892,8 @@ // EXEC ELF objects aren't relocated. DYN ones are, // so if it's a DYN object adjust by subtracting the // map base. - bool IsSO = IsSharedObject(Maps[MapID].Filename); - uint64_t Adjust = IsSO ? Maps[MapID].Start : 0; - NmOutput Syms(Nm); + NmOutput Syms(BinaryCacheRoot, Nm); Syms.reset(&Maps[MapID]); // Accumulate the event totals for each symbol @@ -716,7 +902,7 @@ std::map> SymToEventTotals; while (Event != MapEvents.end() && Sym != Syms.end()) { // Skip events until we find one after the start of Sym - auto PC = Event->first - Adjust; + auto PC = Event->first; if (PC < Sym->Start) { ++Event; continue; @@ -743,7 +929,7 @@ } if (Keep) emitSymbol(Sym, Maps[MapID], MapEvents.lower_bound(Sym.Start), - SymToEventTotals[Sym.Start], Adjust); + SymToEventTotals[Sym.Start]); } } } @@ -751,15 +937,14 @@ void PerfReader::emitSymbol( Symbol &Sym, Map &M, std::map>::iterator Event, - std::map &SymEvents, - uint64_t Adjust) { - ObjdumpOutput Dump(Objdump); + std::map &SymEvents) { + ObjdumpOutput Dump(BinaryCacheRoot, Objdump); Dump.reset(&M, Sym.Start, Sym.End); Dump.next(); emitFunctionStart(Sym.Name); for (uint64_t I = Sym.Start; I < Sym.End; I = Dump.next()) { - auto PC = Event->first - Adjust; + auto PC = Event->first; auto Text = Dump.getText(); if (PC == I) { @@ -782,13 +967,14 @@ #ifndef STANDALONE static PyObject *cPerf_importPerf(PyObject *self, PyObject *args) { const char *Fname; + const char *BinaryCacheRoot = ""; const char *Nm = "nm"; const char *Objdump = "objdump"; - if (!PyArg_ParseTuple(args, "s|ss", &Fname, &Nm, &Objdump)) + if (!PyArg_ParseTuple(args, "s|sss", &Fname, &BinaryCacheRoot, &Nm, &Objdump)) return NULL; try { - PerfReader P(Fname, Nm, Objdump); + PerfReader P(Fname, BinaryCacheRoot, Nm, Objdump); P.readHeader(); P.readAttrs(); P.readDataStream(); @@ -837,13 +1023,17 @@ #else // STANDALONE int main(int argc, char **argv) { - PerfReader P(argv[1], std::cout); + Py_Initialize(); + if (argc < 2) return -1; + PerfReader P(argv[1], "nm", "objdump"); P.readHeader(); P.readAttrs(); P.readDataStream(); P.emitTopLevelCounters(); P.emitMaps(); - P.complete(); + PyObject_Print(P.complete(), stdout, Py_PRINT_RAW); + Py_FinalizeEx(); + return 0; } #endif diff --git a/lnt/testing/profile/perf.py b/lnt/testing/profile/perf.py --- a/lnt/testing/profile/perf.py +++ b/lnt/testing/profile/perf.py @@ -22,7 +22,8 @@ return f.read(8) == b'PERFILE2' @staticmethod - def deserialize(f, nm='nm', objdump='objdump', propagateExceptions=False): + def deserialize(f, binaryCacheRoot='', nm='nm', objdump='objdump', + propagateExceptions=False): f = f.name if os.path.getsize(f) == 0: diff --git a/lnt/testing/profile/profile.py b/lnt/testing/profile/profile.py --- a/lnt/testing/profile/profile.py +++ b/lnt/testing/profile/profile.py @@ -27,7 +27,15 @@ """ for impl in lnt.testing.profile.IMPLEMENTATIONS.values(): if impl.checkFile(f): - ret = impl.deserialize(open(f, 'rb')) + ret = None + with open(f, 'rb') as fd: + if impl is lnt.testing.profile.perf.LinuxPerfProfile: + ret = impl.deserialize(fd, + binaryCacheRoot = os.getenv('LNT_BINARY_CACHE_ROOT', ''), + nm = os.getenv('CMAKE_NM', 'nm'), + objdump = os.getenv('CMAKE_OBJDUMP', 'objdump')) + else: + ret = impl.deserialize(fd) if ret: return Profile(ret) else: