diff --git a/lnt/testing/profile/cPerf.cpp b/lnt/testing/profile/cPerf.cpp --- a/lnt/testing/profile/cPerf.cpp +++ b/lnt/testing/profile/cPerf.cpp @@ -57,9 +57,7 @@ // //===----------------------------------------------------------------------===// -#ifndef STANDALONE #include -#endif #include #include #include @@ -120,7 +118,6 @@ } else { close(P[1]); } - return Stream; } @@ -132,30 +129,6 @@ throw std::logic_error(Str); } -// Returns true if the ELF file given by filename -// is a shared object (DYN). -bool IsSharedObject(std::string Fname) { - // We replicate the first part of an ELF header here - // so as not to rely on . - struct PartialElfHeader { - unsigned char e_ident[16]; - uint16_t e_type; - }; - const int ET_DYN = 3; - - FILE *stream = fopen(Fname.c_str(), "r"); - if (stream == NULL) - return false; - - PartialElfHeader H; - auto NumRead = fread(&H, 1, sizeof(H), stream); - assert(NumRead == sizeof(H)); - - fclose(stream); - - return H.e_type == ET_DYN; -} - //===----------------------------------------------------------------------===// // Perf structures. Taken from https://lwn.net/Articles/644919/ //===----------------------------------------------------------------------===// @@ -190,6 +163,21 @@ uint64_t flags1[3]; }; +struct perf_event_attr { + uint32_t type; + uint32_t size; + uint64_t config; + uint64_t sample_period; + uint64_t sample_type; + uint64_t read_format; + uint64_t flags; + uint32_t wakeup_events; + uint32_t bp_type; + uint64_t bp_addr; + uint64_t bp_len; + uint64_t branch_sample_type; +}; + struct perf_event_header { uint32_t type; uint16_t misc; @@ -237,12 +225,74 @@ uint64_t id; }; +enum perf_type_id { + PERF_TYPE_HARDWARE = 0, + PERF_TYPE_SOFTWARE = 1, + PERF_TYPE_TRACEPOINT = 2, + PERF_TYPE_HW_CACHE = 3, + PERF_TYPE_RAW = 4, + PERF_TYPE_BREAKPOINT = 5, + PERF_TYPE_MAX +}; + +enum perf_hw_id { + PERF_COUNT_HW_CPU_CYCLES = 0, + PERF_COUNT_HW_INSTRUCTIONS = 1, + PERF_COUNT_HW_CACHE_REFERENCES = 2, + PERF_COUNT_HW_CACHE_MISSES = 3, + PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4, + PERF_COUNT_HW_BRANCH_MISSES = 5, + PERF_COUNT_HW_BUS_CYCLES = 6, + PERF_COUNT_HW_STALLED_CYCLES_FRONTEND = 7, + PERF_COUNT_HW_STALLED_CYCLES_BACKEND = 8, + PERF_COUNT_HW_REF_CPU_CYCLES = 9, + PERF_COUNT_HW_MAX +}; + +static const char* hw_event_names[PERF_COUNT_HW_MAX] = { + "cycles", + "instructions", + "cache-references", + "cache-misses", + "branch-instructions", + "branch-misses", + "bus-cycles", + "stalled-cycles-frontend", + "stalled-cycles-backend", + "ref-cpu-cycles" +}; + +enum perf_sw_ids { + PERF_COUNT_SW_CPU_CLOCK = 0, + PERF_COUNT_SW_TASK_CLOCK = 1, + PERF_COUNT_SW_PAGE_FAULTS = 2, + PERF_COUNT_SW_CONTEXT_SWITCHES = 3, + PERF_COUNT_SW_CPU_MIGRATIONS = 4, + PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, + PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, + PERF_COUNT_SW_ALIGNMENT_FAULTS = 7, + PERF_COUNT_SW_EMULATION_FAULTS = 8, + PERF_COUNT_SW_MAX +}; + +static const char* sw_event_names[PERF_COUNT_SW_MAX] = { + "cpu-clock", + "task-clock", + "page-faults", + "context-switches", + "cpu-migrations", + "minor-faults", + "major-faults", + "alignment-faults", + "emulation-faults" +}; + //===----------------------------------------------------------------------===// // Readers for nm and objdump output //===----------------------------------------------------------------------===// struct Map { - uint64_t Start, End; + uint64_t Start, End, PgOff; const char *Filename; }; @@ -259,16 +309,18 @@ class NmOutput : public std::vector { public: - std::string Nm; + std::string BinaryCacheRoot, Nm; - NmOutput(std::string Nm) : Nm(Nm) {} + NmOutput(std::string BinaryCacheRoot, std::string Nm) + : BinaryCacheRoot(BinaryCacheRoot), Nm(Nm) {} void fetchSymbols(Map *M, bool Dynamic) { std::string D = "-D"; if (!Dynamic) // Don't fetch the dynamic symbols - instead fetch static ones. D = ""; - std::string Cmd = Nm + " " + D + " -S --defined-only " + std::string(M->Filename) + + std::string Cmd = Nm + " " + D + " -S --defined-only " + + BinaryCacheRoot + std::string(M->Filename) + " 2>/dev/null"; auto Stream = ForkAndExec(Cmd); @@ -289,7 +341,8 @@ std::string& Four = SplittedLine[3]; char *EndPtr = NULL; - uint64_t Start = strtoull(One.c_str(), &EndPtr, 16); + // Symbols with odd addresses signify functions in THUMB mode. + uint64_t Start = strtoull(One.c_str(), &EndPtr, 16) & ~(uint64_t)1; if (EndPtr == One.c_str()) continue; uint64_t Extent = strtoull(Two.c_str(), &EndPtr, 16); @@ -337,13 +390,13 @@ while (std::getline(ss, token, delim)) { output.push_back(token); } - return output.size(); + return (int)output.size(); } }; class ObjdumpOutput { public: - std::string Objdump; + std::string BinaryCacheRoot, Objdump; FILE *Stream; char *ThisText; uint64_t ThisAddress; @@ -351,8 +404,9 @@ char *Line; size_t LineLen; - ObjdumpOutput(std::string Objdump) - : Objdump(Objdump), Stream(nullptr), Line(NULL), LineLen(0) {} + ObjdumpOutput(std::string BinaryCacheRoot, std::string Objdump) + : BinaryCacheRoot(BinaryCacheRoot), Objdump(Objdump), Stream(nullptr), + Line(NULL), LineLen(0) {} ~ObjdumpOutput() { if (Stream) { fclose(Stream); @@ -375,7 +429,8 @@ std::string Cmd = Objdump + " -d --no-show-raw-insn --start-address=" + std::string(buf1) + " --stop-address=" + - std::string(buf2) + " " + std::string(M->Filename) + + std::string(buf2) + " " + + BinaryCacheRoot + std::string(M->Filename) + " 2>/dev/null"; Stream = ForkAndExec(Cmd); @@ -419,12 +474,13 @@ class PerfReader { public: - PerfReader(const std::string &Filename, std::string Nm, - std::string Objdump); + PerfReader(const std::string &Filename, std::string BinaryCacheRoot, + std::string Nm, std::string Objdump); ~PerfReader(); void readHeader(); void readAttrs(); + void readEventDesc(); void readDataStream(); unsigned char *readEvent(unsigned char *); perf_event_sample parseEvent(unsigned char *Buf, uint64_t Layout); @@ -438,8 +494,7 @@ void emitSymbol( Symbol &Sym, Map &M, std::map>::iterator Event, - std::map &SymEvents, - uint64_t Adjust); + std::map &SymEvents); PyObject *complete(); private: @@ -458,12 +513,14 @@ PyObject *Functions, *TopLevelCounters; std::vector Lines; - std::string Nm, Objdump; + std::string BinaryCacheRoot, Nm, Objdump; }; -PerfReader::PerfReader(const std::string &Filename, +PerfReader::PerfReader(const std::string &Filename, std::string BinaryCacheRoot, std::string Nm, std::string Objdump) - : Nm(Nm), Objdump(Objdump) { + : BinaryCacheRoot(BinaryCacheRoot), Nm(Nm), Objdump(Objdump) { + TopLevelCounters = PyDict_New(); + Functions = PyDict_New(); int fd = open(Filename.c_str(), O_RDONLY); assert(fd > 0); @@ -475,7 +532,9 @@ assert(Buffer != MAP_FAILED); } -PerfReader::~PerfReader() { munmap(Buffer, BufferLen); } +PerfReader::~PerfReader() { + munmap(Buffer, BufferLen); +} void PerfReader::readHeader() { Header = (perf_header *)&Buffer[0]; @@ -490,16 +549,52 @@ Buf = readEvent(Buf); } +#define HEADER_EVENT_DESC 12 + void PerfReader::readAttrs() { - const int HEADER_EVENT_DESC = 12; + if (Header->flags & (1U << HEADER_EVENT_DESC)) { + readEventDesc(); + } else { + uint64_t NumEvents = Header->attrs.size / Header->attr_size; + for (unsigned I = 0; I < NumEvents; ++I) { + const perf_event_attr* attr = (const perf_event_attr*)&Buffer[Header->attrs.offset + I * Header->attr_size]; + const perf_file_section* ids = (const perf_file_section*)((unsigned char *)attr + attr->size); + unsigned char* Buf = &Buffer[ids->offset]; + uint64_t NumIDs = ids->size / sizeof(uint64_t); + + const char* Str = "unknown"; + switch (attr->type) { + case PERF_TYPE_HARDWARE: + if (attr->config < PERF_COUNT_HW_MAX) Str = hw_event_names[attr->config]; + break; + case PERF_TYPE_SOFTWARE: + if (attr->config < PERF_COUNT_SW_MAX) Str = sw_event_names[attr->config]; + break; + } + + // Weirdness of perf: if there is only one event descriptor, that + // event descriptor can be referred to by ANY id! + if (NumEvents == 1 && NumIDs == 0) { + EventIDs[0] = Str; + EventLayouts[0] = attr->sample_type; + } + + for (unsigned J = 0; J < NumIDs; ++J) { + auto id = TakeU64(Buf); + EventIDs[id] = Str; + EventLayouts[id] = attr->sample_type; + } + } + } +} + +void PerfReader::readEventDesc() { perf_file_section *P = (perf_file_section *)&Buffer[Header->data.offset + Header->data.size]; for (int I = 0; I < HEADER_EVENT_DESC; ++I) - if (Header->flags & (1U << I)) + if (Header->flags & (1ULL << I)) ++P; - assert(Header->flags & (1U << HEADER_EVENT_DESC)); - unsigned char *Buf = &Buffer[P->offset]; uint32_t NumEvents = TakeU32(Buf); uint32_t AttrSize = TakeU32(Buf); @@ -532,12 +627,13 @@ } unsigned char *PerfReader::readEvent(unsigned char *Buf) { - perf_event_sample *E = (perf_event_sample *)Buf; - - if (E->header.type == PERF_RECORD_MMAP) { + perf_event_header *E = (perf_event_header *)Buf; + switch (E->type) { + case PERF_RECORD_MMAP: + { perf_event_mmap *E = (perf_event_mmap *)Buf; auto MapID = Maps.size(); - Maps.push_back({E->start, E->start + E->extent, E->filename}); + Maps.push_back({E->start, E->start + E->extent, E->pgoff, E->filename}); // FIXME: use EventLayouts.begin()->second! perf_sample_id *ID = @@ -545,10 +641,12 @@ auto &CurrentMap = CurrentMaps[ID->time]; CurrentMap.insert({E->start, MapID}); } - if (E->header.type == PERF_RECORD_MMAP2) { + break; + case PERF_RECORD_MMAP2: + { perf_event_mmap2 *E = (perf_event_mmap2 *)Buf; auto MapID = Maps.size(); - Maps.push_back({E->start, E->start + E->extent, E->filename}); + Maps.push_back({E->start, E->start + E->extent, E->pgoff, E->filename}); // FIXME: use EventLayouts.begin()->second! perf_sample_id *ID = @@ -556,44 +654,45 @@ auto &CurrentMap = CurrentMaps[ID->time]; CurrentMap.insert({E->start, MapID}); } + break; + case PERF_RECORD_SAMPLE: + { + perf_event_sample* E = (perf_event_sample*)Buf; + auto NewE = parseEvent(((unsigned char*)E) + sizeof(perf_event_header), + EventLayouts.begin()->second); + auto EventID = NewE.id; + auto RawPC = NewE.ip; + + // Search for the map corresponding to this sample. Search backwards through + // time, discarding any maps created after our timestamp. + uint64_t MapID = ~0ULL; + for (auto I = CurrentMaps.rbegin(), E = CurrentMaps.rend(); + I != E; ++I) { + if (I->first > NewE.time) + continue; - if (E->header.type != PERF_RECORD_SAMPLE) - return &Buf[E->header.size]; - - auto NewE = parseEvent(((unsigned char*)E) + sizeof(perf_event_header), - EventLayouts.begin()->second); - auto EventID = NewE.id; - auto PC = NewE.ip; - - // Search for the map corresponding to this sample. Search backwards through - // time, discarding any maps created after our timestamp. - size_t MapID = ~0UL; - for (auto I = CurrentMaps.rbegin(), E = CurrentMaps.rend(); - I != E; ++I) { - if (I->first > NewE.time) - continue; - - auto NewI = I->second.upper_bound(PC); - if (NewI == I->second.begin()) - continue; - --NewI; - - if (NewI->first > PC) - continue; - MapID = NewI->second; - break; - } - if (MapID == ~0UL) - return &Buf[E->header.size]; - assert(MapID != ~0UL); - - assert(EventIDs.count(EventID)); - Events[MapID][PC][EventIDs[EventID]] += NewE.period; + auto NewI = I->second.upper_bound(RawPC); + if (NewI == I->second.begin()) + continue; + --NewI; - TotalEvents[EventIDs[EventID]] += NewE.period; - TotalEventsPerMap[MapID][EventIDs[EventID]] += NewE.period; + if (NewI->first > RawPC) + continue; + MapID = NewI->second; + break; + } + if (MapID != ~0ULL) { + auto DSOPC = RawPC - Maps[MapID].Start + Maps[MapID].PgOff; + assert(EventIDs.count(EventID)); + Events[MapID][DSOPC][EventIDs[EventID]] += NewE.period; - return &Buf[E->header.size]; + TotalEvents[EventIDs[EventID]] += NewE.period; + TotalEventsPerMap[MapID][EventIDs[EventID]] += NewE.period; + } + } + break; + } + return &Buf[E->size]; } perf_event_sample PerfReader::parseEvent(unsigned char *Buf, uint64_t Layout) { @@ -667,12 +766,9 @@ } void PerfReader::emitTopLevelCounters() { - TopLevelCounters = PyDict_New(); for (auto &KV : TotalEvents) PyDict_SetItemString(TopLevelCounters, KV.first, PyLong_FromUnsignedLongLong((unsigned long long)KV.second)); - - Functions = PyDict_New(); } void PerfReader::emitMaps() { @@ -704,10 +800,8 @@ // EXEC ELF objects aren't relocated. DYN ones are, // so if it's a DYN object adjust by subtracting the // map base. - bool IsSO = IsSharedObject(Maps[MapID].Filename); - uint64_t Adjust = IsSO ? Maps[MapID].Start : 0; - NmOutput Syms(Nm); + NmOutput Syms(BinaryCacheRoot, Nm); Syms.reset(&Maps[MapID]); // Accumulate the event totals for each symbol @@ -716,7 +810,7 @@ std::map> SymToEventTotals; while (Event != MapEvents.end() && Sym != Syms.end()) { // Skip events until we find one after the start of Sym - auto PC = Event->first - Adjust; + auto PC = Event->first; if (PC < Sym->Start) { ++Event; continue; @@ -743,7 +837,7 @@ } if (Keep) emitSymbol(Sym, Maps[MapID], MapEvents.lower_bound(Sym.Start), - SymToEventTotals[Sym.Start], Adjust); + SymToEventTotals[Sym.Start]); } } } @@ -751,15 +845,14 @@ void PerfReader::emitSymbol( Symbol &Sym, Map &M, std::map>::iterator Event, - std::map &SymEvents, - uint64_t Adjust) { - ObjdumpOutput Dump(Objdump); + std::map &SymEvents) { + ObjdumpOutput Dump(BinaryCacheRoot, Objdump); Dump.reset(&M, Sym.Start, Sym.End); Dump.next(); emitFunctionStart(Sym.Name); for (uint64_t I = Sym.Start; I < Sym.End; I = Dump.next()) { - auto PC = Event->first - Adjust; + auto PC = Event->first; auto Text = Dump.getText(); if (PC == I) { @@ -782,13 +875,14 @@ #ifndef STANDALONE static PyObject *cPerf_importPerf(PyObject *self, PyObject *args) { const char *Fname; + const char *BinaryCacheRoot = ""; const char *Nm = "nm"; const char *Objdump = "objdump"; - if (!PyArg_ParseTuple(args, "s|ss", &Fname, &Nm, &Objdump)) + if (!PyArg_ParseTuple(args, "s|sss", &Fname, &BinaryCacheRoot, &Nm, &Objdump)) return NULL; try { - PerfReader P(Fname, Nm, Objdump); + PerfReader P(Fname, BinaryCacheRoot, Nm, Objdump); P.readHeader(); P.readAttrs(); P.readDataStream(); @@ -837,13 +931,17 @@ #else // STANDALONE int main(int argc, char **argv) { - PerfReader P(argv[1], std::cout); + Py_Initialize(); + if (argc < 2) return -1; + PerfReader P(argv[1], "", "nm", "objdump"); P.readHeader(); P.readAttrs(); P.readDataStream(); P.emitTopLevelCounters(); P.emitMaps(); - P.complete(); + PyObject_Print(P.complete(), stdout, Py_PRINT_RAW); + Py_FinalizeEx(); + return 0; } #endif diff --git a/lnt/testing/profile/perf.py b/lnt/testing/profile/perf.py --- a/lnt/testing/profile/perf.py +++ b/lnt/testing/profile/perf.py @@ -22,7 +22,8 @@ return f.read(8) == b'PERFILE2' @staticmethod - def deserialize(f, nm='nm', objdump='objdump', propagateExceptions=False): + def deserialize(f, binaryCacheRoot='', nm='nm', objdump='objdump', + propagateExceptions=False): f = f.name if os.path.getsize(f) == 0: diff --git a/lnt/testing/profile/profile.py b/lnt/testing/profile/profile.py --- a/lnt/testing/profile/profile.py +++ b/lnt/testing/profile/profile.py @@ -27,7 +27,15 @@ """ for impl in lnt.testing.profile.IMPLEMENTATIONS.values(): if impl.checkFile(f): - ret = impl.deserialize(open(f, 'rb')) + ret = None + with open(f, 'rb') as fd: + if impl is lnt.testing.profile.perf.LinuxPerfProfile: + ret = impl.deserialize(fd, + binaryCacheRoot = os.getenv('LNT_BINARY_CACHE_ROOT', ''), + nm = os.getenv('CMAKE_NM', 'nm'), + objdump = os.getenv('CMAKE_OBJDUMP', 'objdump')) + else: + ret = impl.deserialize(fd) if ret: return Profile(ret) else: