diff --git a/lnt/testing/profile/cPerf.cpp b/lnt/testing/profile/cPerf.cpp --- a/lnt/testing/profile/cPerf.cpp +++ b/lnt/testing/profile/cPerf.cpp @@ -130,30 +130,6 @@ throw std::logic_error(Str); } -// Returns true if the ELF file given by filename -// is a shared object (DYN). -bool IsSharedObject(std::string Fname) { - // We replicate the first part of an ELF header here - // so as not to rely on . - struct PartialElfHeader { - unsigned char e_ident[16]; - uint16_t e_type; - }; - const int ET_DYN = 3; - - FILE *stream = fopen(Fname.c_str(), "r"); - if (stream == NULL) - return false; - - PartialElfHeader H; - auto NumRead = fread(&H, 1, sizeof(H), stream); - assert(NumRead == sizeof(H)); - - fclose(stream); - - return H.e_type == ET_DYN; -} - //===----------------------------------------------------------------------===// // Perf structures. Taken from https://lwn.net/Articles/644919/ //===----------------------------------------------------------------------===// @@ -188,6 +164,21 @@ uint64_t flags1[3]; }; +struct perf_event_attr { + uint32_t type; + uint32_t size; + uint64_t config; + uint64_t sample_period; + uint64_t sample_type; + uint64_t read_format; + uint64_t flags; + uint32_t wakeup_events; + uint32_t bp_type; + uint64_t bp_addr; + uint64_t bp_len; + uint64_t branch_sample_type; +}; + struct perf_event_header { uint32_t type; uint16_t misc; @@ -235,12 +226,74 @@ uint64_t id; }; +enum perf_type_id { + PERF_TYPE_HARDWARE = 0, + PERF_TYPE_SOFTWARE = 1, + PERF_TYPE_TRACEPOINT = 2, + PERF_TYPE_HW_CACHE = 3, + PERF_TYPE_RAW = 4, + PERF_TYPE_BREAKPOINT = 5, + PERF_TYPE_MAX +}; + +enum perf_hw_id { + PERF_COUNT_HW_CPU_CYCLES = 0, + PERF_COUNT_HW_INSTRUCTIONS = 1, + PERF_COUNT_HW_CACHE_REFERENCES = 2, + PERF_COUNT_HW_CACHE_MISSES = 3, + PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4, + PERF_COUNT_HW_BRANCH_MISSES = 5, + PERF_COUNT_HW_BUS_CYCLES = 6, + PERF_COUNT_HW_STALLED_CYCLES_FRONTEND = 7, + PERF_COUNT_HW_STALLED_CYCLES_BACKEND = 8, + PERF_COUNT_HW_REF_CPU_CYCLES = 9, + PERF_COUNT_HW_MAX +}; + +static const char* hw_event_names[PERF_COUNT_HW_MAX] = { + "cycles", + "instructions", + "cache-references", + "cache-misses", + "branch-instructions", + "branch-misses", + "bus-cycles", + "stalled-cycles-frontend", + "stalled-cycles-backend", + "ref-cpu-cycles" +}; + +enum perf_sw_ids { + PERF_COUNT_SW_CPU_CLOCK = 0, + PERF_COUNT_SW_TASK_CLOCK = 1, + PERF_COUNT_SW_PAGE_FAULTS = 2, + PERF_COUNT_SW_CONTEXT_SWITCHES = 3, + PERF_COUNT_SW_CPU_MIGRATIONS = 4, + PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, + PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, + PERF_COUNT_SW_ALIGNMENT_FAULTS = 7, + PERF_COUNT_SW_EMULATION_FAULTS = 8, + PERF_COUNT_SW_MAX +}; + +static const char* sw_event_names[PERF_COUNT_SW_MAX] = { + "cpu-clock", + "task-clock", + "page-faults", + "context-switches", + "cpu-migrations", + "minor-faults", + "major-faults", + "alignment-faults", + "emulation-faults" +}; + //===----------------------------------------------------------------------===// // Readers for nm and objdump output //===----------------------------------------------------------------------===// struct Map { - uint64_t Start, End; + uint64_t Start, End, PgOff; const char *Filename; }; @@ -337,7 +390,7 @@ while (std::getline(ss, token, delim)) { output.push_back(token); } - return output.size(); + return (int)output.size(); } }; @@ -427,6 +480,7 @@ void readHeader(); void readAttrs(); + void readEventDesc(); void readDataStream(); unsigned char *readEvent(unsigned char *); perf_event_sample parseEvent(unsigned char *Buf, uint64_t Layout); @@ -440,8 +494,7 @@ void emitSymbol( Symbol &Sym, Map &M, std::map>::iterator Event, - std::map &SymEvents, - uint64_t Adjust); + std::map &SymEvents); PyObject *complete(); private: @@ -495,16 +548,52 @@ Buf = readEvent(Buf); } +#define HEADER_EVENT_DESC 12 + void PerfReader::readAttrs() { - const int HEADER_EVENT_DESC = 12; + if (Header->flags & (1U << HEADER_EVENT_DESC)) { + readEventDesc(); + } else { + uint64_t NumEvents = Header->attrs.size / Header->attr_size; + for (unsigned I = 0; I < NumEvents; ++I) { + const perf_event_attr* attr = (const perf_event_attr*)&Buffer[Header->attrs.offset + I * Header->attr_size]; + const perf_file_section* ids = (const perf_file_section*)((unsigned char *)attr + attr->size); + unsigned char* Buf = &Buffer[ids->offset]; + uint64_t NumIDs = ids->size / sizeof(uint64_t); + + const char* Str = "unknown"; + switch (attr->type) { + case PERF_TYPE_HARDWARE: + if (attr->config < PERF_COUNT_HW_MAX) Str = hw_event_names[attr->config]; + break; + case PERF_TYPE_SOFTWARE: + if (attr->config < PERF_COUNT_SW_MAX) Str = sw_event_names[attr->config]; + break; + } + + // Weirdness of perf: if there is only one event descriptor, that + // event descriptor can be referred to by ANY id! + if (NumEvents == 1 && NumIDs == 0) { + EventIDs[0] = Str; + EventLayouts[0] = attr->sample_type; + } + + for (unsigned J = 0; J < NumIDs; ++J) { + auto id = TakeU64(Buf); + EventIDs[id] = Str; + EventLayouts[id] = attr->sample_type; + } + } + } +} + +void PerfReader::readEventDesc() { perf_file_section *P = (perf_file_section *)&Buffer[Header->data.offset + Header->data.size]; for (int I = 0; I < HEADER_EVENT_DESC; ++I) - if (Header->flags & (1U << I)) + if (Header->flags & (1ULL << I)) ++P; - assert(Header->flags & (1U << HEADER_EVENT_DESC)); - unsigned char *Buf = &Buffer[P->offset]; uint32_t NumEvents = TakeU32(Buf); uint32_t AttrSize = TakeU32(Buf); @@ -537,12 +626,13 @@ } unsigned char *PerfReader::readEvent(unsigned char *Buf) { - perf_event_sample *E = (perf_event_sample *)Buf; - - if (E->header.type == PERF_RECORD_MMAP) { + perf_event_header *E = (perf_event_header *)Buf; + switch (E->type) { + case PERF_RECORD_MMAP: + { perf_event_mmap *E = (perf_event_mmap *)Buf; auto MapID = Maps.size(); - Maps.push_back({E->start, E->start + E->extent, E->filename}); + Maps.push_back({E->start, E->start + E->extent, E->pgoff, E->filename}); // FIXME: use EventLayouts.begin()->second! perf_sample_id *ID = @@ -550,10 +640,12 @@ auto &CurrentMap = CurrentMaps[ID->time]; CurrentMap.insert({E->start, MapID}); } - if (E->header.type == PERF_RECORD_MMAP2) { + break; + case PERF_RECORD_MMAP2: + { perf_event_mmap2 *E = (perf_event_mmap2 *)Buf; auto MapID = Maps.size(); - Maps.push_back({E->start, E->start + E->extent, E->filename}); + Maps.push_back({E->start, E->start + E->extent, E->pgoff, E->filename}); // FIXME: use EventLayouts.begin()->second! perf_sample_id *ID = @@ -561,44 +653,45 @@ auto &CurrentMap = CurrentMaps[ID->time]; CurrentMap.insert({E->start, MapID}); } + break; + case PERF_RECORD_SAMPLE: + { + perf_event_sample* E = (perf_event_sample*)Buf; + auto NewE = parseEvent(((unsigned char*)E) + sizeof(perf_event_header), + EventLayouts.begin()->second); + auto EventID = NewE.id; + auto RawPC = NewE.ip; + + // Search for the map corresponding to this sample. Search backwards through + // time, discarding any maps created after our timestamp. + uint64_t MapID = ~0ULL; + for (auto I = CurrentMaps.rbegin(), E = CurrentMaps.rend(); + I != E; ++I) { + if (I->first > NewE.time) + continue; - if (E->header.type != PERF_RECORD_SAMPLE) - return &Buf[E->header.size]; - - auto NewE = parseEvent(((unsigned char*)E) + sizeof(perf_event_header), - EventLayouts.begin()->second); - auto EventID = NewE.id; - auto PC = NewE.ip; - - // Search for the map corresponding to this sample. Search backwards through - // time, discarding any maps created after our timestamp. - size_t MapID = ~0UL; - for (auto I = CurrentMaps.rbegin(), E = CurrentMaps.rend(); - I != E; ++I) { - if (I->first > NewE.time) - continue; - - auto NewI = I->second.upper_bound(PC); - if (NewI == I->second.begin()) - continue; - --NewI; - - if (NewI->first > PC) - continue; - MapID = NewI->second; - break; - } - if (MapID == ~0UL) - return &Buf[E->header.size]; - assert(MapID != ~0UL); - - assert(EventIDs.count(EventID)); - Events[MapID][PC][EventIDs[EventID]] += NewE.period; + auto NewI = I->second.upper_bound(RawPC); + if (NewI == I->second.begin()) + continue; + --NewI; - TotalEvents[EventIDs[EventID]] += NewE.period; - TotalEventsPerMap[MapID][EventIDs[EventID]] += NewE.period; + if (NewI->first > RawPC) + continue; + MapID = NewI->second; + break; + } + if (MapID != ~0ULL) { + auto DSOPC = RawPC - Maps[MapID].Start + Maps[MapID].PgOff; + assert(EventIDs.count(EventID)); + Events[MapID][DSOPC][EventIDs[EventID]] += NewE.period; - return &Buf[E->header.size]; + TotalEvents[EventIDs[EventID]] += NewE.period; + TotalEventsPerMap[MapID][EventIDs[EventID]] += NewE.period; + } + } + break; + } + return &Buf[E->size]; } perf_event_sample PerfReader::parseEvent(unsigned char *Buf, uint64_t Layout) { @@ -706,8 +799,6 @@ // EXEC ELF objects aren't relocated. DYN ones are, // so if it's a DYN object adjust by subtracting the // map base. - bool IsSO = IsSharedObject(Maps[MapID].Filename); - uint64_t Adjust = IsSO ? Maps[MapID].Start : 0; NmOutput Syms(Nm, BinaryCacheRoot); Syms.reset(&Maps[MapID]); @@ -718,7 +809,7 @@ std::map> SymToEventTotals; while (Event != MapEvents.end() && Sym != Syms.end()) { // Skip events until we find one after the start of Sym - auto PC = Event->first - Adjust; + auto PC = Event->first; if (PC < Sym->Start) { ++Event; continue; @@ -745,7 +836,7 @@ } if (Keep) emitSymbol(Sym, Maps[MapID], MapEvents.lower_bound(Sym.Start), - SymToEventTotals[Sym.Start], Adjust); + SymToEventTotals[Sym.Start]); } } } @@ -753,15 +844,14 @@ void PerfReader::emitSymbol( Symbol &Sym, Map &M, std::map>::iterator Event, - std::map &SymEvents, - uint64_t Adjust) { + std::map &SymEvents) { ObjdumpOutput Dump(Objdump, BinaryCacheRoot); Dump.reset(&M, Sym.Start, Sym.End); Dump.next(); emitFunctionStart(Sym.Name); for (uint64_t I = Sym.Start; I < Sym.End; I = Dump.next()) { - auto PC = Event->first - Adjust; + auto PC = Event->first; auto Text = Dump.getText(); if (PC == I) {