Index: llvm/include/llvm/XRay/Profile.h =================================================================== --- /dev/null +++ llvm/include/llvm/XRay/Profile.h @@ -0,0 +1,109 @@ +//===- Profile.h - XRay Profile Abstraction -------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Defines the XRay Profile class representing the latency profile generated by +// XRay's profiling mode. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_XRAY_PROFILE_H +#define LLVM_XRAY_PROFILE_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" +#include +#include +#include + +namespace llvm { +namespace xray { + +class Profile; + +/// This function will attempt to load an XRay Profiling Mode profile from the +/// provided |Filename|. +Expected loadProfile(StringRef Filename); + +/// This algorithm will merge two Profile instances into a single Profile +/// instance, aggregating blocks by Thread ID. +Profile mergeProfilesByThread(const Profile &L, const Profile &R); + +/// Profile instances are thread-compatible. +class Profile { +public: + using ThreadID = uint64_t; + using PathID = unsigned; + using FuncID = int32_t; + + struct Data { + uint64_t CallCount; + uint64_t CumulativeLocalTime; + }; + + struct Block { + ThreadID Thread; + std::vector> PathData; + }; + + /// Provides a sequence of function IDs from a previously interned PathID. + Expected> path(PathID P) const; + + /// The stack represented in |P| must be in stack order (leaf to root). + PathID internPath(ArrayRef P); + + /// Appends a fully-formed Block instance into the Profile. + Error addBlock(Block &&B); + + // No copies. + Profile(const Profile &) = delete; + Profile &operator=(const Profile &) = delete; + + // Move only. + Profile(Profile &&) noexcept = default; + Profile &operator=(Profile &&) noexcept = default; + + Profile() = default; + ~Profile() = default; + +private: + using BlockList = std::list; + + struct TrieNode { + FuncID Func = 0; + std::vector Callees{}; + TrieNode *Caller = nullptr; + PathID ID = 0; + }; + + // List of blocks associated with a Profile. + BlockList Blocks; + + // List of TrieNode elements we've seen. + std::list NodeStorage; + + // List of call stack roots. + SmallVector Roots; + + // Reverse mapping between a PathID to a TrieNode*. + DenseMap PathIDMap; + + // Used to increment + PathID NextID = 1; + +public: + using const_iterator = BlockList::const_iterator; + const_iterator begin() const { return Blocks.begin(); } + const_iterator end() const { return Blocks.end(); } +}; + +} // namespace xray +} // namespace llvm + +#endif Index: llvm/include/llvm/XRay/Trace.h =================================================================== --- llvm/include/llvm/XRay/Trace.h +++ llvm/include/llvm/XRay/Trace.h @@ -18,7 +18,6 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Support/Error.h" -#include "llvm/Support/FileSystem.h" #include "llvm/XRay/XRayRecord.h" namespace llvm { @@ -62,7 +61,9 @@ }; /// This function will attempt to load XRay trace records from the provided -/// |Filename|. +/// |Filename|. If the file provided contains an XRay Profile, this will error +/// out and suggest to the user that the llvm/XRay/Profile.h header and +/// `loadProfile(...)` function be called instead. Expected loadTraceFile(StringRef Filename, bool Sort = false); } // namespace xray Index: llvm/lib/XRay/CMakeLists.txt =================================================================== --- llvm/lib/XRay/CMakeLists.txt +++ llvm/lib/XRay/CMakeLists.txt @@ -1,5 +1,6 @@ add_llvm_library(LLVMXRay InstrumentationMap.cpp + Profile.cpp Trace.cpp ADDITIONAL_HEADER_DIRS Index: llvm/lib/XRay/Profile.cpp =================================================================== --- /dev/null +++ llvm/lib/XRay/Profile.cpp @@ -0,0 +1,280 @@ +//===- Profile.cpp - XRay Profile Abstraction -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Defines the XRay Profile class representing the latency profile generated by +// XRay's profiling mode. +// +//===----------------------------------------------------------------------===// +#include "llvm/XRay/Profile.h" + +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FileSystem.h" +#include +#include + +namespace llvm { +namespace xray { + +namespace { + +struct BlockHeader { + uint32_t Size; + uint32_t Number; + uint64_t Thread; +}; + +static Expected readBlockHeader(DataExtractor &Extractor, + uint32_t &Offset) { + BlockHeader H; + uint32_t CurrentOffset = Offset; + H.Size = Extractor.getU32(&Offset); + if (Offset == CurrentOffset) + return make_error( + Twine("Error parsing block header size at offset '") + + Twine(CurrentOffset) + "'", + std::make_error_code(std::errc::invalid_argument)); + CurrentOffset = Offset; + H.Number = Extractor.getU32(&Offset); + if (Offset == CurrentOffset) + return make_error( + Twine("Error parsing block header number at offset '") + + Twine(CurrentOffset) + "'", + std::make_error_code(std::errc::invalid_argument)); + CurrentOffset = Offset; + H.Thread = Extractor.getU64(&Offset); + if (Offset == CurrentOffset) + return make_error( + Twine("Error parsing block header thread id at offset '") + + Twine(CurrentOffset) + "'", + std::make_error_code(std::errc::invalid_argument)); + return H; +} + +static Expected> readPath(DataExtractor &Extractor, + uint32_t &Offset) { + // We're reading a sequence of int32_t's until we find a 0. + std::vector Path; + auto CurrentOffset = Offset; + int32_t FuncId; + do { + FuncId = Extractor.getSigned(&Offset, 4); + if (CurrentOffset == Offset) + return make_error( + Twine("Error parsing path at offset '") + Twine(CurrentOffset) + "'", + std::make_error_code(std::errc::invalid_argument)); + CurrentOffset = Offset; + Path.push_back(FuncId); + } while (FuncId != 0); + return std::move(Path); +} + +static Expected readData(DataExtractor &Extractor, + uint32_t &Offset) { + // We expect a certain number of elements for Data: + // - A 64-bit CallCount + // - A 64-bit CumulativeLocalTime counter + Profile::Data D; + auto CurrentOffset = Offset; + D.CallCount = Extractor.getU64(&Offset); + if (CurrentOffset == Offset) + return make_error( + Twine("Error parsing call counts at offset '") + Twine(CurrentOffset) + + "'", + std::make_error_code(std::errc::invalid_argument)); + CurrentOffset = Offset; + D.CumulativeLocalTime = Extractor.getU64(&Offset); + if (CurrentOffset == Offset) + return make_error( + Twine("Error parsing cumulative local time at offset '") + + Twine(CurrentOffset) + "'", + std::make_error_code(std::errc::invalid_argument)); + return D; +} + +} // namespace + +Profile::PathID Profile::internPath(ArrayRef Path) { + if (Path.empty()) + return 0; + + auto RevPath = reverse(Path); + + // Find the root. + auto It = RevPath.begin(); + auto PathRoot = *It++; + auto RootIt = + find_if(Roots, [PathRoot](TrieNode *N) { return N->Func == PathRoot; }); + + // If we've not seen this root before, remember it. + TrieNode *Node = nullptr; + if (RootIt == Roots.end()) { + NodeStorage.emplace_back(); + Node = &NodeStorage.back(); + Node->Func = PathRoot; + Roots.push_back(Node); + } else { + Node = *RootIt; + } + + // Now traverse the path, re-creating if necessary. + while (It != RevPath.end()) { + auto NodeFuncID = *It++; + auto CalleeIt = find_if(Node->Callees, [NodeFuncID](TrieNode *N) { + return N->Func == NodeFuncID; + }); + if (CalleeIt == Node->Callees.end()) { + NodeStorage.emplace_back(); + auto NewNode = &NodeStorage.back(); + NewNode->Func = NodeFuncID; + NewNode->Caller = Node; + Node->Callees.push_back(NewNode); + Node = NewNode; + } else { + Node = *CalleeIt; + } + } + + // At this point, Node *must* be pointing at the leaf. + assert(Node->Func == Path.front()); + if (Node->ID == 0) { + Node->ID = NextID++; + PathIDMap.insert({Node->ID, Node}); + } + return Node->ID; +} + +Error Profile::addBlock(Block &&B) { + if (B.Thread == 0) + return make_error( + "Block may not have Thread ID 0.", + std::make_error_code(std::errc::invalid_argument)); + if (B.PathData.empty()) + return make_error( + "Block may not have empty path data.", + std::make_error_code(std::errc::invalid_argument)); + + Blocks.emplace_back(std::move(B)); + return Error::success(); +} + +Expected> Profile::path(Profile::PathID P) const { + auto It = PathIDMap.find(P); + if (It == PathIDMap.end()) + return make_error( + Twine("PathID not found: ") + Twine(P), + std::make_error_code(std::errc::invalid_argument)); + std::vector Path; + for (auto Node = It->second; Node; Node = Node->Caller) + Path.push_back(Node->Func); + return std::move(Path); +} + +Profile mergeProfilesByThread(const Profile &L, const Profile &R) { + Profile Merged; + using PathDataMap = DenseMap; + using PathDataMapPtr = std::unique_ptr; + using PathDataVector = decltype(Profile::Block::PathData); + DenseMap ThreadProfileIndex; + for (const auto &Block : L) { + auto Res = ThreadProfileIndex.insert( + {Block.Thread, PathDataMapPtr{new PathDataMap()}}); + auto &It = Res.first; + for (const auto &PathAndData : Block.PathData) { + auto &PathID = PathAndData.first; + auto &Data = PathAndData.second; + auto NewPathID = Merged.internPath(cantFail(L.path(PathID))); + It->second->insert({NewPathID, Data}); + } + } + for (const auto &Block : R) { + auto Res = ThreadProfileIndex.insert( + {Block.Thread, PathDataMapPtr{new PathDataMap()}}); + auto &It = Res.first; + for (const auto &PathAndData : Block.PathData) { + auto &PathID = PathAndData.first; + auto &Data = PathAndData.second; + auto NewPathID = Merged.internPath(cantFail(R.path(PathID))); + PathDataMap::iterator PathDataIt; + bool Inserted; + std::tie(PathDataIt, Inserted) = It->second->insert({NewPathID, Data}); + if (!Inserted) { + auto &ExistingData = PathDataIt->second; + ExistingData.CallCount += Data.CallCount; + ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime; + } + } + } + + for (const auto &IndexedThreadBlock : ThreadProfileIndex) { + PathDataVector PathAndData; + PathAndData.reserve(IndexedThreadBlock.second->size()); + copy(*IndexedThreadBlock.second, std::back_inserter(PathAndData)); + cantFail( + Merged.addBlock({IndexedThreadBlock.first, std::move(PathAndData)})); + } + return Merged; +} + +Expected loadProfile(StringRef Filename) { + int Fd; + if (auto EC = sys::fs::openFileForRead(Filename, Fd)) + return make_error( + Twine("Cannot read profile from '") + Filename + "'", EC); + + uint64_t FileSize; + if (auto EC = sys::fs::file_size(Filename, FileSize)) + return make_error( + Twine("Cannot get filesize of '") + Filename + "'", EC); + + std::error_code EC; + sys::fs::mapped_file_region MappedFile( + Fd, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0, EC); + if (EC) + return make_error( + Twine("Cannot mmap profile '") + Filename + "'", EC); + StringRef Data(MappedFile.data(), MappedFile.size()); + + Profile P; + uint32_t Offset = 0; + DataExtractor Extractor(Data, true, 8); + + // For each block we get from the file: + while (Offset != MappedFile.size()) { + auto HeaderOrError = readBlockHeader(Extractor, Offset); + if (!HeaderOrError) + return HeaderOrError.takeError(); + + // TODO: Maybe store this header information for each block, even just for + // debugging? + const auto &Header = HeaderOrError.get(); + + // Read in the path data. + auto PathOrError = readPath(Extractor, Offset); + if (!PathOrError) + return PathOrError.takeError(); + const auto &Path = PathOrError.get(); + + // For each path we encounter, we should intern it to get a PathID. + auto DataOrError = readData(Extractor, Offset); + if (!DataOrError) + return DataOrError.takeError(); + auto &Data = DataOrError.get(); + + if (auto E = + P.addBlock(Profile::Block{Profile::ThreadID{Header.Thread}, + {{P.internPath(Path), std::move(Data)}}})) + return std::move(E); + } + + return P; +} + +} // namespace xray +} // namespace llvm Index: llvm/lib/XRay/Trace.cpp =================================================================== --- llvm/lib/XRay/Trace.cpp +++ llvm/lib/XRay/Trace.cpp @@ -30,16 +30,34 @@ // FIXME: Maybe deduce whether the data is little or big-endian using some // magic bytes in the beginning of the file? - // First 32 bytes of the file will always be the header. We assume a certain - // format here: + // First 32 bytes of the file will always be the header. For FDR and Basic + // mode, we assume a specific format: // // (2) uint16 : version // (2) uint16 : type // (4) uint32 : bitfield // (8) uint64 : cycle frequency // (16) - : padding + // + // We also look for the Profiling Mode output, which uses an alternate data + // format using a set of magic bytes at the beginning of the file. If we see + // the first 64 bits (16 bytes) to be that set of magic bytes, then we bail + // out advising the caller to use a separate loader. We're doing this because + // the data format for the profiling mode doesn't provide access to specific + // events compared to FDR and Basic mode. DataExtractor HeaderExtractor(Data, true, 8); + + { + uint32_t OffsetPtr = 0; + uint64_t MagicBytes = HeaderExtractor.getU64(&OffsetPtr); + if (MagicBytes == 0x7872617970726f66) + return make_error( + Twine("Detected XRay profiling mode file; use the `Profile` loader " + "instead."), + std::make_error_code(std::errc::invalid_argument)); + } + uint32_t OffsetPtr = 0; FileHeader.Version = HeaderExtractor.getU16(&OffsetPtr); FileHeader.Type = HeaderExtractor.getU16(&OffsetPtr); @@ -122,8 +140,8 @@ return make_error( Twine("Corrupted log, found arg payload following non-matching " "function + thread record. Record for function ") + - Twine(Record.FuncId) + " != " + Twine(FuncId) + "; offset: " + - Twine(S.data() - Data.data()), + Twine(Record.FuncId) + " != " + Twine(FuncId) + + "; offset: " + Twine(S.data() - Data.data()), std::make_error_code(std::errc::executable_format_error)); // Advance another four bytes to avoid padding. OffsetPtr += 4; @@ -708,9 +726,9 @@ if (Sort) std::stable_sort(T.Records.begin(), T.Records.end(), - [&](const XRayRecord &L, const XRayRecord &R) { - return L.TSC < R.TSC; - }); + [&](const XRayRecord &L, const XRayRecord &R) { + return L.TSC < R.TSC; + }); return std::move(T); } Index: llvm/tools/llvm-xray/xray-stacks.cpp =================================================================== --- llvm/tools/llvm-xray/xray-stacks.cpp +++ llvm/tools/llvm-xray/xray-stacks.cpp @@ -29,6 +29,7 @@ #include "llvm/Support/FormatVariadic.h" #include "llvm/XRay/Graph.h" #include "llvm/XRay/InstrumentationMap.h" +#include "llvm/XRay/Profile.h" #include "llvm/XRay/Trace.h" using namespace llvm; @@ -723,9 +724,18 @@ symbolize::LLVMSymbolizer Symbolizer(Opts); FuncIdConversionHelper FuncIdHelper(StacksInstrMap, Symbolizer, Map.getFunctionAddresses()); + // TODO: Someday, support output to files instead of just directly to // standard output. for (const auto &Filename : StackInputs) { + // First, we try to load the file using the Profile loader. If we happen to + // get a Profile object, we can then translate those directly into the + // format that we need. + auto ProfileOrErr = loadProfile(Filename); + if (!ProfileOrErr) + logAllUnhandledErrors(ProfileOrErr.takeError(), errs(), ""); + + // TODO: Handle Profile objects. auto TraceOrErr = loadTraceFile(Filename); if (!TraceOrErr) { if (!StackKeepGoing) Index: llvm/unittests/XRay/CMakeLists.txt =================================================================== --- llvm/unittests/XRay/CMakeLists.txt +++ llvm/unittests/XRay/CMakeLists.txt @@ -1,9 +1,11 @@ set(LLVM_LINK_COMPONENTS Support + XRay ) add_llvm_unittest(XRayTests GraphTest.cpp + ProfileTest.cpp ) add_dependencies(XRayTests intrinsics_gen) Index: llvm/unittests/XRay/ProfileTest.cpp =================================================================== --- /dev/null +++ llvm/unittests/XRay/ProfileTest.cpp @@ -0,0 +1,70 @@ +//===- ProfileTest.cpp - XRay Profile unit tests ----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#include "llvm/XRay/Profile.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace llvm { +namespace xray { +namespace { + +using ::testing::AllOf; +using ::testing::ElementsAre; +using ::testing::Eq; +using ::testing::Field; +using ::testing::Not; +using ::testing::Pair; + +TEST(ProfileTest, CreateProfile) { Profile P; } + +TEST(ProfileTest, InternPath) { + Profile P; + auto Path0 = P.internPath({3, 2, 1}); + auto Path1 = P.internPath({3, 2, 1}); + auto Path2 = P.internPath({2, 1}); + EXPECT_THAT(Path0, Eq(Path1)); + EXPECT_THAT(Path0, Not(Eq(Path2))); +} + +TEST(ProfileTest, AddBlocks) { + Profile P; + EXPECT_TRUE(errorToBool(P.addBlock({}))); + EXPECT_TRUE(errorToBool(P.addBlock({1, {}}))); + EXPECT_FALSE(errorToBool(P.addBlock( + Profile::Block{Profile::ThreadID{1}, + { + {P.internPath({2, 1}), Profile::Data{1, 1000}}, + {P.internPath({3, 2, 1}), Profile::Data{10, 100}}, + }}))); +} + +TEST(ProfileTest, MergeProfilesByThread) { + Profile P0, P1; + EXPECT_FALSE(errorToBool(P0.addBlock( + Profile::Block{Profile::ThreadID{1}, + {{P0.internPath({2, 1}), Profile::Data{1, 1000}}}}))); + EXPECT_FALSE(errorToBool(P1.addBlock( + Profile::Block{Profile::ThreadID{1}, + {{P1.internPath({2, 1}), Profile::Data{1, 1000}}}}))); + + Profile Merged = mergeProfilesByThread(P0, P1); + EXPECT_THAT(Merged, + ElementsAre(AllOf( + Field(&Profile::Block::Thread, Eq(Profile::ThreadID{1})), + Field(&Profile::Block::PathData, + ElementsAre(Pair( + Merged.internPath({2, 1}), + AllOf(Field(&Profile::Data::CallCount, Eq(2u)), + Field(&Profile::Data::CumulativeLocalTime, + Eq(2000u))))))))); +} + +} // namespace +} // namespace xray +} // namespace llvm