diff --git a/llvm/include/llvm/CodeGen/MachineStableHash.h b/llvm/include/llvm/CodeGen/MachineStableHash.h --- a/llvm/include/llvm/CodeGen/MachineStableHash.h +++ b/llvm/include/llvm/CodeGen/MachineStableHash.h @@ -14,17 +14,27 @@ #ifndef LLVM_CODEGEN_MACHINESTABLEHASH_H #define LLVM_CODEGEN_MACHINESTABLEHASH_H +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/StableHashing.h" namespace llvm { class MachineInstr; class MachineOperand; +/// \returns a stable_hash for MachineOperand \p MO stable_hash stableHashValue(const MachineOperand &MO); + +/// \returns a stable_hash of all the hashes of the opcode and MachineOperands +/// of MachineInstr \p MI. stable_hash stableHashValue(const MachineInstr &MI, bool HashVRegs = false, bool HashConstantPoolIndices = false, bool HashMemOperands = false); +/// \returns a collection of stable_hashes for each of the MachineInstrs of +/// a given MachineBasicBlock from iterator \p Begin to \p End. +std::vector +stableHashMachineInstrs(const MachineBasicBlock::iterator &Begin, + const MachineBasicBlock::iterator &End); } // namespace llvm #endif diff --git a/llvm/include/llvm/CodeGen/StableHashTree.h b/llvm/include/llvm/CodeGen/StableHashTree.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/CodeGen/StableHashTree.h @@ -0,0 +1,176 @@ +//===-- StableHashTree.h ----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Contains a stable hash tree implementation based on llvm::stable_hash. +/// A StableHashTree is a Trie that contains sequences of hash values. This +/// data structure is generic, and is intended for use cases where something +/// similar to a trie is already in use. The upside to using a StableHashTree is +/// that it can be used to understand data collected across modules, or it can +/// be used to serialize data about a build to disk for use in a future build. +/// +/// To use a StableHashTree you must already have a way to take some sequence of +/// data and use llvm::stable_hash to turn that sequence into a +/// std::vector (ie StableHashSequence). Each of these hash +/// sequences can be inserted into a StableHashTree where the beginning of a +/// unique sequence starts from the root of the tree and ends at a Terminal +/// (IsTerminal) node. +/// +/// This StableHashTree was originally implemented as part of the EuroLLVM 2020 +/// talk "Global Machine Outliner for ThinLTO": +/// +/// https://llvm.org/devmtg/2020-04/talks.html#TechTalk_58 +/// +/// This talk covers how a global stable hash tree is used to collect +/// information about valid MachineOutliner Candidates across modules, and used +/// to inform modules where matching candidates are encountered but occur in +/// less frequency and as a result are ignored by the MachineOutliner had there +/// not been a global stable hash tree in use (assuming FullLTO is disabled). +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_STABLEHASHTREE_H +#define LLVM_CODEGEN_STABLEHASHTREE_H + +#include +#include +#include +#include + +#include "llvm/CodeGen/StableHashing.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/VirtualFileSystem.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + +/// \brief A HashNode is an entry in a StableHashTree that contains a value Hash +/// as well as a collection of Successors (which are other HashNodes that are +/// part of a sequence of llvm::stable_hashes). A HashNode might +/// be IsTerminal meaning that it represents the end of a stable_hash sequence. +struct HashNode { + stable_hash Hash = 0LL; + bool IsTerminal{false}; + std::unordered_map> Successors; +}; + +struct StableHashTree { + + /// Graph traversal callback types. + ///{ + using EdgeCallbackFn = + std::function; + using NodeCallbackFn = std::function; + ///} + + using StableHashSequence = std::vector; + + /// Walks every edge and node in the StableHashTree and calls CallbackEdge + /// for the edges and CallbackNode for the nodes with the stable_hash for + /// the source and the stable_hash of the sink for an edge. These generic + /// callbacks can be used to traverse a StableHashTree for the purpose of + /// print debugging or serializing it. + void walkGraph(EdgeCallbackFn CallbackEdge, + NodeCallbackFn CallbackNode) const; + + /// Walks the nodes of a StableHashTree using walkGraph. + void walkVertices(NodeCallbackFn Callback) const { + walkGraph([](const HashNode *A, const HashNode *B) {}, Callback); + } + + /// Uses walkVertices to print a StableHashTree. + /// If a \p DebugMap is provided, then it will be used to provide richer + /// output. + void print(raw_ostream &OS = llvm::errs(), + std::unordered_map DebugMap = {}) const; + + void dump() const { print(llvm::errs()); } + + /// Builds a StableHashTree from a \p Buffer. + /// The serialization format here should be considered opaque, and may change. + /// \returns llvm::Error::ErrorSuccess if successful, otherwise returns some + /// other llvm::Error error kind. + llvm::Error readFromBuffer(StringRef Buffer); + + /// Serializes a StableHashTree from a file at \p Filename. + llvm::Error readFromFile(StringRef Filename) { + llvm::SmallString<256> Filepath(Filename); + auto FileOrError = + llvm::vfs::getRealFileSystem()->getBufferForFile(Filepath); + if (!FileOrError) + return llvm::errorCodeToError(FileOrError.getError()); + return readFromBuffer(FileOrError.get()->getBuffer()); + } + + /// Serializes a StableHashTree to a file at \p Filename. + llvm::Error writeToFile(StringRef Filename) const { + std::error_code EC; + llvm::raw_fd_ostream OS(Filename, EC, llvm::sys::fs::OF_Text); + if (EC) + return llvm::createStringError(EC, "Unable to open StableHashTree Data"); + // Note: For now the format is the same as the print output, but this can + // change. + print(OS); + OS.flush(); + return llvm::Error::success(); + } + + void insert(const std::vector &Sequences) { + for (const auto &Sequence : Sequences) + insert(Sequence); + } + + /// \returns true if \p Sequence exists in a StableHashTree, false + /// otherwise. + bool find(const StableHashSequence &Sequence) const; + + /// \returns the size of a StableHashTree by traversing it. If + /// \p GetTerminalCountOnly is true, it only counts the terminal nodes + /// (meaning it returns the size of the number of hash sequences in a + /// StableHashTree). + size_t size(bool GetTerminalCountOnly = false) const { + size_t Size = 0; + walkVertices([&Size, GetTerminalCountOnly](const HashNode *N) { + Size += (N && (!GetTerminalCountOnly || N->IsTerminal)); + }); + return Size; + } + + size_t depth() const { + size_t Size = 0; + + std::unordered_map DepthMap; + + walkGraph( + [&DepthMap](const HashNode *Src, const HashNode *Dst) { + size_t Depth = DepthMap[Src]; + DepthMap[Dst] = Depth + 1; + }, + [&Size, &DepthMap](const HashNode *N) { + Size = std::max(Size, DepthMap[N]); + }); + + return Size; + } + +private: + /// StableHashTree is a compact representation of a set of stable_hash + /// sequences. It allows for for efficient walking of these sequences for + /// matching purposes. HashTreeImpl is the root node of this tree. Its Hash + /// value is 0, and its Successors are the beginning of StableHashSequences + /// inserted into the StableHashTree. + HashNode HashTreeImpl; + + /// Inserts a \p Sequence into a StableHashTree. The last node in the sequence + /// will set IsTerminal to true in StableHashTree. + void insert(const StableHashSequence &Sequence); +}; + +} // namespace llvm + +#endif diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -94,6 +94,7 @@ MachineOptimizationRemarkEmitter.cpp MachineOutliner.cpp MachinePassManager.cpp + StableHashTree.cpp MachinePipeliner.cpp MachinePostDominators.cpp MachineRegionInfo.cpp diff --git a/llvm/lib/CodeGen/MachineStableHash.cpp b/llvm/lib/CodeGen/MachineStableHash.cpp --- a/llvm/lib/CodeGen/MachineStableHash.cpp +++ b/llvm/lib/CodeGen/MachineStableHash.cpp @@ -192,3 +192,17 @@ return stable_hash_combine_range(HashComponents.begin(), HashComponents.end()); } + +std::vector +llvm::stableHashMachineInstrs(const MachineBasicBlock::iterator &Begin, + const MachineBasicBlock::iterator &End) { + std::vector Sequence; + for (auto I = Begin; I != End; I++) { + const MachineInstr &MI = *I; + stable_hash Hash = stableHashValue(MI); + if (!Hash) + return {}; + Sequence.push_back(Hash); + } + return Sequence; +} diff --git a/llvm/lib/CodeGen/StableHashTree.cpp b/llvm/lib/CodeGen/StableHashTree.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/CodeGen/StableHashTree.cpp @@ -0,0 +1,205 @@ +//===---- StableHashTree.cpp ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/StableHashTree.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/StableHashing.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#define DEBUG_TYPE "stable-hash-tree" + +using namespace llvm; + +namespace llvm { + +void StableHashTree::walkGraph(EdgeCallbackFn CallbackEdge, + NodeCallbackFn CallbackNode) const { + std::stack Stack; + Stack.push(&HashTreeImpl); + + while (!Stack.empty()) { + const auto *Current = Stack.top(); + Stack.pop(); + CallbackNode(Current); + for (const auto &P : Current->Successors) { + CallbackEdge(Current, P.second.get()); + Stack.push(P.second.get()); + } + } +} + +void StableHashTree::print( + llvm::raw_ostream &OS, + std::unordered_map DebugMap) const { + + std::unordered_map NodeMap; + + walkVertices([&NodeMap](const HashNode *Current) { + size_t Index = NodeMap.size(); + NodeMap[Current] = Index; + assert(Index = NodeMap.size() + 1 && + "Expected size of ModeMap to increment by 1"); + }); + + bool IsFirstEntry = true; + OS << "{"; + for (const auto &Entry : NodeMap) { + if (!IsFirstEntry) + OS << ","; + OS << "\n"; + IsFirstEntry = false; + OS << " \"" << Entry.second << "\" : {\n"; + OS << " \"hash\" : \""; + OS.raw_ostream::write_hex(Entry.first->Hash); + OS << "\",\n"; + + OS << " \"isTerminal\" : " + << "\"" << (Entry.first->IsTerminal ? "true" : "false") << "\",\n"; + + // For debugging we want to provide a string representation of the hashing + // source, such as a MachineInstr dump, etc. Not intended for production. + auto MII = DebugMap.find(Entry.first->Hash); + if (MII != DebugMap.end()) + OS << " \"source\" : \"" << MII->second << "\",\n"; + + OS << " \"neighbors\" : ["; + + bool IsFirst = true; + for (const auto &Adj : Entry.first->Successors) { + if (!IsFirst) + OS << ","; + IsFirst = false; + OS << " \""; + OS << NodeMap[Adj.second.get()]; + OS << "\" "; + } + + OS << "]\n }"; + } + OS << "\n}\n"; + OS.flush(); +} + +llvm::Error StableHashTree::readFromBuffer(StringRef Buffer) { + auto Json = llvm::json::parse(Buffer); + if (!Json) + return Json.takeError(); + + const json::Object *JO = Json.get().getAsObject(); + if (!JO) + return llvm::createStringError(std::error_code(), "Bad Json"); + + std::unordered_map JsonMap; + for (const auto &E : *JO) + JsonMap[std::stoul(E.first.str())] = &E.second; + + assert(JsonMap.find(0x0) != JsonMap.end() && "Expected a root HashTree node"); + + // We have a JsonMap and a NodeMap. We walk the JSON form of the HashTree + // using the JsonMap by using the stack of JSON IDs. As we walk we used the + // IDs to get the currwent JSON Node and the current HashNode. + std::unordered_map NodeMap; + std::stack Stack; + Stack.push(0); + NodeMap[0] = &HashTreeImpl; + + while (!Stack.empty()) { + unsigned Current = Stack.top(); + Stack.pop(); + + HashNode *CurrentSubtree = NodeMap[Current]; + const auto *CurrentJson = JsonMap[Current]->getAsObject(); + + std::vector Neighbors; + llvm::transform(*CurrentJson->get("neighbors")->getAsArray(), + std::back_inserter(Neighbors), + [](const llvm::json::Value &S) { + return std::stoull(S.getAsString()->str()); + }); + + stable_hash Hash = std::stoull( + CurrentJson->get("hash")->getAsString()->str(), nullptr, 16); + CurrentSubtree->Hash = Hash; + + std::string IsTerminalStr = + StringRef(CurrentJson->get("isTerminal")->getAsString()->str()).lower(); + CurrentSubtree->IsTerminal = + IsTerminalStr == "true" || IsTerminalStr == "on"; + + for (auto N : Neighbors) { + auto I = JsonMap.find(N); + if (I == JsonMap.end()) + return llvm::createStringError(std::error_code(), + "Missing neighbor in JSON"); + + std::unique_ptr Neighbor = std::make_unique(); + HashNode *NeighborPtr = Neighbor.get(); + stable_hash StableHash = std::stoull( + I->second->getAsObject()->get("hash")->getAsString()->str(), nullptr, + 16); + CurrentSubtree->Successors.emplace(StableHash, std::move(Neighbor)); + NodeMap[N] = NeighborPtr; + + Stack.push(I->first); + } + } + + return llvm::Error::success(); +} + +void StableHashTree::insert(const StableHashSequence &Sequence) { + HashNode *Current = &HashTreeImpl; + for (stable_hash StableHash : Sequence) { + auto I = Current->Successors.find(StableHash); + if (I == Current->Successors.end()) { + std::unique_ptr Next = std::make_unique(); + HashNode *NextPtr = Next.get(); + NextPtr->Hash = StableHash; + Current->Successors.emplace(StableHash, std::move(Next)); + Current = NextPtr; + continue; + } + Current = I->second.get(); + } + Current->IsTerminal = true; +} + +bool StableHashTree::find(const StableHashSequence &Sequence) const { + const HashNode *Current = &HashTreeImpl; + for (stable_hash StableHash : Sequence) { + const auto I = Current->Successors.find(StableHash); + if (I == Current->Successors.end()) + return false; + Current = I->second.get(); + } + return Current->IsTerminal; +} + +} // namespace llvm diff --git a/llvm/unittests/CodeGen/CMakeLists.txt b/llvm/unittests/CodeGen/CMakeLists.txt --- a/llvm/unittests/CodeGen/CMakeLists.txt +++ b/llvm/unittests/CodeGen/CMakeLists.txt @@ -23,6 +23,7 @@ LexicalScopesTest.cpp MachineInstrBundleIteratorTest.cpp MachineInstrTest.cpp + StableHashTreeTest.cpp MachineOperandTest.cpp PassManagerTest.cpp ScalableVectorMVTsTest.cpp diff --git a/llvm/unittests/CodeGen/StableHashTreeTest.cpp b/llvm/unittests/CodeGen/StableHashTreeTest.cpp new file mode 100644 --- /dev/null +++ b/llvm/unittests/CodeGen/StableHashTreeTest.cpp @@ -0,0 +1,112 @@ +//===- StableHashTreeTest.cpp ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/StableHashTree.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineStableHash.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/ModuleSlotTracker.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "gtest/gtest.h" + +using namespace llvm; + +namespace { +// Include helper functions to ease the manipulation of MachineFunctions. +#include "MFCommon.inc" + +TEST(HashBasicBlock, StableHashTreeTest) { + LLVMContext Ctx; + Module Mod("Module", Ctx); + auto MF = createMachineFunction(Ctx, Mod); + + MCInstrDesc MCID1 = {0, 0, 0, 0, 0, 0, 0, nullptr, nullptr, nullptr}; + MCInstrDesc MCID2 = {1, 0, 0, 0, 0, 0, 0, nullptr, nullptr, nullptr}; + MCInstrDesc MCID3 = {2, 0, 0, 0, 0, 0, 0, nullptr, nullptr, nullptr}; + MCInstrDesc MCID4 = {3, 0, 0, 0, 0, 0, 0, nullptr, nullptr, nullptr}; + MCInstrDesc MCID5 = {4, 0, 0, 0, 0, 0, 0, nullptr, nullptr, nullptr}; + + std::vector> InstrSequences = { + {MCID1, MCID2, MCID4}, + {MCID1, MCID3, MCID4}, + {MCID1, MCID3, MCID4, MCID5}, + }; + + // Populate a Stable Hash Tree with Machine Instructions. Because this is a + // Hash Trie the series of instructions should overlap and result in a tree + // that is of depth 4 and of size 7. + bool IsFirst = true; + StableHashTree Tree; + for (auto &IS : InstrSequences) { + auto *MBB = MF->CreateMachineBasicBlock(); + for (auto &MCID : IS) { + auto *MI = MF->CreateMachineInstr(MCID, DebugLoc()); + MBB->insert(MBB->end(), MI); + } + + auto BI = MBB->begin(); + std::vector> HashList = { + llvm::stableHashMachineInstrs(BI, MBB->end())}; + Tree.insert(HashList); + + if (IsFirst) { + IsFirst = false; + ASSERT_TRUE(Tree.depth() == 3); + } + } + + // Check depth and size of this tree as expected above. + ASSERT_TRUE(Tree.depth() == 4); + ASSERT_TRUE(Tree.size() == 7); + + // Since the purpose of Stable Hash Tree is partly for serializing, test + // print. + std::string Str; + raw_string_ostream Sstr(Str); + Tree.print(Sstr); + + // Now test that `Tree` is deserialized to a string, serialize it into Tree2. + StableHashTree Tree2; + if (auto Err = Tree2.readFromBuffer(StringRef(Str))) + consumeError(std::move(Err)); + + // Because the HashNode uses an unordered_map as a successor as we walk to + // compare `Tree` and `Tree2` we must insert the hash values into the sorted + // std::map and std::set for proper comparison. + std::map> HashValueMap1; + std::map> HashValueMap2; + + Tree.walkVertices([&HashValueMap1](const HashNode *N) { + for (const auto &Succ : N->Successors) + HashValueMap1[N->Hash].insert(Succ.first); + }); + + Tree2.walkVertices([&HashValueMap2](const HashNode *N) { + for (const auto &Succ : N->Successors) + HashValueMap2[N->Hash].insert(Succ.first); + }); + + ASSERT_TRUE(std::equal(HashValueMap1.begin(), HashValueMap1.end(), + HashValueMap2.begin())); +} + +} // end namespace