diff --git a/llvm/include/llvm/MC/MCSystemModel.h b/llvm/include/llvm/MC/MCSystemModel.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/MC/MCSystemModel.h @@ -0,0 +1,720 @@ +//=== MC/MCSystemModel.h - Target System Model --------------*- C++ -*-=======// +// +// The LLVM Compiler Infrastructure +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file describes an abstract interface used to get information about a +// target machine's execution engine, including core specifications, memory +// models and other things related to execution resources. +// +// A system model is a collection of execution resources along with a memory +// model. Execution resources may describe things like sockets, cores and +// hardware threads. A memory model describes the cache heirarchy private to +// each execution resource as well as available write-combining buffers and a +// software prefetching configuration. +// +// For example, imagine we're describing a system like this: +// +// 1 Socket +// 48 cores per socket +// 4 threads per core +// 32K per-core L1 cache +// 1M per-core L2 cache +// 32M socket-level L3 cache +// A software prefetcher +// 8 write-combining buffers +// +// Then the system model may look something like this: +// +// System ---------------------. +// | | +// 1 x "Socket" - "L3" 32M <-| +// | 8 x "WCBuf" | +// | | +// 48 x "Core" - "L2" 1M <-| +// | "L1" 32L <-| +// | "Prefetcher" <-' +// | Distance: 800 bytes +// 4 x "Thread" +// +// The pieces to the right of the execution resource make up the memory model. +// +// Note how at the system level we can see the global view of the cache +// hierarchy as well as any software prefetcher configs. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCSYSTEMMODEL_H +#define LLVM_MC_MCSYSTEMMODEL_H + +#include "llvm/ADT/iterator.h" +#include "llvm/ADT/PointerUnion.h" +#include "llvm/ADT/SmallVector.h" + +#include + +namespace llvm { + +class MCMemoryModel; + +/// Provide information about write-combining buffers. These are typically used +/// by hardware to buffer stores for efficient data streaming. Each buffer +/// expects a more-or-less linear stream of writes. A write outside the current +/// cache line being filled causes the buffer to flush, so software should not +/// oversubscribe the available hardware resources. If it does, in the worst +/// case bufffers will thrash and flush after each write, as each address sent +/// will map to a cache line outside those currently being filled. +class MCWriteCombiningBufferInfo { +private: + unsigned ID; /// A unique ID + const char *Name; /// A name for debugging + const int NumBuffers; /// The number of write-commbining buffers + const MCMemoryModel *MemModel; /// The associated memory model + + friend class MCMemoryModel; + + /// Set the associated memory model. + void setMemoryModel(const MCMemoryModel *M) { + assert((MemModel == nullptr || MemModel == M) && + "Set write-combining buffers to multiple memory models!"); + MemModel = M; + } + +public: + MCWriteCombiningBufferInfo(unsigned I, + const char *TheName, + int NumBufs) + : ID(I), Name(TheName), NumBuffers(NumBufs), MemModel(nullptr) {} + + virtual ~MCWriteCombiningBufferInfo(); + + /// Return the buffer ID number. + unsigned getID() const { return ID; } + + /// Return the buffer name for debugging. + const char *getName() const { return Name; } + + /// Return the number of available write-combining buffers. + int getNumBuffers() const { return NumBuffers; } + + /// Return the associated memory model. + const MCMemoryModel &getMemoryModel() const { + assert(MemModel && "Memory model not set for write-combining buffer info!"); + return *MemModel; + } +}; + +class MCCacheLevelInfo; + +/// MCSoftwarePrefetcherConfig - Provide information about how to +/// configure the software prefetcher. +class MCSoftwarePrefetcherConfig { +private: + unsigned ID; /// A unique ID + const char *Name; /// A name for debugging + const bool EnabledForReads; /// Prefetch loads + const bool EnabledForWrites; /// Prefetch stores + const unsigned BytesAhead; /// Prefetch this many bytes ahead, 0 for heuristics + const unsigned MinBytesAhead; /// Prefetch at least this many bytes ahead + const unsigned MaxBytesAhead; /// Do not prefetch more than this many bytes ahead + const unsigned InstructionsAhead; /// The number of instructions ahead to prefetch + const unsigned MaxIterationsAhead; /// The maximum iterations to prefetch ahead + const unsigned MinByteStride; /// Prefetch only if stride is at least this large + const MCCacheLevelInfo *CacheLevel; /// Cache level targeted by this prefetcher + + friend class MCCacheLevelInfo; + + /// Set the cache level targeted by this prefetcher. + void setTargetCacheLevel(MCCacheLevelInfo *C) { + assert((CacheLevel == nullptr || CacheLevel == C) && + "Set software prefetcher to multiple cache levels!"); + CacheLevel = C; + } + +public: + MCSoftwarePrefetcherConfig(unsigned I, + const char *TheName, + bool EnableForReads, + bool EnableForWrites, + unsigned NumBytesAhead, + unsigned MinNumBytesAhead, + unsigned MaxNumBytesAhead, + unsigned NumInstructionsAhead, + unsigned MaxNumIterationsAhead, + unsigned MinStride) + : ID(I), + Name(TheName), + EnabledForReads(EnableForReads), + EnabledForWrites(EnableForWrites), + BytesAhead(NumBytesAhead), + MinBytesAhead(MinNumBytesAhead), + MaxBytesAhead(MaxNumBytesAhead), + InstructionsAhead(NumInstructionsAhead), + MaxIterationsAhead(MaxNumIterationsAhead), + MinByteStride(MinStride), + CacheLevel(nullptr) {} + + virtual ~MCSoftwarePrefetcherConfig(); + + /// Return the prefetch config ID number. + unsigned getID() const { return ID; } + + /// Return the prefetch config name for debugging. + const char *getName() const { return Name; } + + /// Return whether we should do software prefetching for loads. + bool isEnabledForReads() const { return EnabledForReads; } + + /// Return whether we should do software prefetching for stores. + bool isEnabledForWrites() const { return EnabledForWrites; } + + /// Return the preferred prefetch distance in bytes. A value of 0 + /// tells the software prefetcher to determine distance using + /// heuristics. + unsigned getDistanceInBytes() const { return BytesAhead; } + + /// Never prefetch less that this number of bytes ahead. + unsigned getMinDistanceInBytes() const { return MinBytesAhead; } + + /// Never prefetch more that this number of bytes ahead. + unsigned getMaxDistanceInBytes() const { return MaxBytesAhead; } + + /// Return the preferred prefetch distance in terms of number of + /// instructions. + unsigned getDistanceInInstructions() const { return InstructionsAhead; } + + /// Never prefetch more than this number of loop iterations ahead. + unsigned getMaxDistanceInIterations() const { return MaxIterationsAhead; } + + /// Prefetch only if the byte stride is at least this large. + unsigned getMinByteStride() const { return MinByteStride; } + + /// Return the cache level targeted by this prefetcher. + const MCCacheLevelInfo &getTargetCacheLevel() const { + assert(CacheLevel && "Cache level not set for software prefetcher!"); + return *CacheLevel; + } +}; + +/// Provide information about a specific level in the cache (size, +/// associativity, etc.). +class MCCacheLevelInfo { +private: + unsigned ID; /// A unique ID + const char *Name; /// A name for debugging + const unsigned Size; /// Size of cache in bytes + const unsigned LineSize; /// Size of cache line in bytes + const unsigned Ways; /// Number of ways + const unsigned Latency; /// Number of cycles to load + /// Software prefetching config, if there is one + const MCSoftwarePrefetcherConfig *SoftwarePrefetcher; + /// Memory model to which this cache level belongs + const MCMemoryModel *MemModel; + + friend class MCMemoryModel; + + /// Set the memory model to which this cache level belongs. + void setMemoryModel(MCMemoryModel *M) { + assert((MemModel == nullptr || MemModel == M) && + "Set cache level to multiple memory models!"); + MemModel = M; + } + +public: + MCCacheLevelInfo(unsigned I, + const char *TheName, + uint64_t TotalSize, + unsigned TheLineSize, + unsigned NumWays, + unsigned TheLatency, + MCSoftwarePrefetcherConfig *ThePrefetcher = nullptr) + : ID(I), + Name(TheName), + Size(TotalSize), + LineSize(TheLineSize), + Ways(NumWays), + Latency(TheLatency), + SoftwarePrefetcher(ThePrefetcher), + MemModel(nullptr) { + if (ThePrefetcher) + ThePrefetcher->setTargetCacheLevel(this); + } + + virtual ~MCCacheLevelInfo(); + + /// Return the cache level ID number. + unsigned getID() const { return ID; } + + /// Return the cache level name for debugging. + const char *getName() const { return Name; } + + /// Return the total size of the cache level in bytes. + uint64_t getSizeInBytes() const { return Size; } + + /// Return the size of the cache line in bytes. + unsigned getLineSizeInBytes() const { return LineSize; } + + /// Return the number of ways. + unsigned getAssociativity() const { return Ways; } + + /// Return the latency of a load in clocks. + unsigned getLatency() const { return Latency; } + + /// Return whenther a software prefetcher targets this cache level. + bool hasSoftwarePrefetcher() const { + return SoftwarePrefetcher != nullptr; + } + + /// Return the software prefetcher targeting this cache level, or empty if no + /// such software prefetcher exists. + const MCSoftwarePrefetcherConfig &getSoftwarePrefetcher() const { + assert(hasSoftwarePrefetcher() && "No software prefetcher to get!"); + return *SoftwarePrefetcher; + } + + /// Return the memory model to which this cache level belongs. + const MCMemoryModel &getMemoryModel() const { + assert(MemModel && "Memory model not set for cache level info!"); + return *MemModel; + } +}; + +class MCExecutionResource; + +/// Aggregate some number of cache levels together along with a software +/// prefetching configuration and write-combining buffer information into a +/// model of the memory system as viewed from a particular execution resource. +/// For example, a core my have L1 and L2 caches private to it, while a socket +/// may have an L3 shared by all cores contained by the socket. The core memory +/// model will list L1 and L2 and the socket memory model will list L3. +class MCMemoryModel { +public: + typedef const MCCacheLevelInfo *cachelevel_iterator; + +private: + unsigned ID; /// A unique ID + const char *Name; /// A name for debugging + + MCCacheLevelInfo *Levels; /// Array of cache levels + unsigned NumLevels; /// Number of cache levels + /// Write-combining buffer information if there is any + const MCWriteCombiningBufferInfo *WCBuffers; + /// Descriptor for the execution resource to which this memory model belongs + const MCExecutionResource *Resource; + + friend class MCExecutionResource; + + /// Set the execution resoruce descriptor for this memory model. + void setExecutionResource(const MCExecutionResource *R) { + assert((Resource == nullptr || Resource == R) && + "Set memory model to multiple execution resources!"); + Resource = R; + } + +public: + MCMemoryModel(unsigned I, + const char *TheName, + MCCacheLevelInfo *CacheLevels, + unsigned NumCacheLevels, + MCWriteCombiningBufferInfo *WCBufs = nullptr) + : ID(I), + Name(TheName), + Levels(CacheLevels), + NumLevels(NumCacheLevels), + WCBuffers(WCBufs), + Resource(nullptr) { + for (unsigned i = 0; i < getNumCacheLevels(); ++i) + CacheLevels[i].setMemoryModel(this); + + if (WCBufs) + WCBufs->setMemoryModel(this); + } + + virtual ~MCMemoryModel(); + + /// Return the memory model ID number. + unsigned getID() const { return ID; } + + /// Return the memory model name for debugging. + const char *getName() const { return Name; } + + /// Return the associated execution resourse descriptor. + const MCExecutionResource &getExecutionResource() const { + assert(Resource && "Execution resource not set for memory model!"); + return *Resource; + } + + //===--------------------------------------------------------------------===// + /// Cache Level Information + + /// Index the hierarchy for a cache level. Note that this is a piece of the + /// global cache hierarchy private to the execution resource using the memory + /// model, and shared by any contained execution resources. As such "level 0" + /// (or level 1, etc.) has no correspondence to a global-view cache level. + /// Thus names like "L1" aren't very useful. + const MCCacheLevelInfo &getCacheLevel(unsigned Level) const { + assert(Level < NumLevels && + "Attempting to access record for invalid cache level!"); + return Levels[Level]; + } + + /// Return the number of cache levels. + unsigned getNumCacheLevels() const { + return NumLevels; + } + + /// Cache level iterators + cachelevel_iterator begin() const { return Levels; } + cachelevel_iterator end() const { + return Levels + getNumCacheLevels(); + } + + //===--------------------------------------------------------------------===// + /// Write Combining Buffer Information + + /// Return whenther this memory model has write-combining buffers. + bool hasWriteCombiningBuffers() const { + return WCBuffers != nullptr; + } + + + /// Return the write combining buffer info. + const MCWriteCombiningBufferInfo &getWCBufferInfo() const { + assert(hasWriteCombiningBuffers() && "No write-combining buffers to get!"); + return *WCBuffers; + } +}; + +class MCExecutionResource; +class MCSystemModel; + +/// Provide information about the number of execution resources of a given type +/// are contained within an execution resource. For example at the socket level +/// there may be a core resource descriptor specifying that the socket has 48 +/// cores. +/// +/// MCExecutionResourceDesc exists so that we don't need to instantiate multiple +/// independent, equivalent MCExecutionResources. If we're modeling a socket +/// with 48 cores each having four threads, we would not want to instantiate 192 +/// entirely equivalent objects to describe the threads. Instead we instantiate +/// one MCExecutionResourceDesc desribing 48 cores containing one +/// MCExecutionResourceDesc describing four threads. +class MCExecutionResourceDesc { + unsigned ID; /// A unique ID + const char *Name; /// A name for debugging + const MCExecutionResource *Resource; /// The described resource + unsigned NumResources; /// The resource count + + using ParentType = PointerUnion; + ParentType Parent; /// The containing resource or system model + + friend class MCExecutionResource; + + void setParentResource(MCExecutionResource *R) { + assert((!Parent || Parent.getOpaqueValue() == R) && + "Execution resource descriptor set to multiple parents"); + Parent = R; + } + + friend class MCSystemModel; + + void setParentSystemModel(MCSystemModel *S) { + assert((!Parent || Parent.getOpaqueValue() == S) && + "Execution resource descriptor set to multiple parents"); + Parent = S; + } + +public: + MCExecutionResourceDesc(unsigned I, + const char *TheName, + MCExecutionResource *R, + unsigned N); + + /// Return the resource descriptor ID number. + unsigned getID() const { return ID; } + + /// Return the resource descriptor name for debugging. + const char *getName() const { return Name; } + + /// Get the resource. + const MCExecutionResource &getResource() const { + return *Resource; + } + + /// Get the number of resources represented by this descriptor. + unsigned getNumResources() const { + return NumResources; + } + + bool parentIsExecutionResource() const { + return Parent.is(); + } + + bool parentIsSystemModel() const { + return Parent.is(); + } + + const MCExecutionResource &getParentExecutionResource() const { + return *Parent.get(); + } + + const MCSystemModel &getParentSystemModel() const { + return *Parent.get(); + } +}; + +/// Provide information about a specific kind of execution resource (core, +/// thread, etc.). A resource contains a memory model describing the pieces of +/// the memory heirarchy private to it. For example a core may contain two +/// levels of private cache and the socket containing the cores may contain one +/// level of cache private to it. +/// +/// An MCExecutionResource may contain MCExecutionResourceDescs describing child +/// resources. For example, a core may contain a descriptor describing four +/// hardware threads. +class MCExecutionResource { + unsigned ID; /// A unique ID + const char *Name; /// A name for debugging + + /// An array of execution resource desciptors, allowing an execution resource + /// to contain a variety of resources; for example a socket containing some + /// number of big cores and some number of little cores + const MCExecutionResourceDesc *const *Contained; + + /// The number of unique contained execution resource types + unsigned NumContained; + + /// The memory model for this execution resource if there is one + const MCMemoryModel *MemoryModel; + + /// The descriptor associated with this resource. + const MCExecutionResourceDesc *Descriptor; + + friend class MCExecutionResourceDesc; + + /// Set the descriptor associated with this resource. + void setDescriptor(const MCExecutionResourceDesc *D) { + assert((Descriptor == nullptr || Descriptor == D) && + "Set multiple descriptors for resrouce!"); + Descriptor = D; + } + +public: + + using resource_iterator = + pointee_iterator; + + MCExecutionResource(unsigned I, + const char *TheName, + MCExecutionResourceDesc *const *C, + unsigned NC, + MCMemoryModel *M = nullptr) + : ID(I), + Name(TheName), + Contained(C), + NumContained(NC), + MemoryModel(M), + Descriptor(nullptr) { + for (unsigned i = 0; i < getNumContainedExecutionResourceTypes(); ++i) + C[i]->setParentResource(this); + if (M != nullptr) { + M->setExecutionResource(this); + } + } + + virtual ~MCExecutionResource(); /// Allow subclasses + + /// Return the resource ID number. + unsigned getID() const { return ID; } + + /// Return the resource name for debugging. + const char *getName() const { return Name; } + + bool hasMemoryModel() const { + return MemoryModel != nullptr; + } + + /// Return the memory model for this resource. + const MCMemoryModel &getMemoryModel() const { + assert(hasMemoryModel() && "No memory model!"); + return *MemoryModel; + } + + /// Return the number of unique execution resource types contained within this + /// one. + unsigned getNumContainedExecutionResourceTypes() const { + return NumContained; + } + + /// Iterate over unique contained resources. + resource_iterator begin() const { + return resource_iterator(Contained); + } + resource_iterator end() const { + return resource_iterator(Contained + NumContained); + } + + /// Get the resource descriptor indexed by the given value. + const MCExecutionResourceDesc & + getContainedResourceDescriptor(unsigned Index) const { + assert(Index < getNumContainedExecutionResourceTypes() && + "Overindexing resource descriptors!"); + return *Contained[Index]; + } + + /// Get the descriptor for this resource. + const MCExecutionResourceDesc &getResourceDescriptor() const { + assert(Descriptor != nullptr && "No resource descriptor!"); + return *Descriptor; + } +}; + +/// Model a collection of execution resources coupled with other information. +/// This also aggregates information about the resource memory models, +/// presenting a global system view of memory characteristics. +/// +/// An MCSystemModel is simply a collection of MCExecutionResourceDescs along +/// with a cache model and software prefetcher configuration. The +/// MCExecutionResourceDescs describe hardware execution resources, such as +/// sockets, cores and threads. What exactly an "execution resource" means is +/// entirely up to the target. +class MCSystemModel { + unsigned ID; /// A unique ID + const char *Name; /// A name for debugging + + /// An array of execution resource descriptor pointers + const MCExecutionResourceDesc *const *Resources; + unsigned NumResources; /// Number of entries in the array + + /// A default system model for targets that don't define one. + static const MCSystemModel Default; + +public: + /// Caches of information about execution resources and their memory models. + + /// Make the cache topology indexable by level. The bottom-most + /// cache level of each resource makes up level zero. For example: + // + /// Thread + /// | + /// Big Core Little Core + /// \/ + /// Socket + // + /// If the big core has an L1 and L2 cache, the little core has an L1 + /// cache and the socket has an L3 cache, the big and little L1s go + /// into the L1 set, the big core L2 goes into the L2 set and the + /// socket L3 goes into the L3 set. + + using CacheLevelList = SmallVector; + +private: + using CacheLevelInfo = SmallVector; + CacheLevelInfo CacheLevels; ///The system cache topology + + /// Cache information about caches. This makes accessing cache information + /// faster. + void initCacheInfoCache(); + +public: + using PrefetchConfigList = SmallVector; + +private: + PrefetchConfigList Prefetchers; /// A list of all software prefetchers + + /// Cache information about prefetchers. This makes accessing prefetcher + /// information faster. + void initPrefetchConfigCache(); + +public: + /// Convenience values for indexing the global-view cache hierarchy. + enum class CacheLevel { + L1 = 0, + L2, + L3, + L4 + }; + + using resource_iterator = + pointee_iterator; + + MCSystemModel(unsigned I, + const char *TheName, + MCExecutionResourceDesc *const *R, + unsigned NR) + : ID(I), Name(TheName), Resources(R), NumResources(NR) { + for (unsigned i = 0; i < getNumExecutionResourceTypes(); ++i) + R[i]->setParentSystemModel(this); + + initCacheInfoCache(); + initPrefetchConfigCache(); + } + + virtual ~MCSystemModel(); + + /// Return the default initialized model. + static const MCSystemModel &getDefaultSystemModel() { + return Default; + } + + /// Return the execution engine ID number. + unsigned getID() const { return ID; } + + /// Return the execution engine name for debugging. + const char *getName() const { return Name; } + + /// Return the number of unique execution resource types. + unsigned getNumExecutionResourceTypes() const { + return NumResources; + } + + /// Iterate over top-level execution resources. + resource_iterator begin() const { + return resource_iterator(Resources); + } + resource_iterator end() const { + return resource_iterator(Resources + NumResources); + } + + /// Get the resource descriptor indexed by the given value. + const MCExecutionResourceDesc &getResourceDescriptor(unsigned Index) const { + assert(Index < getNumExecutionResourceTypes() && + "Overindexing resource descriptors!"); + return *Resources[Index]; + } + + /// Return the number of cache levels in the global hierarchy. + unsigned getNumCacheLevels() const { + return CacheLevels.size(); + } + + /// Retrieve cached information about cache levels. + const CacheLevelList &getCacheLevelInfo(CacheLevel Level) const { + assert(static_cast(Level) < getNumCacheLevels() && + "Overindexing cache levels!"); + + return CacheLevels[static_cast(Level)]; + } + + const CacheLevelList &getCacheLevelInfo(unsigned Level) const { + assert(Level < getNumCacheLevels() && + "Overindexing cache levels!"); + + return CacheLevels[Level]; + } + + /// Retrieve cached information about prefetchers. + const PrefetchConfigList &getSoftwarePrefetcherInfo() const { + return Prefetchers; + } +}; + +} // End llvm namespace + +#endif diff --git a/llvm/lib/MC/CMakeLists.txt b/llvm/lib/MC/CMakeLists.txt --- a/llvm/lib/MC/CMakeLists.txt +++ b/llvm/lib/MC/CMakeLists.txt @@ -43,6 +43,7 @@ MCSubtargetInfo.cpp MCSymbol.cpp MCSymbolELF.cpp + MCSystemModel.cpp MCTargetOptions.cpp MCValue.cpp MCWasmObjectTargetWriter.cpp diff --git a/llvm/lib/MC/MCSystemModel.cpp b/llvm/lib/MC/MCSystemModel.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/MC/MCSystemModel.cpp @@ -0,0 +1,137 @@ +//=== MC/MCSystemModel.cpp - Target System Model ------------*- C++ -*-=======// +// +// The LLVM Compiler Infrastructure +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defioned MCSystemModel methods. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCSystemModel.h" +#include + +namespace llvm { + +MCWriteCombiningBufferInfo::~MCWriteCombiningBufferInfo() {} +MCSoftwarePrefetcherConfig::~MCSoftwarePrefetcherConfig() {} +MCCacheLevelInfo::~MCCacheLevelInfo() {} +MCMemoryModel::~MCMemoryModel() {} + +MCExecutionResourceDesc::MCExecutionResourceDesc(unsigned I, + const char *TheName, + MCExecutionResource *R, + unsigned N) + : ID(I), Name(TheName), Resource(R), NumResources(N) { + R->setDescriptor(this); +} + +MCExecutionResource::~MCExecutionResource() {} +MCSystemModel::~MCSystemModel() {} + +const MCSystemModel MCSystemModel::Default(0, + "Default System Model", + nullptr, + 0); + + +void MCSystemModel::initCacheInfoCache() { + // Create a global cache topology. The tricky part is collapsing + // the execution resource levels properly. For example, let's say + // we have a system with a CPU socket and a GPU socket. The CPU + // socket contains two core types: big and little. The CPUsocket + // contains an L3 cache, the big core contains and L2 and L1 cache + // and the little core contains an L1 cache. The GPU socket + // contains shared L2 and L3 caches and GPU cores have a private L1 + // cache: + // + // System + // / \ + // (L2, L3) GPU CPU (L3) + // | / \ + // (L1) C L (L1) B (L1, L2) + // + // We want the final topology to look like this: + // + // L3 (GPU) L3 (CPU) + // L2 (GPU) L2 (B) + // L1 (C) L1 (L) L1(B) + // + // The algorithm below recursively determines the topology for the + // resources below the current one, then merges the lists from all + // child resources so that it is no longer than the maximum size of + // any child list. Child lists are ordered by cache level, so the + // lowest level of cache appears first. + // + auto mergeCacheInfo = [](const CacheLevelInfo &I1, + const CacheLevelInfo &I2) -> CacheLevelInfo { + CacheLevelInfo Result; + unsigned Length = std::max(I1.size(), I2.size()); + for (unsigned i = 0; i < Length; ++i) { + Result.push_back(CacheLevelList()); + if (i < I1.size()) + Result.back().append(I1[i].begin(), I1[i].end()); + if (i < I2.size()) + Result.back().append(I2[i].begin(), I2[i].end()); + } + + return Result; + }; + + std::function getCacheInfo = + [&](const MCExecutionResourceDesc &Desc) -> CacheLevelInfo { + const MCExecutionResource &Resource = Desc.getResource(); + + CacheLevelInfo Result; + for (const MCExecutionResourceDesc &ContainedDesc : Resource) + Result = mergeCacheInfo(Result, getCacheInfo(ContainedDesc)); + + // Add cache information for this resource. + if (Resource.hasMemoryModel()) + for (const MCCacheLevelInfo &Level : Resource.getMemoryModel()) { + Result.push_back(CacheLevelList()); + Result.back().push_back(&Level); + } + + return Result; + }; + + CacheLevels.clear(); + + for (const MCExecutionResourceDesc &Desc : *this) + CacheLevels = mergeCacheInfo(CacheLevels, getCacheInfo(Desc)); +} + +void MCSystemModel::initPrefetchConfigCache() { + Prefetchers.clear(); + + using WorkListType = SmallVector; + WorkListType WorkList; + for (const MCExecutionResourceDesc &ResourceDesc : *this) + WorkList.push_back(&ResourceDesc); + + while (!WorkList.empty()) { + const MCExecutionResourceDesc *Item = WorkList.back(); + WorkList.pop_back(); + const MCExecutionResource &Resource = Item->getResource(); + if (Resource.hasMemoryModel()) { + const MCMemoryModel &MemModel = Resource.getMemoryModel(); + for (const MCCacheLevelInfo &CacheLevel : MemModel) { + if (CacheLevel.hasSoftwarePrefetcher()) { + const MCSoftwarePrefetcherConfig &Prefetcher = + CacheLevel.getSoftwarePrefetcher(); + if (Prefetcher.isEnabledForReads() || Prefetcher.isEnabledForWrites()) + Prefetchers.push_back(&Prefetcher); + } + } + } + for (const auto &ResourceDesc : Resource) + WorkList.push_back(&ResourceDesc); + } +} + +} // end llvm namespace diff --git a/llvm/unittests/MC/CMakeLists.txt b/llvm/unittests/MC/CMakeLists.txt --- a/llvm/unittests/MC/CMakeLists.txt +++ b/llvm/unittests/MC/CMakeLists.txt @@ -10,5 +10,6 @@ DwarfLineTables.cpp MCInstPrinter.cpp StringTableBuilderTest.cpp + SystemModel.cpp TargetRegistry.cpp ) diff --git a/llvm/unittests/MC/SystemModel.cpp b/llvm/unittests/MC/SystemModel.cpp new file mode 100644 --- /dev/null +++ b/llvm/unittests/MC/SystemModel.cpp @@ -0,0 +1,1426 @@ +//===- unittests/MC/SystemModel.cpp ---------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCSystemModel.h" +#include "gtest/gtest.h" + +using namespace llvm; + +namespace { + +TEST(SystemModel, Topology1Tests) { + // Test this topology: + // + // System + // / \ + // (L2, L3) GPU CPU (L3) + // | / \ + // (L1) C L (L1) B (L1, L2) + + const unsigned BigL1 = 0; + const unsigned BigL2 = 1; + + const unsigned LittleL1 = 0; + + const unsigned GPUCoreL1 = 0; + + const unsigned CPUL3 = 0; + + const unsigned GPUL2 = 0; + const unsigned GPUL3 = 1; + + const unsigned Big = 0; + const unsigned Little = 1; + + const unsigned GPU = 0; + + const unsigned CPUSocket = 0; + const unsigned GPUSocket = 1; + + // Define cache parameters. + const char *BigCacheLevelNames[] = { "BigL1", "BigL2" }; + unsigned BigCacheLevelSizes[] = { 1024*16, 1024 * 1024*4 }; + unsigned BigCacheLevelLineSizes[] = { 32, 32 }; + unsigned BigCacheLevelAssociativities[] = { 8, 24 }; + unsigned BigCacheLevelLatencies[] = { 2, 12 }; + + const char *LittleCacheLevelNames[] = { "LittleL1" }; + unsigned LittleCacheLevelSizes[] = { 1024*8 }; + unsigned LittleCacheLevelLineSizes[] = { 32 }; + unsigned LittleCacheLevelAssociativities[] = { 8 }; + unsigned LittleCacheLevelLatencies[] = { 2 }; + + const char *CPUCacheLevelNames[] = { "CPUL3" }; + unsigned CPUCacheLevelSizes[] = { 1024*1024*8 }; + unsigned CPUCacheLevelLineSizes[] = { 32 }; + unsigned CPUCacheLevelAssociativities[] = { 32 }; + unsigned CPUCacheLevelLatencies[] = { 50 }; + + const char *GPUCoreCacheLevelNames[] = { "GPUCoreL1" }; + unsigned GPUCoreCacheLevelSizes[] = { 1024*32 }; + unsigned GPUCoreCacheLevelLineSizes[] = { 64 }; + unsigned GPUCoreCacheLevelAssociativities[] = { 8 }; + unsigned GPUCoreCacheLevelLatencies[] = { 2 }; + + const char *GPUCacheLevelNames[] = { "GPUL2", "GPUL3" }; + unsigned GPUCacheLevelSizes[] = { 1024*64, 1024*1024*2 }; + unsigned GPUCacheLevelLineSizes[] = { 64, 64 }; + unsigned GPUCacheLevelAssociativities[] = { 24, 32 }; + unsigned GPUCacheLevelLatencies[] = { 12, 50 }; + + // Define thread parameters. + const char *ThreadName = "Thread"; + + // Define core parameters. + // The GPU has four cores with two thread team schedulers of vector + // length 64, for a total of 512 "threads." + const char *CPUCoreNames[] = { "BigCore", "LittleCore" }; + unsigned CPUCoreCounts[] = { 2, 8 }; + unsigned CPUThreadCounts[] = { 4, 2 }; + + const char *GPUCoreNames[] = { "GPUCore" }; + unsigned GPUCoreCounts[] = { 4 }; + // Threads in a core. The GPU has two thread team schedulers, each + // team may be a vector length of, say, 64 which we don't model. + unsigned GPUThreadCounts[] = { 2 }; + + // Define socket parameters. + const char *SocketNames[] = { "CPU", "GPU" }; + unsigned CoreTypeCounts[] = { 2, 1 }; + + unsigned ID = 0; + + // Define write-combining buffers. + MCWriteCombiningBufferInfo BigWCBufs(ID++, "BigWCBufs", 8); + MCWriteCombiningBufferInfo LittleWCBufs(ID++, "LittleWCBufs", 4); + MCWriteCombiningBufferInfo NoWCBufs(ID++, "NoWCBufs", 0); + + // Define software prefetchers. + MCSoftwarePrefetcherConfig BigPrefetcher(ID++, "Big L1 Prefetcher", true, true, 1024, + 512, 4096, 100, 4, 32); + + MCSoftwarePrefetcherConfig LittlePrefetcher(ID++, "Little L1 Prefetcher", true, true, 1024, + 512, 4096, 100, 4, 32); + + MCSoftwarePrefetcherConfig NoPrefetcher(ID++, "NoPrefetcher", false, false, 0, + 0, 0, 0, 0, 0); + + // Define caches. + MCCacheLevelInfo BigCoreCacheLevels[] = { + MCCacheLevelInfo(ID++, + BigCacheLevelNames[BigL1], + BigCacheLevelSizes[BigL1], + BigCacheLevelLineSizes[BigL1], + BigCacheLevelAssociativities[BigL1], + BigCacheLevelLatencies[BigL1], + &BigPrefetcher), + MCCacheLevelInfo(ID++, + BigCacheLevelNames[BigL2], + BigCacheLevelSizes[BigL2], + BigCacheLevelLineSizes[BigL2], + BigCacheLevelAssociativities[BigL2], + BigCacheLevelLatencies[BigL2]), + }; + + MCCacheLevelInfo LittleCoreCacheLevels[] = { + MCCacheLevelInfo(ID++, + LittleCacheLevelNames[LittleL1], + LittleCacheLevelSizes[LittleL1], + LittleCacheLevelLineSizes[LittleL1], + LittleCacheLevelAssociativities[LittleL1], + LittleCacheLevelLatencies[LittleL1], + &LittlePrefetcher), + }; + + MCCacheLevelInfo CPUCacheLevels[] = { + MCCacheLevelInfo(ID++, + CPUCacheLevelNames[CPUL3], + CPUCacheLevelSizes[CPUL3], + CPUCacheLevelLineSizes[CPUL3], + CPUCacheLevelAssociativities[CPUL3], + CPUCacheLevelLatencies[CPUL3]), + }; + + // Each GPU core has a small cache. + MCCacheLevelInfo GPUCoreCacheLevels[] = { + MCCacheLevelInfo(ID++, + GPUCoreCacheLevelNames[GPUCoreL1], + GPUCoreCacheLevelSizes[GPUCoreL1], + GPUCoreCacheLevelLineSizes[GPUCoreL1], + GPUCoreCacheLevelAssociativities[GPUCoreL1], + GPUCoreCacheLevelLatencies[GPUCoreL1]), + }; + + // All GPU cores share two higher levels of cache. + MCCacheLevelInfo GPUCacheLevels[] = { + MCCacheLevelInfo(ID++, + GPUCacheLevelNames[GPUL2], + GPUCacheLevelSizes[GPUL2], + GPUCacheLevelLineSizes[GPUL2], + GPUCacheLevelAssociativities[GPUL2], + GPUCacheLevelLatencies[GPUL2]), + MCCacheLevelInfo(ID++, + GPUCacheLevelNames[GPUL3], + GPUCacheLevelSizes[GPUL3], + GPUCacheLevelLineSizes[GPUL3], + GPUCacheLevelAssociativities[GPUL3], + GPUCacheLevelLatencies[GPUL3]), + }; + + // Define memory models. + MCMemoryModel BigMemModel(ID++, + "BigMemModel", + BigCoreCacheLevels, + 2, + &BigWCBufs); + + MCMemoryModel LittleMemModel(ID++, + "LittleMemModel", + LittleCoreCacheLevels, + 1, + &LittleWCBufs); + + MCMemoryModel CPUMemModel(ID++, + "CPUMemModel", + CPUCacheLevels, + 1); + + MCMemoryModel GPUCoreMemModel(ID++, + "GPUCoreMemModel", + GPUCoreCacheLevels, + 1); + + MCMemoryModel GPUMemModel(ID++, + "GPUMemModel", + GPUCacheLevels, + 2); + + // Define threads. + MCExecutionResource BigThread(ID++, ThreadName, nullptr, 0); + MCExecutionResource LittleThread(ID++, ThreadName, nullptr, 0); + MCExecutionResource GPUThread(ID++, ThreadName, nullptr, 0); + + // Define cores. + MCExecutionResourceDesc BigThreadDesc(ID++, + "BigThreadDesc", + &BigThread, + CPUThreadCounts[Big]); + MCExecutionResourceDesc LittleThreadDesc(ID++, + "LittleThreadDesc", + &LittleThread, + CPUThreadCounts[Little]); + MCExecutionResourceDesc GPUThreadDesc(ID++, + "GPUThreadDesc", + &GPUThread, + GPUThreadCounts[GPU]); + + MCExecutionResourceDesc *BigThreadsList[] = { &BigThreadDesc }; + MCExecutionResourceDesc *LittleThreadsList[] = { &LittleThreadDesc }; + MCExecutionResourceDesc *GPUThreadsList[] = { &GPUThreadDesc }; + + MCExecutionResource BigCore(ID++, + CPUCoreNames[Big], + BigThreadsList, + 1, + &BigMemModel); + + MCExecutionResource LittleCore(ID++, + CPUCoreNames[Little], + LittleThreadsList, + 1, + &LittleMemModel); + + MCExecutionResource GPUCore(ID++, + GPUCoreNames[GPU], + GPUThreadsList, + 1, + &GPUCoreMemModel); + + // Define sockets. + MCExecutionResourceDesc BigCoreDesc(ID++, + "BigCoreDesc", + &BigCore, + CPUCoreCounts[Big]); + + MCExecutionResourceDesc LittleCoreDesc(ID++, + "LittleCoreDesc", + &LittleCore, + CPUCoreCounts[Little]); + + MCExecutionResourceDesc GPUCoreDesc(ID++, + "GPUCoreDesc", + &GPUCore, + GPUCoreCounts[GPU]); + + MCExecutionResourceDesc *CPUCoreList[] = { &BigCoreDesc, &LittleCoreDesc }; + MCExecutionResourceDesc *GPUCoreList[] = { &GPUCoreDesc }; + + MCExecutionResource CPUEngine(ID++, + SocketNames[CPUSocket], + CPUCoreList, + 2, + &CPUMemModel); + + MCExecutionResource GPUEngine(ID++, + SocketNames[GPUSocket], + GPUCoreList, + 1, + &GPUMemModel); + + // Define a node consisting of a CPU socket and a GPU socket. + MCExecutionResourceDesc CPUSocketDesc(ID++, "CPUSocketDesc", &CPUEngine, 1); + MCExecutionResourceDesc GPUSocketDesc(ID++, "GPUSocketDesc", &GPUEngine, 1); + + MCExecutionResourceDesc *SocketList[] = { &CPUSocketDesc, &GPUSocketDesc }; + + MCSystemModel Node(ID++, "Node", SocketList, 2); + + // Test the topology. + EXPECT_EQ(Node.getNumExecutionResourceTypes(), 2u); + + unsigned s = 0; + for (const auto &SocketDesc : Node) { + EXPECT_EQ(&SocketDesc.getParentSystemModel(), &Node); + EXPECT_EQ(SocketDesc.getNumResources(), 1u); + + const auto &Socket = SocketDesc.getResource(); + EXPECT_EQ(&Socket.getResourceDescriptor(), &SocketDesc); + EXPECT_STREQ(Socket.getName(), SocketNames[s]); + EXPECT_EQ(Socket.getNumContainedExecutionResourceTypes(), + CoreTypeCounts[s]); + + unsigned *CoreCounts = (s == CPUSocket ? + CPUCoreCounts : GPUCoreCounts); + const char *const *CoreNames = (s == CPUSocket ? + CPUCoreNames : GPUCoreNames); + unsigned *ThreadCounts = (s == CPUSocket ? + CPUThreadCounts : GPUThreadCounts); + + unsigned c = 0; + for (const auto &CoreDesc : Socket) { + EXPECT_EQ(&CoreDesc.getParentExecutionResource(), &Socket); + EXPECT_EQ(CoreDesc.getNumResources(), CoreCounts[c]); + + const auto &Core = CoreDesc.getResource(); + EXPECT_EQ(&Core.getResourceDescriptor(), &CoreDesc); + EXPECT_STREQ(Core.getName(), CoreNames[c]); + + EXPECT_EQ(Core.getNumContainedExecutionResourceTypes(), 1u); + + const auto &ThreadDesc = Core.getContainedResourceDescriptor(0); + EXPECT_EQ(&ThreadDesc.getParentExecutionResource(), &Core); + EXPECT_EQ(ThreadDesc.getNumResources(), ThreadCounts[c]); + + const auto &Thread = ThreadDesc.getResource(); + EXPECT_EQ(&Thread.getResourceDescriptor(), &ThreadDesc); + EXPECT_STREQ(Thread.getName(), ThreadName); + + EXPECT_EQ(Thread.getNumContainedExecutionResourceTypes(), 0u); + + // Check core-level caches. + const char *const *CacheNames = (s == CPUSocket && + c == Big ? + BigCacheLevelNames : + (s == CPUSocket && + c == Little ? + LittleCacheLevelNames : + GPUCoreCacheLevelNames)); + const unsigned *CacheSizes = (s == CPUSocket && + c == Big ? + BigCacheLevelSizes : + (s == CPUSocket && + c == Little ? + LittleCacheLevelSizes : + GPUCoreCacheLevelSizes)); + const unsigned *CacheLineSizes = (s == CPUSocket && + c == Big ? + BigCacheLevelLineSizes : + (s == CPUSocket && + c == Little ? + LittleCacheLevelLineSizes : + GPUCoreCacheLevelLineSizes)); + const unsigned *CacheAssociativities = (s == CPUSocket && + c == Big ? + BigCacheLevelAssociativities : + (s == CPUSocket && + c == Little ? + LittleCacheLevelAssociativities : + GPUCoreCacheLevelAssociativities) + ); + const unsigned *CacheLatencies = (s == CPUSocket && + c == Big ? + BigCacheLevelLatencies : + (s == CPUSocket && + c == Little ? + LittleCacheLevelLatencies : + GPUCoreCacheLevelLatencies)); + + EXPECT_EQ(&Core.getMemoryModel().getExecutionResource(), &Core); + + unsigned lvl = 0; + for (const auto &CacheLevel : Core.getMemoryModel()) { + EXPECT_EQ(&CacheLevel.getMemoryModel(), &Core.getMemoryModel()); + EXPECT_STREQ(CacheLevel.getName(), CacheNames[lvl]); + EXPECT_EQ(CacheLevel.getSizeInBytes(), CacheSizes[lvl]); + EXPECT_EQ(CacheLevel.getLineSizeInBytes(), CacheLineSizes[lvl]); + EXPECT_EQ(CacheLevel.getAssociativity(), CacheAssociativities[lvl]); + EXPECT_EQ(CacheLevel.getLatency(), CacheLatencies[lvl]); + + if (CacheLevel.getName() == StringRef("BigL1")) { + const MCSoftwarePrefetcherConfig &Prefetcher = + CacheLevel.getSoftwarePrefetcher(); + EXPECT_EQ(&Prefetcher.getTargetCacheLevel(), &CacheLevel); + EXPECT_STREQ(Prefetcher.getName(), "Big L1 Prefetcher"); + } + else if (CacheLevel.getName() == StringRef("LittleL1")) { + const MCSoftwarePrefetcherConfig &Prefetcher = + CacheLevel.getSoftwarePrefetcher(); + EXPECT_EQ(&Prefetcher.getTargetCacheLevel(), &CacheLevel); + EXPECT_STREQ(Prefetcher.getName(), "Little L1 Prefetcher"); + } + + ++lvl; + } + + ++c; + } + + // Check socket-level caches. + const char *const *CacheNames = (s == CPUSocket ? + CPUCacheLevelNames : + GPUCacheLevelNames); + const unsigned *CacheSizes = (s == CPUSocket ? + CPUCacheLevelSizes : + GPUCacheLevelSizes); + const unsigned *CacheLineSizes = (s == CPUSocket ? + CPUCacheLevelLineSizes : + GPUCacheLevelLineSizes); + const unsigned *CacheAssociativities = (s == CPUSocket ? + CPUCacheLevelAssociativities : + GPUCacheLevelAssociativities); + const unsigned *CacheLatencies = (s == CPUSocket ? + CPUCacheLevelLatencies : + GPUCacheLevelLatencies); + + EXPECT_EQ(&Socket.getMemoryModel().getExecutionResource(), &Socket); + + unsigned lvl = 0; + for (const auto &CacheLevel : Socket.getMemoryModel()) { + EXPECT_EQ(&CacheLevel.getMemoryModel(), &Socket.getMemoryModel()); + EXPECT_STREQ(CacheLevel.getName(), CacheNames[lvl]); + EXPECT_EQ(CacheLevel.getSizeInBytes(), CacheSizes[lvl]); + EXPECT_EQ(CacheLevel.getLineSizeInBytes(), CacheLineSizes[lvl]); + EXPECT_EQ(CacheLevel.getAssociativity(), CacheAssociativities[lvl]); + EXPECT_EQ(CacheLevel.getLatency(), CacheLatencies[lvl]); + + ++lvl; + } + + ++s; + } + + // Test the global system representation of the memory model. + const MCSystemModel::CacheLevelList &L1Levels = + Node.getCacheLevelInfo(MCSystemModel::CacheLevel::L1); + const MCSystemModel::CacheLevelList &L2Levels = + Node.getCacheLevelInfo(MCSystemModel::CacheLevel::L2); + const MCSystemModel::CacheLevelList &L3Levels = + Node.getCacheLevelInfo(MCSystemModel::CacheLevel::L3); + + const MCSystemModel::PrefetchConfigList &PrefetchConfigs = + Node.getSoftwarePrefetcherInfo(); + + EXPECT_EQ(L1Levels.size(), 3u); + EXPECT_EQ(L2Levels.size(), 2u); + EXPECT_EQ(L3Levels.size(), 2u); + EXPECT_EQ(PrefetchConfigs.size(), 2u); + + unsigned i = 0; + for (const auto L1Level : L1Levels) { + const char *const *CacheNames = (i == 0 ? BigCacheLevelNames : + i == 1 ? LittleCacheLevelNames : + GPUCoreCacheLevelNames); + const unsigned *CacheSizes = (i == 0 ? BigCacheLevelSizes : + i == 1 ? LittleCacheLevelSizes : + GPUCoreCacheLevelSizes); + const unsigned *CacheLineSizes = (i == 0 ? BigCacheLevelLineSizes : + i == 1 ? LittleCacheLevelLineSizes : + GPUCoreCacheLevelLineSizes + ); + const unsigned *CacheAssociativities = (i == 0 ? + BigCacheLevelAssociativities : + i == 1 ? + LittleCacheLevelAssociativities : + GPUCoreCacheLevelAssociativities); + const unsigned *CacheLatencies = (i == 0 ? BigCacheLevelLatencies : + i == 1 ? LittleCacheLevelLatencies : + GPUCoreCacheLevelLatencies + ); + + unsigned Index = (i == 0 ? BigL1 : + i == 1 ? LittleL1 : GPUCoreL1); + + EXPECT_STREQ(L1Level->getName(), CacheNames[Index]); + EXPECT_EQ(L1Level->getSizeInBytes(), CacheSizes[Index]); + EXPECT_EQ(L1Level->getLineSizeInBytes(), CacheLineSizes[Index]); + EXPECT_EQ(L1Level->getAssociativity(), CacheAssociativities[Index]); + EXPECT_EQ(L1Level->getLatency(), CacheLatencies[Index]); + + ++i; + } + + i = 0; + for (const auto L2Level : L2Levels) { + const char *const *CacheNames = (i == 0 ? BigCacheLevelNames : + GPUCacheLevelNames); + const unsigned *CacheSizes = (i == 0 ? BigCacheLevelSizes : + GPUCacheLevelSizes); + const unsigned *CacheLineSizes = (i == 0 ? BigCacheLevelLineSizes : + GPUCacheLevelLineSizes); + const unsigned *CacheAssociativities = (i == 0 ? + BigCacheLevelAssociativities : + GPUCacheLevelAssociativities); + const unsigned *CacheLatencies = (i == 0 ? BigCacheLevelLatencies : + GPUCacheLevelLatencies); + + unsigned Index = (i == 0 ? BigL2 : GPUL2); + + EXPECT_STREQ(L2Level->getName(), CacheNames[Index]); + EXPECT_EQ(L2Level->getSizeInBytes(), CacheSizes[Index]); + EXPECT_EQ(L2Level->getLineSizeInBytes(), CacheLineSizes[Index]); + EXPECT_EQ(L2Level->getAssociativity(), CacheAssociativities[Index]); + EXPECT_EQ(L2Level->getLatency(), CacheLatencies[Index]); + + ++i; + } + + i = 0; + for (const auto L3Level : L3Levels) { + const char *const *CacheNames = (i == 0 ? CPUCacheLevelNames : + GPUCacheLevelNames); + const unsigned *CacheSizes = (i == 0 ? CPUCacheLevelSizes : + GPUCacheLevelSizes); + const unsigned *CacheLineSizes = (i == 0 ? CPUCacheLevelLineSizes : + GPUCacheLevelLineSizes); + const unsigned *CacheAssociativities = (i == 0 ? + CPUCacheLevelAssociativities : + GPUCacheLevelAssociativities); + const unsigned *CacheLatencies = (i == 0 ? CPUCacheLevelLatencies : + GPUCacheLevelLatencies); + + unsigned Index = (i == 0 ? CPUL3 : GPUL3); + + EXPECT_STREQ(L3Level->getName(), CacheNames[Index]); + EXPECT_EQ(L3Level->getSizeInBytes(), CacheSizes[Index]); + EXPECT_EQ(L3Level->getLineSizeInBytes(), CacheLineSizes[Index]); + EXPECT_EQ(L3Level->getAssociativity(), CacheAssociativities[Index]); + EXPECT_EQ(L3Level->getLatency(), CacheLatencies[Index]); + + ++i; + } +} + +TEST(SystemModel, Topology2Tests) { + // Test this topology: + // + // System + // / \ + // GPU CPU (L3) + // | / \ + // (L1) C L (L1) B (L1, L2) + + const unsigned BigL1 = 0; + const unsigned BigL2 = 1; + + const unsigned LittleL1 = 0; + + const unsigned GPUCoreL1 = 0; + + const unsigned CPUL3 = 0; + + const unsigned Big = 0; + const unsigned Little = 1; + + const unsigned GPU = 0; + + const unsigned CPUSocket = 0; + const unsigned GPUSocket = 1; + + // Define cache parameters. + const char *BigCacheLevelNames[] = { "BigL1", "BigL2" }; + unsigned BigCacheLevelSizes[] = { 1024*16, 1024 * 1024*4 }; + unsigned BigCacheLevelLineSizes[] = { 32, 32 }; + unsigned BigCacheLevelAssociativities[] = { 8, 24 }; + unsigned BigCacheLevelLatencies[] = { 2, 12 }; + + const char *LittleCacheLevelNames[] = { "LittleL1" }; + unsigned LittleCacheLevelSizes[] = { 1024*8 }; + unsigned LittleCacheLevelLineSizes[] = { 32 }; + unsigned LittleCacheLevelAssociativities[] = { 8 }; + unsigned LittleCacheLevelLatencies[] = { 2 }; + + const char *CPUCacheLevelNames[] = { "CPUL3" }; + unsigned CPUCacheLevelSizes[] = { 1024*1024*8 }; + unsigned CPUCacheLevelLineSizes[] = { 32 }; + unsigned CPUCacheLevelAssociativities[] = { 32 }; + unsigned CPUCacheLevelLatencies[] = { 50 }; + + const char *GPUCoreCacheLevelNames[] = { "GPUCoreL1" }; + unsigned GPUCoreCacheLevelSizes[] = { 1024*32 }; + unsigned GPUCoreCacheLevelLineSizes[] = { 64 }; + unsigned GPUCoreCacheLevelAssociativities[] = { 8 }; + unsigned GPUCoreCacheLevelLatencies[] = { 2 }; + + // Define thread parameters. + const char *ThreadName = "Thread"; + + // Define core parameters. + // The GPU has four cores with two thread team schedulers of vector + // length 64, for a total of 512 "threads." + const char *CPUCoreNames[] = { "BigCore", "LittleCore" }; + unsigned CPUCoreCounts[] = { 2, 8 }; + unsigned CPUThreadCounts[] = { 4, 2 }; + + const char *GPUCoreNames[] = { "GPUCore" }; + unsigned GPUCoreCounts[] = { 4 }; + // Threads in a core. The GPU has two thread team schedulers, each + // team may be a vector length of, say, 64 which we don't model. + unsigned GPUThreadCounts[] = { 2 }; + + // Define socket parameters. + const char *SocketNames[] = { "CPU", "GPU" }; + unsigned CoreTypeCounts[] = { 2, 1 }; + + unsigned ID = 0; + + // Define write-combining buffers. + MCWriteCombiningBufferInfo BigWCBufs(ID++, "BigWCBufs", 8); + + MCWriteCombiningBufferInfo LittleWCBufs(ID++, "LittleWCBufs", 4); + + // Define software prefetchers. + MCSoftwarePrefetcherConfig BigPrefetcher(ID++, "Big L1 Prefetcher", true, + true, 1024, 512, 4096, 100, 4, 32); + + MCSoftwarePrefetcherConfig LittlePrefetcher(ID++, "Little L1 Prefetcher", + true, true, 1024, 512, 4096, 100, + 4, 32); + + // Define caches. + MCCacheLevelInfo BigCoreCacheLevels[] = { + MCCacheLevelInfo(ID++, + BigCacheLevelNames[BigL1], + BigCacheLevelSizes[BigL1], + BigCacheLevelLineSizes[BigL1], + BigCacheLevelAssociativities[BigL1], + BigCacheLevelLatencies[BigL1], + &BigPrefetcher), + MCCacheLevelInfo(ID++, + BigCacheLevelNames[BigL2], + BigCacheLevelSizes[BigL2], + BigCacheLevelLineSizes[BigL2], + BigCacheLevelAssociativities[BigL2], + BigCacheLevelLatencies[BigL2]), + }; + + MCCacheLevelInfo LittleCoreCacheLevels[] = { + MCCacheLevelInfo(ID++, + LittleCacheLevelNames[LittleL1], + LittleCacheLevelSizes[LittleL1], + LittleCacheLevelLineSizes[LittleL1], + LittleCacheLevelAssociativities[LittleL1], + LittleCacheLevelLatencies[LittleL1], + &LittlePrefetcher), + }; + + MCCacheLevelInfo CPUCacheLevels[] = { + MCCacheLevelInfo(ID++, + CPUCacheLevelNames[CPUL3], + CPUCacheLevelSizes[CPUL3], + CPUCacheLevelLineSizes[CPUL3], + CPUCacheLevelAssociativities[CPUL3], + CPUCacheLevelLatencies[CPUL3]), + }; + + // Each GPU core has a small cache. + MCCacheLevelInfo GPUCoreCacheLevels[] = { + MCCacheLevelInfo(ID++, + GPUCoreCacheLevelNames[GPUCoreL1], + GPUCoreCacheLevelSizes[GPUCoreL1], + GPUCoreCacheLevelLineSizes[GPUCoreL1], + GPUCoreCacheLevelAssociativities[GPUCoreL1], + GPUCoreCacheLevelLatencies[GPUCoreL1]), + }; + + // Define memory models. + MCMemoryModel BigMemModel(ID++, + "BigMemModel", + BigCoreCacheLevels, + 2, + &BigWCBufs); + + MCMemoryModel LittleMemModel(ID++, + "LittleMemModel", + LittleCoreCacheLevels, + 1, + &LittleWCBufs); + + MCMemoryModel CPUMemModel(ID++, + "CPUMemModel", + CPUCacheLevels, + 1); + + MCMemoryModel GPUCoreMemModel(ID++, + "GPUCoreMemModel", + GPUCoreCacheLevels, + 1); + + // Define threads. + MCExecutionResource BigThread(ID++, ThreadName, nullptr, 0); + MCExecutionResource LittleThread(ID++, ThreadName, nullptr, 0); + MCExecutionResource GPUThread(ID++, ThreadName, nullptr, 0); + + // Define cores. + MCExecutionResourceDesc BigThreadDesc(ID++, + "BigThreadDesc", + &BigThread, + CPUThreadCounts[Big]); + MCExecutionResourceDesc LittleThreadDesc(ID++, + "LittleThreadDesc", + &LittleThread, + CPUThreadCounts[Little]); + MCExecutionResourceDesc GPUThreadDesc(ID++, + "GPUThreadDesc", + &GPUThread, + GPUThreadCounts[GPU]); + + MCExecutionResourceDesc *BigThreadsList[] = { &BigThreadDesc }; + MCExecutionResourceDesc *LittleThreadsList[] = { &LittleThreadDesc }; + MCExecutionResourceDesc *GPUThreadsList[] = { &GPUThreadDesc }; + + MCExecutionResource BigCore(ID++, + CPUCoreNames[Big], + BigThreadsList, + 1, + &BigMemModel); + + MCExecutionResource LittleCore(ID++, + CPUCoreNames[Little], + LittleThreadsList, + 1, + &LittleMemModel); + + MCExecutionResource GPUCore(ID++, + GPUCoreNames[GPU], + GPUThreadsList, + 1, + &GPUCoreMemModel); + + // Define sockets. + MCExecutionResourceDesc BigCoreDesc(ID++, + "BigCoreDesc", + &BigCore, + CPUCoreCounts[Big]); + + MCExecutionResourceDesc LittleCoreDesc(ID++, + "LittleCoreDesc", + &LittleCore, + CPUCoreCounts[Little]); + + MCExecutionResourceDesc GPUCoreDesc(ID++, + "GPUCoreDesc", + &GPUCore, + GPUCoreCounts[GPU]); + + MCExecutionResourceDesc *CPUCoreList[] = { &BigCoreDesc, &LittleCoreDesc }; + MCExecutionResourceDesc *GPUCoreList[] = { &GPUCoreDesc }; + + MCExecutionResource CPUEngine(ID++, + SocketNames[CPUSocket], + CPUCoreList, + 2, + &CPUMemModel); + + MCExecutionResource GPUEngine(ID++, + SocketNames[GPUSocket], + GPUCoreList, + 1); + + // Define a node consisting of a CPU socket and a GPU socket. + MCExecutionResourceDesc CPUSocketDesc(ID++, "CPUSocketDesc", &CPUEngine, 1); + MCExecutionResourceDesc GPUSocketDesc(ID++, "GPUSocketDesc", &GPUEngine, 1); + + MCExecutionResourceDesc *SocketList[] = { &CPUSocketDesc, &GPUSocketDesc }; + + MCSystemModel Node(ID++, "Node", SocketList, 2); + + // Test the topology. + EXPECT_EQ(Node.getNumExecutionResourceTypes(), 2u); + + unsigned s = 0; + for (const auto &SocketDesc : Node) { + EXPECT_EQ(&SocketDesc.getParentSystemModel(), &Node); + EXPECT_EQ(SocketDesc.getNumResources(), 1u); + + const auto &Socket = SocketDesc.getResource(); + EXPECT_EQ(&Socket.getResourceDescriptor(), &SocketDesc); + EXPECT_STREQ(Socket.getName(), SocketNames[s]); + EXPECT_EQ(Socket.getNumContainedExecutionResourceTypes(), + CoreTypeCounts[s]); + + unsigned *CoreCounts = (s == 0 ? CPUCoreCounts : GPUCoreCounts); + const char *const *CoreNames = (s == 0 ? CPUCoreNames : GPUCoreNames); + unsigned *ThreadCounts = (s == 0 ? CPUThreadCounts : GPUThreadCounts); + + unsigned c = 0; + for (const auto &CoreDesc : Socket) { + EXPECT_EQ(&CoreDesc.getParentExecutionResource(), &Socket); + EXPECT_EQ(CoreDesc.getNumResources(), CoreCounts[c]); + + const auto &Core = CoreDesc.getResource(); + EXPECT_EQ(&Core.getResourceDescriptor(), &CoreDesc); + EXPECT_STREQ(Core.getName(), CoreNames[c]); + + EXPECT_EQ(Core.getNumContainedExecutionResourceTypes(), 1u); + + const auto &ThreadDesc = Core.getContainedResourceDescriptor(0); + EXPECT_EQ(&ThreadDesc.getParentExecutionResource(), &Core); + EXPECT_EQ(ThreadDesc.getNumResources(), ThreadCounts[c]); + + const auto &Thread = ThreadDesc.getResource(); + EXPECT_EQ(&Thread.getResourceDescriptor(), &ThreadDesc); + EXPECT_STREQ(Thread.getName(), ThreadName); + + EXPECT_EQ(Thread.getNumContainedExecutionResourceTypes(), 0u); + + // Check core-level caches. + const char *const *CacheNames = (s == CPUSocket && + c == Big ? + BigCacheLevelNames : + (s == CPUSocket && + c == Little ? + LittleCacheLevelNames : + GPUCoreCacheLevelNames)); + const unsigned *CacheSizes = (s == CPUSocket && + c == Big ? + BigCacheLevelSizes : + (s == CPUSocket && + c == Little ? + LittleCacheLevelSizes : + GPUCoreCacheLevelSizes)); + const unsigned *CacheLineSizes = (s == CPUSocket && + c == Big ? + BigCacheLevelLineSizes : + (s == CPUSocket && + c == Little ? + LittleCacheLevelLineSizes : + GPUCoreCacheLevelLineSizes)); + const unsigned *CacheAssociativities = (s == CPUSocket && + c == Big ? + BigCacheLevelAssociativities : + (s == CPUSocket && + c == Little ? + LittleCacheLevelAssociativities : + GPUCoreCacheLevelAssociativities) + ); + const unsigned *CacheLatencies = (s == CPUSocket && + c == Big ? + BigCacheLevelLatencies : + (s == CPUSocket && + c == Little ? + LittleCacheLevelLatencies : + GPUCoreCacheLevelLatencies)); + + EXPECT_EQ(&Core.getMemoryModel().getExecutionResource(), &Core); + + unsigned lvl = 0; + for (const auto &CacheLevel : Core.getMemoryModel()) { + EXPECT_EQ(&CacheLevel.getMemoryModel(), &Core.getMemoryModel()); + EXPECT_STREQ(CacheLevel.getName(), CacheNames[lvl]); + EXPECT_EQ(CacheLevel.getSizeInBytes(), CacheSizes[lvl]); + EXPECT_EQ(CacheLevel.getLineSizeInBytes(), CacheLineSizes[lvl]); + EXPECT_EQ(CacheLevel.getAssociativity(), CacheAssociativities[lvl]); + EXPECT_EQ(CacheLevel.getLatency(), CacheLatencies[lvl]); + + if (CacheLevel.getName() == StringRef("BigL1")) { + const MCSoftwarePrefetcherConfig &Prefetcher = + CacheLevel.getSoftwarePrefetcher(); + EXPECT_EQ(&Prefetcher.getTargetCacheLevel(), &CacheLevel); + EXPECT_STREQ(Prefetcher.getName(), "Big L1 Prefetcher"); + } + else if (CacheLevel.getName() == StringRef("LittleL1")) { + const MCSoftwarePrefetcherConfig &Prefetcher = + CacheLevel.getSoftwarePrefetcher(); + EXPECT_EQ(&Prefetcher.getTargetCacheLevel(), &CacheLevel); + EXPECT_STREQ(Prefetcher.getName(), "Little L1 Prefetcher"); + } + + ++lvl; + } + + ++c; + } + + // Check socket-level caches. + const char *const *CacheNames = CPUCacheLevelNames; + const unsigned *CacheSizes = CPUCacheLevelSizes; + const unsigned *CacheLineSizes = CPUCacheLevelLineSizes; + const unsigned *CacheAssociativities = CPUCacheLevelAssociativities; + const unsigned *CacheLatencies = CPUCacheLevelLatencies; + + if (Socket.hasMemoryModel()) { + EXPECT_EQ(&Socket.getMemoryModel().getExecutionResource(), &Socket); + + unsigned lvl = 0; + for (const auto &CacheLevel : Socket.getMemoryModel()) { + EXPECT_EQ(&CacheLevel.getMemoryModel(), &Socket.getMemoryModel()); + EXPECT_STREQ(CacheLevel.getName(), CacheNames[lvl]); + EXPECT_EQ(CacheLevel.getSizeInBytes(), CacheSizes[lvl]); + EXPECT_EQ(CacheLevel.getLineSizeInBytes(), CacheLineSizes[lvl]); + EXPECT_EQ(CacheLevel.getAssociativity(), CacheAssociativities[lvl]); + EXPECT_EQ(CacheLevel.getLatency(), CacheLatencies[lvl]); + + ++lvl; + } + } + ++s; + } + + // Test the global system representation of the memory model. + const MCSystemModel::CacheLevelList &L1Levels = + Node.getCacheLevelInfo(MCSystemModel::CacheLevel::L1); + const MCSystemModel::CacheLevelList &L2Levels = + Node.getCacheLevelInfo(MCSystemModel::CacheLevel::L2); + const MCSystemModel::CacheLevelList &L3Levels = + Node.getCacheLevelInfo(MCSystemModel::CacheLevel::L3); + + const MCSystemModel::PrefetchConfigList &PrefetchConfigs = + Node.getSoftwarePrefetcherInfo(); + + EXPECT_EQ(L1Levels.size(), 3u); + EXPECT_EQ(L2Levels.size(), 1u); + EXPECT_EQ(L3Levels.size(), 1u); + EXPECT_EQ(PrefetchConfigs.size(), 2u); + + unsigned i = 0; + for (const auto L1Level : L1Levels) { + const char *const *CacheNames = (i == 0 ? BigCacheLevelNames : + i == 1 ? LittleCacheLevelNames : + GPUCoreCacheLevelNames); + const unsigned *CacheSizes = (i == 0 ? BigCacheLevelSizes : + i == 1 ? LittleCacheLevelSizes : + GPUCoreCacheLevelSizes); + const unsigned *CacheLineSizes = (i == 0 ? BigCacheLevelLineSizes : + i == 1 ? LittleCacheLevelLineSizes : + GPUCoreCacheLevelLineSizes + ); + const unsigned *CacheAssociativities = (i == 0 ? + BigCacheLevelAssociativities : + i == 1 ? + LittleCacheLevelAssociativities : + GPUCoreCacheLevelAssociativities); + const unsigned *CacheLatencies = (i == 0 ? BigCacheLevelLatencies : + i == 1 ? LittleCacheLevelLatencies : + GPUCoreCacheLevelLatencies + ); + + unsigned Index = (i == 0 ? BigL1 : + i == 1 ? LittleL1 : GPUCoreL1); + + EXPECT_STREQ(L1Level->getName(), CacheNames[Index]); + EXPECT_EQ(L1Level->getSizeInBytes(), CacheSizes[Index]); + EXPECT_EQ(L1Level->getLineSizeInBytes(), CacheLineSizes[Index]); + EXPECT_EQ(L1Level->getAssociativity(), CacheAssociativities[Index]); + EXPECT_EQ(L1Level->getLatency(), CacheLatencies[Index]); + + ++i; + } + + i = 0; + for (const auto L2Level : L2Levels) { + const char *const *CacheNames = BigCacheLevelNames; + const unsigned *CacheSizes = BigCacheLevelSizes; + const unsigned *CacheLineSizes = BigCacheLevelLineSizes; + const unsigned *CacheAssociativities = BigCacheLevelAssociativities; + const unsigned *CacheLatencies = BigCacheLevelLatencies; + + unsigned Index = BigL2; + + EXPECT_STREQ(L2Level->getName(), CacheNames[Index]); + EXPECT_EQ(L2Level->getSizeInBytes(), CacheSizes[Index]); + EXPECT_EQ(L2Level->getLineSizeInBytes(), CacheLineSizes[Index]); + EXPECT_EQ(L2Level->getAssociativity(), CacheAssociativities[Index]); + EXPECT_EQ(L2Level->getLatency(), CacheLatencies[Index]); + + ++i; + } + + i = 0; + for (const auto L3Level : L3Levels) { + const char *const *CacheNames = CPUCacheLevelNames; + const unsigned *CacheSizes = CPUCacheLevelSizes; + const unsigned *CacheLineSizes = CPUCacheLevelLineSizes; + const unsigned *CacheAssociativities = CPUCacheLevelAssociativities; + const unsigned *CacheLatencies = CPUCacheLevelLatencies; + + unsigned Index = CPUL3; + + EXPECT_STREQ(L3Level->getName(), CacheNames[Index]); + EXPECT_EQ(L3Level->getSizeInBytes(), CacheSizes[Index]); + EXPECT_EQ(L3Level->getLineSizeInBytes(), CacheLineSizes[Index]); + EXPECT_EQ(L3Level->getAssociativity(), CacheAssociativities[Index]); + EXPECT_EQ(L3Level->getLatency(), CacheLatencies[Index]); + + ++i; + } +} + +TEST(SystemModel, Topology3Tests) { + // Test this topology: + // + // System + // / \ + // (L1) GPU CPU (L3) + // | / \ + // C L (L1) B (L1, L2) + + const unsigned BigL1 = 0; + const unsigned BigL2 = 1; + + const unsigned LittleL1 = 0; + + const unsigned CPUL3 = 0; + + const unsigned GPUL1 = 0; + + const unsigned Big = 0; + const unsigned Little = 1; + + const unsigned GPU = 0; + + const unsigned CPUSocket = 0; + const unsigned GPUSocket = 1; + + // Define cache parameters. + const char *BigCacheLevelNames[] = { "BigL1", "BigL2" }; + unsigned BigCacheLevelSizes[] = { 1024*16, 1024 * 1024*4 }; + unsigned BigCacheLevelLineSizes[] = { 32, 32 }; + unsigned BigCacheLevelAssociativities[] = { 8, 24 }; + unsigned BigCacheLevelLatencies[] = { 2, 12 }; + + const char *LittleCacheLevelNames[] = { "LittleL1" }; + unsigned LittleCacheLevelSizes[] = { 1024*8 }; + unsigned LittleCacheLevelLineSizes[] = { 32 }; + unsigned LittleCacheLevelAssociativities[] = { 8 }; + unsigned LittleCacheLevelLatencies[] = { 2 }; + + const char *CPUCacheLevelNames[] = { "CPUL3" }; + unsigned CPUCacheLevelSizes[] = { 1024*1024*8 }; + unsigned CPUCacheLevelLineSizes[] = { 32 }; + unsigned CPUCacheLevelAssociativities[] = { 32 }; + unsigned CPUCacheLevelLatencies[] = { 50 }; + + const char *GPUCacheLevelNames[] = { "GPUL1" }; + unsigned GPUCacheLevelSizes[] = { 1024*64 }; + unsigned GPUCacheLevelLineSizes[] = { 64 }; + unsigned GPUCacheLevelAssociativities[] = { 24 }; + unsigned GPUCacheLevelLatencies[] = { 12 }; + + // Define thread parameters. + const char *ThreadName = "Thread"; + + // Define core parameters. + // The GPU has four cores with two thread team schedulers of vector + // length 64, for a total of 512 "threads." + const char *CPUCoreNames[] = { "BigCore", "LittleCore" }; + unsigned CPUCoreCounts[] = { 2, 8 }; + unsigned CPUThreadCounts[] = { 4, 2 }; + + const char *GPUCoreNames[] = { "GPUCore" }; + unsigned GPUCoreCounts[] = { 4 }; + // Threads in a core. The GPU has two thread team schedulers, each + // team may be a vector length of, say, 64 which we don't model. + unsigned GPUThreadCounts[] = { 2 }; + + // Define socket parameters. + const char *SocketNames[] = { "CPU", "GPU" }; + unsigned CoreTypeCounts[] = { 2, 1 }; + + unsigned ID = 0; + + // Define write-combining buffers. + MCWriteCombiningBufferInfo BigWCBufs(ID++, "BigWCBufs", 8); + MCWriteCombiningBufferInfo LittleWCBufs(ID++, "LittleWCBufs", 4); + + // Define software prefetchers. + MCSoftwarePrefetcherConfig BigPrefetcher(ID++, "Big L1 Prefetcher", true, + true, 1024, 512, 4096, 100, 4, 32); + + MCSoftwarePrefetcherConfig LittlePrefetcher(ID++, "Little L1 Prefetcher", + true, true, 1024, 512, 4096, 100, + 4, 32); + + // Define caches. + MCCacheLevelInfo BigCoreCacheLevels[] = { + MCCacheLevelInfo(ID++, + BigCacheLevelNames[BigL1], + BigCacheLevelSizes[BigL1], + BigCacheLevelLineSizes[BigL1], + BigCacheLevelAssociativities[BigL1], + BigCacheLevelLatencies[BigL1], + &BigPrefetcher), + MCCacheLevelInfo(ID++, + BigCacheLevelNames[BigL2], + BigCacheLevelSizes[BigL2], + BigCacheLevelLineSizes[BigL2], + BigCacheLevelAssociativities[BigL2], + BigCacheLevelLatencies[BigL2]), + }; + + MCCacheLevelInfo LittleCoreCacheLevels[] = { + MCCacheLevelInfo(ID++, + LittleCacheLevelNames[LittleL1], + LittleCacheLevelSizes[LittleL1], + LittleCacheLevelLineSizes[LittleL1], + LittleCacheLevelAssociativities[LittleL1], + LittleCacheLevelLatencies[LittleL1], + &LittlePrefetcher), + }; + + MCCacheLevelInfo CPUCacheLevels[] = { + MCCacheLevelInfo(ID++, + CPUCacheLevelNames[CPUL3], + CPUCacheLevelSizes[CPUL3], + CPUCacheLevelLineSizes[CPUL3], + CPUCacheLevelAssociativities[CPUL3], + CPUCacheLevelLatencies[CPUL3]), + }; + + // All GPU cores share one level of cache. + MCCacheLevelInfo GPUCacheLevels[] = { + MCCacheLevelInfo(ID++, + GPUCacheLevelNames[GPUL1], + GPUCacheLevelSizes[GPUL1], + GPUCacheLevelLineSizes[GPUL1], + GPUCacheLevelAssociativities[GPUL1], + GPUCacheLevelLatencies[GPUL1]), + }; + + // Define memory models. + MCMemoryModel BigMemModel(ID++, + "BigMemModel", + BigCoreCacheLevels, + 2, + &BigWCBufs); + + MCMemoryModel LittleMemModel(ID++, + "LittleMemModel", + LittleCoreCacheLevels, + 1, + &LittleWCBufs); + + MCMemoryModel CPUMemModel(ID++, + "CPUMemModel", + CPUCacheLevels, + 1); + + MCMemoryModel GPUMemModel(ID++, + "GPUMemModel", + GPUCacheLevels, + 1); + + // Define threads. + MCExecutionResource BigThread(ID++, ThreadName, nullptr, 0); + MCExecutionResource LittleThread(ID++, ThreadName, nullptr, 0); + MCExecutionResource GPUThread(ID++, ThreadName, nullptr, 0); + + // Define cores. + MCExecutionResourceDesc BigThreadDesc(ID++, + "BigThreadDesc", + &BigThread, + CPUThreadCounts[Big]); + MCExecutionResourceDesc LittleThreadDesc(ID++, + "LittleThreadDesc", + &LittleThread, + CPUThreadCounts[Little]); + MCExecutionResourceDesc GPUThreadDesc(ID++, + "GPUThreadDesc", + &GPUThread, + GPUThreadCounts[GPU]); + + MCExecutionResourceDesc *BigThreadsList[] = { &BigThreadDesc }; + MCExecutionResourceDesc *LittleThreadsList[] = { &LittleThreadDesc }; + MCExecutionResourceDesc *GPUThreadsList[] = { &GPUThreadDesc }; + + MCExecutionResource BigCore(ID++, + CPUCoreNames[Big], + BigThreadsList, + 1, + &BigMemModel); + + MCExecutionResource LittleCore(ID++, + CPUCoreNames[Little], + LittleThreadsList, + 1, + &LittleMemModel); + + MCExecutionResource GPUCore(ID++, + GPUCoreNames[GPU], + GPUThreadsList, + 1); + + // Define sockets. + MCExecutionResourceDesc BigCoreDesc(ID++, + "BigCoreDesc", + &BigCore, + CPUCoreCounts[Big]); + + MCExecutionResourceDesc LittleCoreDesc(ID++, + "LittleCoreDesc", + &LittleCore, + CPUCoreCounts[Little]); + + MCExecutionResourceDesc GPUCoreDesc(ID++, + "GPUCoreDesc", + &GPUCore, + GPUCoreCounts[GPU]); + + MCExecutionResourceDesc *CPUCoreList[] = { &BigCoreDesc, &LittleCoreDesc }; + MCExecutionResourceDesc *GPUCoreList[] = { &GPUCoreDesc }; + + MCExecutionResource CPUEngine(ID++, + SocketNames[CPUSocket], + CPUCoreList, + 2, + &CPUMemModel); + + MCExecutionResource GPUEngine(ID++, + SocketNames[GPUSocket], + GPUCoreList, + 1, + &GPUMemModel); + + // Define a node consisting of a CPU socket and a GPU socket. + MCExecutionResourceDesc CPUSocketDesc(ID++, "CPUSocketDesc", &CPUEngine, 1); + MCExecutionResourceDesc GPUSocketDesc(ID++, "GPUSocketDesc", &GPUEngine, 1); + + MCExecutionResourceDesc *SocketList[] = { &CPUSocketDesc, &GPUSocketDesc }; + + MCSystemModel Node(ID++, "Node", SocketList, 2); + + // Test the topology. + EXPECT_EQ(Node.getNumExecutionResourceTypes(), 2u); + + unsigned s = 0; + for (const auto &SocketDesc : Node) { + EXPECT_EQ(&SocketDesc.getParentSystemModel(), &Node); + EXPECT_EQ(SocketDesc.getNumResources(), 1u); + + const auto &Socket = SocketDesc.getResource(); + EXPECT_EQ(&Socket.getResourceDescriptor(), &SocketDesc); + EXPECT_STREQ(Socket.getName(), SocketNames[s]); + EXPECT_EQ(Socket.getNumContainedExecutionResourceTypes(), + CoreTypeCounts[s]); + + unsigned *CoreCounts = (s == CPUSocket ? + CPUCoreCounts : GPUCoreCounts); + const char *const *CoreNames = (s == CPUSocket ? + CPUCoreNames : GPUCoreNames); + unsigned *ThreadCounts = (s == CPUSocket ? + CPUThreadCounts : GPUThreadCounts); + + unsigned c = 0; + for (const auto &CoreDesc : Socket) { + EXPECT_EQ(&CoreDesc.getParentExecutionResource(), &Socket); + EXPECT_EQ(CoreDesc.getNumResources(), CoreCounts[c]); + + const auto &Core = CoreDesc.getResource(); + EXPECT_EQ(&Core.getResourceDescriptor(), &CoreDesc); + EXPECT_STREQ(Core.getName(), CoreNames[c]); + + EXPECT_EQ(Core.getNumContainedExecutionResourceTypes(), 1u); + + const auto &ThreadDesc = Core.getContainedResourceDescriptor(0); + EXPECT_EQ(&ThreadDesc.getParentExecutionResource(), &Core); + EXPECT_EQ(ThreadDesc.getNumResources(), ThreadCounts[c]); + + const auto &Thread = ThreadDesc.getResource(); + EXPECT_EQ(&Thread.getResourceDescriptor(), &ThreadDesc); + EXPECT_STREQ(Thread.getName(), ThreadName); + + EXPECT_EQ(Thread.getNumContainedExecutionResourceTypes(), 0u); + + const char *const *CacheNames = (c == Big ? + BigCacheLevelNames : + LittleCacheLevelNames); + const unsigned *CacheSizes = (c == Big ? + BigCacheLevelSizes : + LittleCacheLevelSizes); + const unsigned *CacheLineSizes = (c == Big ? + BigCacheLevelLineSizes : + LittleCacheLevelLineSizes); + const unsigned *CacheAssociativities = (c == Big ? + BigCacheLevelAssociativities : + LittleCacheLevelAssociativities); + const unsigned *CacheLatencies = (c == Big ? + BigCacheLevelLatencies : + LittleCacheLevelLatencies); + + if (Core.hasMemoryModel()) { + EXPECT_EQ(&Core.getMemoryModel().getExecutionResource(), &Core); + + unsigned lvl = 0; + for (const auto &CacheLevel : Core.getMemoryModel()) { + EXPECT_EQ(&CacheLevel.getMemoryModel(), &Core.getMemoryModel()); + EXPECT_STREQ(CacheLevel.getName(), CacheNames[lvl]); + EXPECT_EQ(CacheLevel.getSizeInBytes(), CacheSizes[lvl]); + EXPECT_EQ(CacheLevel.getLineSizeInBytes(), CacheLineSizes[lvl]); + EXPECT_EQ(CacheLevel.getAssociativity(), CacheAssociativities[lvl]); + EXPECT_EQ(CacheLevel.getLatency(), CacheLatencies[lvl]); + + if (CacheLevel.getName() == StringRef("BigL1")) { + const MCSoftwarePrefetcherConfig &Prefetcher = + CacheLevel.getSoftwarePrefetcher(); + EXPECT_EQ(&Prefetcher.getTargetCacheLevel(), &CacheLevel); + EXPECT_STREQ(Prefetcher.getName(),"Big L1 Prefetcher"); + } + else if (CacheLevel.getName() == StringRef("LittleL1")) { + const MCSoftwarePrefetcherConfig &Prefetcher = + CacheLevel.getSoftwarePrefetcher(); + EXPECT_EQ(&Prefetcher.getTargetCacheLevel(), &CacheLevel); + EXPECT_STREQ(Prefetcher.getName(), "Little L1 Prefetcher"); + } + + ++lvl; + } + } + + ++c; + } + + // Check socket-level caches. + const char *const *CacheNames = (s == CPUSocket ? + CPUCacheLevelNames : + GPUCacheLevelNames); + const unsigned *CacheSizes = (s == CPUSocket ? + CPUCacheLevelSizes : + GPUCacheLevelSizes); + const unsigned *CacheLineSizes = (s == CPUSocket ? + CPUCacheLevelLineSizes : + GPUCacheLevelLineSizes); + const unsigned *CacheAssociativities = (s == CPUSocket ? + CPUCacheLevelAssociativities : + GPUCacheLevelAssociativities); + const unsigned *CacheLatencies = (s == CPUSocket ? + CPUCacheLevelLatencies : + GPUCacheLevelLatencies); + + EXPECT_EQ(&Socket.getMemoryModel().getExecutionResource(), &Socket); + + unsigned lvl = 0; + for (const auto &CacheLevel : Socket.getMemoryModel()) { + EXPECT_STREQ(CacheLevel.getName(), CacheNames[lvl]); + EXPECT_EQ(CacheLevel.getSizeInBytes(), CacheSizes[lvl]); + EXPECT_EQ(CacheLevel.getLineSizeInBytes(), CacheLineSizes[lvl]); + EXPECT_EQ(CacheLevel.getAssociativity(), CacheAssociativities[lvl]); + EXPECT_EQ(CacheLevel.getLatency(), CacheLatencies[lvl]); + + ++lvl; + } + + ++s; + } + + // Test the global system representation of the memory model. + const MCSystemModel::CacheLevelList &L1Levels = + Node.getCacheLevelInfo(MCSystemModel::CacheLevel::L1); + const MCSystemModel::CacheLevelList &L2Levels = + Node.getCacheLevelInfo(MCSystemModel::CacheLevel::L2); + const MCSystemModel::CacheLevelList &L3Levels = + Node.getCacheLevelInfo(MCSystemModel::CacheLevel::L3); + + const MCSystemModel::PrefetchConfigList &PrefetchConfigs = + Node.getSoftwarePrefetcherInfo(); + + EXPECT_EQ(L1Levels.size(), 3u); + EXPECT_EQ(L2Levels.size(), 1u); + EXPECT_EQ(L3Levels.size(), 1u); + EXPECT_EQ(PrefetchConfigs.size(), 2u); + + unsigned i = 0; + for (const auto L1Level : L1Levels) { + const char *const *CacheNames = (i == 0 ? BigCacheLevelNames : + i == 1 ? LittleCacheLevelNames : + GPUCacheLevelNames); + const unsigned *CacheSizes = (i == 0 ? BigCacheLevelSizes : + i == 1 ? LittleCacheLevelSizes : + GPUCacheLevelSizes); + const unsigned *CacheLineSizes = (i == 0 ? BigCacheLevelLineSizes : + i == 1 ? LittleCacheLevelLineSizes : + GPUCacheLevelLineSizes); + const unsigned *CacheAssociativities = (i == 0 ? + BigCacheLevelAssociativities : + i == 1 ? + LittleCacheLevelAssociativities : + GPUCacheLevelAssociativities); + const unsigned *CacheLatencies = (i == 0 ? BigCacheLevelLatencies : + i == 1 ? LittleCacheLevelLatencies : + GPUCacheLevelLatencies); + + unsigned Index = (i == 0 ? BigL1 : + i == 1 ? LittleL1 : GPUL1); + + EXPECT_STREQ(L1Level->getName(), CacheNames[Index]); + EXPECT_EQ(L1Level->getSizeInBytes(), CacheSizes[Index]); + EXPECT_EQ(L1Level->getLineSizeInBytes(), CacheLineSizes[Index]); + EXPECT_EQ(L1Level->getAssociativity(), CacheAssociativities[Index]); + EXPECT_EQ(L1Level->getLatency(), CacheLatencies[Index]); + + ++i; + } + + i = 0; + for (const auto L2Level : L2Levels) { + const char *const *CacheNames = BigCacheLevelNames; + const unsigned *CacheSizes = BigCacheLevelSizes; + const unsigned *CacheLineSizes = BigCacheLevelLineSizes; + const unsigned *CacheAssociativities = BigCacheLevelAssociativities; + const unsigned *CacheLatencies = BigCacheLevelLatencies; + + unsigned Index = BigL2; + + EXPECT_STREQ(L2Level->getName(), CacheNames[Index]); + EXPECT_EQ(L2Level->getSizeInBytes(), CacheSizes[Index]); + EXPECT_EQ(L2Level->getLineSizeInBytes(), CacheLineSizes[Index]); + EXPECT_EQ(L2Level->getAssociativity(), CacheAssociativities[Index]); + EXPECT_EQ(L2Level->getLatency(), CacheLatencies[Index]); + + ++i; + } + + i = 0; + for (const auto L3Level : L3Levels) { + const char *const *CacheNames = CPUCacheLevelNames; + const unsigned *CacheSizes = CPUCacheLevelSizes; + const unsigned *CacheLineSizes = CPUCacheLevelLineSizes; + const unsigned *CacheAssociativities = CPUCacheLevelAssociativities; + const unsigned *CacheLatencies = CPUCacheLevelLatencies; + + unsigned Index = CPUL3; + + EXPECT_STREQ(L3Level->getName(), CacheNames[Index]); + EXPECT_EQ(L3Level->getSizeInBytes(), CacheSizes[Index]); + EXPECT_EQ(L3Level->getLineSizeInBytes(), CacheLineSizes[Index]); + EXPECT_EQ(L3Level->getAssociativity(), CacheAssociativities[Index]); + EXPECT_EQ(L3Level->getLatency(), CacheLatencies[Index]); + + ++i; + } +} + +} // end namespace