diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -732,6 +732,15 @@ /// \return The associativity of the cache level, if available. llvm::Optional getCacheAssociativity(CacheLevel Level) const; + /// \return Whether to prefetch loads. + bool prefetchReads() const; + + /// \return Whether to prefetch stores. + bool prefetchWrites() const; + + /// \return Whether to use read prefetches for stores. + bool useReadPrefetchForWrites() const; + /// \return How much before a load we should place the prefetch instruction. /// This is currently measured in number of instructions. unsigned getPrefetchDistance() const; @@ -1124,12 +1133,15 @@ virtual unsigned getMinimumVF(unsigned ElemWidth) const = 0; virtual bool shouldConsiderAddressTypePromotion( const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0; - virtual unsigned getCacheLineSize() = 0; - virtual llvm::Optional getCacheSize(CacheLevel Level) = 0; - virtual llvm::Optional getCacheAssociativity(CacheLevel Level) = 0; - virtual unsigned getPrefetchDistance() = 0; - virtual unsigned getMinPrefetchStride() = 0; - virtual unsigned getMaxPrefetchIterationsAhead() = 0; + virtual unsigned getCacheLineSize() const = 0; + virtual llvm::Optional getCacheSize(CacheLevel Level) const = 0; + virtual llvm::Optional getCacheAssociativity(CacheLevel Level) const = 0; + virtual bool prefetchReads() const = 0; + virtual bool prefetchWrites() const = 0; + virtual bool useReadPrefetchForWrites() const = 0; + virtual unsigned getPrefetchDistance() const = 0; + virtual unsigned getMinPrefetchStride() const = 0; + virtual unsigned getMaxPrefetchIterationsAhead() const = 0; virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0; virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, @@ -1440,20 +1452,25 @@ return Impl.shouldConsiderAddressTypePromotion( I, AllowPromotionWithoutCommonHeader); } - unsigned getCacheLineSize() override { + unsigned getCacheLineSize() const override { return Impl.getCacheLineSize(); } - llvm::Optional getCacheSize(CacheLevel Level) override { + llvm::Optional getCacheSize(CacheLevel Level) const override { return Impl.getCacheSize(Level); } - llvm::Optional getCacheAssociativity(CacheLevel Level) override { + llvm::Optional getCacheAssociativity(CacheLevel Level) const override { return Impl.getCacheAssociativity(Level); } - unsigned getPrefetchDistance() override { return Impl.getPrefetchDistance(); } - unsigned getMinPrefetchStride() override { + bool prefetchReads() const override { return Impl.prefetchReads(); } + bool prefetchWrites() const override { return Impl.prefetchWrites(); } + bool useReadPrefetchForWrites() const override { + return Impl.useReadPrefetchForWrites(); + } + unsigned getPrefetchDistance() const override { return Impl.getPrefetchDistance(); } + unsigned getMinPrefetchStride() const override { return Impl.getMinPrefetchStride(); } - unsigned getMaxPrefetchIterationsAhead() override { + unsigned getMaxPrefetchIterationsAhead() const override { return Impl.getMaxPrefetchIterationsAhead(); } unsigned getMaxInterleaveFactor(unsigned VF) override { diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -369,9 +369,9 @@ return false; } - unsigned getCacheLineSize() { return 0; } + unsigned getCacheLineSize() const { return 0; } - llvm::Optional getCacheSize(TargetTransformInfo::CacheLevel Level) { + llvm::Optional getCacheSize(TargetTransformInfo::CacheLevel Level) const { switch (Level) { case TargetTransformInfo::CacheLevel::L1D: LLVM_FALLTHROUGH; @@ -383,7 +383,7 @@ } llvm::Optional getCacheAssociativity( - TargetTransformInfo::CacheLevel Level) { + TargetTransformInfo::CacheLevel Level) const { switch (Level) { case TargetTransformInfo::CacheLevel::L1D: LLVM_FALLTHROUGH; @@ -394,11 +394,17 @@ llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); } - unsigned getPrefetchDistance() { return 0; } + bool prefetchReads() const { return false; }; - unsigned getMinPrefetchStride() { return 1; } + bool prefetchWrites() const { return false; } - unsigned getMaxPrefetchIterationsAhead() { return UINT_MAX; } + bool useReadPrefetchForWrites() const { return false; } + + unsigned getPrefetchDistance() const { return 0; } + + unsigned getMinPrefetchStride() const { return 1; } + + unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; } unsigned getMaxInterleaveFactor(unsigned VF) { return 1; } diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -492,6 +492,46 @@ return BaseT::getInstructionLatency(I); } + virtual Optional + getCacheSize(TargetTransformInfo::CacheLevel Level) const { + return Optional( + getST()->getCacheSize(static_cast(Level))); + } + + virtual Optional + getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const { + return Optional( + getST()->getCacheAssociativity(static_cast(Level))); + } + + virtual unsigned getCacheLineSize() const { + return getST()->getCacheLineSize(); + } + + virtual bool prefetchReads() const { + return getST()->prefetchReads(); + } + + virtual bool prefetchWrites() const { + return getST()->prefetchWrites(); + } + + virtual bool useReadPrefetchForWrites() const { + return getST()->useReadPrefetchForWrites(); + } + + virtual unsigned getPrefetchDistance() const { + return getST()->getPrefetchDistance(); + } + + virtual unsigned getMinPrefetchStride() const { + return getST()->getMinPrefetchStride(); + } + + virtual unsigned getMaxPrefetchIterationsAhead() const { + return getST()->getMaxPrefetchIterationsAhead(); + } + /// @} /// \name Vector TTI Implementations diff --git a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h --- a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h +++ b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h @@ -44,6 +44,7 @@ struct SubtargetFeatureKV; struct SubtargetInfoKV; class SUnit; +class TargetSystemModel; class TargetFrameLowering; class TargetInstrInfo; class TargetLowering; @@ -67,7 +68,8 @@ const MCWriteProcResEntry *WPR, const MCWriteLatencyEntry *WL, const MCReadAdvanceEntry *RA, const InstrStage *IS, - const unsigned *OC, const unsigned *FP); + const unsigned *OC, const unsigned *FP, + const SubtargetInfoKV *SystemModels); public: // AntiDepBreakMode - Type of anti-dependence breaking that should diff --git a/llvm/include/llvm/MC/MCSubtargetInfo.h b/llvm/include/llvm/MC/MCSubtargetInfo.h --- a/llvm/include/llvm/MC/MCSubtargetInfo.h +++ b/llvm/include/llvm/MC/MCSubtargetInfo.h @@ -16,6 +16,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/MC/MCSystemModel.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/MC/MCSchedule.h" #include "llvm/MC/SubtargetFeature.h" @@ -50,6 +51,90 @@ const unsigned *ForwardingPaths; FeatureBitset FeatureBits; // Feature bits for current CPU + FS + // System models + const SubtargetInfoKV *SystemModels; + const MCSystemModel *CPUModel; + + /// If caches at a particular level are of different sizes, ask the + /// target what to do. By default, return zero. + /// + virtual Optional + resolveCacheSize(unsigned Level, + const MCSystemModel::CacheLevelSet &Levels) const { + return Optional(0); + } + + /// If caches at a particular level are of different + /// associativities, ask the target what to do. By default, return + /// one. + /// + virtual Optional + resolveCacheAssociativity(unsigned Level, + const MCSystemModel::CacheLevelSet &Levels) const { + return Optional(1); + } + + /// If cache lines at a particular level are of different sizes, ask + /// the target what to do. By default, return zero. + /// + virtual Optional + resolveCacheLineSize(unsigned Level, + const MCSystemModel::CacheLevelSet &Levels) const { + return Optional(0); + } + + /// If prefetcher configs report differences on whether they are + /// enabled for reads, ask the target what to do. By default, + /// return the first enable/disable setting we find. + /// + virtual bool resolvePrefetchReads( + const MCSystemModel::PrefetchConfigSet &Prefetchers) const { + return (*Prefetchers.begin())->isEnabledForReads(); + } + + /// If prefetcher configs report differences on whether they are + /// enabled for writes, ask the target what to do. By default, + /// return the first enable/disable setting we find. + /// + virtual bool resolvePrefetchWrites( + const MCSystemModel::PrefetchConfigSet &Prefetchers) const { + return (*Prefetchers.begin())->isEnabledForWrites(); + } + + /// If prefetcher configs report differences on whether to use read + /// prefetches for stores, ask the target what to do. By default, + /// return the first setting we find. + /// + virtual bool resolveUseReadPrefetchForWrites( + const MCSystemModel::PrefetchConfigSet &Prefetchers) const { + return (*Prefetchers.begin())->useReadPrefetchForWrites(); + } + + /// If prefetcher configs report different distances, ask the target + /// what to do. By default, return the first distance we find. + /// + virtual unsigned resolvePrefetchDistanceInInstructions( + const MCSystemModel::PrefetchConfigSet &Prefetchers) const { + return (*Prefetchers.begin())->getDistanceInInstructions(); + } + + /// If prefetcher configs report different max distances, ask the + /// target what to do. By default, return the first distance we + /// find. + /// + virtual unsigned resolveMaxPrefetchIterationsAhead( + const MCSystemModel::PrefetchConfigSet &Prefetchers) const { + return (*Prefetchers.begin())->getMaxDistanceInIterations(); + } + + /// If prefetcher configs report different min strides, ask the + /// target what to do. By default, return the first stride we find. + /// + virtual unsigned resolveMinPrefetchStride( + const MCSystemModel::PrefetchConfigSet &Prefetchers) const { + return (*Prefetchers.begin())->getMinByteStride(); + } + public: MCSubtargetInfo(const MCSubtargetInfo &) = default; MCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS, @@ -58,7 +143,8 @@ const SubtargetInfoKV *ProcSched, const MCWriteProcResEntry *WPR, const MCWriteLatencyEntry *WL, const MCReadAdvanceEntry *RA, const InstrStage *IS, - const unsigned *OC, const unsigned *FP); + const unsigned *OC, const unsigned *FP, + const SubtargetInfoKV *SystemModelTable); MCSubtargetInfo() = delete; MCSubtargetInfo &operator=(const MCSubtargetInfo &) = delete; MCSubtargetInfo &operator=(MCSubtargetInfo &&) = delete; @@ -180,6 +266,87 @@ auto Found = std::lower_bound(ProcDesc.begin(), ProcDesc.end(), CPU); return Found != ProcDesc.end() && StringRef(Found->Key) == CPU; } + + /// Get the system model of a CPU. + const MCSystemModel &getSystemModelForCPU(StringRef CPU) const; + + /// Get the system model for this subtarget's CPU. + const MCSystemModel &getSystemModel() const { return *CPUModel; } + + /// Return the cache size in bytes for the given level of cache. + /// Level is zero-based, so a value of zero means the first level of + /// cache. If the size at the level is ambiguous (for example, + /// there are two different types of cores with different L1 sizes), + /// ask the target what to do via resolveCacheSize. + /// + virtual Optional getCacheSize(unsigned Level) const; + + /// Return the cache associatvity for the given level of cache. + /// Level is zero-based, so a value of zero means the first level of + /// cache. If the associativity at the level is ambiguous (for + /// example, there are two different types of cores with different + /// L1 associativities), ask the target what to do via + /// resolveCacheAssociativity. + /// + virtual Optional getCacheAssociativity(unsigned Level) const; + + /// Return the target cache line size in bytes at a given level. If + /// there are multiple such caches with different sizes, ask the + /// target what to do via resolveCacheLineSize. + /// + virtual Optional getCacheLineSize(unsigned Level) const; + + /// Return the target cache line size in bytes. By default, return + /// the line size for the bottom-most level of cache. This provides + /// a more convenient interface for the common case where all cache + /// levels have the same line size. Return zero if there is no + /// cache model. + /// + virtual unsigned getCacheLineSize() const { + Optional Size = getCacheLineSize(0); + if (Size) + return *Size; + + return 0; + } + + /// Return whether we should do software prefetching for loads on + /// this target. + /// + virtual bool prefetchReads() const; + + /// Return whether we should do software prefetching for stores on + /// this target. + /// + virtual bool prefetchWrites() const; + + /// Return whether to use read prefetches for stores on this target. + /// + virtual bool useReadPrefetchForWrites() const; + + /// Return the preferred prefetch distance in terms of instructions. + /// Return the prefetch config for the topmost memory model that has + /// a prefetcher. If there are multiple such models with different + /// prefetching configs, return 0. The target will have to override + /// this to do the right thing. + /// + virtual unsigned getPrefetchDistance() const; + + /// Return the maximum prefetch distance in terms of loop + /// iterations. Return the prefetch config for the topmost memory + /// model that has a prefetcher. If there are multiple such models + /// with different prefetching configs, return 0. The target will + /// have to override this to do the right thing. + /// + virtual unsigned getMaxPrefetchIterationsAhead() const; + + /// Return the minimum stride necessary to trigger software + /// prefetching. Return the prefetch config for the topmost memory + /// model that has a prefetcher. If there are multiple such models + /// with different prefetching configs, return 0. The target will + /// have to override this to do the right thing. + /// + virtual unsigned getMinPrefetchStride() const; }; } // end namespace llvm diff --git a/llvm/include/llvm/MC/MCSystemModel.h b/llvm/include/llvm/MC/MCSystemModel.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/MC/MCSystemModel.h @@ -0,0 +1,566 @@ +//=== MC/MCSystemModel.h - Target System Model --------------*- C++ -*-=======// +// +// The LLVM Compiler Infrastructure +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file describes an abstract interface used to get information +// about a target machine's execution engine, including core +// specifications, memory models and other things related to execution +// resources. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCSYSTEMMODEL_H +#define LLVM_MC_MCSYSTEMMODEL_H + +#include "llvm/ADT/iterator.h" +#include "llvm/ADT/SmallVector.h" + +#include + +namespace llvm { + +/// Provide information about write-combining buffers. These are +/// typically used by hardware to buffer non-temporal stores for +/// efficient data streaming. Each buffer expects a more-or-less +/// linear stream of writes. A write outside the current cache line +/// being filled causes the buffer to flush, so software should not +/// oversubscribe the available hardware resources. If it does, in +/// the worst case bufffers will thrash and flush after each write, as +/// each address sent will map to a cache line outside those currently +/// being filled. For example, assuming hardware has two buffers, +/// streaming arrays A and B in the following loop is fine, as writes +/// to A will map to, say, buffer 0 and writes to B will map to buffer +/// 1. If C were also streamed, in the worst case its writes would +/// ping-pong between buffers 0 and 1, flushing one or the other after +/// each write to C, degrading the streaming of A and B. +/// +/// do { +/// A[i] = ... +/// B[i] = ... +/// C[i] = ... +/// } while(i < Something); +/// +class MCWriteCombiningBufferInfo { +private: + unsigned ID; + const char *Name; + const int NumBuffers; // The number of write-commbining buffers + +public: + MCWriteCombiningBufferInfo(unsigned I, const char *TheName, int NumBufs) + : ID(I), Name(TheName), NumBuffers(NumBufs) {} + + virtual ~MCWriteCombiningBufferInfo(); + + /// Return the buffer ID number. + /// + unsigned getID() const { return ID; } + + /// Return the buffer name for debugging. + /// + const char *getName() const { return Name; } + + /// Return the number of available write-combining buffers. + /// + int getNumBuffers() const { return NumBuffers; } +}; + +/// MCSoftwarePrefetcherConfig - Provide information about how to +/// configure the software prefetcher. +/// +class MCSoftwarePrefetcherConfig { +private: + unsigned ID; + const char *Name; + const bool EnabledForReads; + const bool EnabledForWrites; + const bool UseReadPrefetchForWrites; + const unsigned BytesAhead; + const unsigned MinBytesAhead; + const unsigned MaxBytesAhead; + const unsigned InstructionsAhead; + const unsigned MaxIterationsAhead; + const unsigned MinByteStride; + +public: + MCSoftwarePrefetcherConfig(unsigned I, + const char *TheName, + bool EnableForReads, + bool EnableForWrites, + bool ReadPrefetchForWrites, + unsigned NumBytesAhead, + unsigned MinNumBytesAhead, + unsigned MaxNumBytesAhead, + unsigned NumInstructionsAhead, + unsigned MaxNumIterationsAhead, + unsigned MinStride) + : ID(I), + Name(TheName), + EnabledForReads(EnableForReads), + EnabledForWrites(EnableForWrites), + UseReadPrefetchForWrites(ReadPrefetchForWrites), + BytesAhead(NumBytesAhead), + MinBytesAhead(MinNumBytesAhead), + MaxBytesAhead(MaxNumBytesAhead), + InstructionsAhead(NumInstructionsAhead), + MaxIterationsAhead(MaxNumIterationsAhead), + MinByteStride(MinStride) {} + + virtual ~MCSoftwarePrefetcherConfig(); + + /// Return the prefetch config ID number. + /// + unsigned getID() const { return ID; } + + /// Return the prefetch config name for debugging. + /// + const char *getName() const { return Name; } + + /// Return whether we should do software prefetching for loads. + /// + bool isEnabledForReads() const { return EnabledForReads; } + + /// Return whether we should do software prefetching for stores. + /// + bool isEnabledForWrites() const { return EnabledForWrites; } + + /// Return whether we should use read prefetches for stores. + /// + bool useReadPrefetchForWrites() const { return UseReadPrefetchForWrites; } + + /// Return the preferred prefetch distance in bytes. A value of 0 + /// tells the software prefetcher to determine distance using + /// heuristics. + /// + unsigned getDistanceInBytes() const { return BytesAhead; } + + /// Never prefetch less that this number of bytes ahead. + /// + unsigned getMinDistanceInBytes() const { return MinBytesAhead; } + + /// Never prefetch more that this number of bytes ahead. + /// + unsigned getMaxDistanceInBytes() const { return MaxBytesAhead; } + + /// Return the preferred prefetch distance in terms of number of + /// instructions. + /// + unsigned getDistanceInInstructions() const { return InstructionsAhead; } + + /// Never prefetch more than this number of loop iterations ahead. + /// + unsigned getMaxDistanceInIterations() const { return MaxIterationsAhead; } + + /// Prefetch only if the byte stride is at least this large. + /// + unsigned getMinByteStride() const { return MinByteStride; } +}; + +/// Provide information about a specific level in the cache (size, +/// associativity, etc.). +/// +class MCCacheLevelInfo { +private: + unsigned ID; + const char *Name; + const unsigned Size; // Size of cache in bytes + const unsigned LineSize; // Size of cache line in bytes + const unsigned Ways; // Number of ways + const unsigned Latency; // Number of cycles to load + +public: + MCCacheLevelInfo(unsigned I, + const char *TheName, + uint64_t TotalSize, + unsigned TheLineSize, + unsigned NumWays, + unsigned TheLatency) + : ID(I), + Name(TheName), + Size(TotalSize), + LineSize(TheLineSize), + Ways(NumWays), + Latency(TheLatency) {} + + virtual ~MCCacheLevelInfo(); + + /// Return the register class ID number. + /// + unsigned getID() const { return ID; } + + /// Return the register class name for debugging. + /// + const char *getName() const { return Name; } + + /// Return the total size of the cache level in bytes. + /// + uint64_t getSizeInBytes() const { return Size; } + + /// Return the size of the cache line in bytes. + /// + unsigned getLineSizeInBytes() const { return LineSize; } + + /// Return the number of ways. + /// + unsigned getAssociativity() const { return Ways; } + + /// Return the latency of a load in clocks. + /// + unsigned getLatency() const { return Latency; } +}; + + +/// Aggregate some number of cache levels together along with an +/// software prefetching configuration and write-combining buffer +/// information into a model of the memory system as viewed from a +/// particular execution resource. For example, a core my have L1 and +/// L2 caches private to it, while a socket may have an L3 shared by +/// all cores contained by the socket. The core memory model will +/// list L1 and L2 and the socket memory model will list L3. +/// +class MCMemoryModel { +public: + typedef const MCCacheLevelInfo *cachelevel_iterator; + +private: + unsigned IDNum; + const char *Name; + + const MCCacheLevelInfo *Levels; // Array of cache levels + unsigned NumLevels; // Number of cache levels + // Write-combining buffer information + const MCWriteCombiningBufferInfo &WCBuffers; + // Software prefetching config + const MCSoftwarePrefetcherConfig &SoftwarePrefetcher; + +public: + MCMemoryModel(unsigned I, + const char *TheName, + const MCCacheLevelInfo *CacheLevels, + unsigned NumCacheLevels, + const MCWriteCombiningBufferInfo &WCBufs, + const MCSoftwarePrefetcherConfig &PrefetcherConfig) + : IDNum(I), + Name(TheName), + Levels(CacheLevels), + NumLevels(NumCacheLevels), + WCBuffers(WCBufs), + SoftwarePrefetcher(PrefetcherConfig) {} + + virtual ~MCMemoryModel(); + + /// Return the memory model ID number. + /// + unsigned getID() const { return IDNum; } + + /// Return the memory model name for debugging. + /// + const char *getName() const { return Name; } + + //===--------------------------------------------------------------------===// + // Cache Level Information + // + + /// Index the hierarchy for a cache level. Note that this is a + /// piece of the global cache hierarchy private to the execution + /// resource using the memory model, and shared by any contained + /// execution resources. As such "level 0" (or level 1, etc.) has + /// no correspondence to a global-view cache level. Thus names like + /// "L1" aren't very useful. + /// + const MCCacheLevelInfo &getCacheLevel(unsigned Level) const { + assert(Level < NumLevels && + "Attempting to access record for invalid cache level!"); + return Levels[Level]; + } + + /// Return the number of cache levels. + /// + unsigned getNumCacheLevels() const { + return NumLevels; + } + + /// Cache level iterators + /// + cachelevel_iterator begin() const { return Levels; } + cachelevel_iterator end() const { + return Levels + getNumCacheLevels(); + } + + //===--------------------------------------------------------------------===// + // Write Combining Buffer Information + // + + /// Return the write combining buffer info. + /// + const MCWriteCombiningBufferInfo &getWCBufferInfo() const { + return WCBuffers; + } + + //===--------------------------------------------------------------------===// + // Software Prefetcher Configuration + // + + /// Return the software prefetcher configuration. + /// + const MCSoftwarePrefetcherConfig &getSoftwarePrefetcherConfig() const { + return SoftwarePrefetcher; + } +}; + +class MCExecutionResource; + +/// Provide information about the number of execution resources of a +/// given type are contained within an execution resource. For example +/// at the socket level there may be a core resource descriptor specifying +/// that the socket has 48 cores. +/// +class MCExecutionResourceDesc { + unsigned ID; + const char *Name; + const MCExecutionResource *Resource; // The described resource + unsigned NumResources; // The resource count + +public: + MCExecutionResourceDesc(unsigned I, + const char *TheName, + const MCExecutionResource *R, + unsigned N) + : ID(I), Name(TheName), Resource(R), NumResources(N) {} + + /// Return the resource descriptor ID number. + /// + unsigned getID() const { return ID; } + + /// Return the resource descriptor name for debugging. + /// + const char *getName() const { return Name; } + + /// Get the resource. + /// + const MCExecutionResource &getResource() const { + return *Resource; + } + + /// Get the number of resources represented by this descriptor. + /// + unsigned getNumResources() const { + return NumResources; + } +}; + +/// Provide information about a specific kind of execution resource +/// (core, thread, etc.). +class MCExecutionResource { + unsigned ID; + const char *Name; + + /// An array of execution resource desciptors, allowing an execution + /// resource to contain a variety of resources; for example a socket + /// containing some number of big cores and some number of little + /// cores + const MCExecutionResourceDesc *const *Contained; + + /// The number of unique contained execution resource types + unsigned NumContained; + + /// The memory model for this execution resource + const MCMemoryModel &MemoryModel; + +public: + + using resource_iterator = + pointee_iterator; + + MCExecutionResource(unsigned I, + const char *TheName, + const MCExecutionResourceDesc *const *C, + unsigned NC, + const MCMemoryModel &M) + : ID(I), Name(TheName), Contained(C), NumContained(NC), MemoryModel(M) {} + + virtual ~MCExecutionResource(); // Allow subclasses + + /// Return the resource ID number. + /// + unsigned getID() const { return ID; } + + /// Return the resource name for debugging. + /// + const char *getName() const { return Name; } + + /// Return the memory model for this resource. + /// + const MCMemoryModel &getMemoryModel() const { + return MemoryModel; + } + + /// Return the number of unique execution resource types contained + /// within this one. + /// + /// + unsigned getNumContainedExecutionResourceTypes() const { + return NumContained; + } + + /// Iterate over unique contained resources. + /// + resource_iterator begin() const { + return resource_iterator(Contained); + } + resource_iterator end() const { + return resource_iterator(Contained + NumContained); + } + + /// Get the resrouce descriptor indexed by the given value. + /// + const MCExecutionResourceDesc &getResourceDescriptor(unsigned Index) const { + assert(Index < getNumContainedExecutionResourceTypes() && + "Overindexing resource descriptors!"); + return *Contained[Index]; + } +}; + +/// Model a collection of execution resources coupled with other +/// information. This also aggregates information about the resource +/// memory models, presenting a global system view of memory +/// characteristics. +/// +class MCSystemModel { + unsigned ID; + const char *Name; + + /// An array of execution resource descriptor pointers + const MCExecutionResourceDesc *const *Resources; + unsigned NumResources; /// Number of entries in the array + + static const MCSystemModel Default; + +public: + // Caches of information about execution resources and their memory + // models. + + // Make the cache topology indexable by level. The bottom-most + // cache level of each resource makes up level zero. For example: + // + // Thread + // | + // Big Core Little Core + // \/ + // Socket + // + // If the big core has an L1 and L2 cache, the little core has an L1 + // cache and the socket has an L3 cache, the big and little L1s go + // into the L1 set, the big core L2 goes into the L2 set and the + // socket L3 goes into the L3 set. + // + + using CacheLevelSet = SmallVector; + +private: + using CacheLevelInfo = SmallVector; + mutable CacheLevelInfo CacheLevels; + + /// Cache information about caches on an as-needed basis. + /// + void initCacheInfoCache() const; + + // Gather information about all software prefetch configs. + // +public: + using PrefetchConfigSet = SmallVector; + +private: + mutable PrefetchConfigSet Prefetchers; + + /// Cache information about prefetchers on an as-needed basis. + /// + void initPrefetchConfigCache() const; + +public: + /// Convenience values for indexing the global-view cache hierarchy. + /// + enum CacheLevel { + L1 = 0, + L2, + L3, + L4 + }; + + using resource_iterator = + pointee_iterator; + + MCSystemModel(unsigned I, + const char *TheName, + const MCExecutionResourceDesc *const *R, + unsigned NR) + : ID(I), Name(TheName), Resources(R), NumResources(NR) { + initCacheInfoCache(); + initPrefetchConfigCache(); + } + + virtual ~MCSystemModel(); + + /// Return the default initialized model. + /// + static const MCSystemModel &getDefaultSystemModel() { + return Default; + } + + /// Return the execution engine ID number. + /// + unsigned getID() const { return ID; } + + /// Return the execution engine name for debugging. + /// + const char *getName() const { return Name; } + + /// Return the number of unique execution resource types. + /// + unsigned getNumExecutionResourceTypes() const { + return NumResources; + } + + /// Iterate over top-level execution resources. + /// + resource_iterator begin() const { + return resource_iterator(Resources); + } + resource_iterator end() const { + return resource_iterator(Resources + NumResources); + } + + /// Get the resrouce descriptor indexed by the given value. + /// + const MCExecutionResourceDesc &getResourceDescriptor(unsigned Index) const { + assert(Index < getNumExecutionResourceTypes() && + "Overindexing resource descriptors!"); + return *Resources[Index]; + } + + /// Retrieve cached information about cache levels. + /// + const CacheLevelSet *getCacheLevelInfo(unsigned Level) const { + if (Level >= CacheLevels.size()) { + return nullptr; + } + + return &CacheLevels[Level]; + } + + /// Retrieve cached information about prefetchers. + /// + const PrefetchConfigSet &getSoftwarePrefetcherInfo() const { + return Prefetchers; + } +}; + +} // End llvm namespace + +#endif diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td --- a/llvm/include/llvm/Target/Target.td +++ b/llvm/include/llvm/Target/Target.td @@ -1458,6 +1458,11 @@ } //===----------------------------------------------------------------------===// +// Pull in the common support for execution engine generation. +// +include "llvm/Target/TargetSystemModel.td" + +//===----------------------------------------------------------------------===// // Processor chip sets - These values represent each of the chip sets supported // by the scheduler. Each Processor definition requires corresponding // instruction itineraries. @@ -1472,6 +1477,10 @@ // SchedMachineModel SchedModel = NoSchedModel; + // System - A system model describing execution resources and + // the machine memory model. + SystemModel System = MinimalSystemModel; + // ProcItin - The scheduling information for the target processor. // ProcessorItineraries ProcItin = pi; diff --git a/llvm/include/llvm/Target/TargetCacheModel.td b/llvm/include/llvm/Target/TargetCacheModel.td new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Target/TargetCacheModel.td @@ -0,0 +1,58 @@ +//===- TargetCacheModel.td - Target cache information----------*- tablegen -*-// +// +// The LLVM Compiler Infrastructure +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// This represents a specific level within the cache hierarchy. +//===----------------------------------------------------------------------===// +class Size : Int; +class Ways : Int; +class LineSize : Int; +class Latency : Int; + +class CacheLevel { + int Size = size.Value; + int LineSize = linesize.Value; + int Ways = ways.Value; + int Latency = latency.Value; +} + +def NoCacheLevel : CacheLevel, LineSize<0>, Ways<0>, Latency<0>>; + +//===----------------------------------------------------------------------===// +// This models a specific cache hierarchy. Levels should be given in order +// from lowest to highest (e.g. L1, then L2...). +//===----------------------------------------------------------------------===// +class CacheHierarchy levels> { + list Levels = levels; +} + +def NoCaches : CacheHierarchy<[]>; + +//===----------------------------------------------------------------------===// +// Provide some common cache sizes. +//===----------------------------------------------------------------------===// +def _1KiB : Int<1024>; +def _16KiB : Int; +def _32KiB : Int; +def _64KiB : Int; +def _128KiB : Int; +def _256KiB : Int; +def _512KiB : Int; +def _1MiB : Int; +def _2MiB : Int; +def _4MiB : Int; +def _6MiB : Int; +def _8MiB : Int; +def _12MiB : Int; +def _16MiB : Int; +def _20MiB : Int; +def _25MiB : Int; +def _32MiB : Int; +def _40MiB : Int; diff --git a/llvm/include/llvm/Target/TargetMemoryModel.td b/llvm/include/llvm/Target/TargetMemoryModel.td new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Target/TargetMemoryModel.td @@ -0,0 +1,42 @@ +//===- TargetMemoryModel.td - Target memory system information-*- tablegen -*-// +// +// The LLVM Compiler Infrastructure +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the target-independent memory hiearchy interfaces which +// should be implemented by each target that makes use of such information. +// +//===----------------------------------------------------------------------===// + +include "llvm/Target/TargetCacheModel.td" +include "llvm/Target/TargetWCBufferModel.td" +include "llvm/Target/TargetSoftwarePrefetchConfig.td" + +//===----------------------------------------------------------------------===// +// MemorySystem - This models a memory subsystem. +//===----------------------------------------------------------------------===// +class MemoryModel { + CacheHierarchy Caches = c; + WriteCombiningBuffer WCBuffers = w; + SoftwarePrefetcher Prefetcher = p; +} + +// For execution resources that really don't have a memory model. +// +def NoMemoryModel : MemoryModel; + +//===----------------------------------------------------------------------===// +// Define the minimal memory model needed to implement legacy TTI interfaces. +//===----------------------------------------------------------------------===// +def MinimalMemoryModel : MemoryModel; diff --git a/llvm/include/llvm/Target/TargetSoftwarePrefetchConfig.td b/llvm/include/llvm/Target/TargetSoftwarePrefetchConfig.td new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Target/TargetSoftwarePrefetchConfig.td @@ -0,0 +1,99 @@ +//===- TargetSoftwarePrefetchConfig.td - Target prefetch info--*- tablegen -*-// +// +// The LLVM Compiler Infrastructure +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// SoftwarePrefetcher - This provides parameters to software prefetching. +//===----------------------------------------------------------------------===// +class IsReadEnabled : Int; +class IsWriteEnabled : Int; +class ReadPrefetchForWrites : Int; +class ByteDistance : Int; +class MinByteDistance : Int; +class MaxByteDistance : Int; +class InstructionDistance : Int; +class MaxIterationDistance : Int; +class MinByteStride : Int; + +class SoftwarePrefetcher { + int EnabledForReads = re.Value; // Do prefetching for loads. + int EnabledForWrites = we.Value; // Do prefetching for stores. + int UseReadPFForWrites = rfw.Value; // Use a read prefetch for stores. + int BytesAhead = bd.Value; // Average "good" prefetch distance. Set + // to zero to tell the prefetcher to use + // heuristics to determine an appropriate + // distance. + int MinBytesAhead = bnd.Value; // Don't prefetch anything less than this + // far ahead. + int MaxBytesAhead = bxd.Value; // Don't prefetch anything more than this + // far ahead. + int InstructionsAhead = id.Value; // Prefetch this many instructions ahead, + // used by prefetchers that operate in + // terms of instruction distance rather + // than bytes. + int MaxIterationsAhead = ixd.Value; // Don't prefetch more than this number + // of iterations ahead. Used by + // prefetchers that operate in terms of + // instruction distance rather than + // bytes. + int MinStride = ns.Value; // Don't prefetch unless stride is at + // least this large. +} + + +def ReadEnabled : IsReadEnabled<1>; +def ReadDisabled : IsReadEnabled<0>; + +def WriteEnabled : IsWriteEnabled<1>; +def WriteDisabled : IsWriteEnabled<0>; + +def UseReadPrefetchForWrites : ReadPrefetchForWrites<1>; +def UseWritePrefetchForWrites : ReadPrefetchForWrites<0>; + +def HeuristicByteDistance : ByteDistance<0>; +def HeuristicInstructionDistance : InstructionDistance<0>; + +def NoSoftwarePrefetcher : SoftwarePrefetcher, + MaxByteDistance<0>, + HeuristicInstructionDistance, + MaxIterationDistance<0>, + MinByteStride<0>>; + +// This is for targets that define some aspects of prefetching in +// target-specific TTI. Until such targets are ported to the system +// model, they should continue to work as they do now. Once the porting +// is complete, TransitionSoftwarePrefetcher use should migrate to +// NoSoftwarePrefetcher or to the appropriate target-defined software +// prefetch configuration. +// +def TransitionSoftwarePrefetcher : + SoftwarePrefetcher, + MaxByteDistance<0>, + HeuristicInstructionDistance, + // Legacy TTI used UINT_MAX, which isn't available in + // TableGen. Approximate it with some thing that will + // work on 32-bit hosts. + MaxIterationDistance<4294967295>, + MinByteStride<1>>; diff --git a/llvm/include/llvm/Target/TargetSystemModel.td b/llvm/include/llvm/Target/TargetSystemModel.td new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Target/TargetSystemModel.td @@ -0,0 +1,73 @@ +//===- TargetSystemModel.td - Target Hardware Info --------*- tablegen -*-====// +// +// The LLVM Compiler Infrastructure +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the target-independent execution hardware available. It +// should be implemented by each target that makes use of such information. +// +//===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// A convenience wrapper to hold an integer value for later consumption. +// This is used to give names to common integer-y concepts like sizes +// and flags. +//===----------------------------------------------------------------------===// +class Int { + int Value = value; +} + +include "llvm/Target/TargetMemoryModel.td" + +class ExecutionResource; + +class ExecutionResourceDesc { + ExecutionResource Resource = resource; + int NumResources = numresources; +} + +//===----------------------------------------------------------------------===// +// This models a particular group of execution resources (threads, cores, +// etc.), describing the relationship among them and their cache sharing +// characteristics. +//===----------------------------------------------------------------------===// +class ExecutionResource contained, + MemoryModel memmodel> { + list Contained = contained; + MemoryModel MemModel = memmodel; +} + +//===----------------------------------------------------------------------===// +// Define some common execution resources, mainly for readability. +//===----------------------------------------------------------------------===// +class Thread : ExecutionResource<[], NoMemoryModel>; +class Core contained, + MemoryModel memmodel> : + ExecutionResource; +class Socket contained, + MemoryModel memmodel> : + ExecutionResource; + +//===----------------------------------------------------------------------===// +// This models a collection of execution resources as well as the machine +// memory model. +//===----------------------------------------------------------------------===// +class SystemModel resources> { + list Resources = resources; +} + +//===----------------------------------------------------------------------===// +// Define the minimal execution engine needed to implement legacy TTI +// interfaces. +//===----------------------------------------------------------------------===// +def MinimalCore : Core<[], MinimalMemoryModel>; + +def MinimalCoreResourceDesc : ExecutionResourceDesc; + +def MinimalSystemModel : SystemModel<[MinimalCoreResourceDesc]>; diff --git a/llvm/include/llvm/Target/TargetWCBufferModel.td b/llvm/include/llvm/Target/TargetWCBufferModel.td new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Target/TargetWCBufferModel.td @@ -0,0 +1,22 @@ +//===- TargetWCBufferModel.td - Target buffer information------*- tablegen -*-// +// +// The LLVM Compiler Infrastructure +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// WriteCombiningBuffer - This models hardware buffers that combine +// writes into a single transaction. Typically +// these are used for non-temporal store operations. +// +// NumBuffers - The number of effective buffers available. +//===----------------------------------------------------------------------===// +class WriteCombiningBuffer { + int NumBuffers = n; +} + +def NoWCBuffers : WriteCombiningBuffer<0>; diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -375,6 +375,18 @@ return TTIImpl->getCacheAssociativity(Level); } +bool TargetTransformInfo::prefetchReads() const { + return TTIImpl->prefetchReads(); +} + +bool TargetTransformInfo::prefetchWrites() const { + return TTIImpl->prefetchWrites(); +} + +bool TargetTransformInfo::useReadPrefetchForWrites() const { + return TTIImpl->useReadPrefetchForWrites(); +} + unsigned TargetTransformInfo::getPrefetchDistance() const { return TTIImpl->getPrefetchDistance(); } diff --git a/llvm/lib/CodeGen/TargetSubtargetInfo.cpp b/llvm/lib/CodeGen/TargetSubtargetInfo.cpp --- a/llvm/lib/CodeGen/TargetSubtargetInfo.cpp +++ b/llvm/lib/CodeGen/TargetSubtargetInfo.cpp @@ -19,8 +19,10 @@ ArrayRef PF, ArrayRef PD, const SubtargetInfoKV *ProcSched, const MCWriteProcResEntry *WPR, const MCWriteLatencyEntry *WL, const MCReadAdvanceEntry *RA, - const InstrStage *IS, const unsigned *OC, const unsigned *FP) - : MCSubtargetInfo(TT, CPU, FS, PF, PD, ProcSched, WPR, WL, RA, IS, OC, FP) { + const InstrStage *IS, const unsigned *OC, const unsigned *FP, + const SubtargetInfoKV *SystemModels) + : MCSubtargetInfo(TT, CPU, FS, PF, PD, ProcSched, WPR, WL, RA, IS, OC, FP, + SystemModels) { } TargetSubtargetInfo::~TargetSubtargetInfo() = default; diff --git a/llvm/lib/MC/CMakeLists.txt b/llvm/lib/MC/CMakeLists.txt --- a/llvm/lib/MC/CMakeLists.txt +++ b/llvm/lib/MC/CMakeLists.txt @@ -42,6 +42,7 @@ MCSubtargetInfo.cpp MCSymbol.cpp MCSymbolELF.cpp + MCSystemModel.cpp MCTargetOptions.cpp MCValue.cpp MCWasmObjectTargetWriter.cpp diff --git a/llvm/lib/MC/MCSubtargetInfo.cpp b/llvm/lib/MC/MCSubtargetInfo.cpp --- a/llvm/lib/MC/MCSubtargetInfo.cpp +++ b/llvm/lib/MC/MCSubtargetInfo.cpp @@ -9,6 +9,7 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCSystemModel.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/MC/MCSchedule.h" #include "llvm/MC/SubtargetFeature.h" @@ -28,10 +29,14 @@ void MCSubtargetInfo::InitMCProcessorInfo(StringRef CPU, StringRef FS) { FeatureBits = getFeatures(CPU, FS, ProcDesc, ProcFeatures); - if (!CPU.empty()) + if (!CPU.empty()) { CPUSchedModel = &getSchedModelForCPU(CPU); - else + CPUModel = &getSystemModelForCPU(CPU); + } + else { CPUSchedModel = &MCSchedModel::GetDefaultSchedModel(); + CPUModel = &MCSystemModel::getDefaultSystemModel(); + } } void MCSubtargetInfo::setDefaultFeatures(StringRef CPU, StringRef FS) { @@ -43,10 +48,12 @@ ArrayRef PF, ArrayRef PD, const SubtargetInfoKV *ProcSched, const MCWriteProcResEntry *WPR, const MCWriteLatencyEntry *WL, const MCReadAdvanceEntry *RA, - const InstrStage *IS, const unsigned *OC, const unsigned *FP) + const InstrStage *IS, const unsigned *OC, const unsigned *FP, + const SubtargetInfoKV *SystemModelTable) : TargetTriple(TT), CPU(C), ProcFeatures(PF), ProcDesc(PD), ProcSchedModels(ProcSched), WriteProcResTable(WPR), WriteLatencyTable(WL), - ReadAdvanceTable(RA), Stages(IS), OperandCycles(OC), ForwardingPaths(FP) { + ReadAdvanceTable(RA), Stages(IS), OperandCycles(OC), ForwardingPaths(FP), + SystemModels(SystemModelTable) { InitMCProcessorInfo(CPU, FS); } @@ -117,3 +124,207 @@ InstrItins = InstrItineraryData(getSchedModel(), Stages, OperandCycles, ForwardingPaths); } + +const MCSystemModel & +MCSubtargetInfo::getSystemModelForCPU(StringRef CPU) const { + assert(SystemModels && "Processor execution engine not available!"); + + ArrayRef Models(SystemModels, ProcDesc.size()); + + assert(std::is_sorted(Models.begin(), Models.end(), + [](const SubtargetInfoKV &LHS, const SubtargetInfoKV &RHS) { + return strcmp(LHS.Key, RHS.Key) < 0; + }) && + "Processor system model table is not sorted"); + + // Find entry + auto Found = + std::lower_bound(Models.begin(), Models.end(), CPU); + if (Found == Models.end() || StringRef(Found->Key) != CPU) { + if (CPU != "help") // Don't error if the user asked for help. + errs() << "'" << CPU + << "' is not a recognized processor for this target" + << " (ignoring processor)\n"; + return MCSystemModel::getDefaultSystemModel(); + } + assert(Found->Value && "Missing processor SystemModel value"); + return *(const MCSystemModel *)Found->Value; +} + +Optional MCSubtargetInfo::getCacheSize(unsigned Level) const { + const MCSystemModel::CacheLevelSet *Levels = + getSystemModel().getCacheLevelInfo(Level); + + if (Levels == nullptr) { + return Optional(); + } + + Optional Size; + for (const auto *LevelInfo : *Levels) { + if (!Size) { + Size = Optional(LevelInfo->getSizeInBytes()); + continue; + } + if (LevelInfo->getSizeInBytes() != *Size) + // Cache at this level are of different sizes. + return resolveCacheSize(Level, *Levels); + } + + return Size; +} + +Optional +MCSubtargetInfo::getCacheAssociativity(unsigned Level) const { + const MCSystemModel::CacheLevelSet *Levels = + getSystemModel().getCacheLevelInfo(Level); + + if (Levels == nullptr) { + return Optional(); + } + + Optional Associativity; + for (const auto *LevelInfo : *Levels) { + if (!Associativity) { + Associativity = Optional(LevelInfo->getAssociativity()); + continue; + } + if (LevelInfo->getAssociativity() != *Associativity) + // Cache at this level are of different sizes. + return resolveCacheAssociativity(Level, *Levels); + } + + return Associativity; +} + +Optional MCSubtargetInfo::getCacheLineSize(unsigned Level) const { + const MCSystemModel::CacheLevelSet *Levels = + getSystemModel().getCacheLevelInfo(Level); + + if (Levels == nullptr) { + return Optional(); + } + + Optional Size; + for (const auto *LevelInfo : *Levels) { + if (!Size) { + Size = Optional(LevelInfo->getLineSizeInBytes()); + continue; + } + if (LevelInfo->getLineSizeInBytes() != *Size) + return resolveCacheLineSize(Level, *Levels); + } + + return Size; +} + +bool MCSubtargetInfo::prefetchReads() const { + Optional Enabled; + + const MCSystemModel::PrefetchConfigSet &PrefetcherConfigs = + getSystemModel().getSoftwarePrefetcherInfo(); + + for (const auto *PrefetcherConfig : PrefetcherConfigs) { + if (!Enabled) { + Enabled = Optional(PrefetcherConfig->isEnabledForReads()); + continue; + } + if (PrefetcherConfig->isEnabledForReads() != *Enabled) + return resolvePrefetchReads(PrefetcherConfigs); + } + + return Enabled ? *Enabled : false; +} + +bool MCSubtargetInfo::prefetchWrites() const { + Optional Enabled; + + const MCSystemModel::PrefetchConfigSet &PrefetcherConfigs = + getSystemModel().getSoftwarePrefetcherInfo(); + + for (const auto *PrefetcherConfig : PrefetcherConfigs) { + if (!Enabled) { + Enabled = Optional(PrefetcherConfig->isEnabledForWrites()); + continue; + } + if (PrefetcherConfig->isEnabledForWrites() != *Enabled) + return resolvePrefetchWrites(PrefetcherConfigs); + } + + return Enabled ? *Enabled : false; +} + +bool MCSubtargetInfo::useReadPrefetchForWrites() const { + Optional UseReadPFForWrites; + + const MCSystemModel::PrefetchConfigSet &PrefetcherConfigs = + getSystemModel().getSoftwarePrefetcherInfo(); + + for (const auto *PrefetcherConfig : PrefetcherConfigs) { + if (!UseReadPFForWrites) { + UseReadPFForWrites = + Optional(PrefetcherConfig->useReadPrefetchForWrites()); + continue; + } + if (PrefetcherConfig->useReadPrefetchForWrites() != *UseReadPFForWrites) + return + resolveUseReadPrefetchForWrites(PrefetcherConfigs); + } + + return UseReadPFForWrites ? *UseReadPFForWrites : false; +} + +unsigned MCSubtargetInfo::getPrefetchDistance() const { + Optional Distance; + + const MCSystemModel::PrefetchConfigSet &PrefetcherConfigs = + getSystemModel().getSoftwarePrefetcherInfo(); + + for (const auto *PrefetcherConfig : PrefetcherConfigs) { + if (!Distance) { + Distance = + Optional(PrefetcherConfig->getDistanceInInstructions()); + continue; + } + if (PrefetcherConfig->getDistanceInInstructions() != *Distance) + return resolvePrefetchDistanceInInstructions(PrefetcherConfigs); + } + + return Distance ? *Distance : 0; +} + +unsigned MCSubtargetInfo::getMaxPrefetchIterationsAhead() const { + Optional Distance; + + const MCSystemModel::PrefetchConfigSet &PrefetcherConfigs = + getSystemModel().getSoftwarePrefetcherInfo(); + + for (const auto *PrefetcherConfig : PrefetcherConfigs) { + if (!Distance) { + Distance = + Optional(PrefetcherConfig->getMaxDistanceInIterations()); + continue; + } + if (PrefetcherConfig->getMaxDistanceInIterations() != *Distance) + return resolveMaxPrefetchIterationsAhead(PrefetcherConfigs); + } + + return Distance ? *Distance : 0; +} + +unsigned MCSubtargetInfo::getMinPrefetchStride() const { + Optional Distance; + + const MCSystemModel::PrefetchConfigSet &PrefetcherConfigs = + getSystemModel().getSoftwarePrefetcherInfo(); + + for (const auto *PrefetcherConfig : PrefetcherConfigs) { + if (!Distance) { + Distance = Optional(PrefetcherConfig->getMinByteStride()); + continue; + } + if (PrefetcherConfig->getMinByteStride() != *Distance) + return resolveMinPrefetchStride(PrefetcherConfigs); + } + + return Distance ? *Distance : 0; +} diff --git a/llvm/lib/MC/MCSystemModel.cpp b/llvm/lib/MC/MCSystemModel.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/MC/MCSystemModel.cpp @@ -0,0 +1,153 @@ +//=== MC/MCSystemModel.cpp - Target System Model ------------*- C++ -*-=======// +// +// The LLVM Compiler Infrastructure +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defioned MCSystemModel methods. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCSystemModel.h" +#include + +namespace llvm { + +MCWriteCombiningBufferInfo::~MCWriteCombiningBufferInfo() {} +MCSoftwarePrefetcherConfig::~MCSoftwarePrefetcherConfig() {} +MCCacheLevelInfo::~MCCacheLevelInfo() {} +MCMemoryModel::~MCMemoryModel() {} + +namespace { + + MCWriteCombiningBufferInfo DefaultWCBuffers(0, + "Default Write-Combining Buffer", + 0); + + MCSoftwarePrefetcherConfig DefaultPrefetcherConfig(0, + "Default Prefetcher", + false, + false, + false, + 0, + 0, + 0, + 0, + 0, + 0); + + const MCMemoryModel DefaultMemoryModel(0, + "Default Memory Model", + nullptr, + 0, + DefaultWCBuffers, + DefaultPrefetcherConfig); + +} // end anonymous namespace + + +MCExecutionResource::~MCExecutionResource() {} +MCSystemModel::~MCSystemModel() {} + +const MCSystemModel MCSystemModel::Default(0, + "Default System Model", + nullptr, + 0); + + +void MCSystemModel::initCacheInfoCache() const { + // Create a global cache topology. The tricky part is collapsing + // the execution resource levels properly. For example, let's say + // we have a system with a CPU socket and a GPU socket. The CPU + // socket contains two core types: big and lttle. The CPUsocket + // contains an L3 cache, the big core contains and L2 and L1 cache + // and the little core contains an L1 cache. The GPU socket + // contains shared L2 and L3 caches and GPU cores have a private L1 + // cache: + // + // System + // / \ + // (L2, L3) GPU CPU (L3) + // | / \ + // (L1) C L (L1) B (L1, L2) + // + // We want the final topology to look like this: + // + // L3 (GPU) L3 (CPU) + // L2 (GPU) L2 (B) + // L1 (C) L1 (L) L1(B) + // + // The algorithm below recursively determines the topology for the + // resources below the current one, then merges the lists from all + // child resources so that it is no longer than the maximum size of + // any child list. Child lists are ordered by cache level, so the + // lowest level of cache appears first. + // + auto mergeCacheInfo = [](const CacheLevelInfo &I1, + const CacheLevelInfo &I2) -> CacheLevelInfo { + CacheLevelInfo Result; + unsigned Length = std::max(I1.size(), I2.size()); + for (unsigned i = 0; i < Length; ++i) { + Result.push_back(CacheLevelSet()); + if (i < I1.size()) + Result.back().append(I1[i].begin(), I1[i].end()); + if (i < I2.size()) + Result.back().append(I2[i].begin(), I2[i].end()); + } + + return Result; + }; + + std::function getCacheInfo = + [&](const MCExecutionResourceDesc &Desc) -> CacheLevelInfo { + const MCExecutionResource &Resource = Desc.getResource(); + + CacheLevelInfo Result; + for (const auto &ContainedDesc : Resource) { + Result = mergeCacheInfo(Result, getCacheInfo(ContainedDesc)); + } + + // Add cache information for this resource. + for (const auto &Level : Resource.getMemoryModel()) { + Result.push_back(CacheLevelSet()); + Result.back().push_back(&Level); + } + + return Result; + }; + + CacheLevels.clear(); + + for (const auto &Desc : *this) + CacheLevels = mergeCacheInfo(CacheLevels, getCacheInfo(Desc)); +} + +void MCSystemModel::initPrefetchConfigCache() const { + Prefetchers.clear(); + + using WorkListType = SmallVector; + WorkListType WorkList; + for (const auto &ResourceDesc : *this) { + WorkList.push_back(&ResourceDesc); + } + + while (!WorkList.empty()) { + const MCExecutionResourceDesc *Item = WorkList.back(); + WorkList.pop_back(); + const MCExecutionResource &Resource = Item->getResource(); + const MCSoftwarePrefetcherConfig &Prefetcher = + Resource.getMemoryModel().getSoftwarePrefetcherConfig(); + if (Prefetcher.isEnabledForReads() || Prefetcher.isEnabledForWrites()) { + Prefetchers.push_back(&Prefetcher); + } + for (const auto &ResourceDesc : Resource) { + WorkList.push_back(&ResourceDesc); + } + } +} + +} // end llvm namespace diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -329,10 +329,10 @@ unsigned getVectorInsertExtractBaseCost() const { return VectorInsertExtractBaseCost; } - unsigned getCacheLineSize() const { return CacheLineSize; } - unsigned getPrefetchDistance() const { return PrefetchDistance; } - unsigned getMinPrefetchStride() const { return MinPrefetchStride; } - unsigned getMaxPrefetchIterationsAhead() const { + unsigned getCacheLineSize() const override { return CacheLineSize; } + unsigned getPrefetchDistance() const override { return PrefetchDistance; } + unsigned getMinPrefetchStride() const override { return MinPrefetchStride; } + unsigned getMaxPrefetchIterationsAhead() const override { return MaxPrefetchIterationsAhead; } unsigned getPrefFunctionAlignment() const { return PrefFunctionAlignment; } diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -153,14 +153,6 @@ shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader); - unsigned getCacheLineSize(); - - unsigned getPrefetchDistance(); - - unsigned getMinPrefetchStride(); - - unsigned getMaxPrefetchIterationsAhead(); - bool shouldExpandReduction(const IntrinsicInst *II) const { return false; } diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -877,22 +877,6 @@ return Considerable; } -unsigned AArch64TTIImpl::getCacheLineSize() { - return ST->getCacheLineSize(); -} - -unsigned AArch64TTIImpl::getPrefetchDistance() { - return ST->getPrefetchDistance(); -} - -unsigned AArch64TTIImpl::getMinPrefetchStride() { - return ST->getMinPrefetchStride(); -} - -unsigned AArch64TTIImpl::getMaxPrefetchIterationsAhead() { - return ST->getMaxPrefetchIterationsAhead(); -} - bool AArch64TTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const { assert(isa(Ty) && "Expected Ty to be a vector type"); diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp @@ -38,9 +38,31 @@ #include "AMDGPUGenSubtargetInfo.inc" #define NoSchedModel NoSchedModelR600 +#define NoCaches NoCachesR600 +#define NoWCBuffers NoWCBuffersR600 +#define NoSoftwarePrefetcher NoSoftwarePrefetcherR600 +#define TransitionSoftwarePrefetcher TransitionSoftwarePrefetcherR600 +#define NoMemoryModel NoMemoryModelR600 +#define MinimalMemoryModel MinimalMemoryModelR600 +#define MinimalCore MinimalCoreR600 +#define MinimalCoreContained MinimalCoreContainedR600 +#define MinimalCoreResourceDesc MinimalCoreResourceDescR600 +#define MinimalSystemModelResources MinimalSystemModelResourcesR600 +#define MinimalSystemModel MinimalSystemModelR600 #define GET_SUBTARGETINFO_MC_DESC #include "R600GenSubtargetInfo.inc" #undef NoSchedModelR600 +#undef NoCachesR600 +#undef NoWCBuffersR600 +#undef NoSoftwarePrefetcherR600 +#undef TransitionSoftwarePrefetcherR600 +#undef NoMemoryModelR600 +#undef MinimalMemoryModelR600 +#undef MinimalCoreR600 +#undef MinimalCoreContainedR600 +#undef MinimalCoreResourceDescR600 +#undef MinimalSystemModelResourcesR600 +#undef MinimalSystemModelR600 #define GET_REGINFO_MC_DESC #include "AMDGPUGenRegisterInfo.inc" diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -68,8 +68,8 @@ bool shouldFavorPostInc() const; // L1 cache prefetch. - unsigned getPrefetchDistance() const; - unsigned getCacheLineSize() const; + unsigned getPrefetchDistance() const override; + unsigned getCacheLineSize() const override; /// @} diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -67,8 +67,8 @@ bool enableInterleavedAccessVectorization(); unsigned getNumberOfRegisters(bool Vector); unsigned getRegisterBitWidth(bool Vector) const; - unsigned getCacheLineSize(); - unsigned getPrefetchDistance(); + unsigned getCacheLineSize() const override; + unsigned getPrefetchDistance() const override; unsigned getMaxInterleaveFactor(unsigned VF); int vectorCostAdjustment(int Cost, unsigned Opcode, Type *Ty1, Type *Ty2); int getArithmeticInstrCost( diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -274,7 +274,7 @@ } -unsigned PPCTTIImpl::getCacheLineSize() { +unsigned PPCTTIImpl::getCacheLineSize() const { // Check first if the user specified a custom line size. if (CacheLineSize.getNumOccurrences() > 0) return CacheLineSize; @@ -289,7 +289,7 @@ return 64; } -unsigned PPCTTIImpl::getPrefetchDistance() { +unsigned PPCTTIImpl::getPrefetchDistance() const { // This seems like a reasonable default for the BG/Q (this pass is enabled, by // default, only on the BG/Q). return 300; diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -59,9 +59,9 @@ unsigned getNumberOfRegisters(bool Vector); unsigned getRegisterBitWidth(bool Vector) const; - unsigned getCacheLineSize() { return 256; } - unsigned getPrefetchDistance() { return 2000; } - unsigned getMinPrefetchStride() { return 2048; } + unsigned getCacheLineSize() const override { return 256; } + unsigned getPrefetchDistance() const override { return 2000; } + unsigned getMinPrefetchStride() const override { return 2048; } bool hasDivRemOp(Type *DataType, bool IsSigned); bool prefersVectorizedAddressing() { return false; } diff --git a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp --- a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp +++ b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp @@ -92,6 +92,20 @@ return TTI->getMaxPrefetchIterationsAhead(); } + bool prefetchReads() { + return TTI->prefetchReads(); + } + + bool prefetchWrites() { + if (PrefetchWrites.getNumOccurrences() > 0) + return PrefetchWrites; + return TTI->prefetchWrites(); + } + + bool useReadPrefetchForWrites() { + return TTI->useReadPrefetchForWrites(); + } + AssumptionCache *AC; LoopInfo *LI; ScalarEvolution *SE; @@ -254,10 +268,11 @@ Instruction *MemI; if (LoadInst *LMemI = dyn_cast(&I)) { + if (!prefetchReads()) continue; MemI = LMemI; PtrValue = LMemI->getPointerOperand(); } else if (StoreInst *SMemI = dyn_cast(&I)) { - if (!PrefetchWrites) continue; + if (!prefetchWrites()) continue; MemI = SMemI; PtrValue = SMemI->getPointerOperand(); } else continue; @@ -314,10 +329,13 @@ Type *I32 = Type::getInt32Ty(BB->getContext()); Function *PrefetchFunc = Intrinsic::getDeclaration(M, Intrinsic::prefetch); + int PrefetchType = + MemI->mayReadFromMemory() ? 0 : + useReadPrefetchForWrites() ? 0 : 1; Builder.CreateCall( PrefetchFunc, {PrefPtrValue, - ConstantInt::get(I32, MemI->mayReadFromMemory() ? 0 : 1), + ConstantInt::get(I32, PrefetchType), ConstantInt::get(I32, 3), ConstantInt::get(I32, 1)}); ++NumPrefetches; LLVM_DEBUG(dbgs() << " Access: " << *PtrValue << ", SCEV: " << *LSCEV diff --git a/llvm/test/TableGen/SystemModelEmitter.td b/llvm/test/TableGen/SystemModelEmitter.td new file mode 100644 --- /dev/null +++ b/llvm/test/TableGen/SystemModelEmitter.td @@ -0,0 +1,261 @@ +// RUN: llvm-tblgen -gen-subtarget -I %p/../../include %s | FileCheck %s + +include "llvm/Target/Target.td" + +// Define a thread. +def CoreThread : Thread; + +// Define a big core. +def BigCoreL1 : CacheLevel, + LineSize<64>, + Ways<2>, + Latency<3>>; + +def BigCoreL2 : CacheLevel, + LineSize<64>, + Ways<16>, + Latency<12>>; + +def BigCoreCacheHierarchy : CacheHierarchy<[BigCoreL1, BigCoreL2]>; + +def BigCoreWCBuffers : WriteCombiningBuffer<4>; + +def BigCoreSoftwarePrefetcher : SoftwarePrefetcher, + MaxByteDistance<640>, + InstructionDistance<200>, + MaxIterationDistance<4>, + MinByteStride<2048>>; + +def BigCoreMemoryModel : MemoryModel; + +def BigCoreThreadDesc : ExecutionResourceDesc; + +def BigCore : Core<[BigCoreThreadDesc], BigCoreMemoryModel>; + +// Define a little core. +def LittleCoreL1 : CacheLevel, + LineSize<32>, + Ways<2>, + Latency<3>>; + +def LittleCoreCacheHierarchy : CacheHierarchy<[LittleCoreL1]>; + +def LittleCoreWCBuffers : WriteCombiningBuffer<2>; + +def LittleCoreSoftwarePrefetcher : SoftwarePrefetcher, + MaxByteDistance<120>, + InstructionDistance<100>, + MaxIterationDistance<2>, + MinByteStride<512>>; + + +def LittleCoreMemoryModel : MemoryModel; + +def LittleCoreThreadDesc : ExecutionResourceDesc; + +def LittleCore : Core<[LittleCoreThreadDesc], LittleCoreMemoryModel>; + +// Define the socket-level memory model. +def SocketL3 : CacheLevel, + LineSize<64>, + Ways<16>, + Latency<33>>; + +def SocketCacheHierarchy : CacheHierarchy<[SocketL3]>; + +// Prefetching and write-combining are handled at the core level. +def SocketMemoryModel : MemoryModel; + +// Define an execution engine containing big cores. +def HomogeneousCoreDesc : ExecutionResourceDesc; + +def HomogeneousSocket : Socket<[HomogeneousCoreDesc], SocketMemoryModel>; + +def HomogeneousSocketDesc : ExecutionResourceDesc; + +def HomogeneousModel : SystemModel<[HomogeneousSocketDesc]>; + +// Define a socket containing big cores and little cores. +def HeterogeneousBigCoreDesc : ExecutionResourceDesc; +def HeterogeneousLittleCoreDesc : ExecutionResourceDesc; + +def HeterogeneousSocket : Socket<[HeterogeneousBigCoreDesc, + HeterogeneousLittleCoreDesc], + SocketMemoryModel>; + +def HeterogeneousSocketDesc : ExecutionResourceDesc; + +def HeterogeneousModel : SystemModel<[HeterogeneousSocketDesc]>; + + +def HomogeneousProc : Processor<"BigProc", NoItineraries, []> { + let System = HomogeneousModel; +} + +def HeterogeneousProc : Processor<"BigLittleProc", NoItineraries, []> { + let System = HeterogeneousModel; +} + +def MyProcDefault : Processor<"DefaultProc", NoItineraries, []>; + +def MyTarget : Target; + +// CHECK: // System models +// CHECK-NEXT: // =============================================================== +// CHECK-NEXT: // + +// CHECK: // Cache models +// CHECK-NEXT: // +// CHECK-NEXT: static const llvm::MCCacheLevelInfo BigCoreCacheHierarchy[] = { +// CHECK-NEXT: llvm::MCCacheLevelInfo(1, "BigCoreL1", 65536, 64, 2, 3), +// CHECK-NEXT: llvm::MCCacheLevelInfo(2, "BigCoreL2", 524288, 64, 16, 12) +// CHECK-NEXT: }; // BigCoreCacheHierarchy + +// CHECK: static const llvm::MCCacheLevelInfo LittleCoreCacheHierarchy[] = { +// CHECK-NEXT: llvm::MCCacheLevelInfo(3, "LittleCoreL1", 32768, 32, 2, 3) +// CHECK-NEXT: }; // LittleCoreCacheHierarchy + +// CHECK: static const llvm::MCCacheLevelInfo NoCaches[] = { +// CHECK-NEXT: llvm::MCCacheLevelInfo(0, "Empty", 0, 0, 0, 0) +// CHECK-NEXT: }; // NoCaches + +// CHECK: static const llvm::MCCacheLevelInfo SocketCacheHierarchy[] = { +// CHECK-NEXT: llvm::MCCacheLevelInfo(4, "SocketL3", 2097152, 64, 16, 33) +// CHECK-NEXT: }; // SocketCacheHierarchy + +// CHECK: // Write-combining buffers +// CHECK-NEXT: // +// CHECK-NEXT: static const llvm::MCWriteCombiningBufferInfo BigCoreWCBuffers(5, "BigCoreWCBuffers", 4); + +// CHECK: static const llvm::MCWriteCombiningBufferInfo LittleCoreWCBuffers(6, "LittleCoreWCBuffers", 2); + +// CHECK: static const llvm::MCWriteCombiningBufferInfo NoWCBuffers(7, "NoWCBuffers", 0); + +// CHECK: // Software prefetch configs +// CHECK-NEXT: // +// CHECK-NEXT: static const llvm::MCSoftwarePrefetcherConfig BigCoreSoftwarePrefetcher(8, "BigCoreSoftwarePrefetcher", 1, 0, 64, 640, 200, 4, 2048); + +// CHECK: static const llvm::MCSoftwarePrefetcherConfig LittleCoreSoftwarePrefetcher(9, "LittleCoreSoftwarePrefetcher", 1, 0, 12, 120, 100, 2, 512); + +// CHECK: static const llvm::MCSoftwarePrefetcherConfig NoSoftwarePrefetcher(10, "NoSoftwarePrefetcher", 0, 0, 0, 0, 0, 0, 0); + +// CHECK: static const llvm::MCSoftwarePrefetcherConfig TransitionSoftwarePrefetcher(11, "TransitionSoftwarePrefetcher", 1, 0, 0, 0, 0, 4294967295, 1); + +// CHECK: // Memory models +// CHECK-NEXT: // +// CHECK-NEXT: static const llvm::MCMemoryModel BigCoreMemoryModel(12, "BigCoreMemoryModel", BigCoreCacheHierarchy, 2, BigCoreWCBuffers, BigCoreSoftwarePrefetcher); + +// CHECK: static const llvm::MCMemoryModel LittleCoreMemoryModel(13, "LittleCoreMemoryModel", LittleCoreCacheHierarchy, 1, LittleCoreWCBuffers, LittleCoreSoftwarePrefetcher); + +// CHECK: static const llvm::MCMemoryModel MinimalMemoryModel(14, "MinimalMemoryModel", NoCaches, 0, NoWCBuffers, TransitionSoftwarePrefetcher); + +// CHECK: static const llvm::MCMemoryModel NoMemoryModel(15, "NoMemoryModel", NoCaches, 0, NoWCBuffers, NoSoftwarePrefetcher); + +// CHECK: static const llvm::MCMemoryModel SocketMemoryModel(16, "SocketMemoryModel", SocketCacheHierarchy, 1, NoWCBuffers, NoSoftwarePrefetcher); + +// CHECK: // System models +// CHECK-NEXT: // +// CHECK-NEXT: static const llvm::MCExecutionResourceDesc *CoreThreadContained[] = { +// CHECK-NEXT: nullptr +// CHECK-NEXT: }; + +// CHECK: static const llvm::MCExecutionResource CoreThread(17, "CoreThread", CoreThreadContained, 0, NoMemoryModel); + +// CHECK: static const llvm::MCExecutionResourceDesc BigCoreThreadDesc(18, "BigCoreThreadDesc", &CoreThread, 4); + +// CHECK: static const llvm::MCExecutionResourceDesc *BigCoreContained[] = { +// CHECK-NEXT: &BigCoreThreadDesc +// CHECK-NEXT: }; + +// CHECK: static const llvm::MCExecutionResource BigCore(19, "BigCore", BigCoreContained, 1, BigCoreMemoryModel); + +// CHECK: static const llvm::MCExecutionResourceDesc HeterogeneousBigCoreDesc(20, "HeterogeneousBigCoreDesc", &BigCore, 2); + +// CHECK: static const llvm::MCExecutionResourceDesc LittleCoreThreadDesc(21, "LittleCoreThreadDesc", &CoreThread, 2); + +// CHECK: static const llvm::MCExecutionResourceDesc *LittleCoreContained[] = { +// CHECK-NEXT: &LittleCoreThreadDesc +// CHECK-NEXT: }; + +// CHECK: static const llvm::MCExecutionResource LittleCore(22, "LittleCore", LittleCoreContained, 1, LittleCoreMemoryModel); + +// CHECK: static const llvm::MCExecutionResourceDesc HeterogeneousLittleCoreDesc(23, "HeterogeneousLittleCoreDesc", &LittleCore, 16); + +// CHECK: static const llvm::MCExecutionResourceDesc *HeterogeneousSocketContained[] = { +// CHECK-NEXT: &HeterogeneousBigCoreDesc, +// CHECK-NEXT: &HeterogeneousLittleCoreDesc +// CHECK-NEXT: }; + +// CHECK: static const llvm::MCExecutionResource HeterogeneousSocket(24, "HeterogeneousSocket", HeterogeneousSocketContained, 2, SocketMemoryModel); + +// CHECK: static const llvm::MCExecutionResourceDesc HeterogeneousSocketDesc(25, "HeterogeneousSocketDesc", &HeterogeneousSocket, 1); + +// CHECK: static const llvm::MCExecutionResourceDesc *HeterogeneousModelResources[] = { +// CHECK-NEXT: &HeterogeneousSocketDesc +// CHECK-NEXT: }; + +// CHECK: static const llvm::MCSystemModel HeterogeneousModel(26, "HeterogeneousModel", HeterogeneousModelResources, 1); + +// CHECK: static const llvm::MCExecutionResourceDesc HomogeneousCoreDesc(26, "HomogeneousCoreDesc", &BigCore, 32); + +// CHECK: static const llvm::MCExecutionResourceDesc *HomogeneousSocketContained[] = { +// CHECK-NEXT: &HomogeneousCoreDesc +// CHECK-NEXT: }; + +// CHECK: static const llvm::MCExecutionResource HomogeneousSocket(27, "HomogeneousSocket", HomogeneousSocketContained, 1, SocketMemoryModel); + +// CHECK: static const llvm::MCExecutionResourceDesc HomogeneousSocketDesc(28, "HomogeneousSocketDesc", &HomogeneousSocket, 1); + +// CHECK: static const llvm::MCExecutionResourceDesc *HomogeneousModelResources[] = { +// CHECK-NEXT: &HomogeneousSocketDesc +// CHECK-NEXT: }; + +// CHECK: static const llvm::MCSystemModel HomogeneousModel(29, "HomogeneousModel", HomogeneousModelResources, 1); + +// CHECK: static const llvm::MCExecutionResourceDesc *MinimalCoreContained[] = { +// CHECK-NEXT: nullptr +// CHECK-NEXT: }; + +// CHECK: static const llvm::MCExecutionResource MinimalCore(29, "MinimalCore", MinimalCoreContained, 0, MinimalMemoryModel); + +// CHECK: static const llvm::MCExecutionResourceDesc MinimalCoreResourceDesc(30, "MinimalCoreResourceDesc", &MinimalCore, 1); + +// CHECK: static const llvm::MCExecutionResourceDesc *MinimalSystemModelResources[] = { +// CHECK-NEXT: &MinimalCoreResourceDesc +// CHECK-NEXT: }; + +// CHECK: static const llvm::MCSystemModel MinimalSystemModel(31, "MinimalSystemModel", MinimalSystemModelResources, 1); + +// CHECK: // Sorted (by key) array of execution engine model for CPU subtype. +// CHECK-NEXT: extern const llvm::SubtargetInfoKV MyTargetProcSystemModelKV[] = { +// CHECK-NEXT: { "BigLittleProc", (const void *)&HeterogeneousModel }, +// CHECK-NEXT: { "BigProc", (const void *)&HomogeneousModel }, +// CHECK-NEXT: { "DefaultProc", (const void *)&MinimalSystemModel }, +// CHECK-NEXT: }; + +// CHECK: const SubtargetInfoKV *ProcSystem) : +// CHECK-NEXT: MCSubtargetInfo(TT, CPU, FS, PF, PD, ProcSched, +// CHECK-NEXT: WPR, WL, RA, IS, OC, FP, ProcSystem) { } + +// CHECK: static inline MCSubtargetInfo *createMyTargetMCSubtargetInfoImpl(const Triple &TT, StringRef CPU, StringRef FS) { +// CHECK-NEXT: return new MyTargetGenMCSubtargetInfo(TT, CPU, FS, None, MyTargetSubTypeKV, +// CHECK-NEXT: MyTargetProcSchedKV, MyTargetWriteProcResTable, MyTargetWriteLatencyTable, MyTargetReadAdvanceTable, +// CHECK-NEXT: nullptr, nullptr, nullptr, MyTargetProcSystemModelKV); +// CHECK-NEXT: } + +// CHECK: extern const llvm::SubtargetInfoKV MyTargetProcSystemModelKV[]; +// CHECK-NEXT: MyTargetGenSubtargetInfo::MyTargetGenSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) +// CHECK-NEXT: : TargetSubtargetInfo(TT, CPU, FS, None, makeArrayRef(MyTargetSubTypeKV, 3), +// CHECK-NEXT: MyTargetProcSchedKV, MyTargetWriteProcResTable, MyTargetWriteLatencyTable, MyTargetReadAdvanceTable, +// CHECK-NEXT: nullptr, nullptr, nullptr, +// CHECK-NEXT: MyTargetProcSystemModelKV) {} diff --git a/llvm/unittests/CodeGen/MachineInstrTest.cpp b/llvm/unittests/CodeGen/MachineInstrTest.cpp --- a/llvm/unittests/CodeGen/MachineInstrTest.cpp +++ b/llvm/unittests/CodeGen/MachineInstrTest.cpp @@ -47,7 +47,8 @@ public: BogusSubtarget(TargetMachine &TM) : TargetSubtargetInfo(Triple(""), "", "", {}, {}, nullptr, nullptr, - nullptr, nullptr, nullptr, nullptr, nullptr), + nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr), FL(), TL(TM) {} ~BogusSubtarget() override {} diff --git a/llvm/unittests/MC/CMakeLists.txt b/llvm/unittests/MC/CMakeLists.txt --- a/llvm/unittests/MC/CMakeLists.txt +++ b/llvm/unittests/MC/CMakeLists.txt @@ -9,5 +9,6 @@ Disassembler.cpp DwarfLineTables.cpp StringTableBuilderTest.cpp + SystemModel.cpp TargetRegistry.cpp ) diff --git a/llvm/unittests/MC/SystemModel.cpp b/llvm/unittests/MC/SystemModel.cpp new file mode 100644 --- /dev/null +++ b/llvm/unittests/MC/SystemModel.cpp @@ -0,0 +1,1407 @@ +//===- unittests/MC/SystemModel.cpp ---------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCSystemModel.h" +#include "gtest/gtest.h" + +using namespace llvm; + +namespace { + +TEST(SystemModel, Topology1Tests) { + // Test this topology: + // + // System + // / \ + // (L2, L3) GPU CPU (L3) + // | / \ + // (L1) C L (L1) B (L1, L2) + + const unsigned BigL1 = 0; + const unsigned BigL2 = 1; + + const unsigned LittleL1 = 0; + + const unsigned GPUCoreL1 = 0; + + const unsigned CPUL3 = 0; + + const unsigned GPUL2 = 0; + const unsigned GPUL3 = 1; + + const unsigned Big = 0; + const unsigned Little = 1; + + const unsigned GPU = 0; + + const unsigned CPUSocket = 0; + const unsigned GPUSocket = 1; + + // Define cache parameters. + const char *BigCacheLevelNames[] = { "BigL1", "BigL2" }; + unsigned BigCacheLevelSizes[] = { 1024*16, 1024 * 1024*4 }; + unsigned BigCacheLevelLineSizes[] = { 32, 32 }; + unsigned BigCacheLevelAssociativities[] = { 8, 24 }; + unsigned BigCacheLevelLatencies[] = { 2, 12 }; + + const char *LittleCacheLevelNames[] = { "LittleL1" }; + unsigned LittleCacheLevelSizes[] = { 1024*8 }; + unsigned LittleCacheLevelLineSizes[] = { 32 }; + unsigned LittleCacheLevelAssociativities[] = { 8 }; + unsigned LittleCacheLevelLatencies[] = { 2 }; + + const char *CPUCacheLevelNames[] = { "CPUL3" }; + unsigned CPUCacheLevelSizes[] = { 1024*1024*8 }; + unsigned CPUCacheLevelLineSizes[] = { 32 }; + unsigned CPUCacheLevelAssociativities[] = { 32 }; + unsigned CPUCacheLevelLatencies[] = { 50 }; + + const char *GPUCoreCacheLevelNames[] = { "GPUCoreL1" }; + unsigned GPUCoreCacheLevelSizes[] = { 1024*32 }; + unsigned GPUCoreCacheLevelLineSizes[] = { 64 }; + unsigned GPUCoreCacheLevelAssociativities[] = { 8 }; + unsigned GPUCoreCacheLevelLatencies[] = { 2 }; + + const char *GPUCacheLevelNames[] = { "GPUL2", "GPUL3" }; + unsigned GPUCacheLevelSizes[] = { 1024*64, 1024*1024*2 }; + unsigned GPUCacheLevelLineSizes[] = { 64, 64 }; + unsigned GPUCacheLevelAssociativities[] = { 24, 32 }; + unsigned GPUCacheLevelLatencies[] = { 12, 50 }; + + // Define thread parameters. + const char *ThreadName = "Thread"; + + // Define core parameters. + // The GPU has four cores with two thread team schedulers of vector + // length 64, for a total of 512 "threads." + const char *CPUCoreNames[] = { "BigCore", "LittleCore" }; + unsigned CPUCoreCounts[] = { 2, 8 }; + unsigned CPUThreadCounts[] = { 4, 2 }; + + const char *GPUCoreNames[] = { "GPUCore" }; + unsigned GPUCoreCounts[] = { 4 }; + // Threads in a core. The GPU has two thread team schedulers, each + // team may be a vector length of, say, 64 which we don't model. + unsigned GPUThreadCounts[] = { 2 }; + + // Define socket parameters. + const char *SocketNames[] = { "CPU", "GPU" }; + unsigned CoreTypeCounts[] = { 2, 1 }; + + unsigned ID = 0; + + // Define write-combining buffers. + MCWriteCombiningBufferInfo WCBufs(ID++, "WCBufs", 4); + MCWriteCombiningBufferInfo NoWCBufs(ID++, "NoWCBufs", 0); + + // Define software prefetchers. + MCSoftwarePrefetcherConfig Prefetcher(ID++, "Prefetcher", true, 1024, 512, + 4096, 100, 4, 32); + + MCSoftwarePrefetcherConfig NoPrefetcher(ID++, "NoPrefetcher", false, 0, 0, 0, + 0, 0, 0); + + // Define caches. + MCCacheLevelInfo BigCoreCacheLevels[] = { + MCCacheLevelInfo(ID++, + BigCacheLevelNames[BigL1], + BigCacheLevelSizes[BigL1], + BigCacheLevelLineSizes[BigL1], + BigCacheLevelAssociativities[BigL1], + BigCacheLevelLatencies[BigL1]), + MCCacheLevelInfo(ID++, + BigCacheLevelNames[BigL2], + BigCacheLevelSizes[BigL2], + BigCacheLevelLineSizes[BigL2], + BigCacheLevelAssociativities[BigL2], + BigCacheLevelLatencies[BigL2]), + }; + + MCCacheLevelInfo LittleCoreCacheLevels[] = { + MCCacheLevelInfo(ID++, + LittleCacheLevelNames[LittleL1], + LittleCacheLevelSizes[LittleL1], + LittleCacheLevelLineSizes[LittleL1], + LittleCacheLevelAssociativities[LittleL1], + LittleCacheLevelLatencies[LittleL1]), + }; + + MCCacheLevelInfo CPUCacheLevels[] = { + MCCacheLevelInfo(ID++, + CPUCacheLevelNames[CPUL3], + CPUCacheLevelSizes[CPUL3], + CPUCacheLevelLineSizes[CPUL3], + CPUCacheLevelAssociativities[CPUL3], + CPUCacheLevelLatencies[CPUL3]), + }; + + // Each GPU core has a small cache. + MCCacheLevelInfo GPUCoreCacheLevels[] = { + MCCacheLevelInfo(ID++, + GPUCoreCacheLevelNames[GPUCoreL1], + GPUCoreCacheLevelSizes[GPUCoreL1], + GPUCoreCacheLevelLineSizes[GPUCoreL1], + GPUCoreCacheLevelAssociativities[GPUCoreL1], + GPUCoreCacheLevelLatencies[GPUCoreL1]), + }; + + // All GPU cores share two higher levels of cache. + MCCacheLevelInfo GPUCacheLevels[] = { + MCCacheLevelInfo(ID++, + GPUCacheLevelNames[GPUL2], + GPUCacheLevelSizes[GPUL2], + GPUCacheLevelLineSizes[GPUL2], + GPUCacheLevelAssociativities[GPUL2], + GPUCacheLevelLatencies[GPUL2]), + MCCacheLevelInfo(ID++, + GPUCacheLevelNames[GPUL3], + GPUCacheLevelSizes[GPUL3], + GPUCacheLevelLineSizes[GPUL3], + GPUCacheLevelAssociativities[GPUL3], + GPUCacheLevelLatencies[GPUL3]), + }; + + // Define memory models. + MCMemoryModel BigMemModel(ID++, + "BigMemModel", + BigCoreCacheLevels, + 2, + WCBufs, + Prefetcher); + + MCMemoryModel LittleMemModel(ID++, + "LittleMemModel", + LittleCoreCacheLevels, + 1, + WCBufs, + Prefetcher); + + MCMemoryModel CPUMemModel(ID++, + "CPUMemModel", + CPUCacheLevels, + 1, + NoWCBufs, + NoPrefetcher); + + MCMemoryModel GPUCoreMemModel(ID++, + "GPUCoreMemModel", + GPUCoreCacheLevels, + 1, + NoWCBufs, + NoPrefetcher); + + MCMemoryModel GPUMemModel(ID++, + "GPUMemModel", + GPUCacheLevels, + 2, + NoWCBufs, + NoPrefetcher); + + MCMemoryModel NoMemModel(ID++, "NoModel", nullptr, 0, NoWCBufs, NoPrefetcher); + + // Define threads. + MCExecutionResource CommonThread(ID++, ThreadName, nullptr, 0, NoMemModel); + + // Define cores. + MCExecutionResourceDesc BigThreadDesc(ID++, + "BigThreadDesc", + &CommonThread, + CPUThreadCounts[Big]); + MCExecutionResourceDesc LittleThreadDesc(ID++, + "LittleThreadDesc", + &CommonThread, + CPUThreadCounts[Little]); + MCExecutionResourceDesc GPUThreadDesc(ID++, + "GPUThreadDesc", + &CommonThread, + GPUThreadCounts[GPU]); + + MCExecutionResourceDesc *BigThreadsList[] = { &BigThreadDesc }; + MCExecutionResourceDesc *LittleThreadsList[] = { &LittleThreadDesc }; + MCExecutionResourceDesc *GPUThreadsList[] = { &GPUThreadDesc }; + + MCExecutionResource BigCore(ID++, + CPUCoreNames[Big], + BigThreadsList, + 1, + BigMemModel); + + MCExecutionResource LittleCore(ID++, + CPUCoreNames[Little], + LittleThreadsList, + 1, + LittleMemModel); + + MCExecutionResource GPUCore(ID++, + GPUCoreNames[GPU], + GPUThreadsList, + 1, + GPUCoreMemModel); + + // Define sockets. + MCExecutionResourceDesc BigCoreDesc(ID++, + "BigCoreDesc", + &BigCore, + CPUCoreCounts[Big]); + + MCExecutionResourceDesc LittleCoreDesc(ID++, + "LittleCoreDesc", + &LittleCore, + CPUCoreCounts[Little]); + + MCExecutionResourceDesc GPUCoreDesc(ID++, + "GPUCoreDesc", + &GPUCore, + GPUCoreCounts[GPU]); + + MCExecutionResourceDesc *CPUCoreList[] = { &BigCoreDesc, &LittleCoreDesc }; + MCExecutionResourceDesc *GPUCoreList[] = { &GPUCoreDesc }; + + MCExecutionResource CPUEngine(ID++, + SocketNames[CPUSocket], + CPUCoreList, + 2, + CPUMemModel); + + MCExecutionResource GPUEngine(ID++, + SocketNames[GPUSocket], + GPUCoreList, + 1, + GPUMemModel); + + // Define a node consisting of a CPU socket and a GPU socket. + MCExecutionResourceDesc CPUSocketDesc(ID++, "CPUSocketDesc", &CPUEngine, 1); + MCExecutionResourceDesc GPUSocketDesc(ID++, "GPUSocketDesc", &GPUEngine, 1); + + MCExecutionResourceDesc *SocketList[] = { &CPUSocketDesc, &GPUSocketDesc }; + + MCSystemModel Node(ID++, "Node", SocketList, 2); + + // Test the topology. + EXPECT_EQ(Node.getNumExecutionResourceTypes(), 2u); + + unsigned s = 0; + for (const auto &SocketDesc : Node) { + EXPECT_EQ(SocketDesc.getNumResources(), 1u); + + const auto &Socket = SocketDesc.getResource(); + EXPECT_STREQ(Socket.getName(), SocketNames[s]); + EXPECT_EQ(Socket.getNumContainedExecutionResourceTypes(), + CoreTypeCounts[s]); + + unsigned *CoreCounts = (s == CPUSocket ? + CPUCoreCounts : GPUCoreCounts); + const char *const *CoreNames = (s == CPUSocket ? + CPUCoreNames : GPUCoreNames); + unsigned *ThreadCounts = (s == CPUSocket ? + CPUThreadCounts : GPUThreadCounts); + + unsigned c = 0; + for (const auto &CoreDesc : Socket) { + EXPECT_EQ(CoreDesc.getNumResources(), CoreCounts[c]); + + const auto &Core = CoreDesc.getResource(); + EXPECT_STREQ(Core.getName(), CoreNames[c]); + + EXPECT_EQ(Core.getNumContainedExecutionResourceTypes(), 1u); + + const auto &ThreadDesc = Core.getResourceDescriptor(0); + EXPECT_EQ(ThreadDesc.getNumResources(), ThreadCounts[c]); + + const auto &Thread = ThreadDesc.getResource(); + EXPECT_STREQ(Thread.getName(), ThreadName); + + EXPECT_EQ(Thread.getNumContainedExecutionResourceTypes(), 0u); + + // Check thread-level caches. + unsigned NumThreadCacheLevels = + Thread.getMemoryModel().getNumCacheLevels(); + EXPECT_EQ(NumThreadCacheLevels, 0u); + + // Check core-level caches. + const char *const *CacheNames = (s == CPUSocket && + c == Big ? + BigCacheLevelNames : + (s == CPUSocket && + c == Little ? + LittleCacheLevelNames : + GPUCoreCacheLevelNames)); + const unsigned *CacheSizes = (s == CPUSocket && + c == Big ? + BigCacheLevelSizes : + (s == CPUSocket && + c == Little ? + LittleCacheLevelSizes : + GPUCoreCacheLevelSizes)); + const unsigned *CacheLineSizes = (s == CPUSocket && + c == Big ? + BigCacheLevelLineSizes : + (s == CPUSocket && + c == Little ? + LittleCacheLevelLineSizes : + GPUCoreCacheLevelLineSizes)); + const unsigned *CacheAssociativities = (s == CPUSocket && + c == Big ? + BigCacheLevelAssociativities : + (s == CPUSocket && + c == Little ? + LittleCacheLevelAssociativities : + GPUCoreCacheLevelAssociativities) + ); + const unsigned *CacheLatencies = (s == CPUSocket && + c == Big ? + BigCacheLevelLatencies : + (s == CPUSocket && + c == Little ? + LittleCacheLevelLatencies : + GPUCoreCacheLevelLatencies)); + + unsigned lvl = 0; + for (const auto &CacheLevel : Core.getMemoryModel()) { + EXPECT_STREQ(CacheLevel.getName(), CacheNames[lvl]); + EXPECT_EQ(CacheLevel.getSizeInBytes(), CacheSizes[lvl]); + EXPECT_EQ(CacheLevel.getLineSizeInBytes(), CacheLineSizes[lvl]); + EXPECT_EQ(CacheLevel.getAssociativity(), CacheAssociativities[lvl]); + EXPECT_EQ(CacheLevel.getLatency(), CacheLatencies[lvl]); + + ++lvl; + } + + ++c; + } + + // Check socket-level caches. + const char *const *CacheNames = (s == CPUSocket ? + CPUCacheLevelNames : + GPUCacheLevelNames); + const unsigned *CacheSizes = (s == CPUSocket ? + CPUCacheLevelSizes : + GPUCacheLevelSizes); + const unsigned *CacheLineSizes = (s == CPUSocket ? + CPUCacheLevelLineSizes : + GPUCacheLevelLineSizes); + const unsigned *CacheAssociativities = (s == CPUSocket ? + CPUCacheLevelAssociativities : + GPUCacheLevelAssociativities); + const unsigned *CacheLatencies = (s == CPUSocket ? + CPUCacheLevelLatencies : + GPUCacheLevelLatencies); + + unsigned lvl = 0; + for (const auto &CacheLevel : Socket.getMemoryModel()) { + EXPECT_STREQ(CacheLevel.getName(), CacheNames[lvl]); + EXPECT_EQ(CacheLevel.getSizeInBytes(), CacheSizes[lvl]); + EXPECT_EQ(CacheLevel.getLineSizeInBytes(), CacheLineSizes[lvl]); + EXPECT_EQ(CacheLevel.getAssociativity(), CacheAssociativities[lvl]); + EXPECT_EQ(CacheLevel.getLatency(), CacheLatencies[lvl]); + + ++lvl; + } + + ++s; + } + + // Test the global system representation of the memory model. + const MCSystemModel::CacheLevelSet *L1Levels = + Node.getCacheLevelInfo(MCSystemModel::CacheLevel::L1); + const MCSystemModel::CacheLevelSet *L2Levels = + Node.getCacheLevelInfo(MCSystemModel::CacheLevel::L2); + const MCSystemModel::CacheLevelSet *L3Levels = + Node.getCacheLevelInfo(MCSystemModel::CacheLevel::L3); + const MCSystemModel::CacheLevelSet *L4Levels = + Node.getCacheLevelInfo(MCSystemModel::CacheLevel::L4); + + const MCSystemModel::PrefetchConfigSet &PrefetchConfigs = + Node.getSoftwarePrefetcherInfo(); + + EXPECT_NE(L1Levels, nullptr); + EXPECT_NE(L2Levels, nullptr); + EXPECT_NE(L3Levels, nullptr); + EXPECT_EQ(L4Levels, nullptr); + + EXPECT_EQ(L1Levels->size(), 3u); + EXPECT_EQ(L2Levels->size(), 2u); + EXPECT_EQ(L3Levels->size(), 2u); + EXPECT_EQ(PrefetchConfigs.size(), 2u); + + unsigned i = 0; + for (const auto L1Level : *L1Levels) { + const char *const *CacheNames = (i == 0 ? BigCacheLevelNames : + i == 1 ? LittleCacheLevelNames : + GPUCoreCacheLevelNames); + const unsigned *CacheSizes = (i == 0 ? BigCacheLevelSizes : + i == 1 ? LittleCacheLevelSizes : + GPUCoreCacheLevelSizes); + const unsigned *CacheLineSizes = (i == 0 ? BigCacheLevelLineSizes : + i == 1 ? LittleCacheLevelLineSizes : + GPUCoreCacheLevelLineSizes + ); + const unsigned *CacheAssociativities = (i == 0 ? + BigCacheLevelAssociativities : + i == 1 ? + LittleCacheLevelAssociativities : + GPUCoreCacheLevelAssociativities); + const unsigned *CacheLatencies = (i == 0 ? BigCacheLevelLatencies : + i == 1 ? LittleCacheLevelLatencies : + GPUCoreCacheLevelLatencies + ); + + unsigned Index = (i == 0 ? BigL1 : + i == 1 ? LittleL1 : GPUCoreL1); + + EXPECT_STREQ(L1Level->getName(), CacheNames[Index]); + EXPECT_EQ(L1Level->getSizeInBytes(), CacheSizes[Index]); + EXPECT_EQ(L1Level->getLineSizeInBytes(), CacheLineSizes[Index]); + EXPECT_EQ(L1Level->getAssociativity(), CacheAssociativities[Index]); + EXPECT_EQ(L1Level->getLatency(), CacheLatencies[Index]); + + ++i; + } + + i = 0; + for (const auto L2Level : *L2Levels) { + const char *const *CacheNames = (i == 0 ? BigCacheLevelNames : + GPUCacheLevelNames); + const unsigned *CacheSizes = (i == 0 ? BigCacheLevelSizes : + GPUCacheLevelSizes); + const unsigned *CacheLineSizes = (i == 0 ? BigCacheLevelLineSizes : + GPUCacheLevelLineSizes); + const unsigned *CacheAssociativities = (i == 0 ? + BigCacheLevelAssociativities : + GPUCacheLevelAssociativities); + const unsigned *CacheLatencies = (i == 0 ? BigCacheLevelLatencies : + GPUCacheLevelLatencies); + + unsigned Index = (i == 0 ? BigL2 : GPUL2); + + EXPECT_STREQ(L2Level->getName(), CacheNames[Index]); + EXPECT_EQ(L2Level->getSizeInBytes(), CacheSizes[Index]); + EXPECT_EQ(L2Level->getLineSizeInBytes(), CacheLineSizes[Index]); + EXPECT_EQ(L2Level->getAssociativity(), CacheAssociativities[Index]); + EXPECT_EQ(L2Level->getLatency(), CacheLatencies[Index]); + + ++i; + } + + i = 0; + for (const auto L3Level : *L3Levels) { + const char *const *CacheNames = (i == 0 ? CPUCacheLevelNames : + GPUCacheLevelNames); + const unsigned *CacheSizes = (i == 0 ? CPUCacheLevelSizes : + GPUCacheLevelSizes); + const unsigned *CacheLineSizes = (i == 0 ? CPUCacheLevelLineSizes : + GPUCacheLevelLineSizes); + const unsigned *CacheAssociativities = (i == 0 ? + CPUCacheLevelAssociativities : + GPUCacheLevelAssociativities); + const unsigned *CacheLatencies = (i == 0 ? CPUCacheLevelLatencies : + GPUCacheLevelLatencies); + + unsigned Index = (i == 0 ? CPUL3 : GPUL3); + + EXPECT_STREQ(L3Level->getName(), CacheNames[Index]); + EXPECT_EQ(L3Level->getSizeInBytes(), CacheSizes[Index]); + EXPECT_EQ(L3Level->getLineSizeInBytes(), CacheLineSizes[Index]); + EXPECT_EQ(L3Level->getAssociativity(), CacheAssociativities[Index]); + EXPECT_EQ(L3Level->getLatency(), CacheLatencies[Index]); + + ++i; + } +} + +TEST(SystemModel, Topology2Tests) { + // Test this topology: + // + // System + // / \ + // GPU CPU (L3) + // | / \ + // (L1) C L (L1) B (L1, L2) + + const unsigned BigL1 = 0; + const unsigned BigL2 = 1; + + const unsigned LittleL1 = 0; + + const unsigned GPUCoreL1 = 0; + + const unsigned CPUL3 = 0; + + const unsigned Big = 0; + const unsigned Little = 1; + + const unsigned GPU = 0; + + const unsigned CPUSocket = 0; + const unsigned GPUSocket = 1; + + // Define cache parameters. + const char *BigCacheLevelNames[] = { "BigL1", "BigL2" }; + unsigned BigCacheLevelSizes[] = { 1024*16, 1024 * 1024*4 }; + unsigned BigCacheLevelLineSizes[] = { 32, 32 }; + unsigned BigCacheLevelAssociativities[] = { 8, 24 }; + unsigned BigCacheLevelLatencies[] = { 2, 12 }; + + const char *LittleCacheLevelNames[] = { "LittleL1" }; + unsigned LittleCacheLevelSizes[] = { 1024*8 }; + unsigned LittleCacheLevelLineSizes[] = { 32 }; + unsigned LittleCacheLevelAssociativities[] = { 8 }; + unsigned LittleCacheLevelLatencies[] = { 2 }; + + const char *CPUCacheLevelNames[] = { "CPUL3" }; + unsigned CPUCacheLevelSizes[] = { 1024*1024*8 }; + unsigned CPUCacheLevelLineSizes[] = { 32 }; + unsigned CPUCacheLevelAssociativities[] = { 32 }; + unsigned CPUCacheLevelLatencies[] = { 50 }; + + const char *GPUCoreCacheLevelNames[] = { "GPUCoreL1" }; + unsigned GPUCoreCacheLevelSizes[] = { 1024*32 }; + unsigned GPUCoreCacheLevelLineSizes[] = { 64 }; + unsigned GPUCoreCacheLevelAssociativities[] = { 8 }; + unsigned GPUCoreCacheLevelLatencies[] = { 2 }; + + // Define thread parameters. + const char *ThreadName = "Thread"; + + // Define core parameters. + // The GPU has four cores with two thread team schedulers of vector + // length 64, for a total of 512 "threads." + const char *CPUCoreNames[] = { "BigCore", "LittleCore" }; + unsigned CPUCoreCounts[] = { 2, 8 }; + unsigned CPUThreadCounts[] = { 4, 2 }; + + const char *GPUCoreNames[] = { "GPUCore" }; + unsigned GPUCoreCounts[] = { 4 }; + // Threads in a core. The GPU has two thread team schedulers, each + // team may be a vector length of, say, 64 which we don't model. + unsigned GPUThreadCounts[] = { 2 }; + + // Define socket parameters. + const char *SocketNames[] = { "CPU", "GPU" }; + unsigned CoreTypeCounts[] = { 2, 1 }; + + unsigned ID = 0; + + // Define write-combining buffers. + MCWriteCombiningBufferInfo WCBufs(ID++, "WCBufs", 4); + MCWriteCombiningBufferInfo NoWCBufs(ID++, "NoWCBufs", 0); + + // Define software prefetchers. + MCSoftwarePrefetcherConfig Prefetcher(ID++, "Prefetcher", true, 1024, 512, + 4096, 100, 4, 32); + + MCSoftwarePrefetcherConfig NoPrefetcher(ID++, "NoPrefetcher", false, 0, 0, 0, + 0, 0, 0); + + // Define caches. + MCCacheLevelInfo BigCoreCacheLevels[] = { + MCCacheLevelInfo(ID++, + BigCacheLevelNames[BigL1], + BigCacheLevelSizes[BigL1], + BigCacheLevelLineSizes[BigL1], + BigCacheLevelAssociativities[BigL1], + BigCacheLevelLatencies[BigL1]), + MCCacheLevelInfo(ID++, + BigCacheLevelNames[BigL2], + BigCacheLevelSizes[BigL2], + BigCacheLevelLineSizes[BigL2], + BigCacheLevelAssociativities[BigL2], + BigCacheLevelLatencies[BigL2]), + }; + + MCCacheLevelInfo LittleCoreCacheLevels[] = { + MCCacheLevelInfo(ID++, + LittleCacheLevelNames[LittleL1], + LittleCacheLevelSizes[LittleL1], + LittleCacheLevelLineSizes[LittleL1], + LittleCacheLevelAssociativities[LittleL1], + LittleCacheLevelLatencies[LittleL1]), + }; + + MCCacheLevelInfo CPUCacheLevels[] = { + MCCacheLevelInfo(ID++, + CPUCacheLevelNames[CPUL3], + CPUCacheLevelSizes[CPUL3], + CPUCacheLevelLineSizes[CPUL3], + CPUCacheLevelAssociativities[CPUL3], + CPUCacheLevelLatencies[CPUL3]), + }; + + // Each GPU core has a small cache. + MCCacheLevelInfo GPUCoreCacheLevels[] = { + MCCacheLevelInfo(ID++, + GPUCoreCacheLevelNames[GPUCoreL1], + GPUCoreCacheLevelSizes[GPUCoreL1], + GPUCoreCacheLevelLineSizes[GPUCoreL1], + GPUCoreCacheLevelAssociativities[GPUCoreL1], + GPUCoreCacheLevelLatencies[GPUCoreL1]), + }; + + // Define memory models. + MCMemoryModel BigMemModel(ID++, + "BigMemModel", + BigCoreCacheLevels, + 2, + WCBufs, + Prefetcher); + + MCMemoryModel LittleMemModel(ID++, + "LittleMemModel", + LittleCoreCacheLevels, + 1, + WCBufs, + Prefetcher); + + MCMemoryModel CPUMemModel(ID++, + "CPUMemModel", + CPUCacheLevels, + 1, + NoWCBufs, + NoPrefetcher); + + MCMemoryModel GPUCoreMemModel(ID++, + "GPUCoreMemModel", + GPUCoreCacheLevels, + 1, + NoWCBufs, + NoPrefetcher); + + MCMemoryModel NoMemModel(ID++, "NoModel", nullptr, 0, NoWCBufs, NoPrefetcher); + + // Define threads. + MCExecutionResource CommonThread(ID++, ThreadName, nullptr, 0, NoMemModel); + + // Define cores. + MCExecutionResourceDesc BigThreadDesc(ID++, + "BigThreadDesc", + &CommonThread, + CPUThreadCounts[Big]); + MCExecutionResourceDesc LittleThreadDesc(ID++, + "LittleThreadDesc", + &CommonThread, + CPUThreadCounts[Little]); + MCExecutionResourceDesc GPUThreadDesc(ID++, + "GPUThreadDesc", + &CommonThread, + GPUThreadCounts[GPU]); + + MCExecutionResourceDesc *BigThreadsList[] = { &BigThreadDesc }; + MCExecutionResourceDesc *LittleThreadsList[] = { &LittleThreadDesc }; + MCExecutionResourceDesc *GPUThreadsList[] = { &GPUThreadDesc }; + + MCExecutionResource BigCore(ID++, + CPUCoreNames[Big], + BigThreadsList, + 1, + BigMemModel); + + MCExecutionResource LittleCore(ID++, + CPUCoreNames[Little], + LittleThreadsList, + 1, + LittleMemModel); + + MCExecutionResource GPUCore(ID++, + GPUCoreNames[GPU], + GPUThreadsList, + 1, + GPUCoreMemModel); + + // Define sockets. + MCExecutionResourceDesc BigCoreDesc(ID++, + "BigCoreDesc", + &BigCore, + CPUCoreCounts[Big]); + + MCExecutionResourceDesc LittleCoreDesc(ID++, + "LittleCoreDesc", + &LittleCore, + CPUCoreCounts[Little]); + + MCExecutionResourceDesc GPUCoreDesc(ID++, + "GPUCoreDesc", + &GPUCore, + GPUCoreCounts[GPU]); + + MCExecutionResourceDesc *CPUCoreList[] = { &BigCoreDesc, &LittleCoreDesc }; + MCExecutionResourceDesc *GPUCoreList[] = { &GPUCoreDesc }; + + MCExecutionResource CPUEngine(ID++, + SocketNames[CPUSocket], + CPUCoreList, + 2, + CPUMemModel); + + MCExecutionResource GPUEngine(ID++, + SocketNames[GPUSocket], + GPUCoreList, + 1, + NoMemModel); + + // Define a node consisting of a CPU socket and a GPU socket. + MCExecutionResourceDesc CPUSocketDesc(ID++, "CPUSocketDesc", &CPUEngine, 1); + MCExecutionResourceDesc GPUSocketDesc(ID++, "GPUSocketDesc", &GPUEngine, 1); + + MCExecutionResourceDesc *SocketList[] = { &CPUSocketDesc, &GPUSocketDesc }; + + MCSystemModel Node(ID++, "Node", SocketList, 2); + + // Test the topology. + EXPECT_EQ(Node.getNumExecutionResourceTypes(), 2u); + + unsigned s = 0; + for (const auto &SocketDesc : Node) { + EXPECT_EQ(SocketDesc.getNumResources(), 1u); + + const auto &Socket = SocketDesc.getResource(); + EXPECT_STREQ(Socket.getName(), SocketNames[s]); + EXPECT_EQ(Socket.getNumContainedExecutionResourceTypes(), + CoreTypeCounts[s]); + + unsigned *CoreCounts = (s == 0 ? CPUCoreCounts : GPUCoreCounts); + const char *const *CoreNames = (s == 0 ? CPUCoreNames : GPUCoreNames); + unsigned *ThreadCounts = (s == 0 ? CPUThreadCounts : GPUThreadCounts); + + unsigned c = 0; + for (const auto &CoreDesc : Socket) { + EXPECT_EQ(CoreDesc.getNumResources(), CoreCounts[c]); + + const auto &Core = CoreDesc.getResource(); + EXPECT_STREQ(Core.getName(), CoreNames[c]); + + EXPECT_EQ(Core.getNumContainedExecutionResourceTypes(), 1u); + + const auto &ThreadDesc = Core.getResourceDescriptor(0); + EXPECT_EQ(ThreadDesc.getNumResources(), ThreadCounts[c]); + + const auto &Thread = ThreadDesc.getResource(); + EXPECT_STREQ(Thread.getName(), ThreadName); + + EXPECT_EQ(Thread.getNumContainedExecutionResourceTypes(), 0u); + + // Check thread-level caches. + unsigned NumThreadCacheLevels = + Thread.getMemoryModel().getNumCacheLevels(); + EXPECT_EQ(NumThreadCacheLevels, 0u); + + // Check core-level caches. + const char *const *CacheNames = (s == CPUSocket && + c == Big ? + BigCacheLevelNames : + (s == CPUSocket && + c == Little ? + LittleCacheLevelNames : + GPUCoreCacheLevelNames)); + const unsigned *CacheSizes = (s == CPUSocket && + c == Big ? + BigCacheLevelSizes : + (s == CPUSocket && + c == Little ? + LittleCacheLevelSizes : + GPUCoreCacheLevelSizes)); + const unsigned *CacheLineSizes = (s == CPUSocket && + c == Big ? + BigCacheLevelLineSizes : + (s == CPUSocket && + c == Little ? + LittleCacheLevelLineSizes : + GPUCoreCacheLevelLineSizes)); + const unsigned *CacheAssociativities = (s == CPUSocket && + c == Big ? + BigCacheLevelAssociativities : + (s == CPUSocket && + c == Little ? + LittleCacheLevelAssociativities : + GPUCoreCacheLevelAssociativities) + ); + const unsigned *CacheLatencies = (s == CPUSocket && + c == Big ? + BigCacheLevelLatencies : + (s == CPUSocket && + c == Little ? + LittleCacheLevelLatencies : + GPUCoreCacheLevelLatencies)); + + unsigned lvl = 0; + for (const auto &CacheLevel : Core.getMemoryModel()) { + EXPECT_STREQ(CacheLevel.getName(), CacheNames[lvl]); + EXPECT_EQ(CacheLevel.getSizeInBytes(), CacheSizes[lvl]); + EXPECT_EQ(CacheLevel.getLineSizeInBytes(), CacheLineSizes[lvl]); + EXPECT_EQ(CacheLevel.getAssociativity(), CacheAssociativities[lvl]); + EXPECT_EQ(CacheLevel.getLatency(), CacheLatencies[lvl]); + + ++lvl; + } + + ++c; + } + + // Check socket-level caches. + const char *const *CacheNames = CPUCacheLevelNames; + const unsigned *CacheSizes = CPUCacheLevelSizes; + const unsigned *CacheLineSizes = CPUCacheLevelLineSizes; + const unsigned *CacheAssociativities = CPUCacheLevelAssociativities; + const unsigned *CacheLatencies = CPUCacheLevelLatencies; + + if (s == GPUSocket) { + unsigned NumSocketCacheLevels = + Socket.getMemoryModel().getNumCacheLevels(); + EXPECT_EQ(NumSocketCacheLevels, 0u); + } + + unsigned lvl = 0; + for (const auto &CacheLevel : Socket.getMemoryModel()) { + EXPECT_STREQ(CacheLevel.getName(), CacheNames[lvl]); + EXPECT_EQ(CacheLevel.getSizeInBytes(), CacheSizes[lvl]); + EXPECT_EQ(CacheLevel.getLineSizeInBytes(), CacheLineSizes[lvl]); + EXPECT_EQ(CacheLevel.getAssociativity(), CacheAssociativities[lvl]); + EXPECT_EQ(CacheLevel.getLatency(), CacheLatencies[lvl]); + + ++lvl; + } + + ++s; + } + + // Test the global system representation of the memory model. + const MCSystemModel::CacheLevelSet *L1Levels = + Node.getCacheLevelInfo(MCSystemModel::CacheLevel::L1); + const MCSystemModel::CacheLevelSet *L2Levels = + Node.getCacheLevelInfo(MCSystemModel::CacheLevel::L2); + const MCSystemModel::CacheLevelSet *L3Levels = + Node.getCacheLevelInfo(MCSystemModel::CacheLevel::L3); + const MCSystemModel::CacheLevelSet *L4Levels = + Node.getCacheLevelInfo(MCSystemModel::CacheLevel::L4); + + const MCSystemModel::PrefetchConfigSet &PrefetchConfigs = + Node.getSoftwarePrefetcherInfo(); + + EXPECT_NE(L1Levels, nullptr); + EXPECT_NE(L2Levels, nullptr); + EXPECT_NE(L3Levels, nullptr); + EXPECT_EQ(L4Levels, nullptr); + + EXPECT_EQ(L1Levels->size(), 3u); + EXPECT_EQ(L2Levels->size(), 1u); + EXPECT_EQ(L3Levels->size(), 1u); + EXPECT_EQ(PrefetchConfigs.size(), 2u); + + unsigned i = 0; + for (const auto L1Level : *L1Levels) { + const char *const *CacheNames = (i == 0 ? BigCacheLevelNames : + i == 1 ? LittleCacheLevelNames : + GPUCoreCacheLevelNames); + const unsigned *CacheSizes = (i == 0 ? BigCacheLevelSizes : + i == 1 ? LittleCacheLevelSizes : + GPUCoreCacheLevelSizes); + const unsigned *CacheLineSizes = (i == 0 ? BigCacheLevelLineSizes : + i == 1 ? LittleCacheLevelLineSizes : + GPUCoreCacheLevelLineSizes + ); + const unsigned *CacheAssociativities = (i == 0 ? + BigCacheLevelAssociativities : + i == 1 ? + LittleCacheLevelAssociativities : + GPUCoreCacheLevelAssociativities); + const unsigned *CacheLatencies = (i == 0 ? BigCacheLevelLatencies : + i == 1 ? LittleCacheLevelLatencies : + GPUCoreCacheLevelLatencies + ); + + unsigned Index = (i == 0 ? BigL1 : + i == 1 ? LittleL1 : GPUCoreL1); + + EXPECT_STREQ(L1Level->getName(), CacheNames[Index]); + EXPECT_EQ(L1Level->getSizeInBytes(), CacheSizes[Index]); + EXPECT_EQ(L1Level->getLineSizeInBytes(), CacheLineSizes[Index]); + EXPECT_EQ(L1Level->getAssociativity(), CacheAssociativities[Index]); + EXPECT_EQ(L1Level->getLatency(), CacheLatencies[Index]); + + ++i; + } + + i = 0; + for (const auto L2Level : *L2Levels) { + const char *const *CacheNames = BigCacheLevelNames; + const unsigned *CacheSizes = BigCacheLevelSizes; + const unsigned *CacheLineSizes = BigCacheLevelLineSizes; + const unsigned *CacheAssociativities = BigCacheLevelAssociativities; + const unsigned *CacheLatencies = BigCacheLevelLatencies; + + unsigned Index = BigL2; + + EXPECT_STREQ(L2Level->getName(), CacheNames[Index]); + EXPECT_EQ(L2Level->getSizeInBytes(), CacheSizes[Index]); + EXPECT_EQ(L2Level->getLineSizeInBytes(), CacheLineSizes[Index]); + EXPECT_EQ(L2Level->getAssociativity(), CacheAssociativities[Index]); + EXPECT_EQ(L2Level->getLatency(), CacheLatencies[Index]); + + ++i; + } + + i = 0; + for (const auto L3Level : *L3Levels) { + const char *const *CacheNames = CPUCacheLevelNames; + const unsigned *CacheSizes = CPUCacheLevelSizes; + const unsigned *CacheLineSizes = CPUCacheLevelLineSizes; + const unsigned *CacheAssociativities = CPUCacheLevelAssociativities; + const unsigned *CacheLatencies = CPUCacheLevelLatencies; + + unsigned Index = CPUL3; + + EXPECT_STREQ(L3Level->getName(), CacheNames[Index]); + EXPECT_EQ(L3Level->getSizeInBytes(), CacheSizes[Index]); + EXPECT_EQ(L3Level->getLineSizeInBytes(), CacheLineSizes[Index]); + EXPECT_EQ(L3Level->getAssociativity(), CacheAssociativities[Index]); + EXPECT_EQ(L3Level->getLatency(), CacheLatencies[Index]); + + ++i; + } +} + +TEST(SystemModel, Topology3Tests) { + // Test this topology: + // + // System + // / \ + // (L1) GPU CPU (L3) + // | / \ + // C L (L1) B (L1, L2) + + const unsigned BigL1 = 0; + const unsigned BigL2 = 1; + + const unsigned LittleL1 = 0; + + const unsigned CPUL3 = 0; + + const unsigned GPUL1 = 0; + + const unsigned Big = 0; + const unsigned Little = 1; + + const unsigned GPU = 0; + + const unsigned CPUSocket = 0; + const unsigned GPUSocket = 1; + + // Define cache parameters. + const char *BigCacheLevelNames[] = { "BigL1", "BigL2" }; + unsigned BigCacheLevelSizes[] = { 1024*16, 1024 * 1024*4 }; + unsigned BigCacheLevelLineSizes[] = { 32, 32 }; + unsigned BigCacheLevelAssociativities[] = { 8, 24 }; + unsigned BigCacheLevelLatencies[] = { 2, 12 }; + + const char *LittleCacheLevelNames[] = { "LittleL1" }; + unsigned LittleCacheLevelSizes[] = { 1024*8 }; + unsigned LittleCacheLevelLineSizes[] = { 32 }; + unsigned LittleCacheLevelAssociativities[] = { 8 }; + unsigned LittleCacheLevelLatencies[] = { 2 }; + + const char *CPUCacheLevelNames[] = { "CPUL3" }; + unsigned CPUCacheLevelSizes[] = { 1024*1024*8 }; + unsigned CPUCacheLevelLineSizes[] = { 32 }; + unsigned CPUCacheLevelAssociativities[] = { 32 }; + unsigned CPUCacheLevelLatencies[] = { 50 }; + + const char *GPUCacheLevelNames[] = { "GPUL1" }; + unsigned GPUCacheLevelSizes[] = { 1024*64 }; + unsigned GPUCacheLevelLineSizes[] = { 64 }; + unsigned GPUCacheLevelAssociativities[] = { 24 }; + unsigned GPUCacheLevelLatencies[] = { 12 }; + + // Define thread parameters. + const char *ThreadName = "Thread"; + + // Define core parameters. + // The GPU has four cores with two thread team schedulers of vector + // length 64, for a total of 512 "threads." + const char *CPUCoreNames[] = { "BigCore", "LittleCore" }; + unsigned CPUCoreCounts[] = { 2, 8 }; + unsigned CPUThreadCounts[] = { 4, 2 }; + + const char *GPUCoreNames[] = { "GPUCore" }; + unsigned GPUCoreCounts[] = { 4 }; + // Threads in a core. The GPU has two thread team schedulers, each + // team may be a vector length of, say, 64 which we don't model. + unsigned GPUThreadCounts[] = { 2 }; + + // Define socket parameters. + const char *SocketNames[] = { "CPU", "GPU" }; + unsigned CoreTypeCounts[] = { 2, 1 }; + + unsigned ID = 0; + + // Define write-combining buffers. + MCWriteCombiningBufferInfo WCBufs(ID++, "WCBufs", 4); + MCWriteCombiningBufferInfo NoWCBufs(ID++, "NoWCBufs", 0); + + // Define software prefetchers. + MCSoftwarePrefetcherConfig Prefetcher(ID++, "Prefetcher", true, 1024, 512, + 4096, 100, 4, 32); + + MCSoftwarePrefetcherConfig NoPrefetcher(ID++, "NoPrefetcher", false, 0, 0, 0, + 0, 0, 0); + + // Define caches. + MCCacheLevelInfo BigCoreCacheLevels[] = { + MCCacheLevelInfo(ID++, + BigCacheLevelNames[BigL1], + BigCacheLevelSizes[BigL1], + BigCacheLevelLineSizes[BigL1], + BigCacheLevelAssociativities[BigL1], + BigCacheLevelLatencies[BigL1]), + MCCacheLevelInfo(ID++, + BigCacheLevelNames[BigL2], + BigCacheLevelSizes[BigL2], + BigCacheLevelLineSizes[BigL2], + BigCacheLevelAssociativities[BigL2], + BigCacheLevelLatencies[BigL2]), + }; + + MCCacheLevelInfo LittleCoreCacheLevels[] = { + MCCacheLevelInfo(ID++, + LittleCacheLevelNames[LittleL1], + LittleCacheLevelSizes[LittleL1], + LittleCacheLevelLineSizes[LittleL1], + LittleCacheLevelAssociativities[LittleL1], + LittleCacheLevelLatencies[LittleL1]), + }; + + MCCacheLevelInfo CPUCacheLevels[] = { + MCCacheLevelInfo(ID++, + CPUCacheLevelNames[CPUL3], + CPUCacheLevelSizes[CPUL3], + CPUCacheLevelLineSizes[CPUL3], + CPUCacheLevelAssociativities[CPUL3], + CPUCacheLevelLatencies[CPUL3]), + }; + + // All GPU cores share one level of cache. + MCCacheLevelInfo GPUCacheLevels[] = { + MCCacheLevelInfo(ID++, + GPUCacheLevelNames[GPUL1], + GPUCacheLevelSizes[GPUL1], + GPUCacheLevelLineSizes[GPUL1], + GPUCacheLevelAssociativities[GPUL1], + GPUCacheLevelLatencies[GPUL1]), + }; + + // Define memory models. + MCMemoryModel BigMemModel(ID++, + "BigMemModel", + BigCoreCacheLevels, + 2, + WCBufs, + Prefetcher); + + MCMemoryModel LittleMemModel(ID++, + "LittleMemModel", + LittleCoreCacheLevels, + 1, + WCBufs, + Prefetcher); + + MCMemoryModel CPUMemModel(ID++, + "CPUMemModel", + CPUCacheLevels, + 1, + NoWCBufs, + NoPrefetcher); + + MCMemoryModel GPUMemModel(ID++, + "GPUMemModel", + GPUCacheLevels, + 1, + NoWCBufs, + NoPrefetcher); + + MCMemoryModel NoMemModel(ID++, "NoModel", nullptr, 0, NoWCBufs, NoPrefetcher); + + // Define threads. + MCExecutionResource CommonThread(ID++, ThreadName, nullptr, 0, NoMemModel); + + // Define cores. + MCExecutionResourceDesc BigThreadDesc(ID++, + "BigThreadDesc", + &CommonThread, + CPUThreadCounts[Big]); + MCExecutionResourceDesc LittleThreadDesc(ID++, + "LittleThreadDesc", + &CommonThread, + CPUThreadCounts[Little]); + MCExecutionResourceDesc GPUThreadDesc(ID++, + "GPUThreadDesc", + &CommonThread, + GPUThreadCounts[GPU]); + + MCExecutionResourceDesc *BigThreadsList[] = { &BigThreadDesc }; + MCExecutionResourceDesc *LittleThreadsList[] = { &LittleThreadDesc }; + MCExecutionResourceDesc *GPUThreadsList[] = { &GPUThreadDesc }; + + MCExecutionResource BigCore(ID++, + CPUCoreNames[Big], + BigThreadsList, + 1, + BigMemModel); + + MCExecutionResource LittleCore(ID++, + CPUCoreNames[Little], + LittleThreadsList, + 1, + LittleMemModel); + + MCExecutionResource GPUCore(ID++, + GPUCoreNames[GPU], + GPUThreadsList, + 1, + NoMemModel); + + // Define sockets. + MCExecutionResourceDesc BigCoreDesc(ID++, + "BigCoreDesc", + &BigCore, + CPUCoreCounts[Big]); + + MCExecutionResourceDesc LittleCoreDesc(ID++, + "LittleCoreDesc", + &LittleCore, + CPUCoreCounts[Little]); + + MCExecutionResourceDesc GPUCoreDesc(ID++, + "GPUCoreDesc", + &GPUCore, + GPUCoreCounts[GPU]); + + MCExecutionResourceDesc *CPUCoreList[] = { &BigCoreDesc, &LittleCoreDesc }; + MCExecutionResourceDesc *GPUCoreList[] = { &GPUCoreDesc }; + + MCExecutionResource CPUEngine(ID++, + SocketNames[CPUSocket], + CPUCoreList, + 2, + CPUMemModel); + + MCExecutionResource GPUEngine(ID++, + SocketNames[GPUSocket], + GPUCoreList, + 1, + GPUMemModel); + + // Define a node consisting of a CPU socket and a GPU socket. + MCExecutionResourceDesc CPUSocketDesc(ID++, "CPUSocketDesc", &CPUEngine, 1); + MCExecutionResourceDesc GPUSocketDesc(ID++, "GPUSocketDesc", &GPUEngine, 1); + + MCExecutionResourceDesc *SocketList[] = { &CPUSocketDesc, &GPUSocketDesc }; + + MCSystemModel Node(ID++, "Node", SocketList, 2); + + // Test the topology. + EXPECT_EQ(Node.getNumExecutionResourceTypes(), 2u); + + unsigned s = 0; + for (const auto &SocketDesc : Node) { + EXPECT_EQ(SocketDesc.getNumResources(), 1u); + + const auto &Socket = SocketDesc.getResource(); + EXPECT_STREQ(Socket.getName(), SocketNames[s]); + EXPECT_EQ(Socket.getNumContainedExecutionResourceTypes(), + CoreTypeCounts[s]); + + unsigned *CoreCounts = (s == CPUSocket ? + CPUCoreCounts : GPUCoreCounts); + const char *const *CoreNames = (s == CPUSocket ? + CPUCoreNames : GPUCoreNames); + unsigned *ThreadCounts = (s == CPUSocket ? + CPUThreadCounts : GPUThreadCounts); + + unsigned c = 0; + for (const auto &CoreDesc : Socket) { + EXPECT_EQ(CoreDesc.getNumResources(), CoreCounts[c]); + + const auto &Core = CoreDesc.getResource(); + EXPECT_STREQ(Core.getName(), CoreNames[c]); + + EXPECT_EQ(Core.getNumContainedExecutionResourceTypes(), 1u); + + const auto &ThreadDesc = Core.getResourceDescriptor(0); + EXPECT_EQ(ThreadDesc.getNumResources(), ThreadCounts[c]); + + const auto &Thread = ThreadDesc.getResource(); + EXPECT_STREQ(Thread.getName(), ThreadName); + + EXPECT_EQ(Thread.getNumContainedExecutionResourceTypes(), 0u); + + // Check thread-level caches. + unsigned NumThreadCacheLevels = + Thread.getMemoryModel().getNumCacheLevels(); + EXPECT_EQ(NumThreadCacheLevels, 0u); + + // Check core-level caches. + if (s == GPUSocket) { + unsigned NumCoreCacheLevels = + Thread.getMemoryModel().getNumCacheLevels(); + EXPECT_EQ(NumCoreCacheLevels, 0u); + } + + const char *const *CacheNames = (c == Big ? + BigCacheLevelNames : + LittleCacheLevelNames); + const unsigned *CacheSizes = (c == Big ? + BigCacheLevelSizes : + LittleCacheLevelSizes); + const unsigned *CacheLineSizes = (c == Big ? + BigCacheLevelLineSizes : + LittleCacheLevelLineSizes); + const unsigned *CacheAssociativities = (c == Big ? + BigCacheLevelAssociativities : + LittleCacheLevelAssociativities); + const unsigned *CacheLatencies = (c == Big ? + BigCacheLevelLatencies : + LittleCacheLevelLatencies); + + unsigned lvl = 0; + for (const auto &CacheLevel : Core.getMemoryModel()) { + EXPECT_STREQ(CacheLevel.getName(), CacheNames[lvl]); + EXPECT_EQ(CacheLevel.getSizeInBytes(), CacheSizes[lvl]); + EXPECT_EQ(CacheLevel.getLineSizeInBytes(), CacheLineSizes[lvl]); + EXPECT_EQ(CacheLevel.getAssociativity(), CacheAssociativities[lvl]); + EXPECT_EQ(CacheLevel.getLatency(), CacheLatencies[lvl]); + + ++lvl; + } + + ++c; + } + + // Check socket-level caches. + const char *const *CacheNames = (s == CPUSocket ? + CPUCacheLevelNames : + GPUCacheLevelNames); + const unsigned *CacheSizes = (s == CPUSocket ? + CPUCacheLevelSizes : + GPUCacheLevelSizes); + const unsigned *CacheLineSizes = (s == CPUSocket ? + CPUCacheLevelLineSizes : + GPUCacheLevelLineSizes); + const unsigned *CacheAssociativities = (s == CPUSocket ? + CPUCacheLevelAssociativities : + GPUCacheLevelAssociativities); + const unsigned *CacheLatencies = (s == CPUSocket ? + CPUCacheLevelLatencies : + GPUCacheLevelLatencies); + + unsigned lvl = 0; + for (const auto &CacheLevel : Socket.getMemoryModel()) { + EXPECT_STREQ(CacheLevel.getName(), CacheNames[lvl]); + EXPECT_EQ(CacheLevel.getSizeInBytes(), CacheSizes[lvl]); + EXPECT_EQ(CacheLevel.getLineSizeInBytes(), CacheLineSizes[lvl]); + EXPECT_EQ(CacheLevel.getAssociativity(), CacheAssociativities[lvl]); + EXPECT_EQ(CacheLevel.getLatency(), CacheLatencies[lvl]); + + ++lvl; + } + + ++s; + } + + // Test the global system representation of the memory model. + const MCSystemModel::CacheLevelSet *L1Levels = + Node.getCacheLevelInfo(MCSystemModel::CacheLevel::L1); + const MCSystemModel::CacheLevelSet *L2Levels = + Node.getCacheLevelInfo(MCSystemModel::CacheLevel::L2); + const MCSystemModel::CacheLevelSet *L3Levels = + Node.getCacheLevelInfo(MCSystemModel::CacheLevel::L3); + const MCSystemModel::CacheLevelSet *L4Levels = + Node.getCacheLevelInfo(MCSystemModel::CacheLevel::L4); + + const MCSystemModel::PrefetchConfigSet &PrefetchConfigs = + Node.getSoftwarePrefetcherInfo(); + + EXPECT_NE(L1Levels, nullptr); + EXPECT_NE(L2Levels, nullptr); + EXPECT_NE(L3Levels, nullptr); + EXPECT_EQ(L4Levels, nullptr); + + EXPECT_EQ(L1Levels->size(), 3u); + EXPECT_EQ(L2Levels->size(), 1u); + EXPECT_EQ(L3Levels->size(), 1u); + EXPECT_EQ(PrefetchConfigs.size(), 2u); + + unsigned i = 0; + for (const auto L1Level : *L1Levels) { + const char *const *CacheNames = (i == 0 ? BigCacheLevelNames : + i == 1 ? LittleCacheLevelNames : + GPUCacheLevelNames); + const unsigned *CacheSizes = (i == 0 ? BigCacheLevelSizes : + i == 1 ? LittleCacheLevelSizes : + GPUCacheLevelSizes); + const unsigned *CacheLineSizes = (i == 0 ? BigCacheLevelLineSizes : + i == 1 ? LittleCacheLevelLineSizes : + GPUCacheLevelLineSizes); + const unsigned *CacheAssociativities = (i == 0 ? + BigCacheLevelAssociativities : + i == 1 ? + LittleCacheLevelAssociativities : + GPUCacheLevelAssociativities); + const unsigned *CacheLatencies = (i == 0 ? BigCacheLevelLatencies : + i == 1 ? LittleCacheLevelLatencies : + GPUCacheLevelLatencies); + + unsigned Index = (i == 0 ? BigL1 : + i == 1 ? LittleL1 : GPUL1); + + EXPECT_STREQ(L1Level->getName(), CacheNames[Index]); + EXPECT_EQ(L1Level->getSizeInBytes(), CacheSizes[Index]); + EXPECT_EQ(L1Level->getLineSizeInBytes(), CacheLineSizes[Index]); + EXPECT_EQ(L1Level->getAssociativity(), CacheAssociativities[Index]); + EXPECT_EQ(L1Level->getLatency(), CacheLatencies[Index]); + + ++i; + } + + i = 0; + for (const auto L2Level : *L2Levels) { + const char *const *CacheNames = BigCacheLevelNames; + const unsigned *CacheSizes = BigCacheLevelSizes; + const unsigned *CacheLineSizes = BigCacheLevelLineSizes; + const unsigned *CacheAssociativities = BigCacheLevelAssociativities; + const unsigned *CacheLatencies = BigCacheLevelLatencies; + + unsigned Index = BigL2; + + EXPECT_STREQ(L2Level->getName(), CacheNames[Index]); + EXPECT_EQ(L2Level->getSizeInBytes(), CacheSizes[Index]); + EXPECT_EQ(L2Level->getLineSizeInBytes(), CacheLineSizes[Index]); + EXPECT_EQ(L2Level->getAssociativity(), CacheAssociativities[Index]); + EXPECT_EQ(L2Level->getLatency(), CacheLatencies[Index]); + + ++i; + } + + i = 0; + for (const auto L3Level : *L3Levels) { + const char *const *CacheNames = CPUCacheLevelNames; + const unsigned *CacheSizes = CPUCacheLevelSizes; + const unsigned *CacheLineSizes = CPUCacheLevelLineSizes; + const unsigned *CacheAssociativities = CPUCacheLevelAssociativities; + const unsigned *CacheLatencies = CPUCacheLevelLatencies; + + unsigned Index = CPUL3; + + EXPECT_STREQ(L3Level->getName(), CacheNames[Index]); + EXPECT_EQ(L3Level->getSizeInBytes(), CacheSizes[Index]); + EXPECT_EQ(L3Level->getLineSizeInBytes(), CacheLineSizes[Index]); + EXPECT_EQ(L3Level->getAssociativity(), CacheAssociativities[Index]); + EXPECT_EQ(L3Level->getLatency(), CacheLatencies[Index]); + + ++i; + } +} + +} // end namespace diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp --- a/llvm/utils/TableGen/SubtargetEmitter.cpp +++ b/llvm/utils/TableGen/SubtargetEmitter.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -121,6 +122,28 @@ void EmitSchedModel(raw_ostream &OS); void EmitHwModeCheck(const std::string &ClassName, raw_ostream &OS); + + void EmitCacheHierarchies(raw_ostream &OS, unsigned &ID); + void EmitWriteCombiningBuffers(raw_ostream &OS, unsigned &ID); + void EmitPrefetchConfigs(raw_ostream &OS, unsigned &ID); + void EmitMemoryModel(raw_ostream &OS, Record &M, unsigned &ID); + void EmitMemoryModels(raw_ostream &OS, unsigned &ID); + void EmitExecutionResource(raw_ostream &OS, Record &Resource, unsigned &ID, + SmallPtrSetImpl &Emitted); + void EmitExecutionResourceDesc(raw_ostream &OS, Record &ResourceDesc, + unsigned &ID, + SmallPtrSetImpl &Emitted); + void EmitExecutionResourceDescResource(raw_ostream &OS, Record &ResourceDesc, + unsigned &ID, + SmallPtrSetImpl &Emitted); + void EmitExecutionResourceList(raw_ostream &OS, StringRef ListName, + const ListInit &ResourceList, + unsigned &ID, + SmallPtrSetImpl &Emitted); + void EmitSystemModel(raw_ostream &OS, Record &E, unsigned &ID, + SmallPtrSetImpl &Emitted); + void EmitSystemModels(raw_ostream &OS); + void ParseFeaturesFunction(raw_ostream &OS, unsigned NumFeatures, unsigned NumProcs); @@ -1680,6 +1703,359 @@ OS << " return 0;\n}\n"; } +// EmitCacheHierarchies - Emits all cache hierarchy information. +// +void SubtargetEmitter::EmitCacheHierarchies(raw_ostream &OS, unsigned &ID) { + std::vector CacheHierarchies = + Records.getAllDerivedDefinitions("CacheHierarchy"); + + for (const auto &CH : CacheHierarchies) { + RecordVal *Levels = CH->getValue("Levels"); + const ListInit &LevelList = *cast(Levels->getValue()); + + std::function Delimeter = [&]() -> const char * { + Delimeter = []() -> const char * { + return ","; + }; + return ""; + }; + + OS << "static const llvm::MCCacheLevelInfo " << CH->getName() << "[] = {"; + for (const auto &L : LevelList) { + OS << Delimeter() << '\n'; + + const DefInit &LevelDef = *cast(L); + Record *Level = LevelDef.getDef(); + + RecordVal *LevelSize = Level->getValue("Size"); + RecordVal *LevelLineSize = Level->getValue("LineSize"); + RecordVal *LevelWays = Level->getValue("Ways"); + RecordVal *LevelLatency = Level->getValue("Latency"); + + OS << " llvm::MCCacheLevelInfo(" + << ID++ << ", \"" + << Level->getName() << "\", " + << LevelSize->getValue()->getAsString() << ", " + << LevelLineSize->getValue()->getAsString() << ", " + << LevelWays->getValue()->getAsString() << ", " + << LevelLatency->getValue()->getAsString() << ')'; + } + if (LevelList.empty()) { + OS << "\n llvm::MCCacheLevelInfo(0, \"Empty\", 0, 0, 0, 0)"; + } + OS << "\n}; // " << CH->getName() << "\n\n"; + } +} + +// Emits all write-combining buffer information. +// +void SubtargetEmitter::EmitWriteCombiningBuffers(raw_ostream &OS, + unsigned &ID) { + std::vector WCBuffers = + Records.getAllDerivedDefinitions("WriteCombiningBuffer"); + + for (const auto &WCBuffer : WCBuffers) { + RecordVal *WCBuffers = WCBuffer->getValue("NumBuffers"); + + OS << "static const llvm::MCWriteCombiningBufferInfo " << WCBuffer->getName() + << "("; + OS << ID++ << ", \"" << WCBuffer->getName() << "\", "; + OS << WCBuffers->getValue()->getAsString() << ");\n\n"; + } +} + +// Emits all prefetcer information. +// +void SubtargetEmitter::EmitPrefetchConfigs(raw_ostream &OS, unsigned &ID) { + std::vector Prefetchers = + Records.getAllDerivedDefinitions("SoftwarePrefetcher"); + + for (const auto &P : Prefetchers) { + RecordVal *EnabledForReads = P->getValue("EnabledForWrites"); + RecordVal *EnabledForWrites = P->getValue("EnabledForReads"); + RecordVal *UseReadPFForWrites = P->getValue("UseReadPFForWrites"); + RecordVal *ByteDistance = P->getValue("BytesAhead"); + RecordVal *MaxByteDistance = P->getValue("MaxBytesAhead"); + RecordVal *MinByteDistance = P->getValue("MinBytesAhead"); + RecordVal *InstructionDistance = P->getValue("InstructionsAhead"); + RecordVal *MaxIterationDistance = P->getValue("MaxIterationsAhead"); + RecordVal *MinByteStride = P->getValue("MinStride"); + + OS << "static const llvm::MCSoftwarePrefetcherConfig " << P->getName() + << "(" + << ID++ << ", \"" << P->getName() << "\", " + << EnabledForReads->getValue()->getAsString() << ", " + << EnabledForWrites->getValue()->getAsString() << ", " + << UseReadPFForWrites->getValue()->getAsString() << ", " + << ByteDistance->getValue()->getAsString() << ", " + << MinByteDistance->getValue()->getAsString() << ", " + << MaxByteDistance->getValue()->getAsString() << ", " + << InstructionDistance->getValue()->getAsString() << ", " + << MaxIterationDistance->getValue()->getAsString() << ", " + << MinByteStride->getValue()->getAsString() << ");\n\n"; + } +} + +// Emits one memory system definition. +// +void SubtargetEmitter::EmitMemoryModel(raw_ostream &OS, Record &M, + unsigned &ID) { + RecordVal *CacheHierarchyValue = M.getValue("Caches"); + const DefInit &CacheHierarchyDef = + *cast(CacheHierarchyValue->getValue()); + Record *CacheHierarchy = CacheHierarchyDef.getDef(); + + // Get cache level information. + RecordVal *CacheLevelValue = CacheHierarchy->getValue("Levels"); + const ListInit &CacheLevelsList = + *cast(CacheLevelValue->getValue()); + unsigned NumLevels = CacheLevelsList.size(); + + RecordVal *WCBufferValue = M.getValue("WCBuffers"); + const DefInit &WCBufferDef = + *cast(WCBufferValue->getValue()); + Record *WCBuffer = WCBufferDef.getDef(); + + RecordVal *PrefetcherValue = M.getValue("Prefetcher"); + const DefInit &PrefetcherDef = + *cast(PrefetcherValue->getValue()); + Record *Prefetcher = PrefetcherDef.getDef(); + + OS << "static const llvm::MCMemoryModel " << M.getName() << "(" + << ID++ << ", \"" << M.getName() << "\", " + << CacheHierarchy->getName() << ", " + << NumLevels << ", " + << WCBuffer->getName() << ", " + << Prefetcher->getName() << ");\n\n"; +} + +// Emits all memory system information. +// +void SubtargetEmitter::EmitMemoryModels(raw_ostream &OS, unsigned &ID) { + std::vector MemoryModels = + Records.getAllDerivedDefinitions("MemoryModel"); + + for (const auto &M : MemoryModels) { + EmitMemoryModel(OS, *M, ID); + } +} + +// Emits all contained execution slices if not already emitted, then +// emits this one. +// +void +SubtargetEmitter::EmitExecutionResource(raw_ostream &OS, + Record &Resource, + unsigned &ID, + SmallPtrSetImpl &Emitted) { + RecordVal *ContainedValue = Resource.getValue("Contained"); + const ListInit &ContainedList = + *cast(ContainedValue->getValue()); + unsigned NumContained = ContainedList.size(); + + // Emit all contained resources. + std::string ListName(Resource.getName()); + ListName += "Contained"; + EmitExecutionResourceList(OS, ListName, ContainedList, ID, Emitted); + + // Emit the memory model; + RecordVal *MemoryModelValue = Resource.getValue("MemModel"); + const DefInit &MemoryModelDef = + *cast(MemoryModelValue->getValue()); + Record *MemoryModel = MemoryModelDef.getDef(); + + // Now emit this resource. + OS << "static const llvm::MCExecutionResource " << Resource.getName() + << "(" + << ID++ << ", \"" + << Resource.getName() << "\", " + << ListName << ", " + << NumContained << ", " + << MemoryModel->getName() << ");\n\n"; +} + +// Emits an execution resource descriptor, emitting all contained +// resources. +// +void +SubtargetEmitter::EmitExecutionResourceDesc(raw_ostream &OS, + Record &ResourceDesc, + unsigned &ID, + SmallPtrSetImpl &Emitted) { + EmitExecutionResourceDescResource(OS, ResourceDesc, ID, Emitted); + + RecordVal *ResourceValue = ResourceDesc.getValue("Resource"); + const DefInit &ResourceDef = + *cast(ResourceValue->getValue()); + Record *ResourceRecord = ResourceDef.getDef(); + + RecordVal *NumValue = ResourceDesc.getValue("NumResources"); + const IntInit &ResourceInt = + *cast(NumValue->getValue()); + + OS << "static const llvm::MCExecutionResourceDesc " << ResourceDesc.getName() + << "(" + << ID++ << ", \"" + << ResourceDesc.getName() << "\", " + << '&' << ResourceRecord->getName() << ", " + << ResourceInt.getValue() << ");\n\n"; +} + +// Emit the resource referenced by this execution resource +// descriptor. +// +void SubtargetEmitter:: +EmitExecutionResourceDescResource(raw_ostream &OS, + Record &ResourceDesc, + unsigned &ID, + SmallPtrSetImpl &Emitted) { + RecordVal *ResourceValue = ResourceDesc.getValue("Resource"); + const DefInit &ResourceDef = + *cast(ResourceValue->getValue()); + Record *ResourceRecord = ResourceDef.getDef(); + + if (Emitted.insert(ResourceRecord).second) + EmitExecutionResource(OS, *ResourceRecord, ID, Emitted); +} + +// Emit an execution resource list, creating the +// MCExecutionResourceDesc necessary to describe contained resources. +// +void SubtargetEmitter:: +EmitExecutionResourceList(raw_ostream &OS, + StringRef ListName, + const ListInit &ResourceList, + unsigned &ID, + SmallPtrSetImpl &Emitted) { + // First, emit all contained resources. We have to do this here + // because we don't want resource definitions to appear in the + // middle of the array below. + + for (auto ResourceDesc : ResourceList) { + const DefInit &ResourceDescDef = *cast(ResourceDesc); + Record *ResourceDescRecord = ResourceDescDef.getDef(); + + if (Emitted.insert(ResourceDescRecord).second) { + EmitExecutionResourceDesc(OS, *ResourceDescRecord, ID, Emitted); + } + } + + // Now emit the actual lists of resource descriptors. + std::function Delimeter = [&]() -> const char * { + Delimeter = []() -> const char * { + return ","; + }; + return ""; + }; + + OS << "static const llvm::MCExecutionResourceDesc *" << ListName << "[] = {"; + for (auto ResourceDesc : ResourceList) { + const DefInit &ResourceDescDef = *cast(ResourceDesc); + Record *ResourceDescRecord = ResourceDescDef.getDef(); + + OS << Delimeter() << "\n &" << ResourceDescRecord->getName(); + } + if (ResourceList.empty()) { + OS << "\n nullptr"; + } + OS << "\n};\n\n"; +} + +// Emit an execution engine. +// +void SubtargetEmitter::EmitSystemModel(raw_ostream &OS, Record &E, + unsigned &ID, + SmallPtrSetImpl &Emitted) { + RecordVal *ResourcesValue = E.getValue("Resources"); + const ListInit &ResourceList = + *cast(ResourcesValue->getValue()); + unsigned NumResources = ResourceList.size(); + + std::string ResourceListName; + ResourceListName += E.getName(); + ResourceListName += "Resources"; + + EmitExecutionResourceList(OS, ResourceListName, ResourceList, ID, Emitted); + + OS << "static const llvm::MCSystemModel " << E.getName() << "(" + << ID << ", \"" << E.getName() << "\", " + << ResourceListName << ", " + << NumResources << ");\n\n"; +} + +// Emits all memory model and execution resource information. +// +void SubtargetEmitter::EmitSystemModels(raw_ostream &OS) { + unsigned ID = 1; + + OS << "// ===============================================================\n" + << "// System models\n" + << "// ===============================================================\n" + << "//\n\n"; + + // Emit memory models. + OS << "// ===============================================================\n" + << "// Cache models\n" + << "//\n"; + + EmitCacheHierarchies(OS, ID); + + OS << "// ===============================================================\n" + << "// Write-combining buffers\n" + << "//\n"; + + EmitWriteCombiningBuffers(OS, ID); + + OS << "// ===============================================================\n" + << "// Software prefetch configs\n" + << "//\n"; + + EmitPrefetchConfigs(OS, ID); + + OS << "// ===============================================================\n" + << "// Memory models\n" + << "//\n"; + + EmitMemoryModels(OS, ID); + + // Emit execution engines. + OS << "// ===============================================================\n" + << "// System models\n" + << "//\n"; + + // Emit a resource list for this engine. + SmallPtrSet EmittedResources; + + std::vector Systems = + Records.getAllDerivedDefinitions("SystemModel"); + + for (const auto &S : Systems) { + EmitSystemModel(OS, *S, ID, EmittedResources); + } + + // Emit lookup tables. + + // Gather and sort processor information + std::vector ProcessorList = + Records.getAllDerivedDefinitions("Processor"); + llvm::sort(ProcessorList, LessRecordFieldName()); + + // Begin processor->system model table + OS << "// Sorted (by key) array of execution engine model for CPU subtype.\n" + << "extern const llvm::SubtargetInfoKV " << Target + << "ProcSystemModelKV[] = {\n"; + // For each processor + for (Record *Processor : ProcessorList) { + StringRef Name = Processor->getValueAsString("Name"); + StringRef SystemName = Processor->getValueAsDef("System")->getName(); + + // Emit as { "cpu", execution engine }, + OS << " { \"" << Name << "\", (const void *)&" << SystemName << " },\n"; + } + // End processor->execution engine model table + OS << "};\n\n"; +} + // // ParseFeaturesFunction - Produces a subtarget specific function for parsing // the subtarget features string. @@ -1744,9 +2120,10 @@ << " const MCWriteProcResEntry *WPR,\n" << " const MCWriteLatencyEntry *WL,\n" << " const MCReadAdvanceEntry *RA, const InstrStage *IS,\n" - << " const unsigned *OC, const unsigned *FP) :\n" + << " const unsigned *OC, const unsigned *FP,\n" + << " const SubtargetInfoKV *ProcSystem) :\n" << " MCSubtargetInfo(TT, CPU, FS, PF, PD, ProcSched,\n" - << " WPR, WL, RA, IS, OC, FP) { }\n\n" + << " WPR, WL, RA, IS, OC, FP, ProcSystem) { }\n\n" << " unsigned resolveVariantSchedClass(unsigned SchedClass,\n" << " const MCInst *MI, unsigned CPUID) const override {\n" << " return " << Target << "_MC" @@ -1807,6 +2184,8 @@ OS << "\n"; EmitSchedModel(OS); OS << "\n"; + EmitSystemModels(OS); + OS << "\n"; #if 0 OS << "} // end anonymous namespace\n\n"; #endif @@ -1835,9 +2214,10 @@ if (SchedModels.hasItineraries()) { OS << Target << "Stages, " << Target << "OperandCycles, " - << Target << "ForwardingPaths"; + << Target << "ForwardingPaths, "; } else - OS << "nullptr, nullptr, nullptr"; + OS << "nullptr, nullptr, nullptr, "; + OS << Target << "ProcSystemModelKV"; OS << ");\n}\n\n"; OS << "} // end namespace llvm\n\n"; @@ -1909,6 +2289,9 @@ OS << "extern const unsigned " << Target << "ForwardingPaths[];\n"; } + OS << "extern const llvm::SubtargetInfoKV " << Target + << "ProcSystemModelKV[];\n"; + OS << ClassName << "::" << ClassName << "(const Triple &TT, StringRef CPU, " << "StringRef FS)\n" << " : TargetSubtargetInfo(TT, CPU, FS, "; @@ -1929,10 +2312,13 @@ if (SchedModels.hasItineraries()) { OS << Target << "Stages, " << Target << "OperandCycles, " - << Target << "ForwardingPaths"; + << Target << "ForwardingPaths, "; } else - OS << "nullptr, nullptr, nullptr"; - OS << ") {}\n\n"; + OS << "nullptr, nullptr, nullptr,"; + OS << '\n'; + OS.indent(24); + OS << Target << "ProcSystemModelKV" + << ") {}\n\n"; EmitSchedModelHelpers(ClassName, OS); EmitHwModeCheck(ClassName, OS);