diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -1188,6 +1188,10 @@ "cpu_specific Multiversioning not implemented on this target"); } + // Get the cache line size of a given cpu. This method switches over + // the given cpu and returns `0` if the CPU is not found. + virtual Optional getCPUCacheLineSize() const { return None; } + // Returns maximal number of args passed in registers. unsigned getRegParmMax() const { assert(RegParmMax < 7 && "RegParmMax value is larger than AST can handle"); diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -182,6 +182,8 @@ StringRef Name, llvm::SmallVectorImpl &Features) const override; + Optional getCPUCacheLineSize() const override; + bool validateAsmConstraint(const char *&Name, TargetInfo::ConstraintInfo &info) const override; diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -1731,6 +1731,118 @@ } } +// Below is based on the following information: +// +------------------------------------+-------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +// | Processor Name | Cache Line Size (Bytes) | Source | +// +------------------------------------+-------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +// | i386 | 64 | https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf | +// | i486 | 16 | "four doublewords" (doubleword = 32 bits, 4 bits * 32 bits = 16 bytes) https://en.wikichip.org/w/images/d/d3/i486_MICROPROCESSOR_HARDWARE_REFERENCE_MANUAL_%281990%29.pdf and http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.126.4216&rep=rep1&type=pdf (page 29) | +// | i586/Pentium MMX | 32 | https://www.7-cpu.com/cpu/P-MMX.html | +// | i686/Pentium | 32 | https://www.7-cpu.com/cpu/P6.html | +// | Netburst/Pentium4 | 64 | https://www.7-cpu.com/cpu/P4-180.html | +// | Atom | 64 | https://www.7-cpu.com/cpu/Atom.html | +// | Westmere | 64 | https://en.wikichip.org/wiki/intel/microarchitectures/sandy_bridge_(client) "Cache Architecture" | +// | Sandy Bridge | 64 | https://en.wikipedia.org/wiki/Sandy_Bridge and https://www.7-cpu.com/cpu/SandyBridge.html | +// | Ivy Bridge | 64 | https://blog.stuffedcow.net/2013/01/ivb-cache-replacement/ and https://www.7-cpu.com/cpu/IvyBridge.html | +// | Haswell | 64 | https://www.7-cpu.com/cpu/Haswell.html | +// | Bradwell | 64 | https://www.7-cpu.com/cpu/Broadwell.html | +// | Skylake (including skylake-avx512) | 64 | https://www.nas.nasa.gov/hecc/support/kb/skylake-processors_550.html "Cache Hierarchy" | +// | Cascade Lake | 64 | https://www.nas.nasa.gov/hecc/support/kb/cascade-lake-processors_579.html "Cache Hierarchy" | +// | Skylake | 64 | https://en.wikichip.org/wiki/intel/microarchitectures/kaby_lake "Memory Hierarchy" | +// | Ice Lake | 64 | https://www.7-cpu.com/cpu/Ice_Lake.html | +// | Knights Landing | 64 | https://software.intel.com/en-us/articles/intel-xeon-phi-processor-7200-family-memory-management-optimizations "The Intel® Xeon Phi™ Processor Architecture" | +// | Knights Mill | 64 | https://software.intel.com/sites/default/files/managed/9e/bc/64-ia-32-architectures-optimization-manual.pdf?countrylabel=Colombia "2.5.5.2 L1 DCache " | +// +------------------------------------+-------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +Optional X86TargetInfo::getCPUCacheLineSize() const { + switch (CPU) { + // i486 + case CK_i486: + case CK_WinChipC6: + case CK_WinChip2: + case CK_C3: + // Lakemont + case CK_Lakemont: + return 16; + // i586 + case CK_i586: + case CK_Pentium: + case CK_PentiumMMX: + // i686 + case CK_PentiumPro: + case CK_i686: + case CK_Pentium2: + case CK_Pentium3: + case CK_PentiumM: + case CK_C3_2: + // K6 + case CK_K6: + case CK_K6_2: + case CK_K6_3: + // Geode + case CK_Geode: + return 32; + + // i386 + case CK_i386: + // Netburst + case CK_Pentium4: + case CK_Prescott: + case CK_Nocona: + // Atom + case CK_Bonnell: + case CK_Silvermont: + case CK_Goldmont: + case CK_GoldmontPlus: + case CK_Tremont: + + case CK_Westmere: + case CK_SandyBridge: + case CK_IvyBridge: + case CK_Haswell: + case CK_Broadwell: + case CK_SkylakeClient: + case CK_SkylakeServer: + case CK_Cascadelake: + case CK_Nehalem: + case CK_Cooperlake: + case CK_Cannonlake: + case CK_Tigerlake: + case CK_IcelakeClient: + case CK_IcelakeServer: + case CK_KNL: + case CK_KNM: + // K7 + case CK_Athlon: + case CK_AthlonXP: + // K8 + case CK_K8: + case CK_K8SSE3: + case CK_AMDFAM10: + // Bobcat + case CK_BTVER1: + case CK_BTVER2: + // Bulldozer + case CK_BDVER1: + case CK_BDVER2: + case CK_BDVER3: + case CK_BDVER4: + // Zen + case CK_ZNVER1: + case CK_ZNVER2: + // Deprecated + case CK_x86_64: + case CK_Yonah: + case CK_Penryn: + return 64; + + // The following currently have unknown cache line sizes (but they are probably all 64): + // Core + case CK_Core2: + case CK_Generic: + return None; + } +} + bool X86TargetInfo::validateOutputSize(const llvm::StringMap &FeatureMap, StringRef Constraint, unsigned Size) const { diff --git a/libcxx/include/memory b/libcxx/include/memory --- a/libcxx/include/memory +++ b/libcxx/include/memory @@ -3863,11 +3863,6 @@ return __r; } - template - static - shared_ptr<_Tp> - allocate_shared(const _Alloc& __a, _Args&& ...__args); - private: template ::value> struct __shared_ptr_default_allocator @@ -4180,26 +4175,6 @@ __r.release(); } -template -template -shared_ptr<_Tp> -shared_ptr<_Tp>::allocate_shared(const _Alloc& __a, _Args&& ...__args) -{ - static_assert( is_constructible<_Tp, _Args...>::value, "Can't construct object in allocate_shared" ); - typedef __shared_ptr_emplace<_Tp, _Alloc> _CntrlBlk; - typedef typename __allocator_traits_rebind<_Alloc, _CntrlBlk>::type _A2; - typedef __allocator_destructor<_A2> _D2; - _A2 __a2(__a); - unique_ptr<_CntrlBlk, _D2> __hold2(__a2.allocate(1), _D2(__a2, 1)); - ::new(static_cast(_VSTD::addressof(*__hold2.get()))) - _CntrlBlk(__a, _VSTD::forward<_Args>(__args)...); - shared_ptr<_Tp> __r; - __r.__ptr_ = __hold2.get()->get(); - __r.__cntrl_ = _VSTD::addressof(*__hold2.release()); - __r.__enable_weak_this(__r.__ptr_, __r.__ptr_); - return __r; -} - template shared_ptr<_Tp>::~shared_ptr() { @@ -4412,7 +4387,22 @@ >::type allocate_shared(const _Alloc& __a, _Args&& ...__args) { - return shared_ptr<_Tp>::allocate_shared(__a, _VSTD::forward<_Args>(__args)...); + static_assert( is_constructible<_Tp, _Args...>::value, "Can't construct object in allocate_shared"); + + typedef __shared_ptr_emplace<_Tp, _Alloc> _CntrlBlk; + typedef typename __allocator_traits_rebind<_Alloc, _CntrlBlk>::type _A2; + typedef __allocator_destructor<_A2> _D2; + + _A2 __a2(__a); + unique_ptr<_CntrlBlk, _D2> __hold2(__a2.allocate(1), _D2(__a2, 1)); + ::new(static_cast(_VSTD::addressof(*__hold2.get()))) + _CntrlBlk(__a, _VSTD::forward<_Args>(__args)...); + + shared_ptr<_Tp> __r; + __r.__ptr_ = __hold2.get()->get(); + __r.__cntrl_ = _VSTD::addressof(*__hold2.release()); + __r.__enable_weak_this(__r.__ptr_, __r.__ptr_); + return __r; } template