diff --git a/compiler-rt/lib/scudo/standalone/size_class_map.h b/compiler-rt/lib/scudo/standalone/size_class_map.h --- a/compiler-rt/lib/scudo/standalone/size_class_map.h +++ b/compiler-rt/lib/scudo/standalone/size_class_map.h @@ -9,11 +9,33 @@ #ifndef SCUDO_SIZE_CLASS_MAP_H_ #define SCUDO_SIZE_CLASS_MAP_H_ +#include "chunk.h" #include "common.h" #include "string_utils.h" namespace scudo { +inline uptr scaledLog2(uptr Size, uptr ZeroLog, uptr LogBits) { + const uptr L = getMostSignificantSetBitIndex(Size); + const uptr LBits = (Size >> (L - LogBits)) - (1 << LogBits); + const uptr HBits = (L - ZeroLog) << LogBits; + return LBits + HBits; +} + +template struct SizeClassMapBase { + static u32 getMaxCachedHint(uptr Size) { + DCHECK_LE(Size, MaxSize); + DCHECK_NE(Size, 0); + u32 N; + // Force a 32-bit division if the template parameters allow for it. + if (Config::MaxBytesCachedLog > 31 || Config::MaxSizeLog > 31) + N = static_cast((1UL << Config::MaxBytesCachedLog) / Size); + else + N = (1U << Config::MaxBytesCachedLog) / static_cast(Size); + return Max(1U, Min(Config::MaxNumCachedHint, N)); + } +}; + // SizeClassMap maps allocation sizes into size classes and back, in an // efficient table-free manner. // @@ -33,22 +55,24 @@ // of chunks that can be cached per-thread: // - MaxNumCachedHint is a hint for the max number of chunks cached per class. // - 2^MaxBytesCachedLog is the max number of bytes cached per class. +template +class FixedSizeClassMap : public SizeClassMapBase { + typedef SizeClassMapBase Base; -template -class SizeClassMap { - static const uptr MinSize = 1UL << MinSizeLog; - static const uptr MidSize = 1UL << MidSizeLog; + static const uptr MinSize = 1UL << Config::MinSizeLog; + static const uptr MidSize = 1UL << Config::MidSizeLog; static const uptr MidClass = MidSize / MinSize; - static const u8 S = NumBits - 1; + static const u8 S = Config::NumBits - 1; static const uptr M = (1UL << S) - 1; + static const uptr SizeDelta = Chunk::getHeaderSize(); + public: - static const u32 MaxNumCachedHint = MaxNumCachedHintT; + static const u32 MaxNumCachedHint = Config::MaxNumCachedHint; - static const uptr MaxSize = 1UL << MaxSizeLog; + static const uptr MaxSize = (1UL << Config::MaxSizeLog) + SizeDelta; static const uptr NumClasses = - MidClass + ((MaxSizeLog - MidSizeLog) << S) + 1; + MidClass + ((Config::MaxSizeLog - Config::MidSizeLog) << S) + 1; static_assert(NumClasses <= 256, ""); static const uptr LargestClassId = NumClasses - 1; static const uptr BatchClassId = 0; @@ -56,97 +80,206 @@ static uptr getSizeByClassId(uptr ClassId) { DCHECK_NE(ClassId, BatchClassId); if (ClassId <= MidClass) - return ClassId << MinSizeLog; + return (ClassId << Config::MinSizeLog) + SizeDelta; ClassId -= MidClass; const uptr T = MidSize << (ClassId >> S); - return T + (T >> S) * (ClassId & M); + return T + (T >> S) * (ClassId & M) + SizeDelta; } static uptr getClassIdBySize(uptr Size) { + if (Size <= SizeDelta + (1 << Config::MinSizeLog)) + return 1; + Size -= SizeDelta; DCHECK_LE(Size, MaxSize); if (Size <= MidSize) - return (Size + MinSize - 1) >> MinSizeLog; - Size -= 1; - const uptr L = getMostSignificantSetBitIndex(Size); - const uptr LBits = (Size >> (L - S)) - (1 << S); - const uptr HBits = (L - MidSizeLog) << S; - return MidClass + 1 + HBits + LBits; + return (Size + MinSize - 1) >> Config::MinSizeLog; + return MidClass + 1 + scaledLog2(Size - 1, Config::MidSizeLog, S); } +}; - static u32 getMaxCachedHint(uptr Size) { - DCHECK_LE(Size, MaxSize); - DCHECK_NE(Size, 0); - u32 N; - // Force a 32-bit division if the template parameters allow for it. - if (MaxBytesCachedLog > 31 || MaxSizeLog > 31) - N = static_cast((1UL << MaxBytesCachedLog) / Size); - else - N = (1U << MaxBytesCachedLog) / static_cast(Size); - return Max(1U, Min(MaxNumCachedHint, N)); - } +template +class TableSizeClassMap : public SizeClassMapBase { + static const u8 S = Config::NumBits - 1; + static const uptr M = (1UL << S) - 1; + static const uptr ClassesSize = + sizeof(Config::Classes) / sizeof(Config::Classes[0]); - static void print() { - ScopedString Buffer(1024); - uptr PrevS = 0; - uptr TotalCached = 0; - for (uptr I = 0; I < NumClasses; I++) { - if (I == BatchClassId) - continue; - const uptr S = getSizeByClassId(I); - if (S >= MidSize / 2 && (S & (S - 1)) == 0) - Buffer.append("\n"); - const uptr D = S - PrevS; - const uptr P = PrevS ? (D * 100 / PrevS) : 0; - const uptr L = S ? getMostSignificantSetBitIndex(S) : 0; - const uptr Cached = getMaxCachedHint(S) * S; - Buffer.append( - "C%02zu => S: %zu diff: +%zu %02zu%% L %zu Cached: %zu %zu; id %zu\n", - I, getSizeByClassId(I), D, P, L, getMaxCachedHint(S), Cached, - getClassIdBySize(S)); - TotalCached += Cached; - PrevS = S; + struct SizeTable { + constexpr SizeTable() { + uptr Pos = 1 << Config::MidSizeLog; + uptr Inc = 1 << (Config::MidSizeLog - S); + for (uptr i = 0; i != getTableSize(); ++i) { + Pos += Inc; + if ((Pos & (Pos - 1)) == 0) + Inc *= 2; + Tab[i] = computeClassId(Pos + Config::SizeDelta); + } } - Buffer.append("Total Cached: %zu\n", TotalCached); - Buffer.output(); - } - static void validate() { - for (uptr C = 0; C < NumClasses; C++) { - if (C == BatchClassId) - continue; - const uptr S = getSizeByClassId(C); - CHECK_NE(S, 0U); - CHECK_EQ(getClassIdBySize(S), C); - if (C < LargestClassId) - CHECK_EQ(getClassIdBySize(S + 1), C + 1); - CHECK_EQ(getClassIdBySize(S - 1), C); - if (C - 1 != BatchClassId) - CHECK_GT(getSizeByClassId(C), getSizeByClassId(C - 1)); + constexpr static u8 computeClassId(uptr Size) { + for (uptr i = 0; i != ClassesSize; ++i) { + if (Size <= Config::Classes[i]) + return i + 1; + } + return -1; } - // Do not perform the loop if the maximum size is too large. - if (MaxSizeLog > 19) - return; - for (uptr S = 1; S <= MaxSize; S++) { - const uptr C = getClassIdBySize(S); - CHECK_LT(C, NumClasses); - CHECK_GE(getSizeByClassId(C), S); - if (C - 1 != BatchClassId) - CHECK_LT(getSizeByClassId(C - 1), S); + + constexpr static uptr getTableSize() { + return (Config::MaxSizeLog - Config::MidSizeLog) << S; } + + u8 Tab[getTableSize()] = {}; + }; + + static constexpr SizeTable Table = {}; + +public: + static const u32 MaxNumCachedHint = Config::MaxNumCachedHint; + + static const uptr NumClasses = ClassesSize + 1; + static_assert(NumClasses < 256, ""); + static const uptr LargestClassId = NumClasses - 1; + static const uptr BatchClassId = 0; + static const uptr MaxSize = Config::Classes[LargestClassId - 1]; + + static uptr getSizeByClassId(uptr ClassId) { + return Config::Classes[ClassId - 1]; } + + static uptr getClassIdBySize(uptr Size) { + if (Size <= Config::Classes[0]) + return 1; + Size -= Config::SizeDelta; + DCHECK_LE(Size, MaxSize); + if (Size <= (1 << Config::MidSizeLog)) + return ((Size - 1) >> Config::MinSizeLog) + 1; + return Table.Tab[scaledLog2(Size - 1, Config::MidSizeLog, S)]; + } + + static void print() {} + static void validate() {} +}; + +struct AndroidSizeClassConfig { +#if SCUDO_WORDSIZE == 64U + // Measured using a system_server profile. + static const uptr NumBits = 7; + static const uptr MinSizeLog = 4; + static const uptr MidSizeLog = 6; + static const uptr MaxSizeLog = 16; + static const u32 MaxNumCachedHint = 14; + static const uptr MaxBytesCachedLog = 14; + + static constexpr u32 Classes[] = { + 0x00020, 0x00030, 0x00040, 0x00050, 0x00060, 0x00070, 0x00090, 0x000a0, + 0x000b0, 0x000e0, 0x00110, 0x00130, 0x001a0, 0x00240, 0x00320, 0x00430, + 0x00640, 0x00830, 0x00a10, 0x00c30, 0x01010, 0x01150, 0x01ad0, 0x02190, + 0x03610, 0x04010, 0x04510, 0x04d10, 0x05a10, 0x07310, 0x09610, 0x10010, + }; + static const uptr SizeDelta = 16; +#else + // Measured using a dex2oat profile. + static const uptr NumBits = 8; + static const uptr MinSizeLog = 4; + static const uptr MidSizeLog = 8; + static const uptr MaxSizeLog = 16; + static const u32 MaxNumCachedHint = 14; + static const uptr MaxBytesCachedLog = 14; + + static constexpr u32 Classes[] = { + 0x00020, 0x00030, 0x00040, 0x00050, 0x00060, 0x00070, 0x00080, 0x00090, + 0x000a0, 0x000b0, 0x000c0, 0x000d0, 0x000e0, 0x000f0, 0x00100, 0x00110, + 0x00120, 0x00140, 0x00150, 0x00170, 0x00190, 0x001c0, 0x001f0, 0x00220, + 0x00240, 0x00260, 0x002a0, 0x002e0, 0x00310, 0x00340, 0x00380, 0x003b0, + 0x003e0, 0x00430, 0x00490, 0x00500, 0x00570, 0x005f0, 0x00680, 0x00720, + 0x007d0, 0x00890, 0x00970, 0x00a50, 0x00b80, 0x00cb0, 0x00e30, 0x00fb0, + 0x011b0, 0x01310, 0x01470, 0x01790, 0x01b50, 0x01fd0, 0x02310, 0x02690, + 0x02b10, 0x02fd0, 0x03610, 0x03e10, 0x04890, 0x05710, 0x06a90, 0x10010, + }; + static const uptr SizeDelta = 16; +#endif +}; + +typedef TableSizeClassMap AndroidSizeClassMap; + +struct DefaultSizeClassConfig { + static const uptr NumBits = 3; + static const uptr MinSizeLog = 5; + static const uptr MidSizeLog = 8; + static const uptr MaxSizeLog = 17; + static const u32 MaxNumCachedHint = 8; + static const uptr MaxBytesCachedLog = 10; }; -typedef SizeClassMap<3, 5, 8, 17, 8, 10> DefaultSizeClassMap; +typedef FixedSizeClassMap DefaultSizeClassMap; -// TODO(kostyak): further tune class maps for Android & Fuchsia. +struct SvelteSizeClassConfig { #if SCUDO_WORDSIZE == 64U -typedef SizeClassMap<4, 4, 8, 14, 4, 10> SvelteSizeClassMap; -typedef SizeClassMap<2, 5, 9, 16, 14, 14> AndroidSizeClassMap; + static const uptr NumBits = 4; + static const uptr MinSizeLog = 4; + static const uptr MidSizeLog = 8; + static const uptr MaxSizeLog = 14; + static const u32 MaxNumCachedHint = 4; + static const uptr MaxBytesCachedLog = 10; #else -typedef SizeClassMap<4, 3, 7, 14, 5, 10> SvelteSizeClassMap; -typedef SizeClassMap<3, 4, 9, 16, 14, 14> AndroidSizeClassMap; + static const uptr NumBits = 4; + static const uptr MinSizeLog = 3; + static const uptr MidSizeLog = 7; + static const uptr MaxSizeLog = 14; + static const u32 MaxNumCachedHint = 5; + static const uptr MaxBytesCachedLog = 10; #endif +}; + +typedef FixedSizeClassMap SvelteSizeClassMap; + +template inline void printMap() { + ScopedString Buffer(1024); + uptr PrevS = 0; + uptr TotalCached = 0; + for (uptr I = 0; I < SCMap::NumClasses; I++) { + if (I == SCMap::BatchClassId) + continue; + const uptr S = SCMap::getSizeByClassId(I); + const uptr D = S - PrevS; + const uptr P = PrevS ? (D * 100 / PrevS) : 0; + const uptr L = S ? getMostSignificantSetBitIndex(S) : 0; + const uptr Cached = SCMap::getMaxCachedHint(S) * S; + Buffer.append( + "C%02zu => S: %zu diff: +%zu %02zu%% L %zu Cached: %zu %zu; id %zu\n", + I, S, D, P, L, SCMap::getMaxCachedHint(S), Cached, + SCMap::getClassIdBySize(S)); + TotalCached += Cached; + PrevS = S; + } + Buffer.append("Total Cached: %zu\n", TotalCached); + Buffer.output(); +} +template static void validateMap() { + for (uptr C = 0; C < SCMap::NumClasses; C++) { + if (C == SCMap::BatchClassId) + continue; + const uptr S = SCMap::getSizeByClassId(C); + CHECK_NE(S, 0U); + CHECK_EQ(SCMap::getClassIdBySize(S), C); + if (C < SCMap::LargestClassId) + CHECK_EQ(SCMap::getClassIdBySize(S + 1), C + 1); + CHECK_EQ(SCMap::getClassIdBySize(S - 1), C); + if (C - 1 != SCMap::BatchClassId) + CHECK_GT(SCMap::getSizeByClassId(C), SCMap::getSizeByClassId(C - 1)); + } + // Do not perform the loop if the maximum size is too large. + if (SCMap::MaxSize > (1 << 19)) + return; + for (uptr S = 1; S <= SCMap::MaxSize; S++) { + const uptr C = SCMap::getClassIdBySize(S); + CHECK_LT(C, SCMap::NumClasses); + CHECK_GE(SCMap::getSizeByClassId(C), S); + if (C - 1 != SCMap::BatchClassId) + CHECK_LT(SCMap::getSizeByClassId(C - 1), S); + } +} } // namespace scudo #endif // SCUDO_SIZE_CLASS_MAP_H_ diff --git a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp --- a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp @@ -157,15 +157,16 @@ // Check that reallocating a chunk to a slightly smaller or larger size // returns the same chunk. This requires that all the sizes we iterate on use - // the same block size, but that should be the case for 2048 with our default - // class size maps. - P = Allocator->allocate(DataSize, Origin); - memset(P, Marker, DataSize); + // the same block size, but that should be the case for MaxSize - 64 with our + // default class size maps. + constexpr scudo::uptr ReallocSize = MaxSize - 64; + P = Allocator->allocate(ReallocSize, Origin); + memset(P, Marker, ReallocSize); for (scudo::sptr Delta = -32; Delta < 32; Delta += 8) { - const scudo::uptr NewSize = DataSize + Delta; + const scudo::uptr NewSize = ReallocSize + Delta; void *NewP = Allocator->reallocate(P, NewSize); EXPECT_EQ(NewP, P); - for (scudo::uptr I = 0; I < DataSize - 32; I++) + for (scudo::uptr I = 0; I < ReallocSize - 32; I++) EXPECT_EQ((reinterpret_cast(NewP))[I], Marker); checkMemoryTaggingMaybe(Allocator.get(), NewP, NewSize, 0); } @@ -343,9 +344,19 @@ #endif } + +struct DeathSizeClassConfig { + static const scudo::uptr NumBits = 1; + static const scudo::uptr MinSizeLog = 10; + static const scudo::uptr MidSizeLog = 10; + static const scudo::uptr MaxSizeLog = 10; + static const scudo::u32 MaxNumCachedHint = 1; + static const scudo::uptr MaxBytesCachedLog = 10; +}; + struct DeathConfig { // Tiny allocator, its Primary only serves chunks of 1024 bytes. - using DeathSizeClassMap = scudo::SizeClassMap<1U, 10U, 10U, 10U, 1U, 10U>; + using DeathSizeClassMap = scudo::FixedSizeClassMap; typedef scudo::SizeClassAllocator64 Primary; typedef scudo::MapAllocator Secondary; template using TSDRegistryT = scudo::TSDRegistrySharedT; diff --git a/compiler-rt/lib/scudo/standalone/tests/size_class_map_test.cpp b/compiler-rt/lib/scudo/standalone/tests/size_class_map_test.cpp --- a/compiler-rt/lib/scudo/standalone/tests/size_class_map_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/size_class_map_test.cpp @@ -12,8 +12,8 @@ template void testSizeClassMap() { typedef SizeClassMap SCMap; - SCMap::print(); - SCMap::validate(); + scudo::printMap(); + scudo::validateMap(); } TEST(ScudoSizeClassMapTest, DefaultSizeClassMap) { @@ -28,12 +28,31 @@ testSizeClassMap(); } + +struct OneClassSizeClassConfig { + static const scudo::uptr NumBits = 1; + static const scudo::uptr MinSizeLog = 5; + static const scudo::uptr MidSizeLog = 5; + static const scudo::uptr MaxSizeLog = 5; + static const scudo::u32 MaxNumCachedHint = 0; + static const scudo::uptr MaxBytesCachedLog = 0; +}; + TEST(ScudoSizeClassMapTest, OneClassSizeClassMap) { - testSizeClassMap>(); + testSizeClassMap>(); } #if SCUDO_CAN_USE_PRIMARY64 +struct LargeMaxSizeClassConfig { + static const scudo::uptr NumBits = 3; + static const scudo::uptr MinSizeLog = 4; + static const scudo::uptr MidSizeLog = 8; + static const scudo::uptr MaxSizeLog = 63; + static const scudo::u32 MaxNumCachedHint = 128; + static const scudo::uptr MaxBytesCachedLog = 16; +}; + TEST(ScudoSizeClassMapTest, LargeMaxSizeClassMap) { - testSizeClassMap>(); + testSizeClassMap>(); } #endif diff --git a/compiler-rt/lib/scudo/standalone/tools/compute_size_class_config.cpp b/compiler-rt/lib/scudo/standalone/tools/compute_size_class_config.cpp new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/scudo/standalone/tools/compute_size_class_config.cpp @@ -0,0 +1,161 @@ +//===-- compute_size_class_config.cpp -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include + +#include +#include + +struct Alloc { + size_t size, count; +}; + +size_t measureWastage(const std::vector &allocs, + const std::vector &classes, + size_t pageSize, + size_t headerSize) { + size_t totalWastage = 0; + for (auto &a : allocs) { + size_t sizePlusHeader = a.size + headerSize; + size_t wastage = -1ull; + for (auto c : classes) + if (c >= sizePlusHeader && c - sizePlusHeader < wastage) + wastage = c - sizePlusHeader; + if (wastage == -1ull) + continue; + if (wastage > 2 * pageSize) + wastage = 2 * pageSize; + totalWastage += wastage * a.count; + } + return totalWastage; +} + +void readAllocs(std::vector &allocs, const char *path) { + FILE *f = fopen(path, "r"); + if (!f) { + fprintf(stderr, "compute_size_class_config: could not open %s: %s\n", path, + strerror(errno)); + exit(1); + } + + const char header[] = "\n"; + char buf[sizeof(header) - 1]; + if (fread(buf, 1, sizeof(header) - 1, f) != sizeof(header) - 1 || + memcmp(buf, header, sizeof(header) - 1) != 0) { + fprintf(stderr, "compute_size_class_config: invalid input format\n"); + exit(1); + } + + Alloc a; + while (fscanf(f, "\n", &a.size, &a.count) == 2) + allocs.push_back(a); + fclose(f); +} + +size_t log2Floor(size_t x) { return sizeof(long) * 8 - 1 - __builtin_clzl(x); } + +void usage() { + fprintf(stderr, + "usage: compute_size_class_config [-p pageSize] [-c largestClass] " + "[-h headerSize] [-n numClasses] [-b numBits] profile...\n"); + exit(1); +} + +int main(int argc, char **argv) { + size_t pageSize = 4096; + size_t largestClass = 65552; + size_t headerSize = 16; + size_t numClasses = 32; + size_t numBits = 5; + + std::vector allocs; + for (size_t i = 1; i != argc;) { + auto matchArg = [&](size_t &arg, const char *name) { + if (strcmp(argv[i], name) == 0) { + if (i + 1 != argc) { + arg = atoi(argv[i + 1]); + i += 2; + } else { + usage(); + } + return true; + } + return false; + }; + if (matchArg(pageSize, "-p") || matchArg(largestClass, "-c") || + matchArg(headerSize, "-h") || matchArg(numClasses, "-n") || + matchArg(numBits, "-b")) + continue; + readAllocs(allocs, argv[i]); + ++i; + } + + if (allocs.empty()) + usage(); + + std::vector classes; + classes.push_back(largestClass); + for (size_t i = 1; i != numClasses; ++i) { + size_t minWastage = -1ull; + size_t minWastageClass; + for (size_t newClass = 16; newClass != largestClass; newClass += 16) { + // Skip classes with more than numBits bits, ignoring leading or trailing + // zero bits. + if (__builtin_ctzl(newClass - headerSize) + + __builtin_clzl(newClass - headerSize) < + sizeof(long) * 8 - numBits) + continue; + + classes.push_back(newClass); + size_t newWastage = measureWastage(allocs, classes, pageSize, headerSize); + classes.pop_back(); + if (newWastage < minWastage) { + minWastage = newWastage; + minWastageClass = newClass; + } + } + classes.push_back(minWastageClass); + } + + std::sort(classes.begin(), classes.end()); + size_t minSizeLog = log2Floor(headerSize); + size_t midSizeIndex = 0; + while (classes[midSizeIndex + 1] - classes[midSizeIndex] == (1 << minSizeLog)) + midSizeIndex++; + size_t midSizeLog = log2Floor(classes[midSizeIndex] - headerSize); + size_t maxSizeLog = log2Floor(classes.back() - headerSize - 1) + 1; + + printf(R"(// wastage = %zu + +struct MySizeClassConfig { + static const uptr NumBits = %zu; + static const uptr MinSizeLog = %zu; + static const uptr MidSizeLog = %zu; + static const uptr MaxSizeLog = %zu; + static const u32 MaxNumCachedHint = 14; + static const uptr MaxBytesCachedLog = 14; + + static constexpr u32 Classes[] = {)", + measureWastage(allocs, classes, pageSize, headerSize), numBits, + minSizeLog, midSizeLog, maxSizeLog); + for (size_t i = 0; i != classes.size(); ++i) { + if ((i % 8) == 0) + printf("\n "); + else + printf(" "); + printf("0x%05zx,", classes[i]); + } + printf(R"( + }; + static const uptr SizeDelta = %zu; +}; +)", headerSize); +}