diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -472,6 +472,26 @@ if (!llvm::zlib::isAvailable()) return make_error(instrprof_error::zlib_unavailable); + // Sanity-check the uncompressed size. If the data is corrupted, then we + // could run into a situation where we attempt to malloc a large amount of + // data and OOM (see PR51628). While we technically do not have a way to + // error-check if the size is actually "correct", we can at least guess + // beforehand if the data is corrupt by comparing the uncompressed size to + // the compressed size. If the ratio is large enough, then we can warn. + // + // https://zlib.net/zlib_tech.html stats that typical zlib compression + // factors are in the range of 2:1 and 5:1. The value used here should be + // large enough such that we minimize the number of "false positives" from + // decompressions that we can properly allocate for. + constexpr size_t CompressionRatio = 10; + if (UncompressedSize > CompressedSize * CompressionRatio) { + llvm::errs() + << "warning: the uncompressed size is more than " + << CompressionRatio + << "x the size of the compressed size, which could indicate data " + "corruption in the uncompressed size and lead to an OOM error\n"; + } + StringRef CompressedNameStrings(reinterpret_cast(P), CompressedSize); if (Error E =