diff --git a/clang-tools-extra/clangd/index/Serialization.cpp b/clang-tools-extra/clangd/index/Serialization.cpp --- a/clang-tools-extra/clangd/index/Serialization.cpp +++ b/clang-tools-extra/clangd/index/Serialization.cpp @@ -202,6 +202,15 @@ if (UncompressedSize == 0) // No compression Uncompressed = R.rest(); else if (llvm::zlib::isAvailable()) { + // Don't allocate a massive buffer if UncompressedSize was corrupted + // This is effective for sharded index, but not big monolithic ones, as + // once compressed size reaches 4MB nothing can be ruled out. + // Theoretical max ratio from https://zlib.net/zlib_tech.html + constexpr int MaxCompressionRatio = 1032; + if (UncompressedSize / MaxCompressionRatio > R.rest().size()) + return error("Bad stri table: uncompress {0} -> {1} bytes is implausible", + R.rest().size(), UncompressedSize); + if (llvm::Error E = llvm::zlib::uncompress(R.rest(), UncompressedStorage, UncompressedSize)) return std::move(E); diff --git a/clang-tools-extra/clangd/unittests/SerializationTests.cpp b/clang-tools-extra/clangd/unittests/SerializationTests.cpp --- a/clang-tools-extra/clangd/unittests/SerializationTests.cpp +++ b/clang-tools-extra/clangd/unittests/SerializationTests.cpp @@ -7,12 +7,18 @@ //===----------------------------------------------------------------------===// #include "Headers.h" +#include "RIFF.h" #include "index/Index.h" #include "index/Serialization.h" +#include "support/Logger.h" #include "clang/Tooling/CompilationDatabase.h" +#include "llvm/Support/Compression.h" #include "llvm/Support/ScopedPrinter.h" #include "gmock/gmock.h" #include "gtest/gtest.h" +#ifdef LLVM_ON_UNIX +#include +#endif using ::testing::_; using ::testing::AllOf; @@ -297,6 +303,75 @@ EXPECT_NE(SerializedCmd.Output, Cmd.Output); } } + +#if LLVM_ON_UNIX // rlimit is part of POSIX +class ScopedMemoryLimit { + struct rlimit OriginalLimit; + bool Succeeded = false; + +public: + ScopedMemoryLimit(rlim_t Bytes) { + if (!getrlimit(RLIMIT_AS, &OriginalLimit)) { + struct rlimit NewLimit = OriginalLimit; + NewLimit.rlim_cur = Bytes; + Succeeded = !setrlimit(RLIMIT_AS, &NewLimit); + } + if (!Succeeded) + log("Failed to set rlimit"); + } + + ~ScopedMemoryLimit() { + if (Succeeded) + setrlimit(RLIMIT_AS, &OriginalLimit); + } +}; +#else +class ScopedMemoryLimit { +public: + ScopedMemoryLimit(unsigned Bytes) { log("rlimit unsupported"); } +}; +#endif + +// Test that our deserialization detects invalid array sizes without allocating. +// If this detection fails, the test should allocate a huge array and crash. +TEST(SerializationTest, NoCrashOnBadStringTableSize) { + if (!llvm::zlib::isAvailable()) { + log("skipping test, no zlib"); + return; + } + + // First, create a valid serialized file. + auto In = readIndexFile(YAML); + ASSERT_FALSE(!In) << In.takeError(); + IndexFileOut Out(*In); + Out.Format = IndexFileFormat::RIFF; + std::string Serialized = llvm::to_string(Out); + + // Low-level parse it again, we're going to replace the `stri` chunk. + auto Parsed = riff::readFile(Serialized); + ASSERT_FALSE(!Parsed) << Parsed.takeError(); + auto Stri = llvm::find_if(Parsed->Chunks, [](riff::Chunk C) { + return C.ID == riff::fourCC("stri"); + }); + ASSERT_NE(Stri, Parsed->Chunks.end()); + + // stri consists of an 8 byte uncompressed-size, and then compressed data. + // We'll claim our small amount of data expands to 4GB + std::string CorruptStri = + (llvm::fromHex("ffffffff") + Stri->Data.drop_front(4)).str(); + Stri->Data = CorruptStri; + std::string FileDigest = llvm::fromHex("EED8F5EAF25C453C"); + + // Try to crash rather than hang on large allocation. + ScopedMemoryLimit MemLimit(1000 * 1024 * 1024); // 1GB + + std::string CorruptFile = llvm::to_string(*Parsed); + auto CorruptParsed = readIndexFile(CorruptFile); + ASSERT_TRUE(!CorruptParsed); + EXPECT_THAT(llvm::toString(CorruptParsed.takeError()), + testing::HasSubstr("bytes is implausible")); +} + } // namespace } // namespace clangd } // namespace clang