Index: clang-tools-extra/clangd/CMakeLists.txt
===================================================================
--- clang-tools-extra/clangd/CMakeLists.txt
+++ clang-tools-extra/clangd/CMakeLists.txt
@@ -76,6 +76,7 @@
 add_subdirectory(tool)
 add_subdirectory(indexer)
 add_subdirectory(index/dex/dexp)
+add_subdirectory(index/dex/fuzzer)
 
 if (LLVM_INCLUDE_BENCHMARKS)
   add_subdirectory(benchmarks)
Index: clang-tools-extra/clangd/index/dex/Dex.cpp
===================================================================
--- clang-tools-extra/clangd/index/dex/Dex.cpp
+++ clang-tools-extra/clangd/index/dex/Dex.cpp
@@ -128,8 +128,8 @@
 
   // Convert lists of items to posting lists.
   for (const auto &TokenToPostingList : TempInvertedIndex)
-    InvertedIndex.insert({TokenToPostingList.first,
-                          PostingList(move(TokenToPostingList.second))});
+    InvertedIndex.insert(
+        {TokenToPostingList.first, PostingList(TokenToPostingList.second)});
 
   vlog("Built Dex with estimated memory usage {0} bytes.",
        estimateMemoryUsage());
Index: clang-tools-extra/clangd/index/dex/PostingList.h
===================================================================
--- clang-tools-extra/clangd/index/dex/PostingList.h
+++ clang-tools-extra/clangd/index/dex/PostingList.h
@@ -6,13 +6,19 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This defines posting list interface: a storage for identifiers of symbols
-// which can be characterized by a specific feature (such as fuzzy-find trigram,
-// scope, type or any other Search Token). Posting lists can be traversed in
-// order using an iterator and are values for inverted index, which maps search
-// tokens to corresponding posting lists.
-//
+///
+/// \file
+/// This defines posting list interface: a storage for identifiers of symbols
+/// which can be characterized by a specific feature (such as fuzzy-find
+/// trigram, scope, type or any other Search Token). Posting lists can be
+/// traversed in order using an iterator and are values for inverted index,
+/// which maps search tokens to corresponding posting lists.
+///
+/// In order to decrease size of Index in-memory representation, Variable Byte
+/// Encoding (VByte) is used for PostingLists compression. An overview of VByte
+/// algorithm can be found in "Introduction to Information Retrieval" book:
+/// https://nlp.stanford.edu/IR-book/html/htmledition/variable-byte-codes-1.html
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_POSTINGLIST_H
@@ -20,6 +26,7 @@
 
 #include "Iterator.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
 #include <cstdint>
 #include <vector>
 
@@ -29,20 +36,41 @@
 
 class Iterator;
 
+/// Chunk is a fixed-width piece of PostingList which contains the first DocID
+/// in uncompressed format (Head) and delta-encoded Payload. It can be
+/// decompressed upon request.
+struct Chunk {
+  /// Keep sizeof(Chunk) == 32.
+  static constexpr size_t PayloadSize = 32 - sizeof(DocID);
+  /// Number of DocID bits in each encoding byte.
+  static constexpr size_t BitsPerEncodingByte = 7;
+
+  llvm::SmallVector<DocID, PayloadSize + 1> decompress() const;
+
+  /// The first element of
+  DocID Head;
+  /// VByte-encoded deltas.
+  std::array<uint8_t, PayloadSize> Payload = std::array<uint8_t, PayloadSize>();
+};
+static_assert(sizeof(Chunk) == 32, "Chunk should take 32 bytes of memory.");
+
 /// PostingList is the storage of DocIDs which can be inserted to the Query
-/// Tree as a leaf by constructing Iterator over the PostingList object.
-// FIXME(kbobyrev): Use VByte algorithm to compress underlying data.
+/// Tree as a leaf by constructing Iterator over the PostingList object. DocIDs
+/// are stored in underlying chunks. Compression saves memory at a small cost
+/// in access time, which is still fast enough in practice.
 class PostingList {
 public:
-  explicit PostingList(const std::vector<DocID> &&Documents)
-      : Documents(std::move(Documents)) {}
+  explicit PostingList(llvm::ArrayRef<DocID> Documents);
 
+  /// Constructs DocumentIterator over given posting list. DocumentIterator will
+  /// go through the chunks and decompress them on-the-fly when necessary.
   std::unique_ptr<Iterator> iterator() const;
 
-  size_t bytes() const { return Documents.size() * sizeof(DocID); }
+  /// Returns in-memory size.
+  size_t bytes() const { return Chunks.capacity() * sizeof(Chunk); }
 
 private:
-  const std::vector<DocID> Documents;
+  const llvm::SmallVector<Chunk, 1> Chunks;
 };
 
 } // namespace dex
Index: clang-tools-extra/clangd/index/dex/PostingList.cpp
===================================================================
--- clang-tools-extra/clangd/index/dex/PostingList.cpp
+++ clang-tools-extra/clangd/index/dex/PostingList.cpp
@@ -9,6 +9,9 @@
 
 #include "PostingList.h"
 #include "Iterator.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MathExtras.h"
+#include <queue>
 
 namespace clang {
 namespace clangd {
@@ -16,21 +19,31 @@
 
 namespace {
 
-/// Implements Iterator over std::vector<DocID>. This is the most basic
-/// iterator and is simply a wrapper around
-/// std::vector<DocID>::const_iterator.
-class PlainIterator : public Iterator {
+/// Implements iterator of PostingList chunks. This requires iterating over two
+/// levels: the first level iterator iterates over the chunks and decompresses
+/// them on-the-fly when the contents of chunk are to be seen.
+class ChunkIterator : public Iterator {
 public:
-  explicit PlainIterator(llvm::ArrayRef<DocID> Documents)
-      : Documents(Documents), Index(std::begin(Documents)) {}
+  explicit ChunkIterator(llvm::ArrayRef<Chunk> Chunks)
+      : Chunks(Chunks), CurrentChunk(Chunks.begin()) {
+    if (!Chunks.empty()) {
+      DecompressedChunk = CurrentChunk->decompress();
+      CurrentID = DecompressedChunk.begin();
+    }
+  }
 
-  bool reachedEnd() const override { return Index == std::end(Documents); }
+  bool reachedEnd() const override { return CurrentChunk == Chunks.end(); }
 
   /// Advances cursor to the next item.
   void advance() override {
     assert(!reachedEnd() &&
            "Posting List iterator can't advance() at the end.");
-    ++Index;
+    if (++CurrentID == DecompressedChunk.end()) { // reached end of chunk
+      if (++CurrentChunk == Chunks.end())         // reached the end
+        return;
+      DecompressedChunk = CurrentChunk->decompress();
+      CurrentID = DecompressedChunk.begin();
+    }
   }
 
   /// Applies binary search to advance cursor to the next item with DocID
@@ -38,16 +51,26 @@
   void advanceTo(DocID ID) override {
     assert(!reachedEnd() &&
            "Posting List iterator can't advance() at the end.");
-    // If current ID is beyond requested one, iterator is already in the right
-    // state.
-    if (peek() < ID)
-      Index = std::lower_bound(Index, std::end(Documents), ID);
+    if (ID <= peek())
+      return;
+    advanceToChunk(ID);
+    // Try to find ID within current chunk.
+    CurrentID = std::lower_bound(CurrentID, std::end(DecompressedChunk), ID);
+    // Return if the position was found in current chunk.
+    if (CurrentID != std::end(DecompressedChunk))
+      return;
+    // Otherwise, the iterator should point to the first element of the next
+    // chunk (if there is any).
+    ++CurrentChunk;
+    if (CurrentChunk == Chunks.end())
+      return;
+    DecompressedChunk = CurrentChunk->decompress();
+    CurrentID = DecompressedChunk.begin();
   }
 
   DocID peek() const override {
-    assert(!reachedEnd() &&
-           "Posting List iterator can't peek() at the end.");
-    return *Index;
+    assert(!reachedEnd() && "Posting List iterator can't peek() at the end.");
+    return *CurrentID;
   }
 
   float consume() override {
@@ -56,27 +79,154 @@
     return DEFAULT_BOOST_SCORE;
   }
 
-  size_t estimateSize() const override { return Documents.size(); }
+  size_t estimateSize() const override {
+    return Chunks.size() * ApproxEntriesPerChunk;
+  }
 
 private:
   llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
     OS << '[';
-    if (Index != std::end(Documents))
-      OS << *Index;
-    else
-      OS << "END";
+    if (CurrentChunk != Chunks.begin() ||
+        (CurrentID != DecompressedChunk.begin() && !DecompressedChunk.empty()))
+      OS << "... ";
+    OS << (reachedEnd() ? "END" : std::to_string(*CurrentID));
+    if (!reachedEnd() && CurrentID < DecompressedChunk.end() - 1)
+      OS << " ...";
     OS << ']';
     return OS;
   }
 
-  llvm::ArrayRef<DocID> Documents;
-  llvm::ArrayRef<DocID>::const_iterator Index;
+  /// Advances CurrentChunk to the chunk which might contain ID.
+  void advanceToChunk(DocID ID) {
+    if ((CurrentChunk != Chunks.end() - 1) &&
+        ((CurrentChunk + 1)->Head <= ID)) {
+      // Find the next chunk with Head >= ID.
+      CurrentChunk = std::lower_bound(
+          CurrentChunk + 1, Chunks.end(), ID,
+          [](const Chunk &C, const DocID ID) { return C.Head <= ID; });
+      --CurrentChunk;
+      DecompressedChunk = CurrentChunk->decompress();
+      CurrentID = DecompressedChunk.begin();
+    }
+  }
+
+  llvm::ArrayRef<Chunk> Chunks;
+  // Iterator over chunks.
+  decltype(Chunks)::const_iterator CurrentChunk;
+  llvm::SmallVector<DocID, Chunk::PayloadSize + 1> DecompressedChunk;
+  // Iterator over DecompressedChunk.
+  decltype(DecompressedChunk)::iterator CurrentID;
+
+  static constexpr size_t ApproxEntriesPerChunk = 15;
 };
 
+/// Writes a variable length DocID into the buffer and updates the buffer size.
+/// If it doesn't fit, returns false and doesn't write to the buffer.
+bool encodeVByte(DocID Delta, llvm::MutableArrayRef<uint8_t> &Payload) {
+  // Calculate number of bytes Delta encoding would take by rounding up number
+  // of meaningful bytes divided by Chunk::BitsPerEncodingByte.
+  size_t Width = (sizeof(DocID) * 8 - llvm::countLeadingZeros(Delta) +
+                  Chunk::BitsPerEncodingByte - 1) /
+                 Chunk::BitsPerEncodingByte;
+  if (Width > Payload.size())
+    return false;
+
+  static constexpr size_t MaxBytes = 5;
+  for (size_t I = 0; I < MaxBytes; ++I) {
+    DocID Mask = (I + 1 != MaxBytes ? 0x7f : 0xf)
+                 << Chunk::BitsPerEncodingByte * I;
+    uint8_t Encoding = (Delta & Mask) >> (Chunk::BitsPerEncodingByte * I);
+    bool HasNextByte = I + 1 != Width;
+    // If there is next byte, apply continuation bit.
+    Payload.front() = HasNextByte ? Encoding | 0x80 : Encoding;
+    Payload = Payload.drop_front();
+    if (!HasNextByte)
+      break;
+  }
+  return true;
+}
+
+/// Use Variable-length Byte (VByte) delta encoding to compress sorted list of
+/// DocIDs. The compression stores deltas (differences) between subsequent
+/// DocIDs and encodes these deltas utilizing the least possible number of
+/// bytes.
+///
+/// Each encoding byte consists of two parts: the first bit (continuation bit)
+/// indicates whether this is the last byte (0 if this byte is the last) of
+/// current encoding and seven bytes a piece of DocID (payload). DocID contains
+/// 32 bits and therefore it takes up to 5 bytes to encode it (4 full 7-bit
+/// payloads and one 4-bit payload), but in practice it is expected that gaps
+/// (deltas) between subsequent DocIDs are not large enough to require 5 bytes.
+/// In very dense posting lists (with average gaps less than 128) this
+/// representation would be 4 times more efficient than raw DocID array.
+///
+/// PostingList encoding example:
+///
+/// DocIDs    42            47        7000
+/// gaps                    5         6958
+/// Encoding  (raw number)  00000101  10110110 00101110
+llvm::SmallVector<Chunk, 1> encodeStream(llvm::ArrayRef<DocID> Documents) {
+  assert(!Documents.empty() && "Can't encode empty sequence.");
+  llvm::SmallVector<Chunk, 1> Result;
+  std::array<uint8_t, Chunk::PayloadSize> Payload;
+  Payload.fill(0);
+  llvm::MutableArrayRef<uint8_t> PayloadRef(Payload);
+  size_t HeadIndex = 0;
+  for (size_t I = 0; I < Documents.size(); ++I) {
+    if (Documents.size() != 1 &&
+        (I == 0 || (encodeVByte(Documents[I] - Documents[I - 1], PayloadRef) &&
+                    I + 1 != Documents.size())))
+      continue;
+    Chunk NextChunk;
+    NextChunk.Head = Documents[HeadIndex];
+    std::copy(Payload.begin(), Payload.end(), NextChunk.Payload.begin());
+    Result.push_back(NextChunk);
+    HeadIndex = I;
+    Payload.fill(0);
+    PayloadRef = Payload;
+  }
+  return llvm::SmallVector<Chunk, 1>(Result); // no move, shrink-to-fit
+}
+
+/// Reads variable length DocID from the buffer and updates the buffer size. If
+/// the stream is terminated, return None.
+llvm::Optional<DocID> readVByte(llvm::ArrayRef<uint8_t> &Bytes) {
+  if (Bytes.front() == 0 || Bytes.empty())
+    return llvm::None;
+  DocID Result = 0;
+  bool HasNextByte = true;
+  for (size_t Length = 0; HasNextByte && !Bytes.empty(); ++Length) {
+    assert(Length <= 5 && "Malformed VByte encoding sequence.");
+    // Write meaningful bits to the correct place in the document decoding.
+    Result |= (Bytes.front() & 0x7f) << (Chunk::BitsPerEncodingByte * Length);
+    if ((Bytes.front() & 0x80) == 0)
+      HasNextByte = false;
+    Bytes = Bytes.drop_front();
+  }
+  return Result;
+}
+
 } // namespace
 
+llvm::SmallVector<DocID, Chunk::PayloadSize + 1> Chunk::decompress() const {
+  llvm::SmallVector<DocID, Chunk::PayloadSize + 1> Result{Head};
+  llvm::ArrayRef<uint8_t> Bytes(Payload);
+  DocID Delta;
+  for (DocID Current = Head; !Bytes.empty(); Current += Delta) {
+    auto MaybeDelta = readVByte(Bytes);
+    if (!MaybeDelta)
+      break;
+    Delta = *MaybeDelta;
+    Result.push_back(Current + Delta);
+  }
+  return llvm::SmallVector<DocID, Chunk::PayloadSize + 1>{Result};
+}
+
+PostingList::PostingList(llvm::ArrayRef<DocID> Documents)
+    : Chunks(encodeStream(Documents)) {}
+
 std::unique_ptr<Iterator> PostingList::iterator() const {
-  return llvm::make_unique<PlainIterator>(Documents);
+  return llvm::make_unique<ChunkIterator>(Chunks);
 }
 
 } // namespace dex
Index: clang-tools-extra/clangd/index/dex/fuzzer/CMakeLists.txt
===================================================================
--- /dev/null
+++ clang-tools-extra/clangd/index/dex/fuzzer/CMakeLists.txt
@@ -0,0 +1,19 @@
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/..)
+
+set(LLVM_LINK_COMPONENTS Support)
+
+if(LLVM_USE_SANITIZE_COVERAGE)
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=fuzzer")
+endif()
+
+add_clang_executable(clangd-vbyte-fuzzer
+  EXCLUDE_FROM_ALL
+  VByteFuzzer.cpp
+  )
+
+target_link_libraries(clangd-vbyte-fuzzer
+  PRIVATE
+  clangBasic
+  clangDaemon
+  ${LLVM_LIB_FUZZING_ENGINE}
+  )
Index: clang-tools-extra/clangd/index/dex/fuzzer/VByteFuzzer.cpp
===================================================================
--- /dev/null
+++ clang-tools-extra/clangd/index/dex/fuzzer/VByteFuzzer.cpp
@@ -0,0 +1,64 @@
+//===-- VByteFuzzer.cpp - Fuzz VByte Posting List encoding ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements a function that runs clangd on a single input.
+/// This function is then linked into the Fuzzer library.
+///
+//===----------------------------------------------------------------------===//
+
+#include "../Iterator.h"
+#include "../PostingList.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstdint>
+#include <vector>
+
+using DocID = clang::clangd::dex::DocID;
+
+/// Transform raw byte sequence into list of DocIDs.
+std::vector<DocID> generateDocuments(uint8_t *Data, size_t Size) {
+  std::vector<DocID> Result;
+  DocID ID = 0;
+  for (size_t I = 0; I < Size; ++I) {
+    size_t Offset = I % 4;
+    if (Offset == 0 && I != 0) {
+      ID = 0;
+      Result.push_back(ID);
+    }
+    ID |= (Data[I] << Offset);
+  }
+  if (Size > 4 && Size % 4 != 0)
+    Result.push_back(ID);
+  return Result;
+}
+
+/// This fuzzer checks that compressed PostingList contains can be successfully
+/// decoded into the original sequence.
+extern "C" int LLVMFuzzerTestOneInput(uint8_t *Data, size_t Size) {
+  if (Size == 0)
+    return 0;
+  const auto OriginalDocuments = generateDocuments(Data, Size);
+  if (OriginalDocuments.empty())
+    return 0;
+  // Ensure that given sequence of DocIDs is sorted.
+  for (size_t I = 1; I < OriginalDocuments.size(); ++I)
+    if (OriginalDocuments[I] <= OriginalDocuments[I - 1])
+      return 0;
+  const clang::clangd::dex::PostingList List(OriginalDocuments);
+  const auto DecodedDocuments = clang::clangd::dex::consume(*List.iterator());
+  // Compare decoded sequence against the original PostingList contents.
+  if (DecodedDocuments.size() != OriginalDocuments.size())
+    LLVM_BUILTIN_TRAP;
+  for (size_t I = 0; I < DecodedDocuments.size(); ++I)
+    if (DecodedDocuments[I].first != OriginalDocuments[I])
+      LLVM_BUILTIN_TRAP;
+  return 0;
+}
Index: clang-tools-extra/unittests/clangd/DexTests.cpp
===================================================================
--- clang-tools-extra/unittests/clangd/DexTests.cpp
+++ clang-tools-extra/unittests/clangd/DexTests.cpp
@@ -69,19 +69,6 @@
   EXPECT_TRUE(DocIterator->reachedEnd());
 }
 
-TEST(DexIterators, AndWithEmpty) {
-  const PostingList L0({});
-  const PostingList L1({0, 5, 7, 10, 42, 320, 9000});
-
-  auto AndEmpty = createAnd(L0.iterator());
-  EXPECT_TRUE(AndEmpty->reachedEnd());
-
-  auto AndWithEmpty = createAnd(L0.iterator(), L1.iterator());
-  EXPECT_TRUE(AndWithEmpty->reachedEnd());
-
-  EXPECT_THAT(consumeIDs(*AndWithEmpty), ElementsAre());
-}
-
 TEST(DexIterators, AndTwoLists) {
   const PostingList L0({0, 5, 7, 10, 42, 320, 9000});
   const PostingList L1({0, 4, 7, 10, 30, 60, 320, 9000});
@@ -120,20 +107,6 @@
   EXPECT_TRUE(And->reachedEnd());
 }
 
-TEST(DexIterators, OrWithEmpty) {
-  const PostingList L0({});
-  const PostingList L1({0, 5, 7, 10, 42, 320, 9000});
-
-  auto OrEmpty = createOr(L0.iterator());
-  EXPECT_TRUE(OrEmpty->reachedEnd());
-
-  auto OrWithEmpty = createOr(L0.iterator(), L1.iterator());
-  EXPECT_FALSE(OrWithEmpty->reachedEnd());
-
-  EXPECT_THAT(consumeIDs(*OrWithEmpty),
-              ElementsAre(0U, 5U, 7U, 10U, 42U, 320U, 9000U));
-}
-
 TEST(DexIterators, OrTwoLists) {
   const PostingList L0({0, 5, 7, 10, 42, 320, 9000});
   const PostingList L1({0, 4, 7, 10, 30, 60, 320, 9000});
@@ -211,29 +184,27 @@
   //      |And Iterator: 1, 5, 9|              |Or Iterator: 0, 1, 3, 5|
   //      +----------+----------+              +----------+------------+
   //                 |                                    |
-  //          +------+-----+                    +---------------------+
-  //          |            |                    |         |           |
-  //  +-------v-----+ +----+---+             +--v--+  +---v----+ +----v---+
-  //  |1, 3, 5, 8, 9| |Boost: 2|             |Empty|  |Boost: 3| |Boost: 4|
-  //  +-------------+ +----+---+             +-----+  +---+----+ +----+---+
-  //                       |                              |           |
-  //                  +----v-----+                      +-v--+    +---v---+
-  //                  |1, 5, 7, 9|                      |1, 5|    |0, 3, 5|
-  //                  +----------+                      +----+    +-------+
+  //          +------+-----+                        ------------+
+  //          |            |                        |           |
+  //  +-------v-----+ +----+---+                +---v----+ +----v---+
+  //  |1, 3, 5, 8, 9| |Boost: 2|                |Boost: 3| |Boost: 4|
+  //  +-------------+ +----+---+                +---+----+ +----+---+
+  //                       |                        |           |
+  //                  +----v-----+                +-v--+    +---v---+
+  //                  |1, 5, 7, 9|                |1, 5|    |0, 3, 5|
+  //                  +----------+                +----+    +-------+
   //
   const PostingList L0({1, 3, 5, 8, 9});
   const PostingList L1({1, 5, 7, 9});
-  const PostingList L3({});
-  const PostingList L4({1, 5});
-  const PostingList L5({0, 3, 5});
+  const PostingList L2({1, 5});
+  const PostingList L3({0, 3, 5});
 
   // Root of the query tree: [1, 5]
   auto Root = createAnd(
       // Lower And Iterator: [1, 5, 9]
       createAnd(L0.iterator(), createBoost(L1.iterator(), 2U)),
       // Lower Or Iterator: [0, 1, 5]
-      createOr(L3.iterator(), createBoost(L4.iterator(), 3U),
-               createBoost(L5.iterator(), 4U)));
+      createOr(createBoost(L2.iterator(), 3U), createBoost(L3.iterator(), 4U)));
 
   EXPECT_FALSE(Root->reachedEnd());
   EXPECT_EQ(Root->peek(), 1U);
@@ -260,15 +231,13 @@
   const PostingList L2({1, 5, 7, 9});
   const PostingList L3({0, 5});
   const PostingList L4({0, 1, 5});
-  const PostingList L5({});
-
-  EXPECT_EQ(llvm::to_string(*(L0.iterator())), "[4]");
-
-  auto Nested =
-      createAnd(createAnd(L1.iterator(), L2.iterator()),
-                createOr(L3.iterator(), L4.iterator(), L5.iterator()));
 
-  EXPECT_EQ(llvm::to_string(*Nested), "(& (| [5] [1] [END]) (& [1] [1]))");
+  EXPECT_EQ(llvm::to_string(*(L0.iterator())), "[4 ...]");
+  auto It = L0.iterator();
+  It->advanceTo(19);
+  EXPECT_EQ(llvm::to_string(*It), "[... 20 ...]");
+  It->advanceTo(9000);
+  EXPECT_EQ(llvm::to_string(*It), "[... END]");
 }
 
 TEST(DexIterators, Limit) {