diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp
--- a/lld/ELF/LTO.cpp
+++ b/lld/ELF/LTO.cpp
@@ -57,6 +57,19 @@
   return ret;
 }
 
+// The merged bitcode after LTO is large. Try openning a file stream that
+// supports reading, seeking and writing. Such a file allows BitcodeWriter to
+// flush buffered data to reduce memory comsuption. If this fails, open a file
+// stream that supports only write.
+static std::unique_ptr<raw_fd_ostream> openLTOOutputFile(StringRef file) {
+  std::error_code ec;
+  std::unique_ptr<raw_fd_ostream> fs =
+      std::make_unique<raw_fd_stream>(file, ec);
+  if (!ec)
+    return fs;
+  return openFile(file);
+}
+
 static std::string getThinLTOOutputFile(StringRef modulePath) {
   return lto::getThinLTOOutputFile(
       std::string(modulePath), std::string(config->thinLTOPrefixReplace.first),
@@ -151,7 +164,8 @@
 
   if (config->emitLLVM) {
     c.PostInternalizeModuleHook = [](size_t task, const Module &m) {
-      if (std::unique_ptr<raw_fd_ostream> os = openFile(config->outputFile))
+      if (std::unique_ptr<raw_fd_ostream> os =
+              openLTOOutputFile(config->outputFile))
         WriteBitcodeToFile(m, *os, false);
       return false;
     };
diff --git a/llvm/include/llvm/Bitcode/BitcodeWriter.h b/llvm/include/llvm/Bitcode/BitcodeWriter.h
--- a/llvm/include/llvm/Bitcode/BitcodeWriter.h
+++ b/llvm/include/llvm/Bitcode/BitcodeWriter.h
@@ -47,7 +47,7 @@
 
   public:
     /// Create a BitcodeWriter that writes to Buffer.
-    BitcodeWriter(SmallVectorImpl<char> &Buffer);
+    BitcodeWriter(SmallVectorImpl<char> &Buffer, raw_fd_stream *FS = nullptr);
 
     ~BitcodeWriter();
 
diff --git a/llvm/include/llvm/Bitstream/BitstreamWriter.h b/llvm/include/llvm/Bitstream/BitstreamWriter.h
--- a/llvm/include/llvm/Bitstream/BitstreamWriter.h
+++ b/llvm/include/llvm/Bitstream/BitstreamWriter.h
@@ -20,17 +20,27 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Bitstream/BitCodes.h"
 #include "llvm/Support/Endian.h"
+#include "llvm/Support/raw_ostream.h"
 #include <vector>
 
 namespace llvm {
 
 class BitstreamWriter {
+  /// Out - The buffer that keeps unflushed bytes.
   SmallVectorImpl<char> &Out;
 
+  /// FS - The file stream that Out flushes to. If FS is nullptr, it does not
+  /// support read or seek, Out cannot be flushed until all data are written.
+  raw_fd_stream *FS;
+
+  /// FlushThreshold - If FS is valid, this is the threshold (unit B) to flush
+  /// FS.
+  const uint64_t FlushThreshold;
+
   /// CurBit - Always between 0 and 31 inclusive, specifies the next bit to use.
   unsigned CurBit;
 
-  /// CurValue - The current value.  Only bits < CurBit are valid.
+  /// CurValue - The current value. Only bits < CurBit are valid.
   uint32_t CurValue;
 
   /// CurCodeSize - This is the declared size of code values used for the
@@ -64,15 +74,19 @@
 
   void WriteByte(unsigned char Value) {
     Out.push_back(Value);
+    FlushToFile();
   }
 
   void WriteWord(unsigned Value) {
     Value = support::endian::byte_swap<uint32_t, support::little>(Value);
     Out.append(reinterpret_cast<const char *>(&Value),
                reinterpret_cast<const char *>(&Value + 1));
+    FlushToFile();
   }
 
-  size_t GetBufferOffset() const { return Out.size(); }
+  uint64_t GetNumOfFlushedBytes() const { return FS ? FS->tell() : 0; }
+
+  size_t GetBufferOffset() const { return Out.size() + GetNumOfFlushedBytes(); }
 
   size_t GetWordIndex() const {
     size_t Offset = GetBufferOffset();
@@ -80,9 +94,29 @@
     return Offset / 4;
   }
 
+  /// If the related file stream supports reading, seeking and writing, flush
+  /// the buffer if its size is above a threshold.
+  void FlushToFile() {
+    if (!FS)
+      return;
+    if (Out.size() < FlushThreshold)
+      return;
+    FS->write((char *)&Out.front(), Out.size());
+    Out.clear();
+  }
+
 public:
-  explicit BitstreamWriter(SmallVectorImpl<char> &O)
-    : Out(O), CurBit(0), CurValue(0), CurCodeSize(2) {}
+  /// Create a BitstreamWriter that writes to Buffer \p O.
+  ///
+  /// \p FS is the file stream that \p O flushes to incrementally. If \p FS is
+  /// null, \p O does not flush incrementially, but writes to disk at the end.
+  ///
+  /// \p FlushThreshold is the threshold (unit M) to flush \p O if \p FS is
+  /// valid.
+  BitstreamWriter(SmallVectorImpl<char> &O, raw_fd_stream *FS = nullptr,
+                  uint32_t FlushThreshold = 512)
+      : Out(O), FS(FS), FlushThreshold(FlushThreshold << 20), CurBit(0),
+        CurValue(0), CurCodeSize(2) {}
 
   ~BitstreamWriter() {
     assert(CurBit == 0 && "Unflushed data remaining");
@@ -104,11 +138,59 @@
   void BackpatchWord(uint64_t BitNo, unsigned NewWord) {
     using namespace llvm::support;
     uint64_t ByteNo = BitNo / 8;
-    assert((!endian::readAtBitAlignment<uint32_t, little, unaligned>(
-               &Out[ByteNo], BitNo & 7)) &&
-           "Expected to be patching over 0-value placeholders");
-    endian::writeAtBitAlignment<uint32_t, little, unaligned>(
-        &Out[ByteNo], NewWord, BitNo & 7);
+    uint64_t StartBit = BitNo & 7;
+    uint64_t NumOfFlushedBytes = GetNumOfFlushedBytes();
+
+    if (ByteNo >= NumOfFlushedBytes) {
+      assert((!endian::readAtBitAlignment<uint32_t, little, unaligned>(
+                 &Out[ByteNo - NumOfFlushedBytes], StartBit)) &&
+             "Expected to be patching over 0-value placeholders");
+      endian::writeAtBitAlignment<uint32_t, little, unaligned>(
+          &Out[ByteNo - NumOfFlushedBytes], NewWord, StartBit);
+      return;
+    }
+
+    // If the byte offset to backpatch is flushed, use seek to backfill data.
+    // First, save the file position to restore later.
+    uint64_t CurPos = FS->tell();
+
+    // Copy data to update into Bytes from the file FS and the buffer Out.
+    char Bytes[8];
+    size_t BytesNum = StartBit ? 8 : 4;
+    size_t BytesFromDisk = std::min(BytesNum, NumOfFlushedBytes - ByteNo);
+    size_t BytesFromBuffer = BytesNum - BytesFromDisk;
+
+    // When unaligned, copy existing data into Bytes from the file FS and the
+    // buffer Out so that it can be updated before writing. For debug builds
+    // read bytes unconditionally in order to check that the existing value is 0
+    // as expected.
+#ifdef NDEBUG
+    if (StartBit)
+#endif
+    {
+      FS->seek(ByteNo);
+      ssize_t BytesRead = FS->read(Bytes, BytesFromDisk);
+      (void)BytesRead; // silence warning
+      assert(BytesRead >= 0 && static_cast<size_t>(BytesRead) == BytesFromDisk);
+      for (size_t i = 0; i < BytesFromBuffer; ++i)
+        Bytes[BytesFromDisk + i] = Out[i];
+      assert((!endian::readAtBitAlignment<uint32_t, little, unaligned>(
+                 Bytes, StartBit)) &&
+             "Expected to be patching over 0-value placeholders");
+    }
+
+    // Update Bytes in terms of bit offset and value.
+    endian::writeAtBitAlignment<uint32_t, little, unaligned>(Bytes, NewWord,
+                                                             StartBit);
+
+    // Copy updated data back to the file FS and the buffer Out.
+    FS->seek(ByteNo);
+    FS->write(Bytes, BytesFromDisk);
+    for (size_t i = 0; i < BytesFromBuffer; ++i)
+      Out[i] = Bytes[BytesFromDisk + i];
+
+    // Restore the file position.
+    FS->seek(CurPos);
   }
 
   void BackpatchWord64(uint64_t BitNo, uint64_t Val) {
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -86,6 +86,9 @@
     IndexThreshold("bitcode-mdindex-threshold", cl::Hidden, cl::init(25),
                    cl::desc("Number of metadatas above which we emit an index "
                             "to enable lazy-loading"));
+static cl::opt<uint32_t> FlushThreshold(
+    "bitcode-flush-threshold", cl::Hidden, cl::init(512),
+    cl::desc("The threshold (unit M) for flushing LLVM bitcode."));
 
 static cl::opt<bool> WriteRelBFToSummary(
     "write-relbf-to-summary", cl::Hidden, cl::init(false),
@@ -4453,8 +4456,8 @@
   Stream.Emit(0xD, 4);
 }
 
-BitcodeWriter::BitcodeWriter(SmallVectorImpl<char> &Buffer)
-    : Buffer(Buffer), Stream(new BitstreamWriter(Buffer)) {
+BitcodeWriter::BitcodeWriter(SmallVectorImpl<char> &Buffer, raw_fd_stream *FS)
+    : Buffer(Buffer), Stream(new BitstreamWriter(Buffer, FS, FlushThreshold)) {
   writeBitcodeHeader(*Stream);
 }
 
@@ -4565,7 +4568,7 @@
   if (TT.isOSDarwin() || TT.isOSBinFormatMachO())
     Buffer.insert(Buffer.begin(), BWH_HeaderSize, 0);
 
-  BitcodeWriter Writer(Buffer);
+  BitcodeWriter Writer(Buffer, dyn_cast<raw_fd_stream>(&Out));
   Writer.writeModule(M, ShouldPreserveUseListOrder, Index, GenerateHash,
                      ModHash);
   Writer.writeSymtab();
@@ -4575,7 +4578,8 @@
     emitDarwinBCHeaderAndTrailer(Buffer, TT);
 
   // Write the generated bitstream to "Out".
-  Out.write((char*)&Buffer.front(), Buffer.size());
+  if (!Buffer.empty())
+    Out.write((char *)&Buffer.front(), Buffer.size());
 }
 
 void IndexBitcodeWriter::write() {