diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -156,6 +156,7 @@
   bool disableVerify;
   bool ehFrameHdr;
   bool emitLLVM;
+  uint16_t emitLLVMFlushThreshold;
   bool emitRelocs;
   bool enableNewDtags;
   bool executeOnly;
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -930,6 +930,8 @@
   config->ehFrameHdr =
       args.hasFlag(OPT_eh_frame_hdr, OPT_no_eh_frame_hdr, false);
   config->emitLLVM = args.hasArg(OPT_plugin_opt_emit_llvm, false);
+  config->emitLLVMFlushThreshold =
+      args::getInteger(args, OPT_plugin_opt_emit_llvm_flush_threshold, 512);
   config->emitRelocs = args.hasArg(OPT_emit_relocs);
   config->callGraphProfileSort = args.hasFlag(
       OPT_call_graph_profile_sort, OPT_no_call_graph_profile_sort, true);
diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp
--- a/lld/ELF/LTO.cpp
+++ b/lld/ELF/LTO.cpp
@@ -57,6 +57,19 @@
   return ret;
 }
 
+// The merged bitcode after LTO is large. Try openning a file stream that
+// supports reading, seeking and writing. Such a file allows BitcodeWriter to
+// flush buffered data to reduce memory comsuption. If this fails, open a file
+// stream that supports only write.
+static std::unique_ptr<raw_fd_ostream> openLTOOutputFile(StringRef file) {
+  std::error_code ec;
+  std::unique_ptr<raw_fd_ostream> fs =
+      std::make_unique<raw_fd_stream>(file, ec);
+  if (!ec)
+    return fs;
+  return openFile(file);
+}
+
 static std::string getThinLTOOutputFile(StringRef modulePath) {
   return lto::getThinLTOOutputFile(
       std::string(modulePath), std::string(config->thinLTOPrefixReplace.first),
@@ -151,8 +164,10 @@
 
   if (config->emitLLVM) {
     c.PostInternalizeModuleHook = [](size_t task, const Module &m) {
-      if (std::unique_ptr<raw_fd_ostream> os = openFile(config->outputFile))
-        WriteBitcodeToFile(m, *os, false);
+      if (std::unique_ptr<raw_fd_ostream> os =
+              openLTOOutputFile(config->outputFile))
+        WriteBitcodeToFile(m, *os, false, nullptr, false, nullptr,
+                           config->emitLLVMFlushThreshold);
       return false;
     };
   }
diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -581,6 +581,9 @@
 def plugin_opt_emit_asm: F<"plugin-opt=emit-asm">,
   Alias<lto_emit_asm>, HelpText<"Alias for --lto-emit-asm">;
 def plugin_opt_emit_llvm: F<"plugin-opt=emit-llvm">;
+def plugin_opt_emit_llvm_flush_threshold:
+  JJ<"plugin-opt=emit-llvm-flush-threshold=">,
+  HelpText<"The threshold (unit M) for flushing LLVM bitcode. 512 by default.">;
 def: J<"plugin-opt=jobs=">, Alias<thinlto_jobs>, HelpText<"Alias for --thinlto-jobs">;
 def: J<"plugin-opt=lto-partitions=">, Alias<lto_partitions>, HelpText<"Alias for --lto-partitions">;
 def plugin_opt_mcpu_eq: J<"plugin-opt=mcpu=">;
diff --git a/llvm/include/llvm/Bitcode/BitcodeWriter.h b/llvm/include/llvm/Bitcode/BitcodeWriter.h
--- a/llvm/include/llvm/Bitcode/BitcodeWriter.h
+++ b/llvm/include/llvm/Bitcode/BitcodeWriter.h
@@ -47,7 +47,8 @@
 
   public:
     /// Create a BitcodeWriter that writes to Buffer.
-    BitcodeWriter(SmallVectorImpl<char> &Buffer);
+    BitcodeWriter(SmallVectorImpl<char> &Buffer, raw_fd_stream *FS = nullptr,
+                  uint16_t FlushThreshold = 512);
 
     ~BitcodeWriter();
 
@@ -126,11 +127,15 @@
   /// Can be used to produce the same module hash for a minimized bitcode
   /// used just for the thin link as in the regular full bitcode that will
   /// be used in the backend.
+  ///
+  /// If \p Out is raw_fd_stream, \p FlushThreshHold defines the threshold
+  /// (unit M) for flushing bitcode.
   void WriteBitcodeToFile(const Module &M, raw_ostream &Out,
                           bool ShouldPreserveUseListOrder = false,
                           const ModuleSummaryIndex *Index = nullptr,
                           bool GenerateHash = false,
-                          ModuleHash *ModHash = nullptr);
+                          ModuleHash *ModHash = nullptr,
+                          uint16_t FlushThreshold = 512);
 
   /// Write the specified thin link bitcode file (i.e., the minimized bitcode
   /// file) to the given raw output stream, where it will be written in a new
diff --git a/llvm/include/llvm/Bitstream/BitstreamWriter.h b/llvm/include/llvm/Bitstream/BitstreamWriter.h
--- a/llvm/include/llvm/Bitstream/BitstreamWriter.h
+++ b/llvm/include/llvm/Bitstream/BitstreamWriter.h
@@ -20,17 +20,27 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Bitstream/BitCodes.h"
 #include "llvm/Support/Endian.h"
+#include "llvm/Support/raw_ostream.h"
 #include <vector>
 
 namespace llvm {
 
 class BitstreamWriter {
+  /// Out - The buffer that keeps unflushed bytes.
   SmallVectorImpl<char> &Out;
 
+  /// FS - The file stream that Out flushes to. If FS is nullptr, it does not
+  /// support read or seek, Out cannot be flushed until all data are written.
+  raw_fd_stream *FS;
+
+  /// FlushThreshold - If FS is valid, this is the threshold (unit B) to flush
+  /// FS.
+  const uint64_t FlushThreshold;
+
   /// CurBit - Always between 0 and 31 inclusive, specifies the next bit to use.
   unsigned CurBit;
 
-  /// CurValue - The current value.  Only bits < CurBit are valid.
+  /// CurValue - The current value. Only bits < CurBit are valid.
   uint32_t CurValue;
 
   /// CurCodeSize - This is the declared size of code values used for the
@@ -64,15 +74,19 @@
 
   void WriteByte(unsigned char Value) {
     Out.push_back(Value);
+    FlushToFile();
   }
 
   void WriteWord(unsigned Value) {
     Value = support::endian::byte_swap<uint32_t, support::little>(Value);
     Out.append(reinterpret_cast<const char *>(&Value),
                reinterpret_cast<const char *>(&Value + 1));
+    FlushToFile();
   }
 
-  size_t GetBufferOffset() const { return Out.size(); }
+  uint64_t GetNumOfFlushedBytes() const { return FS ? FS->tell() : 0; }
+
+  size_t GetBufferOffset() const { return Out.size() + GetNumOfFlushedBytes(); }
 
   size_t GetWordIndex() const {
     size_t Offset = GetBufferOffset();
@@ -80,9 +94,24 @@
     return Offset / 4;
   }
 
+  /// If the related file stream supports reading, seeking and writing, flush
+  /// the buffer if its size is above a threshold.
+  void FlushToFile() {
+    if (!FS) {
+      return;
+    }
+    if (Out.size() < FlushThreshold) {
+      return;
+    }
+    FS->write((char *)&Out.front(), Out.size());
+    Out.clear();
+  }
+
 public:
-  explicit BitstreamWriter(SmallVectorImpl<char> &O)
-    : Out(O), CurBit(0), CurValue(0), CurCodeSize(2) {}
+  BitstreamWriter(SmallVectorImpl<char> &O, raw_fd_stream *FS = nullptr,
+                  uint16_t FlushThreshHold = 512)
+      : Out(O), FS(FS), FlushThreshold(FlushThreshold << 20), CurBit(0),
+        CurValue(0), CurCodeSize(2) {}
 
   ~BitstreamWriter() {
     assert(CurBit == 0 && "Unflushed data remaining");
@@ -104,11 +133,59 @@
   void BackpatchWord(uint64_t BitNo, unsigned NewWord) {
     using namespace llvm::support;
     uint64_t ByteNo = BitNo / 8;
+    uint64_t StartBit = BitNo & 7;
+    uint64_t NumOfFlushedBytes = GetNumOfFlushedBytes();
+
+    if (ByteNo >= NumOfFlushedBytes) {
+      assert((!endian::readAtBitAlignment<uint32_t, little, unaligned>(
+                 &Out[ByteNo - NumOfFlushedBytes], StartBit)) &&
+             "Expected to be patching over 0-value placeholders");
+      endian::writeAtBitAlignment<uint32_t, little, unaligned>(
+          &Out[ByteNo - NumOfFlushedBytes], NewWord, StartBit);
+      return;
+    }
+
+    // If the byte offset to backpatch is flushed, use seek to backfill data.
+    // First, save the file position to restore later.
+    uint64_t CurPos = FS->tell();
+
+    // Copy data to update into Bytes from the file FS and the buffer Out.
+    // To reduce disk access, do not fill in Bytes from the file in non-debug
+    // mode and if the data to back path are aligned. At this case, we do not
+    // check existing data and all data will be overwritten.
+    char Bytes[8];
+    size_t BytesNum = StartBit ? 8 : 4;
+    size_t BytesFromDisk = std::min(BytesNum, NumOfFlushedBytes - ByteNo);
+    size_t BytesFromBuffer = BytesNum - BytesFromDisk;
+
+#ifdef NDEBUG
+    if (StartBit)
+#endif
+    {
+      FS->seek(ByteNo);
+      ssize_t BytesRead = FS->read(Bytes, BytesFromDisk);
+      (void)BytesRead;
+      assert(BytesRead >= 0 && static_cast<size_t>(BytesRead) == BytesFromDisk);
+      for (size_t i = 0; i < BytesFromBuffer; ++i)
+        Bytes[BytesFromDisk + i] = Out[i];
+    }
+
+    // Update Bytes in terms of bit offset and value.
     assert((!endian::readAtBitAlignment<uint32_t, little, unaligned>(
-               &Out[ByteNo], BitNo & 7)) &&
+               Bytes, StartBit)) &&
            "Expected to be patching over 0-value placeholders");
-    endian::writeAtBitAlignment<uint32_t, little, unaligned>(
-        &Out[ByteNo], NewWord, BitNo & 7);
+    endian::writeAtBitAlignment<uint32_t, little, unaligned>(Bytes, NewWord,
+                                                             StartBit);
+
+    // Copy updated data back to the file FS and the buffer Out.
+    FS->seek(ByteNo);
+    FS->write(Bytes, BytesFromDisk);
+    for (size_t i = 0; i < BytesFromBuffer; ++i) {
+      Out[i] = Bytes[BytesFromDisk + i];
+    }
+
+    // Restore the file position.
+    FS->seek(CurPos);
   }
 
   void BackpatchWord64(uint64_t BitNo, uint64_t Val) {
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -4453,8 +4453,9 @@
   Stream.Emit(0xD, 4);
 }
 
-BitcodeWriter::BitcodeWriter(SmallVectorImpl<char> &Buffer)
-    : Buffer(Buffer), Stream(new BitstreamWriter(Buffer)) {
+BitcodeWriter::BitcodeWriter(SmallVectorImpl<char> &Buffer, raw_fd_stream *FS,
+                             uint16_t FlushThreshHold)
+    : Buffer(Buffer), Stream(new BitstreamWriter(Buffer, FS, FlushThreshHold)) {
   writeBitcodeHeader(*Stream);
 }
 
@@ -4555,7 +4556,8 @@
 void llvm::WriteBitcodeToFile(const Module &M, raw_ostream &Out,
                               bool ShouldPreserveUseListOrder,
                               const ModuleSummaryIndex *Index,
-                              bool GenerateHash, ModuleHash *ModHash) {
+                              bool GenerateHash, ModuleHash *ModHash,
+                              uint16_t FlushThreshold) {
   SmallVector<char, 0> Buffer;
   Buffer.reserve(256*1024);
 
@@ -4565,7 +4567,7 @@
   if (TT.isOSDarwin() || TT.isOSBinFormatMachO())
     Buffer.insert(Buffer.begin(), BWH_HeaderSize, 0);
 
-  BitcodeWriter Writer(Buffer);
+  BitcodeWriter Writer(Buffer, dyn_cast<raw_fd_stream>(&Out), FlushThreshold);
   Writer.writeModule(M, ShouldPreserveUseListOrder, Index, GenerateHash,
                      ModHash);
   Writer.writeSymtab();
@@ -4575,7 +4577,9 @@
     emitDarwinBCHeaderAndTrailer(Buffer, TT);
 
   // Write the generated bitstream to "Out".
-  Out.write((char*)&Buffer.front(), Buffer.size());
+  if (!Buffer.empty()) {
+    Out.write((char *)&Buffer.front(), Buffer.size());
+  }
 }
 
 void IndexBitcodeWriter::write() {