diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -265,16 +265,25 @@
   auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
 
   for (const Section &sec : sections) {
-    if (config->mergeLiterals && sectionType(sec.flags) == S_CSTRING_LITERALS) {
+    if (config->mergeLiterals &&
+        (sectionType(sec.flags) == S_CSTRING_LITERALS ||
+         isWordLiteralSection(sec.flags))) {
       if (sec.nreloc)
         fatal(toString(this) + " contains relocations in " + sec.segname + "," +
               sec.sectname +
               ", so LLD cannot do literal merging. Try re-running with "
               "--no-literal-merge.");
 
-      auto *isec = make<CStringInputSection>();
-      parseSection(this, buf, sec, isec);
-      isec->splitIntoPieces(); // FIXME: parallelize this?
+      InputSection *isec;
+      if (sectionType(sec.flags) == S_CSTRING_LITERALS) {
+        isec = make<CStringInputSection>();
+        parseSection(this, buf, sec, isec);
+        // FIXME: parallelize this?
+        cast<CStringInputSection>(isec)->splitIntoPieces();
+      } else {
+        isec = make<WordLiteralInputSection>();
+        parseSection(this, buf, sec, isec);
+      }
       subsections.push_back({{0, isec}});
     } else {
       auto *isec = make<ConcatInputSection>();
diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h
--- a/lld/MachO/InputSection.h
+++ b/lld/MachO/InputSection.h
@@ -27,6 +27,7 @@
   enum Kind {
     ConcatKind,
     CStringLiteralKind,
+    WordLiteralKind,
   };
 
   Kind kind() const { return sectionKind; }
@@ -141,6 +142,17 @@
   std::vector<StringPiece> pieces;
 };
 
+class WordLiteralInputSection : public InputSection {
+public:
+  WordLiteralInputSection() : InputSection(WordLiteralKind) {}
+  uint64_t getFileOffset(uint64_t off) const override;
+  uint64_t getOffset(uint64_t off) const override;
+
+  static bool classof(const InputSection *isec) {
+    return isec->kind() == WordLiteralKind;
+  }
+};
+
 inline uint8_t sectionType(uint32_t flags) {
   return flags & llvm::MachO::SECTION_TYPE;
 }
@@ -164,6 +176,12 @@
          llvm::MachO::S_ATTR_DEBUG;
 }
 
+inline bool isWordLiteralSection(uint32_t flags) {
+  return sectionType(flags) == llvm::MachO::S_4BYTE_LITERALS ||
+         sectionType(flags) == llvm::MachO::S_8BYTE_LITERALS ||
+         sectionType(flags) == llvm::MachO::S_16BYTE_LITERALS;
+}
+
 bool isCodeSection(const InputSection *);
 
 extern std::vector<InputSection *> inputSections;
@@ -192,6 +210,7 @@
 constexpr const char const_[] = "__const";
 constexpr const char lazySymbolPtr[] = "__la_symbol_ptr";
 constexpr const char lazyBinding[] = "__lazy_binding";
+constexpr const char literals[] = "__literals";
 constexpr const char moduleInitFunc[] = "__mod_init_func";
 constexpr const char moduleTermFunc[] = "__mod_term_func";
 constexpr const char nonLazySymbolPtr[] = "__nl_symbol_ptr";
diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@@ -127,6 +127,25 @@
   return piece.outSecOff + addend;
 }
 
+uint64_t WordLiteralInputSection::getFileOffset(uint64_t off) const {
+  return parent->fileOff + getOffset(off);
+}
+
+uint64_t WordLiteralInputSection::getOffset(uint64_t off) const {
+  auto *osec = cast<WordLiteralSection>(parent);
+  const uint8_t *buf = data.data();
+  switch (sectionType(flags)) {
+  case S_4BYTE_LITERALS:
+    return osec->getLiteral4Offset(buf + off);
+  case S_8BYTE_LITERALS:
+    return osec->getLiteral8Offset(buf + off);
+  case S_16BYTE_LITERALS:
+    return osec->getLiteral16Offset(buf + off);
+  default:
+    llvm_unreachable("invalid literal section type");
+  }
+}
+
 bool macho::isCodeSection(const InputSection *isec) {
   uint32_t type = isec->flags & SECTION_TYPE;
   if (type != S_REGULAR && type != S_COALESCED)
diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h
--- a/lld/MachO/SyntheticSections.h
+++ b/lld/MachO/SyntheticSections.h
@@ -22,6 +22,8 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 
+#include <unordered_map>
+
 namespace llvm {
 class DWARFUnit;
 } // namespace llvm
@@ -297,6 +299,7 @@
   // have a corresponding entry in the LazyPointerSection.
   bool addEntry(Symbol *);
   uint64_t getVA(uint32_t stubsIndex) const {
+    assert(isFinal || target->usesThunks());
     // ConcatOutputSection::finalize() can seek the address of a
     // stub before its address is assigned. Before __stubs is
     // finalized, return a contrived out-of-range address.
@@ -531,9 +534,61 @@
   llvm::StringTableBuilder builder;
 };
 
+/*
+ * This section contains deduplicated literal values. The 16-byte values are
+ * laid out first, followed by the 8- and then the 4-byte ones.
+ */
+class WordLiteralSection : public SyntheticSection {
+public:
+  using UInt128 = std::pair<uint64_t, uint64_t>;
+  // I don't think the standard guarantees the size of a pair, so let's make
+  // sure it's exact -- that way we can construct it via `mmap`.
+  static_assert(sizeof(UInt128) == 16, "");
+
+  WordLiteralSection();
+  void addInput(WordLiteralInputSection *);
+  void writeTo(uint8_t *buf) const override;
+
+  uint64_t getSize() const override {
+    return literal16Map.size() * 16 + literal8Map.size() * 8 +
+           literal4Map.size() * 4;
+  }
+
+  bool isNeeded() const override {
+    return !literal16Map.empty() || !literal4Map.empty() ||
+           !literal8Map.empty();
+  }
+
+  uint64_t getLiteral16Offset(const uint8_t *buf) const {
+    return literal16Map.at(*reinterpret_cast<const UInt128 *>(buf)) * 16;
+  }
+
+  uint64_t getLiteral8Offset(const uint8_t *buf) const {
+    return literal16Map.size() * 16 +
+           literal8Map.at(*reinterpret_cast<const uint64_t *>(buf)) * 8;
+  }
+
+  uint64_t getLiteral4Offset(const uint8_t *buf) const {
+    return literal16Map.size() * 16 + literal8Map.size() * 8 +
+           literal4Map.at(*reinterpret_cast<const uint32_t *>(buf)) * 4;
+  }
+
+private:
+  template <class T> struct Hasher {
+    llvm::hash_code operator()(T v) const { return llvm::hash_value(v); }
+  };
+  // We're using unordered_map instead of DenseMap here because we need to
+  // support all possible integer values -- there are no suitable tombstone
+  // values for DenseMap.
+  std::unordered_map<UInt128, uint64_t, Hasher<UInt128>> literal16Map;
+  std::unordered_map<uint64_t, uint64_t> literal8Map;
+  std::unordered_map<uint32_t, uint64_t> literal4Map;
+};
+
 struct InStruct {
   MachHeaderSection *header = nullptr;
   CStringSection *cStringSection = nullptr;
+  WordLiteralSection *wordLiteralSection = nullptr;
   RebaseSection *rebase = nullptr;
   BindingSection *binding = nullptr;
   WeakBindingSection *weakBinding = nullptr;
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -1103,6 +1103,63 @@
   }
 }
 
+// This section is actually emitted as __TEXT,__const by ld64, but clang may
+// emit input sections of that name, and LLD doesn't currently support mixing
+// synthetic and concat-type OutputSections. To work around this, I've given
+// our merged-literals section a different name.
+WordLiteralSection::WordLiteralSection()
+    : SyntheticSection(segment_names::text, section_names::literals) {}
+
+void WordLiteralSection::addInput(WordLiteralInputSection *isec) {
+  isec->parent = this;
+  align = std::max(align, isec->align);
+  // We do all processing of the InputSection here, so it will be effectively
+  // finalized.
+  isec->isFinal = true;
+  const uint8_t *buf = isec->data.data();
+  switch (sectionType(isec->flags)) {
+  case S_4BYTE_LITERALS: {
+    for (size_t i = 0, e = isec->data.size() / 4; i < e; ++i) {
+      uint32_t value = *reinterpret_cast<const uint32_t *>(buf + i * 4);
+      literal4Map.emplace(value, literal4Map.size());
+    }
+    break;
+  }
+  case S_8BYTE_LITERALS: {
+    for (size_t i = 0, e = isec->data.size() / 8; i < e; ++i) {
+      uint64_t value = *reinterpret_cast<const uint64_t *>(buf + i * 8);
+      literal8Map.emplace(value, literal8Map.size());
+    }
+    break;
+  }
+  case S_16BYTE_LITERALS: {
+    for (size_t i = 0, e = isec->data.size() / 16; i < e; ++i) {
+      UInt128 value = *reinterpret_cast<const UInt128 *>(buf + i * 16);
+      literal16Map.emplace(value, literal16Map.size());
+    }
+    break;
+  }
+  default:
+    llvm_unreachable("invalid literal section type");
+  }
+}
+
+void WordLiteralSection::writeTo(uint8_t *buf) const {
+  // Note that we don't attempt to do any endianness conversion in addInput(),
+  // so we don't do it here either -- just write out the original value,
+  // byte-for-byte.
+  for (const auto &p : literal16Map)
+    memcpy(buf + p.second * 16, &p.first, 16);
+  buf += literal16Map.size() * 16;
+
+  for (const auto &p : literal8Map)
+    memcpy(buf + p.second * 8, &p.first, 8);
+  buf += literal8Map.size() * 8;
+
+  for (const auto &p : literal4Map)
+    memcpy(buf + p.second * 4, &p.first, 4);
+}
+
 void macho::createSyntheticSymbols() {
   auto addHeaderSymbol = [](const char *name) {
     symtab->addSynthetic(name, in.header->isec, /*value=*/0,
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -828,19 +828,24 @@
     InputSection *isec = p.value();
     if (isec->shouldOmitFromOutput())
       continue;
+    OutputSection *osec;
     if (auto *concatIsec = dyn_cast<ConcatInputSection>(isec)) {
       NamePair names = maybeRenameSection({isec->segname, isec->name});
-      ConcatOutputSection *&osec = concatOutputSections[names];
-      if (osec == nullptr) {
-        osec = make<ConcatOutputSection>(names.second);
-        osec->inputOrder = p.index();
-      }
-      osec->addInput(concatIsec);
+      ConcatOutputSection *&concatOsec = concatOutputSections[names];
+      if (concatOsec == nullptr)
+        concatOsec = make<ConcatOutputSection>(names.second);
+      concatOsec->addInput(concatIsec);
+      osec = concatOsec;
     } else if (auto *cStringIsec = dyn_cast<CStringInputSection>(isec)) {
-      if (in.cStringSection->inputs.empty())
-        in.cStringSection->inputOrder = p.index();
       in.cStringSection->addInput(cStringIsec);
+      osec = in.cStringSection;
+    } else if (auto *litIsec = dyn_cast<WordLiteralInputSection>(isec)) {
+      in.wordLiteralSection->addInput(litIsec);
+      osec = in.wordLiteralSection;
+    } else {
+      llvm_unreachable("unhandled InputSection type");
     }
+    osec->inputOrder = std::min(osec->inputOrder, static_cast<int>(p.index()));
   }
 
   // Once all the inputs are added, we can finalize the output section
@@ -1015,6 +1020,8 @@
 void macho::createSyntheticSections() {
   in.header = make<MachHeaderSection>();
   in.cStringSection = config->mergeLiterals ? make<CStringSection>() : nullptr;
+  in.wordLiteralSection =
+      config->mergeLiterals ? make<WordLiteralSection>() : nullptr;
   in.rebase = make<RebaseSection>();
   in.binding = make<BindingSection>();
   in.weakBinding = make<WeakBindingSection>();
diff --git a/lld/test/MachO/literal-merging.s b/lld/test/MachO/literal-merging.s
new file mode 100644
--- /dev/null
+++ b/lld/test/MachO/literal-merging.s
@@ -0,0 +1,110 @@
+# REQUIRES: x86
+# RUN: rm -rf %t; split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/qux.s -o %t/qux.o
+# RUN: %lld -dylib %t/test.o %t/qux.o -o %t/test
+# RUN: llvm-objdump --macho --section="__TEXT,__literals" --section="__DATA,ptrs" --syms %t/test | FileCheck %s
+# RUN: llvm-readobj --section-headers %t/test | FileCheck %s --check-prefix=HEADER
+
+# CHECK:      Contents of (__TEXT,__literals) section
+# CHECK-NEXT: [[#%.16x,DEADBEEF16:]] ef be ad de ef be ad de ef be ad de ef be ad de
+# CHECK-NEXT: [[#%.16x,FEEDFACE16:]] ce fa ed fe ce fa ed fe ce fa ed fe ce fa ed fe
+# CHECK-NEXT: [[#%.16x,DEADBEEF8:]]  ef be ad de ef be ad de ce fa ed fe ce fa ed fe
+# CHECK-NEXT: [[#%.16x,DEADBEEF4:]]  ef be ad de ce fa ed fe
+# CHECK-NEXT: Contents of (__DATA,ptrs) section
+# CHECK-NEXT: 0000000000001000  0x[[#%x,DEADBEEF16]]
+# CHECK-NEXT: 0000000000001008  0x[[#%x,DEADBEEF16]]
+# CHECK-NEXT: 0000000000001010  0x[[#%x,FEEDFACE16]]
+# CHECK-NEXT: 0000000000001018  0x[[#%x,DEADBEEF16]]
+# CHECK-NEXT: 0000000000001020  0x[[#%x,DEADBEEF8]]
+# CHECK-NEXT: 0000000000001028  0x[[#%x,DEADBEEF8]]
+# CHECK-NEXT: 0000000000001030  0x[[#%x,DEADBEEF8 + 8]]
+# CHECK-NEXT: 0000000000001038  0x[[#%x,DEADBEEF8]]
+# CHECK-NEXT: 0000000000001040  0x[[#%x,DEADBEEF4]]
+# CHECK-NEXT: 0000000000001048  0x[[#%x,DEADBEEF4]]
+# CHECK-NEXT: 0000000000001050  0x[[#%x,DEADBEEF4 + 4]]
+# CHECK-NEXT: 0000000000001058  0x[[#%x,DEADBEEF4]]
+
+## Make sure the symbol addresses are correct too.
+# CHECK:     SYMBOL TABLE:
+# CHECK-DAG: [[#DEADBEEF16]] g     O __TEXT,__literals _qux16
+# CHECK-DAG: [[#DEADBEEF8]]  g     O __TEXT,__literals _qux8
+# CHECK-DAG: [[#DEADBEEF4]]  g     O __TEXT,__literals _qux4
+
+## Make sure we set the right alignment and flags.
+# HEADER:        Name: __literals
+# HEADER-NEXT:   Segment: __TEXT
+# HEADER-NEXT:   Address:
+# HEADER-NEXT:   Size:
+# HEADER-NEXT:   Offset:
+# HEADER-NEXT:   Alignment: 4
+# HEADER-NEXT:   RelocationOffset:
+# HEADER-NEXT:   RelocationCount: 0
+# HEADER-NEXT:   Type: Regular
+# HEADER-NEXT:   Attributes [ (0x0)
+# HEADER-NEXT:   ]
+# HEADER-NEXT:   Reserved1: 0x0
+# HEADER-NEXT:   Reserved2: 0x0
+# HEADER-NEXT:   Reserved3: 0x0
+
+#--- test.s
+.literal4
+.p2align 2
+L._foo4:
+  .long 0xdeadbeef
+L._bar4:
+  .long 0xdeadbeef
+L._baz4:
+  .long 0xfeedface
+
+.literal8
+L._foo8:
+  .quad 0xdeadbeefdeadbeef
+L._bar8:
+  .quad 0xdeadbeefdeadbeef
+L._baz8:
+  .quad 0xfeedfacefeedface
+
+.literal16
+L._foo16:
+  .quad 0xdeadbeefdeadbeef
+  .quad 0xdeadbeefdeadbeef
+L._bar16:
+  .quad 0xdeadbeefdeadbeef
+  .quad 0xdeadbeefdeadbeef
+L._baz16:
+  .quad 0xfeedfacefeedface
+  .quad 0xfeedfacefeedface
+
+.section __DATA,ptrs,literal_pointers
+.quad L._foo16
+.quad L._bar16
+.quad L._baz16
+.quad _qux16
+
+.quad L._foo8
+.quad L._bar8
+.quad L._baz8
+.quad _qux8
+
+.quad L._foo4
+.quad L._bar4
+.quad L._baz4
+.quad _qux4
+
+#--- qux.s
+.globl _qux4, _qux8, _qux16
+
+.literal4
+.p2align 2
+_qux4:
+  .long 0xdeadbeef
+
+.literal8
+_qux8:
+  .quad 0xdeadbeefdeadbeef
+
+.literal16
+_qux16:
+  .quad 0xdeadbeefdeadbeef
+  .quad 0xdeadbeefdeadbeef
diff --git a/lld/test/MachO/mattrs.ll b/lld/test/MachO/mattrs.ll
--- a/lld/test/MachO/mattrs.ll
+++ b/lld/test/MachO/mattrs.ll
@@ -3,7 +3,7 @@
 
 ;; Verify that LTO behavior can be tweaked using -mattr.
 
-; RUN: %lld -mcpu haswell -mllvm -mattr=+fma %t.o -o %t.dylib -dylib
+; RUN: %lld -save-temps -mcpu haswell -mllvm -mattr=+fma %t.o -o %t.dylib -dylib
 ; RUN: llvm-objdump -d --section="__text" --no-leading-addr --no-show-raw-insn %t.dylib | FileCheck %s --check-prefix=FMA
 
 ; RUN: %lld -mcpu haswell -mllvm -mattr=-fma %t.o -o %t.dylib -dylib
@@ -11,14 +11,14 @@
 
 ; FMA:      <_foo>:
 ; FMA-NEXT: vrcpss       %xmm0, %xmm0, %xmm1
-; FMA-NEXT: vfmsub213ss  7(%rip), %xmm1, %xmm0
+; FMA-NEXT: vfmsub213ss  [[#]](%rip), %xmm1, %xmm0
 ; FMA-NEXT: vfnmadd132ss %xmm1, %xmm1, %xmm0
 ; FMA-NEXT: retq
 
 ; NO-FMA:      <_foo>:
 ; NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1
 ; NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0
-; NO-FMA-NEXT: vmovss 16(%rip), %xmm2
+; NO-FMA-NEXT: vmovss [[#]](%rip), %xmm2
 ; NO-FMA-NEXT: vsubss %xmm0, %xmm2, %xmm0
 ; NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0
 ; NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0