diff --git a/lld/MachO/ConcatOutputSection.h b/lld/MachO/ConcatOutputSection.h
--- a/lld/MachO/ConcatOutputSection.h
+++ b/lld/MachO/ConcatOutputSection.h
@@ -24,7 +24,7 @@
 // files that are labeled with the same segment and section name. This class
 // contains all such sections and writes the data from each section sequentially
 // in the final binary.
-class ConcatOutputSection final : public OutputSection {
+class ConcatOutputSection : public OutputSection {
 public:
   explicit ConcatOutputSection(StringRef name)
       : OutputSection(ConcatKind, name) {}
@@ -38,26 +38,39 @@
   uint64_t getFileSize() const override { return fileSize; }
 
   void addInput(ConcatInputSection *input);
-  void finalize() override;
-  bool needsThunks() const;
-  uint64_t estimateStubsInRangeVA(size_t callIdx) const;
-
+  virtual void finalizeContents();
   void writeTo(uint8_t *buf) const override;
 
-  std::vector<ConcatInputSection *> inputs;
-  std::vector<ConcatInputSection *> thunks;
-
   static bool classof(const OutputSection *sec) {
     return sec->kind() == ConcatKind;
   }
 
   static ConcatOutputSection *getOrCreateForInput(const InputSection *);
 
-private:
-  void finalizeFlags(InputSection *input);
+  std::vector<ConcatInputSection *> inputs;
 
+protected:
   size_t size = 0;
   uint64_t fileSize = 0;
+
+private:
+  void finalizeFlags(InputSection *input);
+};
+
+// ConcatOutputSections that contain code (text) require special handling to
+// support thunk insertion.
+class TextOutputSection : public ConcatOutputSection {
+public:
+  explicit TextOutputSection(StringRef name) : ConcatOutputSection(name) {}
+  void finalizeContents() override {}
+  void finalize() override;
+  bool needsThunks() const;
+  void writeTo(uint8_t *buf) const override;
+
+private:
+  uint64_t estimateStubsInRangeVA(size_t callIdx) const;
+
+  std::vector<ConcatInputSection *> thunks;
 };
 
 // We maintain one ThunkInfo per real function.
diff --git a/lld/MachO/ConcatOutputSection.cpp b/lld/MachO/ConcatOutputSection.cpp
--- a/lld/MachO/ConcatOutputSection.cpp
+++ b/lld/MachO/ConcatOutputSection.cpp
@@ -121,7 +121,7 @@
 // instructions, whereas CISC (i.e., x86) generally doesn't. RISC only needs
 // thunks for programs so large that branch source & destination addresses
 // might differ more than the range of branch instruction(s).
-bool ConcatOutputSection::needsThunks() const {
+bool TextOutputSection::needsThunks() const {
   if (!target->usesThunks())
     return false;
   uint64_t isecAddr = addr;
@@ -138,7 +138,7 @@
       auto *sym = r.referent.get<Symbol *>();
       // Pre-populate the thunkMap and memoize call site counts for every
       // InputSection and ThunkInfo. We do this for the benefit of
-      // ConcatOutputSection::estimateStubsInRangeVA()
+      // estimateStubsInRangeVA().
       ThunkInfo &thunkInfo = thunkMap[sym];
       // Knowing ThunkInfo call site count will help us know whether or not we
       // might need to create more for this referent at the time we are
@@ -154,7 +154,7 @@
 // Since __stubs is placed after __text, we must estimate the address
 // beyond which stubs are within range of a simple forward branch.
 // This is called exactly once, when the last input section has been finalized.
-uint64_t ConcatOutputSection::estimateStubsInRangeVA(size_t callIdx) const {
+uint64_t TextOutputSection::estimateStubsInRangeVA(size_t callIdx) const {
   // Tally the functions which still have call sites remaining to process,
   // which yields the maximum number of thunks we might yet place.
   size_t maxPotentialThunks = 0;
@@ -193,7 +193,25 @@
   return stubsInRangeVA;
 }
 
-void ConcatOutputSection::finalize() {
+void ConcatOutputSection::finalizeContents() {
+  uint64_t isecAddr = 0;
+  uint64_t isecFileOff = 0;
+  auto finalizeOne = [&](ConcatInputSection *isec) {
+    isecAddr = alignTo(isecAddr, isec->align);
+    isecFileOff = alignTo(isecFileOff, isec->align);
+    isec->outSecOff = isecAddr;
+    isec->isFinal = true;
+    isecAddr += isec->getSize();
+    isecFileOff += isec->getFileSize();
+  };
+
+  for (ConcatInputSection *isec : inputs)
+    finalizeOne(isec);
+  size = isecAddr;
+  fileSize = isecFileOff;
+}
+
+void TextOutputSection::finalize() {
   uint64_t isecAddr = addr;
   uint64_t isecFileOff = fileOff;
   auto finalizeOne = [&](ConcatInputSection *isec) {
@@ -358,6 +376,11 @@
 }
 
 void ConcatOutputSection::writeTo(uint8_t *buf) const {
+  for (ConcatInputSection *isec : inputs)
+    isec->writeTo(buf + isec->outSecOff);
+}
+
+void TextOutputSection::writeTo(uint8_t *buf) const {
   // Merge input sections from thunk & ordinary vectors
   size_t i = 0, ie = inputs.size();
   size_t t = 0, te = thunks.size();
@@ -402,8 +425,14 @@
 ConcatOutputSection::getOrCreateForInput(const InputSection *isec) {
   NamePair names = maybeRenameSection({isec->getSegName(), isec->getName()});
   ConcatOutputSection *&osec = concatOutputSections[names];
-  if (!osec)
-    osec = make<ConcatOutputSection>(names.second);
+  if (!osec) {
+    if (isec->getSegName() != segment_names::text ||
+        isec->getName() == section_names::gccExceptTab ||
+        isec->getName() == section_names::ehFrame)
+      osec = make<ConcatOutputSection>(names.second);
+    else
+      osec = make<TextOutputSection>(names.second);
+  }
   return osec;
 }
 
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -972,6 +972,21 @@
 void Writer::finalizeAddresses() {
   TimeTraceScope timeScope("Finalize addresses");
   uint64_t pageSize = target->getPageSize();
+
+  // We could parallelize this loop, but it seems cheaper to do it all in the
+  // main thread.
+  for (OutputSegment *seg : outputSegments) {
+    if (seg == linkEditSegment)
+      continue;
+    for (OutputSection *osec : seg->getSections()) {
+      if (!osec->isNeeded())
+        continue;
+      // Other kinds of OutputSections have already been finalized.
+      if (auto concatOsec = dyn_cast<ConcatOutputSection>(osec))
+          concatOsec->finalizeContents();
+    }
+  }
+
   // Ensure that segments (and the sections they contain) are allocated
   // addresses in ascending order, which dyld requires.
   //