diff --git a/lld/MachO/ConcatOutputSection.h b/lld/MachO/ConcatOutputSection.h
--- a/lld/MachO/ConcatOutputSection.h
+++ b/lld/MachO/ConcatOutputSection.h
@@ -6,8 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLD_MACHO_MERGED_OUTPUT_SECTION_H
-#define LLD_MACHO_MERGED_OUTPUT_SECTION_H
+#ifndef LLD_MACHO_CONCAT_OUTPUT_SECTION_H
+#define LLD_MACHO_CONCAT_OUTPUT_SECTION_H
 
 #include "InputSection.h"
 #include "OutputSection.h"
@@ -24,7 +24,7 @@
 // files that are labeled with the same segment and section name. This class
 // contains all such sections and writes the data from each section sequentially
 // in the final binary.
-class ConcatOutputSection final : public OutputSection {
+class ConcatOutputSection : public OutputSection {
 public:
   explicit ConcatOutputSection(StringRef name)
       : OutputSection(ConcatKind, name) {}
@@ -37,27 +37,46 @@
   uint64_t getSize() const override { return size; }
   uint64_t getFileSize() const override { return fileSize; }
 
-  void addInput(ConcatInputSection *input);
-  void finalize() override;
-  bool needsThunks() const;
-  uint64_t estimateStubsInRangeVA(size_t callIdx) const;
+  // Assign values to InputSection::outSecOff. In contrast to TextOutputSection,
+  // which does this in its implementation of `finalize()`, we can do this
+  // without `finalize()`'s sequential guarantees detailed in the block comment
+  // of `OutputSection::finalize()`.
+  virtual void finalizeContents();
 
+  void addInput(ConcatInputSection *input);
   void writeTo(uint8_t *buf) const override;
 
-  std::vector<ConcatInputSection *> inputs;
-  std::vector<ConcatInputSection *> thunks;
-
   static bool classof(const OutputSection *sec) {
     return sec->kind() == ConcatKind;
   }
 
   static ConcatOutputSection *getOrCreateForInput(const InputSection *);
 
-private:
-  void finalizeFlags(InputSection *input);
+  std::vector<ConcatInputSection *> inputs;
 
+protected:
   size_t size = 0;
   uint64_t fileSize = 0;
+  void finalizeOne(ConcatInputSection *);
+
+private:
+  void finalizeFlags(InputSection *input);
+};
+
+// ConcatOutputSections that contain code (text) require special handling to
+// support thunk insertion.
+class TextOutputSection : public ConcatOutputSection {
+public:
+  explicit TextOutputSection(StringRef name) : ConcatOutputSection(name) {}
+  void finalizeContents() override {}
+  void finalize() override;
+  bool needsThunks() const;
+  void writeTo(uint8_t *buf) const override;
+
+private:
+  uint64_t estimateStubsInRangeVA(size_t callIdx) const;
+
+  std::vector<ConcatInputSection *> thunks;
 };
 
 // We maintain one ThunkInfo per real function.
diff --git a/lld/MachO/ConcatOutputSection.cpp b/lld/MachO/ConcatOutputSection.cpp
--- a/lld/MachO/ConcatOutputSection.cpp
+++ b/lld/MachO/ConcatOutputSection.cpp
@@ -121,7 +121,7 @@
 // instructions, whereas CISC (i.e., x86) generally doesn't. RISC only needs
 // thunks for programs so large that branch source & destination addresses
 // might differ more than the range of branch instruction(s).
-bool ConcatOutputSection::needsThunks() const {
+bool TextOutputSection::needsThunks() const {
   if (!target->usesThunks())
     return false;
   uint64_t isecAddr = addr;
@@ -138,7 +138,7 @@
       auto *sym = r.referent.get<Symbol *>();
       // Pre-populate the thunkMap and memoize call site counts for every
       // InputSection and ThunkInfo. We do this for the benefit of
-      // ConcatOutputSection::estimateStubsInRangeVA()
+      // estimateStubsInRangeVA().
       ThunkInfo &thunkInfo = thunkMap[sym];
       // Knowing ThunkInfo call site count will help us know whether or not we
       // might need to create more for this referent at the time we are
@@ -154,7 +154,7 @@
 // Since __stubs is placed after __text, we must estimate the address
 // beyond which stubs are within range of a simple forward branch.
 // This is called exactly once, when the last input section has been finalized.
-uint64_t ConcatOutputSection::estimateStubsInRangeVA(size_t callIdx) const {
+uint64_t TextOutputSection::estimateStubsInRangeVA(size_t callIdx) const {
   // Tally the functions which still have call sites remaining to process,
   // which yields the maximum number of thunks we might yet place.
   size_t maxPotentialThunks = 0;
@@ -193,23 +193,24 @@
   return stubsInRangeVA;
 }
 
-void ConcatOutputSection::finalize() {
-  uint64_t isecAddr = addr;
-  uint64_t isecFileOff = fileOff;
-  auto finalizeOne = [&](ConcatInputSection *isec) {
-    isecAddr = alignTo(isecAddr, isec->align);
-    isecFileOff = alignTo(isecFileOff, isec->align);
-    isec->outSecOff = isecAddr - addr;
-    isec->isFinal = true;
-    isecAddr += isec->getSize();
-    isecFileOff += isec->getFileSize();
-  };
+void ConcatOutputSection::finalizeOne(ConcatInputSection *isec) {
+  size = alignTo(size, isec->align);
+  fileSize = alignTo(fileSize, isec->align);
+  isec->outSecOff = size;
+  isec->isFinal = true;
+  size += isec->getSize();
+  fileSize += isec->getFileSize();
+}
+
+void ConcatOutputSection::finalizeContents() {
+  for (ConcatInputSection *isec : inputs)
+    finalizeOne(isec);
+}
 
+void TextOutputSection::finalize() {
   if (!needsThunks()) {
     for (ConcatInputSection *isec : inputs)
       finalizeOne(isec);
-    size = isecAddr - addr;
-    fileSize = isecFileOff - fileOff;
     return;
   }
 
@@ -225,7 +226,7 @@
   // Walk all sections in order. Finalize all sections that are less than
   // forwardBranchRange in front of it.
   // isecVA is the address of the current section.
-  // isecAddr is the start address of the first non-finalized section.
+  // addr + size is the start address of the first non-finalized section.
 
   // inputs[finalIdx] is for finalization (address-assignment)
   size_t finalIdx = 0;
@@ -246,7 +247,7 @@
     // from the current position to the position where the thunks are inserted
     // grows. So leave room for a bunch of thunks.
     unsigned slop = 256 * thunkSize;
-    while (finalIdx < endIdx && isecAddr + inputs[finalIdx]->getSize() <
+    while (finalIdx < endIdx && addr + size + inputs[finalIdx]->getSize() <
                                     isecVA + forwardBranchRange - slop)
       finalizeOne(inputs[finalIdx++]);
 
@@ -307,7 +308,7 @@
         }
       }
       // ... otherwise, create a new thunk.
-      if (isecAddr > highVA) {
+      if (addr + size > highVA) {
         // There were too many consecutive branch instructions for `slop`
         // above. If you hit this: For the current algorithm, just bumping up
         // slop above and trying again is probably simplest. (See also PR51578
@@ -342,12 +343,11 @@
       thunkInfo.sym->used = true;
       target->populateThunk(thunkInfo.isec, funcSym);
       finalizeOne(thunkInfo.isec);
+      fprintf(stderr, "%llx\n", thunkInfo.isec->outSecOff);
       thunks.push_back(thunkInfo.isec);
       ++thunkCount;
     }
   }
-  size = isecAddr - addr;
-  fileSize = isecFileOff - fileOff;
 
   log("thunks for " + parent->name + "," + name +
       ": funcs = " + std::to_string(thunkMap.size()) +
@@ -358,6 +358,11 @@
 }
 
 void ConcatOutputSection::writeTo(uint8_t *buf) const {
+  for (ConcatInputSection *isec : inputs)
+    isec->writeTo(buf + isec->outSecOff);
+}
+
+void TextOutputSection::writeTo(uint8_t *buf) const {
   // Merge input sections from thunk & ordinary vectors
   size_t i = 0, ie = inputs.size();
   size_t t = 0, te = thunks.size();
@@ -402,8 +407,14 @@
 ConcatOutputSection::getOrCreateForInput(const InputSection *isec) {
   NamePair names = maybeRenameSection({isec->getSegName(), isec->getName()});
   ConcatOutputSection *&osec = concatOutputSections[names];
-  if (!osec)
-    osec = make<ConcatOutputSection>(names.second);
+  if (!osec) {
+    if (isec->getSegName() == segment_names::text &&
+        isec->getName() != section_names::gccExceptTab &&
+        isec->getName() != section_names::ehFrame)
+      osec = make<TextOutputSection>(names.second);
+    else
+      osec = make<ConcatOutputSection>(names.second);
+  }
   return osec;
 }
 
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -972,6 +972,21 @@
 void Writer::finalizeAddresses() {
   TimeTraceScope timeScope("Finalize addresses");
   uint64_t pageSize = target->getPageSize();
+
+  // We could parallelize this loop, but local benchmarking indicates it is
+  // faster to do it all in the main thread.
+  for (OutputSegment *seg : outputSegments) {
+    if (seg == linkEditSegment)
+      continue;
+    for (OutputSection *osec : seg->getSections()) {
+      if (!osec->isNeeded())
+        continue;
+      // Other kinds of OutputSections have already been finalized.
+      if (auto concatOsec = dyn_cast<ConcatOutputSection>(osec))
+          concatOsec->finalizeContents();
+    }
+  }
+
   // Ensure that segments (and the sections they contain) are allocated
   // addresses in ascending order, which dyld requires.
   //