PECOFF: Optimize the writer using parallel_for.

rui314 · rui314 · commit 93ceadfb7bf9 · 2015-03-06T06:53:13.000Z
Previously applying 1 million relocations took about 2 seconds on my
Xeon 2.4GHz 8 core workstation. After this patch, it takes about 300
milliseconds. As a result, time to link chrome.dll becomes 23 seconds
to 21 seconds.

llvm-svn: 231454
diff --git a/lld/lib/ReaderWriter/PECOFF/WriterPECOFF.cpp b/lld/lib/ReaderWriter/PECOFF/WriterPECOFF.cpp
@@ -602,8 +602,9 @@ void AtomChunk::applyRelocationsARM(uint8_t *Buffer,
                                     std::vector<uint64_t> &SectionRVA,
                                     uint64_t ImageBase) {
   Buffer = Buffer + _fileOffset;
-  for (const auto *Layout : _atomLayouts) {
-    const DefinedAtom *Atom = cast<DefinedAtom>(Layout->_atom);
+  parallel_for_each(_atomLayouts.begin(), _atomLayouts.end(),
+                    [&](const AtomLayout *layout) {
+    const DefinedAtom *Atom = cast<DefinedAtom>(layout->_atom);
     for (const Reference *R : *Atom) {
       if (R->kindNamespace() != Reference::KindNamespace::COFF)
         continue;
@@ -614,7 +615,7 @@ void AtomChunk::applyRelocationsARM(uint8_t *Buffer,
                           Target->permissions() == DefinedAtom::permRWX;
 
       const auto AtomOffset = R->offsetInAtom();
-      const auto FileOffset = Layout->_fileOffset;
+      const auto FileOffset = layout->_fileOffset;
       const auto TargetAddr = AtomRVA[R->target()] | (AssumeTHUMBCode ? 1 : 0);
       auto RelocSite16 =
           reinterpret_cast<ulittle16_t *>(Buffer + FileOffset + AtomOffset);
@@ -645,15 +646,16 @@ void AtomChunk::applyRelocationsARM(uint8_t *Buffer,
         break;
       }
     }
-  }
+  });
 }
 
 void AtomChunk::applyRelocationsX86(uint8_t *buffer,
                                     std::map<const Atom *, uint64_t> &atomRva,
                                     std::vector<uint64_t> &sectionRva,
                                     uint64_t imageBaseAddress) {
   buffer += _fileOffset;
-  for (const auto *layout : _atomLayouts) {
+  parallel_for_each(_atomLayouts.begin(), _atomLayouts.end(),
+                    [&](const AtomLayout *layout) {
     const DefinedAtom *atom = cast<DefinedAtom>(layout->_atom);
     for (const Reference *ref : *atom) {
       // Skip if this reference is not for COFF relocation.
@@ -702,15 +704,16 @@ void AtomChunk::applyRelocationsX86(uint8_t *buffer,
         llvm::report_fatal_error("Unsupported relocation kind");
       }
     }
-  }
+  });
 }
 
 void AtomChunk::applyRelocationsX64(uint8_t *buffer,
                                     std::map<const Atom *, uint64_t> &atomRva,
                                     std::vector<uint64_t> &sectionRva,
                                     uint64_t imageBase) {
   buffer += _fileOffset;
-  for (const auto *layout : _atomLayouts) {
+  parallel_for_each(_atomLayouts.begin(), _atomLayouts.end(),
+                    [&](const AtomLayout *layout) {
     const DefinedAtom *atom = cast<DefinedAtom>(layout->_atom);
     for (const Reference *ref : *atom) {
       if (ref->kindNamespace() != Reference::KindNamespace::COFF)
@@ -762,7 +765,7 @@ void AtomChunk::applyRelocationsX64(uint8_t *buffer,
         llvm::report_fatal_error("Unsupported relocation kind");
       }
     }
-  }
+  });
 }
 
 /// Print atom VAs. Used only for debugging.