Skip to content

Commit 31777ce

Browse files
committedMar 8, 2015
PECOFF: Parallelize base relocation creation.
If an output is large, its base relocation section can be also large. For example, chrome.dll is almost 300 MB, and it has about 9 million base relocations. Creating the section took 1.5 seconds on my machine. This patch changes the way to create the section so that we can use parallel_sort to group base relocations by high bits. This change makes the linker almost 4% faster for the above test case on my machine. If I replace parallel_sort with std::sort, performance remains the same, so single thread performance should remain the same. This has no functionality change. The output should be identical as before. llvm-svn: 231585
1 parent 8ade983 commit 31777ce

File tree

1 file changed

+31
-29
lines changed

1 file changed

+31
-29
lines changed
 

‎lld/lib/ReaderWriter/PECOFF/WriterPECOFF.cpp

+31-29
Original file line numberDiff line numberDiff line change
@@ -355,8 +355,8 @@ class BaseRelocChunk : public SectionChunk {
355355
typedef std::vector<std::unique_ptr<Chunk> > ChunkVectorT;
356356

357357
public:
358-
typedef std::pair<uint16_t, llvm::COFF::BaseRelocationType> BaseRelocation;
359-
typedef std::vector<BaseRelocation> BaseRelocations;
358+
typedef std::vector<std::pair<uint16_t, llvm::COFF::BaseRelocationType>>
359+
BaseRelocations;
360360
typedef std::map<uint64_t, BaseRelocations> RelocationBlocks;
361361

362362
BaseRelocChunk(ChunkVectorT &chunks, const PECOFFLinkingContext &ctx)
@@ -382,13 +382,11 @@ class BaseRelocChunk : public SectionChunk {
382382
// at an address different from its preferred one.
383383
AtomChunk::BaseRelocationList listRelocSites(ChunkVectorT &chunks) const;
384384

385-
// Divide the given RVAs into blocks.
386-
RelocationBlocks
387-
groupByPage(const AtomChunk::BaseRelocationList &relocSites) const;
388-
389385
// Create the content of a relocation block.
390386
std::vector<uint8_t>
391-
createBaseRelocBlock(uint64_t pageAddr, const BaseRelocations &relocs) const;
387+
createBaseRelocBlock(uint64_t pageAddr,
388+
const AtomChunk::BaseRelocation *begin,
389+
const AtomChunk::BaseRelocation *end) const;
392390

393391
const PECOFFLinkingContext &_ctx;
394392
std::vector<uint8_t> _contents;
@@ -965,11 +963,24 @@ std::vector<uint8_t>
965963
BaseRelocChunk::createContents(ChunkVectorT &chunks) const {
966964
std::vector<uint8_t> contents;
967965
AtomChunk::BaseRelocationList relocSites = listRelocSites(chunks);
968-
RelocationBlocks blocks = groupByPage(relocSites);
969-
for (auto &i : blocks) {
970-
uint64_t pageAddr = i.first;
971-
const BaseRelocations &relocs = i.second;
972-
std::vector<uint8_t> block = createBaseRelocBlock(pageAddr, relocs);
966+
967+
uint64_t mask = _ctx.getPageSize() - 1;
968+
parallel_sort(relocSites.begin(), relocSites.end(),
969+
[&](const AtomChunk::BaseRelocation &a,
970+
const AtomChunk::BaseRelocation &b) {
971+
return (a.first & ~mask) < (b.first & ~mask);
972+
});
973+
974+
// Base relocations for the same memory page are grouped together
975+
// and passed to createBaseRelocBlock.
976+
for (size_t i = 0, e = relocSites.size(); i < e; ++i) {
977+
const AtomChunk::BaseRelocation *begin = &relocSites[i];
978+
uint64_t pageAddr = (begin->first & ~mask);
979+
for (++i; i < e; ++i)
980+
if ((relocSites[i].first & ~mask) != pageAddr)
981+
break;
982+
const AtomChunk::BaseRelocation *end = &relocSites[i];
983+
std::vector<uint8_t> block = createBaseRelocBlock(pageAddr, begin, end);
973984
contents.insert(contents.end(), block.begin(), block.end());
974985
}
975986
return contents;
@@ -986,25 +997,16 @@ BaseRelocChunk::listRelocSites(ChunkVectorT &chunks) const {
986997
return ret;
987998
}
988999

989-
// Divide the given RVAs into blocks.
990-
BaseRelocChunk::RelocationBlocks BaseRelocChunk::groupByPage(
991-
const AtomChunk::BaseRelocationList &relocSites) const {
992-
RelocationBlocks blocks;
993-
uint64_t mask = _ctx.getPageSize() - 1;
994-
for (const auto &reloc : relocSites)
995-
blocks[reloc.first & ~mask].push_back(
996-
std::make_pair(reloc.first & mask, reloc.second));
997-
return blocks;
998-
}
999-
10001000
// Create the content of a relocation block.
10011001
std::vector<uint8_t>
1002-
BaseRelocChunk::createBaseRelocBlock(uint64_t pageAddr,
1003-
const BaseRelocations &relocs) const {
1002+
BaseRelocChunk::createBaseRelocBlock(
1003+
uint64_t pageAddr,
1004+
const AtomChunk::BaseRelocation *begin,
1005+
const AtomChunk::BaseRelocation *end) const {
10041006
// Relocation blocks should be padded with IMAGE_REL_I386_ABSOLUTE to be
10051007
// aligned to a DWORD size boundary.
10061008
uint32_t size = llvm::RoundUpToAlignment(
1007-
sizeof(ulittle32_t) * 2 + sizeof(ulittle16_t) * relocs.size(),
1009+
sizeof(ulittle32_t) * 2 + sizeof(ulittle16_t) * (end - begin),
10081010
sizeof(ulittle32_t));
10091011
std::vector<uint8_t> contents(size);
10101012
uint8_t *ptr = &contents[0];
@@ -1018,9 +1020,9 @@ BaseRelocChunk::createBaseRelocBlock(uint64_t pageAddr,
10181020
write32le(ptr, size);
10191021
ptr += sizeof(ulittle32_t);
10201022

1021-
for (const auto &reloc : relocs) {
1022-
assert(reloc.first < _ctx.getPageSize());
1023-
write16le(ptr, (reloc.second << 12) | reloc.first);
1023+
uint64_t mask = _ctx.getPageSize() - 1;
1024+
for (const AtomChunk::BaseRelocation *i = begin; i < end; ++i) {
1025+
write16le(ptr, (i->second << 12) | (i->first & mask));
10241026
ptr += sizeof(ulittle16_t);
10251027
}
10261028
return contents;

0 commit comments

Comments
 (0)
Please sign in to comment.