Skip to content

Commit 552bcb8

Browse files
committedAug 19, 2019
Recommit "[llvm-objcopy][MachO] Support load commands used in executables/shared libraries"
Summary: This patch implements copying some load commands that appear in executables/shared libraries such as the indirect symbol table. I don't add tests intentionally because this patch is incomplete: we need a layout algorithm for executables/shared libraries. I'll submit it as a separate patch with tests. Reviewers: alexshap, rupprecht, jhenderson, compnerd Reviewed By: alexshap Subscribers: abrachet, mgorny, mgrang, MaskRay, mtrent, jakehehrlich, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D63395 llvm-svn: 369298
1 parent 50affbe commit 552bcb8

9 files changed

+560
-210
lines changed
 

Diff for: ‎llvm/tools/llvm-objcopy/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ add_llvm_tool(llvm-objcopy
2626
MachO/MachOObjcopy.cpp
2727
MachO/MachOReader.cpp
2828
MachO/MachOWriter.cpp
29+
MachO/MachOLayoutBuilder.cpp
2930
MachO/Object.cpp
3031
DEPENDS
3132
ObjcopyOptsTableGen

Diff for: ‎llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp

+322
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,322 @@
1+
//===- MachOLayoutBuilder.cpp -----------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "MachOLayoutBuilder.h"
10+
#include "llvm/Support/Errc.h"
11+
#include "llvm/Support/ErrorHandling.h"
12+
13+
namespace llvm {
14+
namespace objcopy {
15+
namespace macho {
16+
17+
uint32_t MachOLayoutBuilder::computeSizeOfCmds() const {
18+
uint32_t Size = 0;
19+
for (const auto &LC : O.LoadCommands) {
20+
auto &MLC = LC.MachOLoadCommand;
21+
auto cmd = MLC.load_command_data.cmd;
22+
switch (cmd) {
23+
case MachO::LC_SEGMENT:
24+
Size += sizeof(MachO::segment_command) +
25+
sizeof(MachO::section) * LC.Sections.size();
26+
continue;
27+
case MachO::LC_SEGMENT_64:
28+
Size += sizeof(MachO::segment_command_64) +
29+
sizeof(MachO::section_64) * LC.Sections.size();
30+
continue;
31+
}
32+
33+
switch (cmd) {
34+
#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \
35+
case MachO::LCName: \
36+
Size += sizeof(MachO::LCStruct) + LC.Payload.size(); \
37+
break;
38+
#include "llvm/BinaryFormat/MachO.def"
39+
#undef HANDLE_LOAD_COMMAND
40+
}
41+
}
42+
43+
return Size;
44+
}
45+
46+
void MachOLayoutBuilder::constructStringTable() {
47+
for (std::unique_ptr<SymbolEntry> &Sym : O.SymTable.Symbols)
48+
StrTableBuilder.add(Sym->Name);
49+
StrTableBuilder.finalize();
50+
}
51+
52+
void MachOLayoutBuilder::updateSymbolIndexes() {
53+
uint32_t Index = 0;
54+
for (auto &Symbol : O.SymTable.Symbols)
55+
Symbol->Index = Index++;
56+
}
57+
58+
// Updates the index and the number of local/external/undefined symbols.
59+
void MachOLayoutBuilder::updateDySymTab(MachO::macho_load_command &MLC) {
60+
assert(MLC.load_command_data.cmd == MachO::LC_DYSYMTAB);
61+
// Make sure that nlist entries in the symbol table are sorted by the those
62+
// types. The order is: local < defined external < undefined external.
63+
assert(std::is_sorted(O.SymTable.Symbols.begin(), O.SymTable.Symbols.end(),
64+
[](const std::unique_ptr<SymbolEntry> &A,
65+
const std::unique_ptr<SymbolEntry> &B) {
66+
return (A->isLocalSymbol() && !B->isLocalSymbol()) ||
67+
(!A->isUndefinedSymbol() &&
68+
B->isUndefinedSymbol());
69+
}) &&
70+
"Symbols are not sorted by their types.");
71+
72+
uint32_t NumLocalSymbols = 0;
73+
auto Iter = O.SymTable.Symbols.begin();
74+
auto End = O.SymTable.Symbols.end();
75+
for (; Iter != End; ++Iter) {
76+
if ((*Iter)->isExternalSymbol())
77+
break;
78+
79+
++NumLocalSymbols;
80+
}
81+
82+
uint32_t NumExtDefSymbols = 0;
83+
for (; Iter != End; ++Iter) {
84+
if ((*Iter)->isUndefinedSymbol())
85+
break;
86+
87+
++NumExtDefSymbols;
88+
}
89+
90+
MLC.dysymtab_command_data.ilocalsym = 0;
91+
MLC.dysymtab_command_data.nlocalsym = NumLocalSymbols;
92+
MLC.dysymtab_command_data.iextdefsym = NumLocalSymbols;
93+
MLC.dysymtab_command_data.nextdefsym = NumExtDefSymbols;
94+
MLC.dysymtab_command_data.iundefsym = NumLocalSymbols + NumExtDefSymbols;
95+
MLC.dysymtab_command_data.nundefsym =
96+
O.SymTable.Symbols.size() - (NumLocalSymbols + NumExtDefSymbols);
97+
}
98+
99+
// Recomputes and updates offset and size fields in load commands and sections
100+
// since they could be modified.
101+
uint64_t MachOLayoutBuilder::layoutSegments() {
102+
auto HeaderSize =
103+
Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
104+
auto Offset = HeaderSize + O.Header.SizeOfCmds;
105+
106+
// Lay out sections.
107+
for (auto &LC : O.LoadCommands) {
108+
uint64_t FileOff = Offset;
109+
auto &MLC = LC.MachOLoadCommand;
110+
StringRef Segname;
111+
switch (MLC.load_command_data.cmd) {
112+
case MachO::LC_SEGMENT:
113+
Segname = StringRef(MLC.segment_command_data.segname,
114+
strnlen(MLC.segment_command_data.segname,
115+
sizeof(MLC.segment_command_data.segname)));
116+
break;
117+
case MachO::LC_SEGMENT_64:
118+
Segname = StringRef(MLC.segment_command_64_data.segname,
119+
strnlen(MLC.segment_command_64_data.segname,
120+
sizeof(MLC.segment_command_64_data.segname)));
121+
break;
122+
default:
123+
continue;
124+
}
125+
126+
if (Segname == "__LINKEDIT") {
127+
// We update the __LINKEDIT segment later (in layoutTail).
128+
assert(LC.Sections.empty() && "__LINKEDIT segment has sections");
129+
LinkEditLoadCommand = &MLC;
130+
continue;
131+
}
132+
133+
// Update file offsets and sizes of sections.
134+
uint64_t VMSize = 0;
135+
uint64_t FileOffsetInSegment = 0;
136+
for (auto &Sec : LC.Sections) {
137+
if (!Sec.isVirtualSection()) {
138+
auto FilePaddingSize =
139+
OffsetToAlignment(FileOffsetInSegment, 1ull << Sec.Align);
140+
Sec.Offset = Offset + FileOffsetInSegment + FilePaddingSize;
141+
Sec.Size = Sec.Content.size();
142+
FileOffsetInSegment += FilePaddingSize + Sec.Size;
143+
}
144+
145+
VMSize = std::max(VMSize, Sec.Addr + Sec.Size);
146+
}
147+
148+
// TODO: Handle the __PAGEZERO segment.
149+
switch (MLC.load_command_data.cmd) {
150+
case MachO::LC_SEGMENT:
151+
MLC.segment_command_data.cmdsize =
152+
sizeof(MachO::segment_command) +
153+
sizeof(MachO::section) * LC.Sections.size();
154+
MLC.segment_command_data.nsects = LC.Sections.size();
155+
MLC.segment_command_data.fileoff = FileOff;
156+
MLC.segment_command_data.vmsize = VMSize;
157+
MLC.segment_command_data.filesize = FileOffsetInSegment;
158+
break;
159+
case MachO::LC_SEGMENT_64:
160+
MLC.segment_command_64_data.cmdsize =
161+
sizeof(MachO::segment_command_64) +
162+
sizeof(MachO::section_64) * LC.Sections.size();
163+
MLC.segment_command_64_data.nsects = LC.Sections.size();
164+
MLC.segment_command_64_data.fileoff = FileOff;
165+
MLC.segment_command_64_data.vmsize = VMSize;
166+
MLC.segment_command_64_data.filesize = FileOffsetInSegment;
167+
break;
168+
}
169+
170+
Offset += FileOffsetInSegment;
171+
}
172+
173+
return Offset;
174+
}
175+
176+
uint64_t MachOLayoutBuilder::layoutRelocations(uint64_t Offset) {
177+
for (auto &LC : O.LoadCommands)
178+
for (auto &Sec : LC.Sections) {
179+
Sec.RelOff = Sec.Relocations.empty() ? 0 : Offset;
180+
Sec.NReloc = Sec.Relocations.size();
181+
Offset += sizeof(MachO::any_relocation_info) * Sec.NReloc;
182+
}
183+
184+
return Offset;
185+
}
186+
187+
Error MachOLayoutBuilder::layoutTail(uint64_t Offset) {
188+
// The order of LINKEDIT elements is as follows:
189+
// rebase info, binding info, weak binding info, lazy binding info, export
190+
// trie, data-in-code, symbol table, indirect symbol table, symbol table
191+
// strings.
192+
uint64_t NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist);
193+
uint64_t StartOfLinkEdit = Offset;
194+
uint64_t StartOfRebaseInfo = StartOfLinkEdit;
195+
uint64_t StartOfBindingInfo = StartOfRebaseInfo + O.Rebases.Opcodes.size();
196+
uint64_t StartOfWeakBindingInfo = StartOfBindingInfo + O.Binds.Opcodes.size();
197+
uint64_t StartOfLazyBindingInfo =
198+
StartOfWeakBindingInfo + O.WeakBinds.Opcodes.size();
199+
uint64_t StartOfExportTrie =
200+
StartOfLazyBindingInfo + O.LazyBinds.Opcodes.size();
201+
uint64_t StartOfFunctionStarts = StartOfExportTrie + O.Exports.Trie.size();
202+
uint64_t StartOfDataInCode =
203+
StartOfFunctionStarts + O.FunctionStarts.Data.size();
204+
uint64_t StartOfSymbols = StartOfDataInCode + O.DataInCode.Data.size();
205+
uint64_t StartOfIndirectSymbols =
206+
StartOfSymbols + NListSize * O.SymTable.Symbols.size();
207+
uint64_t StartOfSymbolStrings =
208+
StartOfIndirectSymbols +
209+
sizeof(uint32_t) * O.IndirectSymTable.Symbols.size();
210+
uint64_t LinkEditSize =
211+
(StartOfSymbolStrings + StrTableBuilder.getSize()) - StartOfLinkEdit;
212+
213+
// Now we have determined the layout of the contents of the __LINKEDIT
214+
// segment. Update its load command.
215+
if (LinkEditLoadCommand) {
216+
MachO::macho_load_command *MLC = LinkEditLoadCommand;
217+
switch (LinkEditLoadCommand->load_command_data.cmd) {
218+
case MachO::LC_SEGMENT:
219+
MLC->segment_command_data.cmdsize = sizeof(MachO::segment_command);
220+
MLC->segment_command_data.fileoff = StartOfLinkEdit;
221+
MLC->segment_command_data.vmsize = alignTo(LinkEditSize, PageSize);
222+
MLC->segment_command_data.filesize = LinkEditSize;
223+
break;
224+
case MachO::LC_SEGMENT_64:
225+
MLC->segment_command_64_data.cmdsize = sizeof(MachO::segment_command_64);
226+
MLC->segment_command_64_data.fileoff = StartOfLinkEdit;
227+
MLC->segment_command_64_data.vmsize = alignTo(LinkEditSize, PageSize);
228+
MLC->segment_command_64_data.filesize = LinkEditSize;
229+
break;
230+
}
231+
}
232+
233+
for (auto &LC : O.LoadCommands) {
234+
auto &MLC = LC.MachOLoadCommand;
235+
auto cmd = MLC.load_command_data.cmd;
236+
switch (cmd) {
237+
case MachO::LC_SYMTAB:
238+
MLC.symtab_command_data.symoff = StartOfSymbols;
239+
MLC.symtab_command_data.nsyms = O.SymTable.Symbols.size();
240+
MLC.symtab_command_data.stroff = StartOfSymbolStrings;
241+
MLC.symtab_command_data.strsize = StrTableBuilder.getSize();
242+
break;
243+
case MachO::LC_DYSYMTAB: {
244+
if (MLC.dysymtab_command_data.ntoc != 0 ||
245+
MLC.dysymtab_command_data.nmodtab != 0 ||
246+
MLC.dysymtab_command_data.nextrefsyms != 0 ||
247+
MLC.dysymtab_command_data.nlocrel != 0 ||
248+
MLC.dysymtab_command_data.nextrel != 0)
249+
return createStringError(llvm::errc::not_supported,
250+
"shared library is not yet supported");
251+
252+
if (!O.IndirectSymTable.Symbols.empty()) {
253+
MLC.dysymtab_command_data.indirectsymoff = StartOfIndirectSymbols;
254+
MLC.dysymtab_command_data.nindirectsyms =
255+
O.IndirectSymTable.Symbols.size();
256+
}
257+
258+
updateDySymTab(MLC);
259+
break;
260+
}
261+
case MachO::LC_DATA_IN_CODE:
262+
MLC.linkedit_data_command_data.dataoff = StartOfDataInCode;
263+
MLC.linkedit_data_command_data.datasize = O.DataInCode.Data.size();
264+
break;
265+
case MachO::LC_FUNCTION_STARTS:
266+
MLC.linkedit_data_command_data.dataoff = StartOfFunctionStarts;
267+
MLC.linkedit_data_command_data.datasize = O.FunctionStarts.Data.size();
268+
break;
269+
case MachO::LC_DYLD_INFO:
270+
case MachO::LC_DYLD_INFO_ONLY:
271+
MLC.dyld_info_command_data.rebase_off =
272+
O.Rebases.Opcodes.empty() ? 0 : StartOfRebaseInfo;
273+
MLC.dyld_info_command_data.rebase_size = O.Rebases.Opcodes.size();
274+
MLC.dyld_info_command_data.bind_off =
275+
O.Binds.Opcodes.empty() ? 0 : StartOfBindingInfo;
276+
MLC.dyld_info_command_data.bind_size = O.Binds.Opcodes.size();
277+
MLC.dyld_info_command_data.weak_bind_off =
278+
O.WeakBinds.Opcodes.empty() ? 0 : StartOfWeakBindingInfo;
279+
MLC.dyld_info_command_data.weak_bind_size = O.WeakBinds.Opcodes.size();
280+
MLC.dyld_info_command_data.lazy_bind_off =
281+
O.LazyBinds.Opcodes.empty() ? 0 : StartOfLazyBindingInfo;
282+
MLC.dyld_info_command_data.lazy_bind_size = O.LazyBinds.Opcodes.size();
283+
MLC.dyld_info_command_data.export_off =
284+
O.Exports.Trie.empty() ? 0 : StartOfExportTrie;
285+
MLC.dyld_info_command_data.export_size = O.Exports.Trie.size();
286+
break;
287+
case MachO::LC_LOAD_DYLINKER:
288+
case MachO::LC_MAIN:
289+
case MachO::LC_RPATH:
290+
case MachO::LC_SEGMENT:
291+
case MachO::LC_SEGMENT_64:
292+
case MachO::LC_VERSION_MIN_MACOSX:
293+
case MachO::LC_BUILD_VERSION:
294+
case MachO::LC_ID_DYLIB:
295+
case MachO::LC_LOAD_DYLIB:
296+
case MachO::LC_UUID:
297+
case MachO::LC_SOURCE_VERSION:
298+
// Nothing to update.
299+
break;
300+
default:
301+
// Abort if it's unsupported in order to prevent corrupting the object.
302+
return createStringError(llvm::errc::not_supported,
303+
"unsupported load command (cmd=0x%x)", cmd);
304+
}
305+
}
306+
307+
return Error::success();
308+
}
309+
310+
Error MachOLayoutBuilder::layout() {
311+
O.Header.NCmds = O.LoadCommands.size();
312+
O.Header.SizeOfCmds = computeSizeOfCmds();
313+
constructStringTable();
314+
updateSymbolIndexes();
315+
uint64_t Offset = layoutSegments();
316+
Offset = layoutRelocations(Offset);
317+
return layoutTail(Offset);
318+
}
319+
320+
} // end namespace macho
321+
} // end namespace objcopy
322+
} // end namespace llvm

Diff for: ‎llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.h

+50
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
//===- MachOLayoutBuilder.h -------------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_OBJCOPY_MACHO_MACHOLAYOUTBUILDER_H
10+
#define LLVM_OBJCOPY_MACHO_MACHOLAYOUTBUILDER_H
11+
12+
#include "MachOObjcopy.h"
13+
#include "Object.h"
14+
15+
namespace llvm {
16+
namespace objcopy {
17+
namespace macho {
18+
19+
class MachOLayoutBuilder {
20+
Object &O;
21+
bool Is64Bit;
22+
uint64_t PageSize;
23+
24+
// Points to the __LINKEDIT segment if it exists.
25+
MachO::macho_load_command *LinkEditLoadCommand = nullptr;
26+
StringTableBuilder StrTableBuilder{StringTableBuilder::MachO};
27+
28+
uint32_t computeSizeOfCmds() const;
29+
void constructStringTable();
30+
void updateSymbolIndexes();
31+
void updateDySymTab(MachO::macho_load_command &MLC);
32+
uint64_t layoutSegments();
33+
uint64_t layoutRelocations(uint64_t Offset);
34+
Error layoutTail(uint64_t Offset);
35+
36+
public:
37+
MachOLayoutBuilder(Object &O, bool Is64Bit, uint64_t PageSize)
38+
: O(O), Is64Bit(Is64Bit), PageSize(PageSize) {}
39+
40+
// Recomputes and updates fields in the given object such as file offsets.
41+
Error layout();
42+
43+
StringTableBuilder &getStringTableBuilder() { return StrTableBuilder; }
44+
};
45+
46+
} // end namespace macho
47+
} // end namespace objcopy
48+
} // end namespace llvm
49+
50+
#endif // LLVM_OBJCOPY_MACHO_MACHOLAYOUTBUILDER_H

Diff for: ‎llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp

+5-1
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,11 @@ Error executeObjcopyOnBinary(const CopyConfig &Config,
5757
if (Error E = handleArgs(Config, *O))
5858
return createFileError(Config.InputFilename, std::move(E));
5959

60-
MachOWriter Writer(*O, In.is64Bit(), In.isLittleEndian(), Out);
60+
// TODO: Support 16KB pages which are employed in iOS arm64 binaries:
61+
// https://github.com/llvm/llvm-project/commit/1bebb2832ee312d3b0316dacff457a7a29435edb
62+
const uint64_t PageSize = 4096;
63+
64+
MachOWriter Writer(*O, In.is64Bit(), In.isLittleEndian(), PageSize, Out);
6165
if (auto E = Writer.finalize())
6266
return E;
6367
return Writer.write();

Diff for: ‎llvm/tools/llvm-objcopy/MachO/MachOReader.cpp

+41
Original file line numberDiff line numberDiff line change
@@ -129,10 +129,19 @@ void MachOReader::readLoadCommands(Object &O) const {
129129
case MachO::LC_SYMTAB:
130130
O.SymTabCommandIndex = O.LoadCommands.size();
131131
break;
132+
case MachO::LC_DYSYMTAB:
133+
O.DySymTabCommandIndex = O.LoadCommands.size();
134+
break;
132135
case MachO::LC_DYLD_INFO:
133136
case MachO::LC_DYLD_INFO_ONLY:
134137
O.DyLdInfoCommandIndex = O.LoadCommands.size();
135138
break;
139+
case MachO::LC_DATA_IN_CODE:
140+
O.DataInCodeCommandIndex = O.LoadCommands.size();
141+
break;
142+
case MachO::LC_FUNCTION_STARTS:
143+
O.FunctionStartsCommandIndex = O.LoadCommands.size();
144+
break;
136145
}
137146
#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \
138147
case MachO::LCName: \
@@ -222,6 +231,35 @@ void MachOReader::readExportInfo(Object &O) const {
222231
O.Exports.Trie = MachOObj.getDyldInfoExportsTrie();
223232
}
224233

234+
void MachOReader::readDataInCodeData(Object &O) const {
235+
if (!O.DataInCodeCommandIndex)
236+
return;
237+
const MachO::linkedit_data_command &LDC =
238+
O.LoadCommands[*O.DataInCodeCommandIndex]
239+
.MachOLoadCommand.linkedit_data_command_data;
240+
241+
O.DataInCode.Data = arrayRefFromStringRef(
242+
MachOObj.getData().substr(LDC.dataoff, LDC.datasize));
243+
}
244+
245+
void MachOReader::readFunctionStartsData(Object &O) const {
246+
if (!O.FunctionStartsCommandIndex)
247+
return;
248+
const MachO::linkedit_data_command &LDC =
249+
O.LoadCommands[*O.FunctionStartsCommandIndex]
250+
.MachOLoadCommand.linkedit_data_command_data;
251+
252+
O.FunctionStarts.Data = arrayRefFromStringRef(
253+
MachOObj.getData().substr(LDC.dataoff, LDC.datasize));
254+
}
255+
256+
void MachOReader::readIndirectSymbolTable(Object &O) const {
257+
MachO::dysymtab_command DySymTab = MachOObj.getDysymtabLoadCommand();
258+
for (uint32_t i = 0; i < DySymTab.nindirectsyms; ++i)
259+
O.IndirectSymTable.Symbols.push_back(
260+
MachOObj.getIndirectSymbolTableEntry(DySymTab, i));
261+
}
262+
225263
std::unique_ptr<Object> MachOReader::create() const {
226264
auto Obj = std::make_unique<Object>();
227265
readHeader(*Obj);
@@ -233,6 +271,9 @@ std::unique_ptr<Object> MachOReader::create() const {
233271
readWeakBindInfo(*Obj);
234272
readLazyBindInfo(*Obj);
235273
readExportInfo(*Obj);
274+
readDataInCodeData(*Obj);
275+
readFunctionStartsData(*Obj);
276+
readIndirectSymbolTable(*Obj);
236277
return Obj;
237278
}
238279

Diff for: ‎llvm/tools/llvm-objcopy/MachO/MachOReader.h

+3
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ class MachOReader : public Reader {
3636
void readWeakBindInfo(Object &O) const;
3737
void readLazyBindInfo(Object &O) const;
3838
void readExportInfo(Object &O) const;
39+
void readDataInCodeData(Object &O) const;
40+
void readFunctionStartsData(Object &O) const;
41+
void readIndirectSymbolTable(Object &O) const;
3942

4043
public:
4144
explicit MachOReader(const object::MachOObjectFile &Obj) : MachOObj(Obj) {}

Diff for: ‎llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp

+101-200
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
//===----------------------------------------------------------------------===//
88

99
#include "MachOWriter.h"
10+
#include "MachOLayoutBuilder.h"
1011
#include "Object.h"
1112
#include "llvm/ADT/STLExtras.h"
1213
#include "llvm/BinaryFormat/MachO.h"
@@ -40,16 +41,10 @@ size_t MachOWriter::totalSize() const {
4041
const MachO::symtab_command &SymTabCommand =
4142
O.LoadCommands[*O.SymTabCommandIndex]
4243
.MachOLoadCommand.symtab_command_data;
43-
if (SymTabCommand.symoff) {
44-
assert((SymTabCommand.nsyms == O.SymTable.Symbols.size()) &&
45-
"Incorrect number of symbols");
44+
if (SymTabCommand.symoff)
4645
Ends.push_back(SymTabCommand.symoff + symTableSize());
47-
}
48-
if (SymTabCommand.stroff) {
49-
assert((SymTabCommand.strsize == StrTableBuilder.getSize()) &&
50-
"Incorrect string table size");
46+
if (SymTabCommand.stroff)
5147
Ends.push_back(SymTabCommand.stroff + SymTabCommand.strsize);
52-
}
5348
}
5449
if (O.DyLdInfoCommandIndex) {
5550
const MachO::dyld_info_command &DyLdInfoCommand =
@@ -84,6 +79,36 @@ size_t MachOWriter::totalSize() const {
8479
}
8580
}
8681

82+
if (O.DySymTabCommandIndex) {
83+
const MachO::dysymtab_command &DySymTabCommand =
84+
O.LoadCommands[*O.DySymTabCommandIndex]
85+
.MachOLoadCommand.dysymtab_command_data;
86+
87+
if (DySymTabCommand.indirectsymoff)
88+
Ends.push_back(DySymTabCommand.indirectsymoff +
89+
sizeof(uint32_t) * O.IndirectSymTable.Symbols.size());
90+
}
91+
92+
if (O.DataInCodeCommandIndex) {
93+
const MachO::linkedit_data_command &LinkEditDataCommand =
94+
O.LoadCommands[*O.DataInCodeCommandIndex]
95+
.MachOLoadCommand.linkedit_data_command_data;
96+
97+
if (LinkEditDataCommand.dataoff)
98+
Ends.push_back(LinkEditDataCommand.dataoff +
99+
LinkEditDataCommand.datasize);
100+
}
101+
102+
if (O.FunctionStartsCommandIndex) {
103+
const MachO::linkedit_data_command &LinkEditDataCommand =
104+
O.LoadCommands[*O.FunctionStartsCommandIndex]
105+
.MachOLoadCommand.linkedit_data_command_data;
106+
107+
if (LinkEditDataCommand.dataoff)
108+
Ends.push_back(LinkEditDataCommand.dataoff +
109+
LinkEditDataCommand.datasize);
110+
}
111+
87112
// Otherwise, use the last section / reloction.
88113
for (const auto &LC : O.LoadCommands)
89114
for (const auto &S : LC.Sections) {
@@ -120,14 +145,6 @@ void MachOWriter::writeHeader() {
120145
memcpy(B.getBufferStart(), &Header, HeaderSize);
121146
}
122147

123-
void MachOWriter::updateSymbolIndexes() {
124-
uint32_t Index = 0;
125-
for (auto &Symbol : O.SymTable.Symbols) {
126-
Symbol->Index = Index;
127-
Index++;
128-
}
129-
}
130-
131148
void MachOWriter::writeLoadCommands() {
132149
uint8_t *Begin = B.getBufferStart() + headerSize();
133150
for (const auto &LC : O.LoadCommands) {
@@ -261,7 +278,7 @@ void MachOWriter::writeSymbolTable() {
261278
.MachOLoadCommand.symtab_command_data;
262279

263280
uint8_t *StrTable = (uint8_t *)B.getBufferStart() + SymTabCommand.stroff;
264-
StrTableBuilder.write(StrTable);
281+
LayoutBuilder.getStringTableBuilder().write(StrTable);
265282
}
266283

267284
void MachOWriter::writeStringTable() {
@@ -275,7 +292,7 @@ void MachOWriter::writeStringTable() {
275292
for (auto Iter = O.SymTable.Symbols.begin(), End = O.SymTable.Symbols.end();
276293
Iter != End; Iter++) {
277294
SymbolEntry *Sym = Iter->get();
278-
auto Nstrx = StrTableBuilder.getOffset(Sym->Name);
295+
uint32_t Nstrx = LayoutBuilder.getStringTableBuilder().getOffset(Sym->Name);
279296

280297
if (Is64Bit)
281298
writeNListEntry<MachO::nlist_64>(*Sym, IsLittleEndian, SymTable, Nstrx);
@@ -344,6 +361,45 @@ void MachOWriter::writeExportInfo() {
344361
memcpy(Out, O.Exports.Trie.data(), O.Exports.Trie.size());
345362
}
346363

364+
void MachOWriter::writeIndirectSymbolTable() {
365+
if (!O.DySymTabCommandIndex)
366+
return;
367+
368+
const MachO::dysymtab_command &DySymTabCommand =
369+
O.LoadCommands[*O.DySymTabCommandIndex]
370+
.MachOLoadCommand.dysymtab_command_data;
371+
372+
char *Out = (char *)B.getBufferStart() + DySymTabCommand.indirectsymoff;
373+
assert((DySymTabCommand.nindirectsyms == O.IndirectSymTable.Symbols.size()) &&
374+
"Incorrect indirect symbol table size");
375+
memcpy(Out, O.IndirectSymTable.Symbols.data(),
376+
sizeof(uint32_t) * O.IndirectSymTable.Symbols.size());
377+
}
378+
379+
void MachOWriter::writeDataInCodeData() {
380+
if (!O.DataInCodeCommandIndex)
381+
return;
382+
const MachO::linkedit_data_command &LinkEditDataCommand =
383+
O.LoadCommands[*O.DataInCodeCommandIndex]
384+
.MachOLoadCommand.linkedit_data_command_data;
385+
char *Out = (char *)B.getBufferStart() + LinkEditDataCommand.dataoff;
386+
assert((LinkEditDataCommand.datasize == O.DataInCode.Data.size()) &&
387+
"Incorrect data in code data size");
388+
memcpy(Out, O.DataInCode.Data.data(), O.DataInCode.Data.size());
389+
}
390+
391+
void MachOWriter::writeFunctionStartsData() {
392+
if (!O.FunctionStartsCommandIndex)
393+
return;
394+
const MachO::linkedit_data_command &LinkEditDataCommand =
395+
O.LoadCommands[*O.FunctionStartsCommandIndex]
396+
.MachOLoadCommand.linkedit_data_command_data;
397+
char *Out = (char *)B.getBufferStart() + LinkEditDataCommand.dataoff;
398+
assert((LinkEditDataCommand.datasize == O.FunctionStarts.Data.size()) &&
399+
"Incorrect function starts data size");
400+
memcpy(Out, O.FunctionStarts.Data.data(), O.FunctionStarts.Data.size());
401+
}
402+
347403
void MachOWriter::writeTail() {
348404
typedef void (MachOWriter::*WriteHandlerType)(void);
349405
typedef std::pair<uint64_t, WriteHandlerType> WriteOperation;
@@ -379,206 +435,51 @@ void MachOWriter::writeTail() {
379435
{DyLdInfoCommand.export_off, &MachOWriter::writeExportInfo});
380436
}
381437

382-
llvm::sort(Queue, [](const WriteOperation &LHS, const WriteOperation &RHS) {
383-
return LHS.first < RHS.first;
384-
});
385-
386-
for (auto WriteOp : Queue)
387-
(this->*WriteOp.second)();
388-
}
389-
390-
void MachOWriter::updateSizeOfCmds() {
391-
auto Size = 0;
392-
for (const auto &LC : O.LoadCommands) {
393-
auto &MLC = LC.MachOLoadCommand;
394-
auto cmd = MLC.load_command_data.cmd;
395-
396-
switch (cmd) {
397-
case MachO::LC_SEGMENT:
398-
Size += sizeof(MachO::segment_command) +
399-
sizeof(MachO::section) * LC.Sections.size();
400-
continue;
401-
case MachO::LC_SEGMENT_64:
402-
Size += sizeof(MachO::segment_command_64) +
403-
sizeof(MachO::section_64) * LC.Sections.size();
404-
continue;
405-
}
406-
407-
switch (cmd) {
408-
#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \
409-
case MachO::LCName: \
410-
Size += sizeof(MachO::LCStruct); \
411-
break;
412-
#include "llvm/BinaryFormat/MachO.def"
413-
#undef HANDLE_LOAD_COMMAND
414-
}
415-
}
416-
417-
O.Header.SizeOfCmds = Size;
418-
}
419-
420-
// Updates the index and the number of local/external/undefined symbols. Here we
421-
// assume that MLC is a LC_DYSYMTAB and the nlist entries in the symbol table
422-
// are already sorted by the those types.
423-
void MachOWriter::updateDySymTab(MachO::macho_load_command &MLC) {
424-
uint32_t NumLocalSymbols = 0;
425-
auto Iter = O.SymTable.Symbols.begin();
426-
auto End = O.SymTable.Symbols.end();
427-
for (; Iter != End; Iter++) {
428-
if ((*Iter)->n_type & (MachO::N_EXT | MachO::N_PEXT))
429-
break;
430-
431-
NumLocalSymbols++;
432-
}
433-
434-
uint32_t NumExtDefSymbols = 0;
435-
for (; Iter != End; Iter++) {
436-
if (((*Iter)->n_type & MachO::N_TYPE) == MachO::N_UNDF)
437-
break;
438+
if (O.DySymTabCommandIndex) {
439+
const MachO::dysymtab_command &DySymTabCommand =
440+
O.LoadCommands[*O.DySymTabCommandIndex]
441+
.MachOLoadCommand.dysymtab_command_data;
438442

439-
NumExtDefSymbols++;
443+
if (DySymTabCommand.indirectsymoff)
444+
Queue.emplace_back(DySymTabCommand.indirectsymoff,
445+
&MachOWriter::writeIndirectSymbolTable);
440446
}
441447

442-
MLC.dysymtab_command_data.ilocalsym = 0;
443-
MLC.dysymtab_command_data.nlocalsym = NumLocalSymbols;
444-
MLC.dysymtab_command_data.iextdefsym = NumLocalSymbols;
445-
MLC.dysymtab_command_data.nextdefsym = NumExtDefSymbols;
446-
MLC.dysymtab_command_data.iundefsym = NumLocalSymbols + NumExtDefSymbols;
447-
MLC.dysymtab_command_data.nundefsym =
448-
O.SymTable.Symbols.size() - (NumLocalSymbols + NumExtDefSymbols);
449-
}
450-
451-
// Recomputes and updates offset and size fields in load commands and sections
452-
// since they could be modified.
453-
Error MachOWriter::layout() {
454-
auto SizeOfCmds = loadCommandsSize();
455-
auto Offset = headerSize() + SizeOfCmds;
456-
O.Header.NCmds = O.LoadCommands.size();
457-
O.Header.SizeOfCmds = SizeOfCmds;
458-
459-
// Lay out sections.
460-
for (auto &LC : O.LoadCommands) {
461-
uint64_t FileOff = Offset;
462-
uint64_t VMSize = 0;
463-
uint64_t FileOffsetInSegment = 0;
464-
for (auto &Sec : LC.Sections) {
465-
if (!Sec.isVirtualSection()) {
466-
auto FilePaddingSize =
467-
OffsetToAlignment(FileOffsetInSegment, 1ull << Sec.Align);
468-
Sec.Offset = Offset + FileOffsetInSegment + FilePaddingSize;
469-
Sec.Size = Sec.Content.size();
470-
FileOffsetInSegment += FilePaddingSize + Sec.Size;
471-
}
472-
473-
VMSize = std::max(VMSize, Sec.Addr + Sec.Size);
474-
}
475-
476-
// TODO: Handle the __PAGEZERO segment.
477-
auto &MLC = LC.MachOLoadCommand;
478-
switch (MLC.load_command_data.cmd) {
479-
case MachO::LC_SEGMENT:
480-
MLC.segment_command_data.cmdsize =
481-
sizeof(MachO::segment_command) +
482-
sizeof(MachO::section) * LC.Sections.size();
483-
MLC.segment_command_data.nsects = LC.Sections.size();
484-
MLC.segment_command_data.fileoff = FileOff;
485-
MLC.segment_command_data.vmsize = VMSize;
486-
MLC.segment_command_data.filesize = FileOffsetInSegment;
487-
break;
488-
case MachO::LC_SEGMENT_64:
489-
MLC.segment_command_64_data.cmdsize =
490-
sizeof(MachO::segment_command_64) +
491-
sizeof(MachO::section_64) * LC.Sections.size();
492-
MLC.segment_command_64_data.nsects = LC.Sections.size();
493-
MLC.segment_command_64_data.fileoff = FileOff;
494-
MLC.segment_command_64_data.vmsize = VMSize;
495-
MLC.segment_command_64_data.filesize = FileOffsetInSegment;
496-
break;
497-
}
448+
if (O.DataInCodeCommandIndex) {
449+
const MachO::linkedit_data_command &LinkEditDataCommand =
450+
O.LoadCommands[*O.DataInCodeCommandIndex]
451+
.MachOLoadCommand.linkedit_data_command_data;
498452

499-
Offset += FileOffsetInSegment;
453+
if (LinkEditDataCommand.dataoff)
454+
Queue.emplace_back(LinkEditDataCommand.dataoff,
455+
&MachOWriter::writeDataInCodeData);
500456
}
501457

502-
// Lay out relocations.
503-
for (auto &LC : O.LoadCommands)
504-
for (auto &Sec : LC.Sections) {
505-
Sec.RelOff = Sec.Relocations.empty() ? 0 : Offset;
506-
Sec.NReloc = Sec.Relocations.size();
507-
Offset += sizeof(MachO::any_relocation_info) * Sec.NReloc;
508-
}
458+
if (O.FunctionStartsCommandIndex) {
459+
const MachO::linkedit_data_command &LinkEditDataCommand =
460+
O.LoadCommands[*O.FunctionStartsCommandIndex]
461+
.MachOLoadCommand.linkedit_data_command_data;
509462

510-
// Lay out tail stuff.
511-
auto NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist);
512-
for (auto &LC : O.LoadCommands) {
513-
auto &MLC = LC.MachOLoadCommand;
514-
auto cmd = MLC.load_command_data.cmd;
515-
switch (cmd) {
516-
case MachO::LC_SYMTAB:
517-
MLC.symtab_command_data.nsyms = O.SymTable.Symbols.size();
518-
MLC.symtab_command_data.strsize = StrTableBuilder.getSize();
519-
MLC.symtab_command_data.symoff = Offset;
520-
Offset += NListSize * MLC.symtab_command_data.nsyms;
521-
MLC.symtab_command_data.stroff = Offset;
522-
Offset += MLC.symtab_command_data.strsize;
523-
break;
524-
case MachO::LC_DYSYMTAB: {
525-
if (MLC.dysymtab_command_data.ntoc != 0 ||
526-
MLC.dysymtab_command_data.nmodtab != 0 ||
527-
MLC.dysymtab_command_data.nextrefsyms != 0 ||
528-
MLC.dysymtab_command_data.nlocrel != 0 ||
529-
MLC.dysymtab_command_data.nextrel != 0)
530-
return createStringError(llvm::errc::not_supported,
531-
"shared library is not yet supported");
532-
533-
if (MLC.dysymtab_command_data.nindirectsyms != 0)
534-
return createStringError(llvm::errc::not_supported,
535-
"indirect symbol table is not yet supported");
536-
537-
updateDySymTab(MLC);
538-
break;
539-
}
540-
case MachO::LC_SEGMENT:
541-
case MachO::LC_SEGMENT_64:
542-
case MachO::LC_VERSION_MIN_MACOSX:
543-
case MachO::LC_BUILD_VERSION:
544-
case MachO::LC_ID_DYLIB:
545-
case MachO::LC_LOAD_DYLIB:
546-
case MachO::LC_UUID:
547-
case MachO::LC_SOURCE_VERSION:
548-
// Nothing to update.
549-
break;
550-
default:
551-
// Abort if it's unsupported in order to prevent corrupting the object.
552-
return createStringError(llvm::errc::not_supported,
553-
"unsupported load command (cmd=0x%x)", cmd);
554-
}
463+
if (LinkEditDataCommand.dataoff)
464+
Queue.emplace_back(LinkEditDataCommand.dataoff,
465+
&MachOWriter::writeFunctionStartsData);
555466
}
556467

557-
return Error::success();
558-
}
468+
llvm::sort(Queue, [](const WriteOperation &LHS, const WriteOperation &RHS) {
469+
return LHS.first < RHS.first;
470+
});
559471

560-
void MachOWriter::constructStringTable() {
561-
for (std::unique_ptr<SymbolEntry> &Sym : O.SymTable.Symbols)
562-
StrTableBuilder.add(Sym->Name);
563-
StrTableBuilder.finalize();
472+
for (auto WriteOp : Queue)
473+
(this->*WriteOp.second)();
564474
}
565475

566-
Error MachOWriter::finalize() {
567-
updateSizeOfCmds();
568-
constructStringTable();
569-
570-
if (auto E = layout())
571-
return E;
572-
573-
return Error::success();
574-
}
476+
Error MachOWriter::finalize() { return LayoutBuilder.layout(); }
575477

576478
Error MachOWriter::write() {
577479
if (Error E = B.allocate(totalSize()))
578480
return E;
579481
memset(B.getBufferStart(), 0, totalSize());
580482
writeHeader();
581-
updateSymbolIndexes();
582483
writeLoadCommands();
583484
writeSections();
584485
writeTail();

Diff for: ‎llvm/tools/llvm-objcopy/MachO/MachOWriter.h

+10-9
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
//===----------------------------------------------------------------------===//
88

99
#include "../Buffer.h"
10+
#include "MachOLayoutBuilder.h"
1011
#include "MachOObjcopy.h"
1112
#include "Object.h"
1213
#include "llvm/BinaryFormat/MachO.h"
@@ -22,20 +23,15 @@ class MachOWriter {
2223
Object &O;
2324
bool Is64Bit;
2425
bool IsLittleEndian;
26+
uint64_t PageSize;
2527
Buffer &B;
26-
StringTableBuilder StrTableBuilder{StringTableBuilder::MachO};
28+
MachOLayoutBuilder LayoutBuilder;
2729

2830
size_t headerSize() const;
2931
size_t loadCommandsSize() const;
3032
size_t symTableSize() const;
3133
size_t strTableSize() const;
3234

33-
void updateDySymTab(MachO::macho_load_command &MLC);
34-
void updateSizeOfCmds();
35-
void updateSymbolIndexes();
36-
void constructStringTable();
37-
Error layout();
38-
3935
void writeHeader();
4036
void writeLoadCommands();
4137
template <typename StructType>
@@ -48,11 +44,16 @@ class MachOWriter {
4844
void writeWeakBindInfo();
4945
void writeLazyBindInfo();
5046
void writeExportInfo();
47+
void writeIndirectSymbolTable();
48+
void writeDataInCodeData();
49+
void writeFunctionStartsData();
5150
void writeTail();
5251

5352
public:
54-
MachOWriter(Object &O, bool Is64Bit, bool IsLittleEndian, Buffer &B)
55-
: O(O), Is64Bit(Is64Bit), IsLittleEndian(IsLittleEndian), B(B) {}
53+
MachOWriter(Object &O, bool Is64Bit, bool IsLittleEndian, uint64_t PageSize,
54+
Buffer &B)
55+
: O(O), Is64Bit(Is64Bit), IsLittleEndian(IsLittleEndian),
56+
PageSize(PageSize), B(B), LayoutBuilder(O, Is64Bit, PageSize) {}
5657

5758
size_t totalSize() const;
5859
Error finalize();

Diff for: ‎llvm/tools/llvm-objcopy/MachO/Object.h

+27
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,16 @@ struct SymbolEntry {
9090
uint8_t n_sect;
9191
uint16_t n_desc;
9292
uint64_t n_value;
93+
94+
bool isExternalSymbol() const {
95+
return n_type & ((MachO::N_EXT | MachO::N_PEXT));
96+
}
97+
98+
bool isLocalSymbol() const { return !isExternalSymbol(); }
99+
100+
bool isUndefinedSymbol() const {
101+
return (n_type & MachO::N_TYPE) == MachO::N_UNDF;
102+
}
93103
};
94104

95105
/// The location of the symbol table inside the binary is described by LC_SYMTAB
@@ -100,6 +110,10 @@ struct SymbolTable {
100110
const SymbolEntry *getSymbolByIndex(uint32_t Index) const;
101111
};
102112

113+
struct IndirectSymbolTable {
114+
std::vector<uint32_t> Symbols;
115+
};
116+
103117
/// The location of the string table inside the binary is described by LC_SYMTAB
104118
/// load command.
105119
struct StringTable {
@@ -206,6 +220,10 @@ struct ExportInfo {
206220
ArrayRef<uint8_t> Trie;
207221
};
208222

223+
struct LinkData {
224+
ArrayRef<uint8_t> Data;
225+
};
226+
209227
struct Object {
210228
MachHeader Header;
211229
std::vector<LoadCommand> LoadCommands;
@@ -218,11 +236,20 @@ struct Object {
218236
WeakBindInfo WeakBinds;
219237
LazyBindInfo LazyBinds;
220238
ExportInfo Exports;
239+
IndirectSymbolTable IndirectSymTable;
240+
LinkData DataInCode;
241+
LinkData FunctionStarts;
221242

222243
/// The index of LC_SYMTAB load command if present.
223244
Optional<size_t> SymTabCommandIndex;
224245
/// The index of LC_DYLD_INFO or LC_DYLD_INFO_ONLY load command if present.
225246
Optional<size_t> DyLdInfoCommandIndex;
247+
/// The index LC_DYSYMTAB load comamnd if present.
248+
Optional<size_t> DySymTabCommandIndex;
249+
/// The index LC_DATA_IN_CODE load comamnd if present.
250+
Optional<size_t> DataInCodeCommandIndex;
251+
/// The index LC_FUNCTION_STARTS load comamnd if present.
252+
Optional<size_t> FunctionStartsCommandIndex;
226253
};
227254

228255
} // end namespace macho

0 commit comments

Comments
 (0)
Please sign in to comment.