Changeset View
Changeset View
Standalone View
Standalone View
lld/MachO/SyntheticSections.cpp
Show First 20 Lines • Show All 276 Lines • ▼ Show 20 Lines | struct Binding { | ||||
uint64_t offset = 0; | uint64_t offset = 0; | ||||
int64_t addend = 0; | int64_t addend = 0; | ||||
}; | }; | ||||
struct BindIR { | struct BindIR { | ||||
// Default value of 0xF0 is not valid opcode and should make the program | // Default value of 0xF0 is not valid opcode and should make the program | ||||
// scream instead of accidentally writing "valid" values. | // scream instead of accidentally writing "valid" values. | ||||
uint8_t opcode = 0xF0; | uint8_t opcode = 0xF0; | ||||
uint64_t data = 0; | uint64_t data = 0; | ||||
uint64_t consecutiveCount = 0; | |||||
}; | }; | ||||
} // namespace | } // namespace | ||||
// Encode a sequence of opcodes that tell dyld to write the address of symbol + | // Encode a sequence of opcodes that tell dyld to write the address of symbol + | ||||
// addend at osec->addr + outSecOff. | // addend at osec->addr + outSecOff. | ||||
// | // | ||||
// The bind opcode "interpreter" remembers the values of each binding field, so | // The bind opcode "interpreter" remembers the values of each binding field, so | ||||
// we only need to encode the differences between bindings. Hence the use of | // we only need to encode the differences between bindings. Hence the use of | ||||
// lastBinding. | // lastBinding. | ||||
static void encodeBinding(const OutputSection *osec, uint64_t outSecOff, | static void encodeBinding(const OutputSection *osec, uint64_t outSecOff, | ||||
int64_t addend, Binding &lastBinding, | int64_t addend, Binding &lastBinding, | ||||
std::vector<BindIR> &opcodes) { | std::vector<BindIR> &opcodes) { | ||||
OutputSegment *seg = osec->parent; | OutputSegment *seg = osec->parent; | ||||
uint64_t offset = osec->getSegmentOffset() + outSecOff; | uint64_t offset = osec->getSegmentOffset() + outSecOff; | ||||
if (lastBinding.segment != seg) { | if (lastBinding.segment != seg) { | ||||
BindIR op = { | opcodes.push_back( | ||||
static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | | {static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | | ||||
seg->index), // opcode | seg->index), | ||||
offset // data | offset}); | ||||
}; | |||||
opcodes.push_back(op); | |||||
lastBinding.segment = seg; | lastBinding.segment = seg; | ||||
lastBinding.offset = offset; | lastBinding.offset = offset; | ||||
} else if (lastBinding.offset != offset) { | } else if (lastBinding.offset != offset) { | ||||
BindIR op = { | opcodes.push_back({BIND_OPCODE_ADD_ADDR_ULEB, offset - lastBinding.offset}); | ||||
static_cast<uint8_t>(BIND_OPCODE_ADD_ADDR_ULEB), // opcode | |||||
offset - lastBinding.offset // data | |||||
}; | |||||
opcodes.push_back(op); | |||||
lastBinding.offset = offset; | lastBinding.offset = offset; | ||||
} | } | ||||
if (lastBinding.addend != addend) { | if (lastBinding.addend != addend) { | ||||
BindIR op = { | opcodes.push_back( | ||||
static_cast<uint8_t>(BIND_OPCODE_SET_ADDEND_SLEB), // opcode | {BIND_OPCODE_SET_ADDEND_SLEB, static_cast<uint64_t>(addend)}); | ||||
static_cast<uint64_t>(addend) // data | |||||
}; | |||||
opcodes.push_back(op); | |||||
lastBinding.addend = addend; | lastBinding.addend = addend; | ||||
} | } | ||||
BindIR op = { | opcodes.push_back({BIND_OPCODE_DO_BIND, 0}); | ||||
static_cast<uint8_t>(BIND_OPCODE_DO_BIND), // opcode | |||||
0 // data | |||||
}; | |||||
opcodes.push_back(op); | |||||
// DO_BIND causes dyld to both perform the binding and increment the offset | // DO_BIND causes dyld to both perform the binding and increment the offset | ||||
lastBinding.offset += target->wordSize; | lastBinding.offset += target->wordSize; | ||||
} | } | ||||
static void optimizeOpcodes(std::vector<BindIR> &opcodes) { | |||||
// Pass 1: Combine bind/add pairs | |||||
size_t i; | |||||
int pWrite = 0; | |||||
for (i = 1; i < opcodes.size(); ++i, ++pWrite) { | |||||
if ((opcodes[i].opcode == BIND_OPCODE_ADD_ADDR_ULEB) && | |||||
(opcodes[i - 1].opcode == BIND_OPCODE_DO_BIND)) { | |||||
opcodes[pWrite].opcode = BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB; | |||||
opcodes[pWrite].data = opcodes[i].data; | |||||
++i; | |||||
} else { | |||||
opcodes[pWrite] = opcodes[i - 1]; | |||||
} | |||||
} | |||||
if (i == opcodes.size()) | |||||
opcodes[pWrite] = opcodes[i - 1]; | |||||
opcodes.resize(pWrite + 1); | |||||
// Pass 2: Compress two or more bind_add opcodes | |||||
pWrite = 0; | |||||
for (i = 1; i < opcodes.size(); ++i, ++pWrite) { | |||||
if ((opcodes[i].opcode == BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB) && | |||||
(opcodes[i - 1].opcode == BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB) && | |||||
(opcodes[i].data == opcodes[i - 1].data)) { | |||||
opcodes[pWrite].opcode = BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB; | |||||
opcodes[pWrite].consecutiveCount = 2; | |||||
opcodes[pWrite].data = opcodes[i].data; | |||||
++i; | |||||
while (i < opcodes.size() && | |||||
(opcodes[i].opcode == BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB) && | |||||
(opcodes[i].data == opcodes[i - 1].data)) { | |||||
opcodes[pWrite].consecutiveCount++; | |||||
++i; | |||||
} | |||||
} else { | |||||
opcodes[pWrite] = opcodes[i - 1]; | |||||
} | |||||
} | |||||
if (i == opcodes.size()) | |||||
opcodes[pWrite] = opcodes[i - 1]; | |||||
opcodes.resize(pWrite + 1); | |||||
} | |||||
int3: nit: LLVM convention is to omit braces for one-liners | |||||
static void flushOpcodes(const BindIR &op, raw_svector_ostream &os) { | static void flushOpcodes(const BindIR &op, raw_svector_ostream &os) { | ||||
uint8_t opcode = op.opcode & BIND_OPCODE_MASK; | uint8_t opcode = op.opcode & BIND_OPCODE_MASK; | ||||
switch (opcode) { | switch (opcode) { | ||||
case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: | case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: | ||||
case BIND_OPCODE_ADD_ADDR_ULEB: | case BIND_OPCODE_ADD_ADDR_ULEB: | ||||
case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB: | |||||
os << op.opcode; | os << op.opcode; | ||||
encodeULEB128(op.data, os); | encodeULEB128(op.data, os); | ||||
break; | break; | ||||
case BIND_OPCODE_SET_ADDEND_SLEB: | case BIND_OPCODE_SET_ADDEND_SLEB: | ||||
os << op.opcode; | os << op.opcode; | ||||
encodeSLEB128(static_cast<int64_t>(op.data), os); | encodeSLEB128(static_cast<int64_t>(op.data), os); | ||||
break; | break; | ||||
case BIND_OPCODE_DO_BIND: | case BIND_OPCODE_DO_BIND: | ||||
os << op.opcode; | os << op.opcode; | ||||
break; | break; | ||||
case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: | |||||
os << op.opcode; | |||||
encodeULEB128(op.consecutiveCount, os); | |||||
encodeULEB128(op.data, os); | |||||
break; | |||||
default: | default: | ||||
llvm_unreachable("cannot bind to an unrecognized symbol"); | llvm_unreachable("cannot bind to an unrecognized symbol"); | ||||
} | } | ||||
} | } | ||||
// Non-weak bindings need to have their dylib ordinal encoded as well. | // Non-weak bindings need to have their dylib ordinal encoded as well. | ||||
static int16_t ordinalForDylibSymbol(const DylibSymbol &dysym) { | static int16_t ordinalForDylibSymbol(const DylibSymbol &dysym) { | ||||
if (config->namespaceKind == NamespaceKind::flat || dysym.isDynamicLookup()) | if (config->namespaceKind == NamespaceKind::flat || dysym.isDynamicLookup()) | ||||
▲ Show 20 Lines • Show All 83 Lines • ▼ Show 20 Lines | if (ordinal != lastOrdinal) { | ||||
encodeDylibOrdinal(ordinal, os); | encodeDylibOrdinal(ordinal, os); | ||||
lastOrdinal = ordinal; | lastOrdinal = ordinal; | ||||
} | } | ||||
std::vector<BindIR> opcodes; | std::vector<BindIR> opcodes; | ||||
for (const BindingEntry &b : bindings) | for (const BindingEntry &b : bindings) | ||||
encodeBinding(b.target.isec->parent, | encodeBinding(b.target.isec->parent, | ||||
b.target.isec->getOffset(b.target.offset), b.addend, | b.target.isec->getOffset(b.target.offset), b.addend, | ||||
lastBinding, opcodes); | lastBinding, opcodes); | ||||
if (config->optimize > 1) | |||||
optimizeOpcodes(opcodes); | |||||
for (const auto &op : opcodes) | for (const auto &op : opcodes) | ||||
flushOpcodes(op, os); | flushOpcodes(op, os); | ||||
} | } | ||||
if (!bindingsMap.empty()) | if (!bindingsMap.empty()) | ||||
os << static_cast<uint8_t>(BIND_OPCODE_DONE); | os << static_cast<uint8_t>(BIND_OPCODE_DONE); | ||||
} | } | ||||
void BindingSection::writeTo(uint8_t *buf) const { | void BindingSection::writeTo(uint8_t *buf) const { | ||||
Show All 16 Lines | for (auto &p : sortBindings(bindingsMap)) { | ||||
os << static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM) | os << static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM) | ||||
<< sym->getName() << '\0' | << sym->getName() << '\0' | ||||
<< static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER); | << static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER); | ||||
std::vector<BindIR> opcodes; | std::vector<BindIR> opcodes; | ||||
for (const BindingEntry &b : bindings) | for (const BindingEntry &b : bindings) | ||||
encodeBinding(b.target.isec->parent, | encodeBinding(b.target.isec->parent, | ||||
b.target.isec->getOffset(b.target.offset), b.addend, | b.target.isec->getOffset(b.target.offset), b.addend, | ||||
lastBinding, opcodes); | lastBinding, opcodes); | ||||
if (config->optimize > 1) | |||||
optimizeOpcodes(opcodes); | |||||
for (const auto &op : opcodes) | for (const auto &op : opcodes) | ||||
flushOpcodes(op, os); | flushOpcodes(op, os); | ||||
} | } | ||||
if (!bindingsMap.empty() || !definitions.empty()) | if (!bindingsMap.empty() || !definitions.empty()) | ||||
os << static_cast<uint8_t>(BIND_OPCODE_DONE); | os << static_cast<uint8_t>(BIND_OPCODE_DONE); | ||||
} | } | ||||
void WeakBindingSection::writeTo(uint8_t *buf) const { | void WeakBindingSection::writeTo(uint8_t *buf) const { | ||||
▲ Show 20 Lines • Show All 962 Lines • Show Last 20 Lines |
nit: LLVM convention is to omit braces for one-liners