diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -276,7 +276,8 @@ }; struct OpcodeTmp { uint8_t opcode = 0xF0; - uint64_t spread = 0; // Placeholder for offset or addend + uint64_t delta = 0; // Placeholder for offset or addend + uint64_t consecutiveCount = 0; }; } // namespace @@ -295,7 +296,7 @@ OpcodeTmp op = { static_cast(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | seg->index), /* opcode */ - offset /* spread */ + offset /* delta */ }; opcodes.push_back(op); lastBinding.segment = seg; @@ -303,7 +304,7 @@ } else if (lastBinding.offset != offset) { OpcodeTmp op = { static_cast(BIND_OPCODE_ADD_ADDR_ULEB), /* opcode */ - offset - lastBinding.offset /* spread */ + offset - lastBinding.offset /* delta */ }; opcodes.push_back(op); lastBinding.offset = offset; @@ -312,7 +313,7 @@ if (lastBinding.addend != addend) { OpcodeTmp op = { static_cast(BIND_OPCODE_SET_ADDEND_SLEB), /* opcode */ - static_cast(addend) /* spread */ + static_cast(addend) /* delta */ }; opcodes.push_back(op); lastBinding.addend = addend; @@ -320,28 +321,91 @@ OpcodeTmp op = { static_cast(BIND_OPCODE_DO_BIND), /* opcode */ - 0 /* spread */ + 0 /* delta */ }; opcodes.push_back(op); // DO_BIND causes dyld to both perform the binding and increment the offset lastBinding.offset += target->wordSize; } +static void optimizeOpcodes(std::vector &opcodes) { + // Pass 1: Combine bind/add pairs + size_t i; + int pWrite = 0; + for (i = 1; i < opcodes.size(); ++i, ++pWrite) { + if ((opcodes[i].opcode == BIND_OPCODE_ADD_ADDR_ULEB) + && (opcodes[i-1].opcode == BIND_OPCODE_DO_BIND)) { + opcodes[pWrite].opcode = BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB; + opcodes[pWrite].delta = opcodes[i].delta; + ++i; + } else { + opcodes[pWrite] = opcodes[i-1]; + } + } + if (i == opcodes.size()) { + opcodes[pWrite] = opcodes[i-1]; + } + opcodes.resize(pWrite+1); + + // Pass 2: Compress two or more bind_add opcodes + pWrite = 0; + for (i = 1; i < opcodes.size(); ++i, ++pWrite) { + if ((opcodes[i].opcode == BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB) + && (opcodes[i-1].opcode == BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB) + && (opcodes[i].delta == opcodes[i-1].delta)) { + opcodes[pWrite].opcode = BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB; + opcodes[pWrite].consecutiveCount = 2; + opcodes[pWrite].delta = opcodes[i].delta; + ++i; + while (i < opcodes.size() && (opcodes[i].opcode == BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB) + && (opcodes[i].delta == opcodes[i-1].delta)) { + opcodes[pWrite].consecutiveCount++; + ++i; + } + } else { + opcodes[pWrite] = opcodes[i-1]; + } + } + if (i == opcodes.size()) { + opcodes[pWrite] = opcodes[i-1]; + } + opcodes.resize(pWrite+1); + + // Pass 3: Use immediate encodings + for (OpcodeTmp* p = &opcodes[0]; p->opcode != REBASE_OPCODE_DONE; ++p) { + if ((p->opcode == BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB) + && (p->delta < (15*sizeof(uint64_t))) + && ((p->delta % sizeof(uint64_t)) == 0)) { + p->opcode = BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED; + p->delta = p->delta/sizeof(uint64_t); + } + } +} + static void flushOpcodes(OpcodeTmp &op, raw_svector_ostream &os) { uint8_t opcode = op.opcode & 0xF0; switch (opcode) { case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: case BIND_OPCODE_ADD_ADDR_ULEB: + case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB: os << op.opcode; - encodeULEB128(op.spread, os); + encodeULEB128(op.delta, os); break; case BIND_OPCODE_SET_ADDEND_SLEB: os << op.opcode; - encodeSLEB128(static_cast(op.spread), os); + encodeSLEB128(static_cast(op.delta), os); break; case BIND_OPCODE_DO_BIND: os << op.opcode; break; + case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: + os << op.opcode; + encodeULEB128(op.consecutiveCount, os); + encodeULEB128(op.delta, os); + break; + case BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED: + os << static_cast(op.opcode | op.delta); + break; default: llvm_unreachable("cannot bind to an unrecognized symbol"); } @@ -362,7 +426,11 @@ } else if (ordinal <= BIND_IMMEDIATE_MASK) { os << static_cast(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | ordinal); } else { - os << static_cast(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB); + BindOpcode opcode = BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB; + if (ordinal <= 15) { + opcode = BIND_OPCODE_SET_DYLIB_ORDINAL_IMM; + } + os << static_cast(opcode); encodeULEB128(ordinal, os); } } @@ -441,6 +509,7 @@ encodeBinding(b.target.isec->parent, b.target.isec->getOffset(b.target.offset), b.addend, lastBinding, opcodes); + optimizeOpcodes(opcodes); for(auto &op : opcodes) flushOpcodes(op, os); } diff --git a/lld/test/MachO/bind-opcodes.s b/lld/test/MachO/bind-opcodes.s --- a/lld/test/MachO/bind-opcodes.s +++ b/lld/test/MachO/bind-opcodes.s @@ -5,25 +5,50 @@ # RUN: %lld -dylib %t/foo.o -o %t/libfoo.dylib # RUN: %lld -lSystem %t/test.o %t/libfoo.dylib -o %t/test -## Make sure we emit exactly one BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM per -## symbol. -# RUN: obj2yaml %t/test | FileCheck %s --implicit-check-not BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM +## Test: +## 1/ We emit exactly one BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM per symbol. +## 2/ Combine BIND_OPCODE_DO_BIND and BIND_OPCODE_ADD_ADDR_ULEB pairs. +## 3/ Compact BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB +## 4/ Use BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED if possible. +# RUN: obj2yaml %t/test | FileCheck %s --implicit-check-not BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM --implicit-check-not BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB --implicit-check-not BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB --implicit-check-not BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED # CHECK: Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM # CHECK-NEXT: Imm: 0 # CHECK-NEXT: Symbol: _foo +# CHECK: Opcode: BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB +# CHECK-NEXT: Imm: 0 +# CHECK-NEXT: ULEBExtraData: [ 0x2, 0x8 ] +# CHECK-NEXT: Symbol: '' + +# CHECK: Opcode: BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB +# CHECK-NEXT: Imm: 0 +# CHECK-NEXT: ULEBExtraData: [ 0x1008 ] +# CHECK-NEXT: Symbol: '' + # CHECK: Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM # CHECK-NEXT: Imm: 0 # CHECK-NEXT: Symbol: _bar +# CHECK: Opcode: BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED +# CHECK-NEXT: Imm: 1 +# CHECK-NEXT: Symbol: '' + +# CHECK: Opcode: BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB +# CHECK-NEXT: Imm: 0 +# CHECK-NEXT: ULEBExtraData: [ 0x1008 ] +# CHECK-NEXT: Symbol: '' + # RUN: llvm-objdump --macho --bind %t/test | FileCheck %s --check-prefix=BIND # BIND: Bind table: -# BIND-NEXT: segment section address type addend dylib symbol -# BIND-NEXT: __DATA __data {{.*}} pointer 0 libfoo _foo -# BIND-NEXT: __DATA __data {{.*}} pointer 0 libfoo _foo -# BIND-NEXT: __DATA __data {{.*}} pointer 0 libfoo _bar -# BIND-NEXT: __DATA __data {{.*}} pointer 0 libfoo _bar +# BIND-NEXT: segment section address type addend dylib symbol +# BIND-NEXT: __DATA __data 0x100001000 pointer 0 libfoo _foo +# BIND-NEXT: __DATA __data 0x100001010 pointer 0 libfoo _foo +# BIND-NEXT: __DATA __data 0x100001020 pointer 1 libfoo _foo +# BIND-NEXT: __DATA __data 0x100002030 pointer 0 libfoo _foo +# BIND-NEXT: __DATA __data 0x100001008 pointer 0 libfoo _bar +# BIND-NEXT: __DATA __data 0x100001018 pointer 0 libfoo _bar +# BIND-NEXT: __DATA __data 0x100002028 pointer 0 libfoo _bar # BIND-EMPTY: #--- foo.s @@ -39,6 +64,10 @@ .quad _bar .quad _foo .quad _bar +.quad _foo+1 +.zero 0x1000 +.quad _bar +.quad _foo .globl _main .text