diff --git a/lld/Common/Args.cpp b/lld/Common/Args.cpp --- a/lld/Common/Args.cpp +++ b/lld/Common/Args.cpp @@ -44,6 +44,20 @@ return 0; } +double lld::args::getDouble(opt::InputArgList &args, unsigned key, int64_t Default) { + auto *a = args.getLastArg(key); + if (!a) + return Default; + + double v; + if (to_float(a->getValue(), v)) + return v; + + StringRef spelling = args.getArgString(a->getIndex()); + error(spelling + ": float expected, but got '" + a->getValue() + "'"); + return 0; +} + int64_t lld::args::getInteger(opt::InputArgList &args, unsigned key, int64_t Default) { return ::getInteger(args, key, Default, 10); diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -254,6 +254,9 @@ ELFKind ekind = ELFNoneKind; uint16_t emachine = llvm::ELF::EM_NONE; llvm::Optional imageBase; + double markLivePc = 1; + uint64_t numSeen = 0; + uint64_t numMarked = 0; uint64_t commonPageSize; uint64_t maxPageSize; uint64_t mipsGotSize; diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -1181,6 +1181,9 @@ error("unknown -hash-style: " + s); } + if (args.hasArg(OPT_mark_live_pc)) + config->markLivePc = args::getDouble(args, OPT_mark_live_pc, 1); + if (args.hasArg(OPT_print_map)) config->mapFile = "-"; diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -679,6 +679,11 @@ if (!linkSec) fatal(toString(this) + ": invalid sh_link index: " + Twine(sec.sh_link)); + if (linkSec == &InputSection::discarded) { + this->sections[i] = &InputSection::discarded; + continue; + } + // A SHF_LINK_ORDER section is discarded if its linked-to section is // discarded. InputSection *isec = cast(this->sections[i]); diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -662,6 +662,8 @@ case EM_SPARCV9: case EM_386: case EM_X86_64: + if (!tls) + return 0; return s.getVA(0) - tls->p_memsz - ((-tls->p_vaddr - tls->p_memsz) & (tls->p_align - 1)); default: diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp --- a/lld/ELF/MarkLive.cpp +++ b/lld/ELF/MarkLive.cpp @@ -90,6 +90,21 @@ template void MarkLive::resolveReloc(InputSectionBase &sec, RelTy &rel, bool isLSDA) { + bool isRegular = isa(sec); + if (isRegular) { + //lld::outs() << sec.file->getName() << ':' << sec.name << '(' << rel.r_offset << ')'; + ++config->numSeen; + assert(config->numSeen > config->numMarked); + if (((double)config->numMarked / double(config->numSeen)) >= + config->markLivePc) + { + //lld::outs() << ": ignored\n"; + return; + } + //lld::outs() << ": used\n"; + ++config->numMarked; + } + Symbol &sym = sec.getFile()->getRelocTargetSym(rel); // If a symbol is referenced in a live section, it is used. @@ -186,6 +201,8 @@ // .eh_frame) so we need to add a check. if (sec == &InputSection::discarded) return; + //if (!sec->name.startswith(".debug_")) + // lld::outs() << "enqueue: " + sec->file->getName() + ": " + sec->name + "\n"; // Usually, a whole section is marked as live or dead, but in mergeable // (splittable) sections, each piece of data has independent liveness bit. @@ -255,6 +272,9 @@ scanEhFrameSection(*eh, eh->template rels()); } + //if (!sec->name.startswith(".debug_") && sec->file) + // lld::outs() << "section: " + sec->file->getName() + ": " + sec->name + "\n"; + if (sec->flags & SHF_LINK_ORDER) continue; @@ -273,6 +293,10 @@ // Mark all reachable sections. while (!queue.empty()) { InputSectionBase &sec = *queue.pop_back_val(); + if (sec.name.startswith(".debug_")) + continue; + + //lld::outs() << "marking: " + sec.file->getName() + ": " + sec.name + "\n"; if (sec.areRelocsRela) { for (const typename ELFT::Rela &rel : sec.template relas()) diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -257,6 +257,8 @@ defm Map: Eq<"Map", "Print a link map to the specified file">; +defm mark_live_pc : Eq<"mark-live-pc", "Percentage of relocations to use in mark live calculations">, MetaVarName<"">; + defm merge_exidx_entries: B<"merge-exidx-entries", "Enable merging .ARM.exidx entries (default)", "Disable merging .ARM.exidx entries">; diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -119,6 +119,9 @@ if (script->hasSectionsCommand) return s->name; + if ((s->flags & SHF_ALLOC) == 0) + return s->name; + // When no SECTIONS is specified, emulate GNU ld's internal linker scripts // by grouping sections with certain prefixes. @@ -1643,35 +1646,6 @@ if (!config->relocatable && config->emachine == EM_ARM && sec->type == SHT_ARM_EXIDX) continue; - - // Link order may be distributed across several InputSectionDescriptions. - // Sorting is performed separately. - std::vector scriptSections; - std::vector sections; - for (BaseCommand *base : sec->sectionCommands) { - auto *isd = dyn_cast(base); - if (!isd) - continue; - bool hasLinkOrder = false; - scriptSections.clear(); - sections.clear(); - for (InputSection *&isec : isd->sections) { - if (isec->flags & SHF_LINK_ORDER) { - InputSection *link = isec->getLinkOrderDep(); - if (link && !link->getParent()) - error(toString(isec) + ": sh_link points to discarded section " + - toString(link)); - hasLinkOrder = true; - } - scriptSections.push_back(&isec); - sections.push_back(isec); - } - if (hasLinkOrder && errorCount() == 0) { - llvm::stable_sort(sections, compareByFilePosition); - for (int i = 0, n = sections.size(); i != n; ++i) - *scriptSections[i] = sections[i]; - } - } } } diff --git a/lld/include/lld/Common/Args.h b/lld/include/lld/Common/Args.h --- a/lld/include/lld/Common/Args.h +++ b/lld/include/lld/Common/Args.h @@ -25,11 +25,12 @@ llvm::CodeGenOpt::Level getCGOptLevel(int optLevelLTO); +int64_t getInteger(llvm::opt::InputArgList &args, unsigned key, + int64_t Default, int base); int64_t getInteger(llvm::opt::InputArgList &args, unsigned key, int64_t Default); - +double getDouble(llvm::opt::InputArgList &args, unsigned key, int64_t Default); int64_t getHex(llvm::opt::InputArgList &args, unsigned key, int64_t Default); - std::vector getStrings(llvm::opt::InputArgList &args, int id); uint64_t getZOptionValue(llvm::opt::InputArgList &args, int id, StringRef key, diff --git a/llvm/utils/fragment_package.py b/llvm/utils/fragment_package.py new file mode 100644 --- /dev/null +++ b/llvm/utils/fragment_package.py @@ -0,0 +1,755 @@ +from __future__ import print_function +import argparse +import json +import os +import re +import shutil +import struct +import subprocess +import tempfile +import time + +cached_extensions = ['.success'] + +def parse_commandline(): + parser = argparse.ArgumentParser() + parser.add_argument('--force-split', help='redo split action', action='store_true') + parser.add_argument('--force-archive-split', help='redo split action for archives only', action='store_true') + parser.add_argument('--outdir', '-o', help='root directory of output package', default='outdir') + parser.add_argument('--bin', help='directory containing required tools', default='.') + parser.add_argument('--tmpdir', help='directory to use as a temporary directory for editing') + parser.add_argument('input', help='directory or file to copy and fragment') + return parser.parse_args() + +def find_files(search_path): + if os.path.isfile(search_path): + return [search_path] + objs = [] + for root, _, files in os.walk(search_path): + for f in files: + if not any([f.endswith(c) for c in cached_extensions]): + objs.append(os.path.join(root, f)) + return objs + +NORMAL_KIND = 0 +ELF_KIND = 1 +ARCHIVE_KIND = 2 + +def get_file_kind(path): + with open(path, 'rb') as fp: + magic = fp.read(8) + if magic.startswith(b'\x7fELF'): + return ELF_KIND + elif magic == b'!\n': + return ARCHIVE_KIND + # TODO: thin archives? + return NORMAL_KIND + +def copy_to_dir(src, root, outdir): + """ + Copy src to outdir, maintaining the directory structure relative to root. + Create any subdirectories required. + """ + dest = os.path.join(outdir, os.path.relpath(os.path.dirname(src), root)) + if not os.path.exists(dest): + os.makedirs(dest) + shutil.copy2(src, dest) + return os.path.join(dest, os.path.basename(src)) + +def extract_members(archive, bindir): + """ + Extract all members from archive using the tools located in bindir. + Return the list of members. + The archive must be the only file in its directory. + """ + directory = os.path.dirname(archive) + assert len(os.listdir(directory)) == 1, "should only have one archive in directory" + cwd = os.getcwd() + os.chdir(directory) + llvm_ar = os.path.join(bindir, 'llvm-ar.exe') + archive_name = os.path.basename(archive) + subprocess.check_call([llvm_ar, 'x', archive_name]) + stdout = subprocess.check_output([llvm_ar, 't', archive_name]).decode('utf-8') + members = stdout.split('\n') + os.chdir(cwd) + return [os.path.join(directory, member.strip()) for member in members if len(member.strip()) > 0] + +def replace_archive(members, archive, tmpdir, bindir): + """ + Delete and recreate archive, with the specified members, using the tools + located at bindir. + """ + os.remove(archive) + response_path = os.path.join(tmpdir, 'response.rsp') + assert not os.path.exists(response_path) + with open(response_path, 'w') as response: + response.writelines(['"' + m + '"' + '\n' for m in members]) + subprocess.check_call([os.path.join(bindir, 'llvm-ar.exe'), 'rc', archive, '@' + response_path]) + +NONE = 0 +DEBUG_INFO = 1 +DEBUG_RANGES = 2 +DEBUG_ARANGES = 3 +DEBUG_LOC = 4 +DEBUG_LINE = 5 + +tag_regex = re.compile(b'(0x[0-9a-f]+):( +)(DW_TAG_variable|DW_TAG_subprogram)') +tag_addr_regex = re.compile(b' +(DW_AT_low_pc|DW_AT_ranges|(DW_AT_location.*\(DW_OP_addr)).*') +prologue_length_regex = re.compile(b' +prologue_length: (0x[0-9a-f]+)') +end_sequence_regex = re.compile(b'(0x[0-9a-f]+): 00 DW_LNE_end_sequence') +loc_offset_regex = re.compile(b'(0x[0-9a-f]+):') +ranges_end_regex = re.compile(b'[0-9a-f]+ ') + +def analyse_dwarfdump(path, original, bindir): + dwarfdump = os.path.join(bindir, 'llvm-dwarfdump') + stdout = subprocess.check_output([dwarfdump, '--debug-info', '--debug-line', '--debug-loc', '--debug-aranges', '--verbose', path]) + info_var_offsets = [] + info_func_offsets = [] + location = NONE + in_variable = False + in_function = False + has_address = False + ranges_offsets = [] + range_starts = [] + line_offsets = [] + aranges_offsets = [] + loc_offsets = [] + + for line in stdout.splitlines(): + if line == b'.debug_info contents:': + location = DEBUG_INFO + continue + if line == b'.debug_ranges contents:': + location = DEBUG_RANGES + new_list = True + ranges_offset = 0 + continue + if line == b'.debug_loc contents:': + location = DEBUG_LOC + continue + if line == b'.debug_aranges contents:': + location = DEBUG_ARANGES + aranges_offset = 0 + continue + if line == b'.debug_line contents:': + location = DEBUG_LINE + continue + + # Split to create sections containing sequences of areas covered by + # variable/subprogram tags and those not. Nested variables are + # considered part of the subprogram. Variables and subprograms without + # a DW_AT_low_pc or DW_AT_ranges are also ignored, since these can't + # represent any actual addresses. + if location == DEBUG_INFO: + if in_variable or in_function: + if not has_address: + match = tag_addr_regex.match(line) + if match: + has_address = True + match = re.match(b'(0x[0-9a-f]+):' + b' ' * indent + b'(DW_TAG|NULL)', line) + if not match: + continue + end = int(match.group(1), 16) + if in_variable: + in_variable = False + if has_address: + info_var_offsets.append((offset, end)) + else: + in_function = False + if has_address: + info_func_offsets.append((offset, end)) + has_address = False + + if not in_variable and not in_function: + match = tag_regex.match(line) + if not match: + continue + offset = int(match.group(1), 16) + indent = len(match.group(2)) + if match.group(3) == b'DW_TAG_variable': + in_variable = True + else: + in_function = True + + # For now, split each .debug_ranges entry into its own one. + # Additionally, add an empty section at the start of each + # sequence, to hold the symbol. + # TODO: combine consecutive range entries into single section, including + # any terminator if all entries in a list are for the same symbol. + if location == DEBUG_RANGES: + if len(line) == 0: + location = NONE + continue + if new_list: + range_starts.append(ranges_offsets) + if new_list and len(ranges_offsets) != 0: + ranges_offsets.append(ranges_offset) + new_list = False + ranges_offsets.append(ranges_offset) + ranges_offset += 0x10 + if ranges_end_regex.match(line): + new_list = True + + # Split into a header section, followed by one per entry, and a trailing + # section containing terminator. + if location == DEBUG_ARANGES: + if len(line) == 0: + location = NONE + continue + aranges_offset += 0x10 + aranges_offsets.append(aranges_offset) # Header and entries are all same size. + + # Split each location sequence into its own section. + if location == DEBUG_LOC: + match = loc_offset_regex.match(line) + if not match: + continue + offset = int(match.group(1), 16) + # .debug_loc doesn't need a distinct header or section start symbol + # so don't add offset 0. + if offset != 0: + loc_offsets.append(offset) + + # Split to create a header section, a section per sequence, and a trailing + # empty section (used for length marker). + if location == DEBUG_LINE: + if len(line_offsets) == 0: + match = prologue_length_regex.match(line) + if not match: + continue + line_offsets.append(int(match.group(1), 16) + 10) # Assume V4, DWARF32. + else: + match = end_sequence_regex.match(line) + if not match: + continue + line_offsets.append(int(match.group(1), 16) + 3) # 00 + length + opcode. + + return (info_func_offsets, info_var_offsets, line_offsets, aranges_offsets, loc_offsets, ranges_offsets, range_starts) + +byte = struct.Struct('> 32 + return r_offset, r_sym + +def set_reloc_offset(elf, reloc_offset, new_offset): + elf.seek(reloc_offset) + elf.write(quad.pack(new_offset)) + +def split_relocs(elf, shdr_table, symtab, shdr, index, debug_sec_shdrs): + data_offset = quad.unpack(shdr[24:32])[0] + reloc_count = quad.unpack(shdr[32:40])[0] // 0x18 + elf.seek(data_offset) + patched_shdr_i = 0 + split_points = [] + info = [] # The sh_info values to use. + symbol_shndxs = set() + assert debug_sec_shdrs[0][0] == 0 + for i in range(0, reloc_count): + reloc_offset = data_offset + i * 0x18 + r_offset, sym_index = get_reloc_data(elf, reloc_offset) + + # The relocation could be patching either the current section, or a + # later one. Find the right one. + # If the relocation's r_offset is greater than the section's end, it is + # not the right section. + previous_shdr_index = debug_sec_shdrs[patched_shdr_i][2] + # Skip empty sections - the relocation can't be patching those. + while debug_sec_shdrs[patched_shdr_i][0] == debug_sec_shdrs[patched_shdr_i][1]: + patched_shdr_i += 1 + while r_offset >= debug_sec_shdrs[patched_shdr_i][1]: + if debug_sec_shdrs[patched_shdr_i][0] == debug_sec_shdrs[patched_shdr_i][1]: + patched_shdr_i += 1 + continue + if len(symbol_shndxs) == 1: + shndx = symbol_shndxs.pop() + if shndx != -1: + assert shndx != 0, 'invalid section index for SHF_LINK_ORDER' + set_link_order(shdr_table, previous_shdr_index, shndx) + symbol_shndxs = set() + patched_shdr_i += 1 + assert r_offset >= debug_sec_shdrs[patched_shdr_i][0] + patched_shdr_index = debug_sec_shdrs[patched_shdr_i][2] + + symbol = symtab.get_symbol(sym_index) + patched_shdr = shdr_table.get(symbol.shndx) + sh_flags = quad.unpack(patched_shdr[8:16])[0] + if symbol.type != STT_SECTION: + # Don't allow SHF_LINK_ORDER where the section refers to + # an undefined symbol. + if symbol.shndx == 0: + symbol_shndxs.add(-1) + else: + symbol_shndxs.add(symbol.shndx) + else: + if sh_flags & SHF_ALLOC: + # LLD doesn't like SHF_LINK_ORDER to mergeable sections. + if sh_flags & SHF_MERGE: + symbol_shndxs.add(-1) + else: + assert symbol.shndx != 0, "sym_index = {}".format(sym_index) + symbol_shndxs.add(symbol.shndx) + + # Update the target offset to be relative to the new target section. + r_offset -= debug_sec_shdrs[patched_shdr_i][0] + elf.seek(reloc_offset) + elf.write(quad.pack(r_offset)) + + # We're in the right section now. If it was the same as the previous + # relocation, just continue. Otherwise, create the next split point. + if len(info) > 0 and info[-1] == patched_shdr_index: + continue + # No need to add a specific split point for the first relocation. + if i != 0: + split_points.append(i * 0x18) + info.append(patched_shdr_index) + + # If the last relocation patches the last section, the link order won't have + # been set yet. + if len(symbol_shndxs) == 1: + # LLD doesn't like SHF_LINK_ORDER to mergeable sections. + shndx = symbol_shndxs.pop() + if shndx != -1: + set_link_order(shdr_table, patched_shdr_index, shndx) + + return len(split_shdr(elf, shdr_table, shdr, index, split_points, False, info)) - 1 + +def split_debug_ranges(elf, shdr_table, symtab, ranges_shdr, ranges_shndx, rela_shdr, rela_shndx): + # TODO: Add symbols for section starts. + # This code assumes that the relocations are in offset order. + # Each entry will either be a pair of relocations to the same symbol, or a + # terminating entry with no relocations. + # If a range list consists solely of relocations to the same symbol, assume + # the terminator and whole range can be discarded with that symbol. + # If not, merge adjacent entries referencing the same symbol, and leave the + # terminator alone. + sh_offset = quad.unpack(ranges_shdr[24:32])[0] + rela_shoffset = quad.unpack(rela_shdr[24:32])[0] + sh_size = quad.unpack(ranges_shdr[32:40])[0] + reloc_count = quad.unpack(rela_shdr[32:40])[0] // 0x18 + reloc_index = 0 + if reloc_index != reloc_count: + r_offset, r_sym = get_reloc_data(elf, rela_shoffset + reloc_index * 0x18) + else: + r_offset = None + r_sym = None + ranges_split_points = [] + reloc_split_points = [] + list_start = 0 + list_reloc_start = reloc_index + list_syms = [] + for sh_offset in range(0, sh_size, 0x10): + if r_offset is None or r_offset >= sh_offset + 0x10: + # Terminator entry. + if len(list_syms) == 0: + # list_syms of 0 means just a terminator is in the list, so there + # are no relocations for the entire list. + ranges_split_points.append((sh_offset + 0x10, None)) + elif len(list_syms) == 1: + ranges_split_points.append((sh_offset + 0x10, list_syms[0])) + reloc_split_points.append(reloc_index * 0x18) + else: + assert len(list_syms) == (sh_offset - list_start) // 0x10 + ranges_split_points.extend(zip(range(list_start + 0x10, sh_offset + 1, 0x10), list_syms)) + ranges_split_points.append((sh_offset + 0x10, None)) + reloc_split_points.extend(range(list_reloc_start * 0x18 + 0x30, reloc_index * 0x18 + 1, 0x30)) + list_start = sh_offset + 0x10 + list_reloc_start = reloc_index + list_syms = [] + continue + # range entry. + if r_offset != sh_offset: + assert False, "unexpected relocation r_offset at offset {}".format(reloc_index * 0x18) + else: + reloc_index += 1 + assert reloc_index != reloc_count, "incorrect number of relocations" + r_offset2, r_sym2 = get_reloc_data(elf, rela_shoffset + reloc_index * 0x18) + assert r_offset2 == sh_offset + 8, "next reloc at offset {} does not patch expected location".format(reloc_index * 0x18) + assert r_sym == r_sym2, "range entry at offset {} does not have common symbol".format(sh_offset) + symbol_shndx = symtab.get_symbol(r_sym).shndx + if len(list_syms) == 0: + list_syms = [symbol_shndx] + elif len(list_syms) == 1 and list_syms[0] != symbol_shndx: + list_syms = [list_syms[0]] * ((sh_offset - list_start) // 0x10) + [symbol_shndx] + elif len(list_syms) > 1: + list_syms.append(symbol_shndx) + reloc_index += 1 + if reloc_index != reloc_count: + r_offset, r_sym = get_reloc_data(elf, rela_shoffset + reloc_index * 0x18) + else: + r_offset = None + r_sym = None + + split_points, links = ([point for point, link in ranges_split_points], [link for point, link in ranges_split_points]) + del split_points[-1] + new_ranges_secs = (split_shdr(elf, shdr_table, ranges_shdr, ranges_shndx, split_points, links=links, link_order=True)) + infos = [new_ranges_secs[i][2] for i in range(0, len(links)) if links[i] is not None] + + if len(reloc_split_points) > 0: + new_offset = 0 + reloc_split_i = 0 + for reloc_offset in range(rela_shoffset, reloc_split_points[-1] + rela_shoffset, 0x18): + if len(reloc_split_points) > reloc_split_i and reloc_offset >= (reloc_split_points[reloc_split_i] + rela_shoffset): + reloc_split_i += 1 + new_offset = 0 + set_reloc_offset(elf, reloc_offset, new_offset) + new_offset += 8 + + del reloc_split_points[-1] + reloc_sec_count = len(split_shdr(elf, shdr_table, rela_shdr, rela_shndx, reloc_split_points, info=infos)) + return len(new_ranges_secs) + reloc_sec_count + +SHT_PROGBITS = 1 +SHT_SYMTAB = 2 +SHT_RELA = 4 +SHT_SYMTAB_SHNDX = 18 + +SHN_XINDEX = 0xffff + +def fragment(path, original, bindir): + start_time = time.time() + + (func_offsets, var_offsets, line_offsets, aranges_offsets, loc_offsets, ranges_offsets, range_start_offsets) = analyse_dwarfdump(path, original, bindir) + print("dwarfdump analysis time: %f" % (time.time() - start_time)) + debug_indexes = {} + rela_debug_indexes = {} + text_indexes = {} + data_indexes = {} + + additional_syms = 0 + need_info_length_reloc = len(func_offsets) + len(var_offsets) > 0 + if need_info_length_reloc: + additional_syms += 1 + need_line_length_reloc = len(line_offsets) > 0 + if need_line_length_reloc: + additional_syms += 1 + need_aranges_length_reloc = len(aranges_offsets) > 0 + if need_aranges_length_reloc: + additional_syms += 1 + additional_syms += len(loc_offsets) + 1 + additional_syms += len(range_start_offsets) + + with open(path, 'r+b') as elf: + elf.seek(40) + shoff = quad.unpack(elf.read(8))[0] + assert shoff != 0, 'object with no shdrs is unsupported' + elf.seek(60) + shnum = half.unpack(elf.read(2))[0] + shstrndx = half.unpack(elf.read(2))[0] + + ## e_shnum == 0 means the first section header sh_size contains the real + ## number of sections. + if shnum == 0: + elf.seek(shoff + 32) + shnum = quad.unpack(elf.read(8))[0] + elf.seek(shoff) + shdr_table = SectionHeaderTable(elf.read(shnum * 64), shnum) + + ## e_shstrndx == SHN_XINDEX means the first section header sh_link + ## contains the real index. + if shstrndx == SHN_XINDEX: + shstrndx = word.unpack(shdr_table.get(0)[40:44])[0] + strtab_shdr = shdr_table.get(shstrndx) + strtab = StringTable(elf, quad.unpack(strtab_shdr[24:32])[0]) + symtab_shndx = None + + start_time = time.time() + for i in range(0, shnum): + shdr = shdr_table.get(i) + sh_name = word.unpack(shdr[:4])[0] + sh_type = word.unpack(shdr[4:8])[0] + name = strtab.get_string(sh_name) + if sh_type == SHT_PROGBITS and name.startswith(b'.debug'): + assert name not in debug_indexes + debug_indexes[name] = {} + debug_indexes[name]['index'] = i + debug_indexes[name]['shdr'] = shdr + elif sh_type == SHT_RELA and name.startswith(b'.rela.debug'): + assert name not in rela_debug_indexes + rela_debug_indexes[name] = {} + rela_debug_indexes[name]['index'] = i + rela_debug_indexes[name]['shdr'] = shdr + elif sh_type == SHT_SYMTAB: + sh_offset = quad.unpack(shdr[24:32])[0] + sh_link = word.unpack(shdr[40:44])[0] + if sh_link == shstrndx: + sym_strtab = strtab + else: + symtab_strtab_shdr = shdr_table.get(sh_link) + sym_strtab = StringTable(elf, quad.unpack(elf.read(8))[0]) + symtab = SymbolTable(elf, sh_offset, sym_strtab) + elif sh_type == SHT_SYMTAB_SHNDX: + symtab_shndx = SymtabShndxTable(elf, quad.unpack(shdr[24:32])[0]) + + print("shdr read time: %f" % (time.time() - start_time)) + + if len(debug_indexes) == 0: + # No debug data, so nothing to do here. + return + + if symtab_shndx is not None: + symtab.symtab_shndx = symtab_shndx + + start_time = time.time() + if b'.debug_info' in debug_indexes: + splits = make_debug_info_split_points(func_offsets, var_offsets) + debug_info_secs = split_shdr(elf, shdr_table, debug_indexes[b'.debug_info']['shdr'], debug_indexes[b'.debug_info']['index'], splits) + shnum += len(debug_info_secs) - 1 + if b'.rela.debug_info' in rela_debug_indexes: + shnum += split_relocs(elf, shdr_table, symtab, rela_debug_indexes[b'.rela.debug_info']['shdr'], rela_debug_indexes[b'.rela.debug_info']['index'], debug_info_secs) - 1 + # TODO: Add relocation + symbol for length + if b'.debug_line' in debug_indexes: + debug_line_secs = split_shdr(elf, shdr_table, debug_indexes[b'.debug_line']['shdr'], debug_indexes[b'.debug_line']['index'], line_offsets) + shnum += len(debug_line_secs) - 1 + if b'.rela.debug_line' in rela_debug_indexes: + shnum += split_relocs(elf, shdr_table, symtab, rela_debug_indexes[b'.rela.debug_line']['shdr'], rela_debug_indexes[b'.rela.debug_line']['index'], debug_line_secs) - 1 + # TODO: Add relocation + symbol for length + if b'.debug_aranges' in debug_indexes: + debug_aranges_secs = split_shdr(elf, shdr_table, debug_indexes[b'.debug_aranges']['shdr'], debug_indexes[b'.debug_aranges']['index'], aranges_offsets) + shnum += len(debug_aranges_secs) - 1 + if b'.rela.debug_aranges' in rela_debug_indexes: + shnum += split_relocs(elf, shdr_table, symtab, rela_debug_indexes[b'.rela.debug_aranges']['shdr'], rela_debug_indexes[b'.rela.debug_aranges']['index'], debug_aranges_secs) - 1 + # TODO: Add relocation + symbol for length + if b'.debug_loc' in debug_indexes: + debug_loc_secs = split_shdr(elf, shdr_table, debug_indexes[b'.debug_loc']['shdr'], debug_indexes[b'.debug_loc']['index'], loc_offsets) + shnum += len(debug_loc_secs) - 1 + if b'.rela.debug_loc' in rela_debug_indexes: + shnum += split_relocs(elf, shdr_table, symtab, rela_debug_indexes[b'.rela.debug_loc']['shdr'], rela_debug_indexes[b'.rela.debug_loc']['index'], debug_loc_secs) - 1 + # TODO: Add symbols for each section. + if b'.debug_ranges' in debug_indexes and b'.rela.debug_ranges' in rela_debug_indexes: + shnum += split_debug_ranges(elf, + shdr_table, + symtab, + debug_indexes[b'.debug_ranges']['shdr'], + debug_indexes[b'.debug_ranges']['index'], + rela_debug_indexes[b'.rela.debug_ranges']['shdr'], + rela_debug_indexes[b'.rela.debug_ranges']['index']) + print("split debug time: %f" % (time.time() - start_time)) + + shdr_table.write(elf, shoff) + +def copy_and_fragment(path, root, outdir, tmpdir, force_split, force_archive_split, bindir): + """ + Copy the file at path to outdir, fragmenting it either en route, or once + at the destination, using tools located in bindir. The output file will + be placed in outdir in the same relative structure as path is in root. + """ + success_file = os.path.normpath(path) + '.success' + if force_split and os.path.exists(success_file): + os.remove(success_file) + dest = os.path.join(outdir, os.path.relpath(path, root)) + kind = get_file_kind(path) + if os.path.exists(success_file) and os.path.exists(dest) and not (force_archive_split and kind == ARCHIVE_KIND): + return + + if kind == NORMAL_KIND: + copy_to_dir(path, root, outdir) + elif kind == ARCHIVE_KIND: + if tmpdir is None: + tmpdir = tempfile.mkdtemp() + elif os.path.exists(tmpdir): + shutil.rmtree(tmpdir) + os.makedirs(tmpdir) + else: + os.makedirs(tmpdir) + archive = copy_to_dir(path, root, tmpdir) + members = extract_members(archive, bindir) + for member in members: + fragment(member, path + '.' + os.path.basename(member), bindir) + replace_archive(members, archive, tmpdir, bindir) + copy_to_dir(archive, tmpdir, outdir) + + else: + obj = copy_to_dir(path, root, outdir) + fragment(obj, path, bindir) + + with open(success_file, 'w') as success: + json.dump(path, success) + +def main(): + start = time.time() + args = parse_commandline() + files = find_files(args.input) + if args.force_split: + shutil.rmtree(args.outdir) + i = 1 + if not os.path.exists(args.outdir): + os.makedirs(args.outdir) + for f in files: + print("Fragmenting %s [%d/%d]" % (f, i, len(files))) + copy_and_fragment(f, args.input, args.outdir, args.tmpdir, args.force_split, args.force_archive_split, args.bin) + i += 1 + print("Total time: %f" % (time.time() - start)) + +if __name__ == '__main__': + main()