Index: utils/sync-source/README.txt =================================================================== --- /dev/null +++ utils/sync-source/README.txt @@ -0,0 +1,293 @@ +sync-source.py + +OVERVIEW + +The sync-source.py utility transfers groups of files between +computers. The primary use case is to enable developing LLVM project +software on one machine, transfer it efficiently to other machines --- +possibly of other architectures --- and test it there. sync-source.py +supports configurable, named source-to-destination mappings and has a +transfer agent plug-in architecture. The current distribution provides +an rsync-over-ssh transfer agent. + +The primary benefits of using sync-source.py are: + +* Provides a simple, reliable way to get a mirror copy of primary- + machine files onto several different destinations without concern + of compromising the patch during testing on different machines. + +* Handles directory-mapping differences between two machines. For + LLDB, this is helpful when going between OS X and any other non-OS X + target system. + +EXAMPLE WORKFLOW + +This utility was developed in the context of working on the LLDB +project. Below we show the transfers we'd like to have happen, +and the configuration that supports it. + +Workflow Example: + +* Develop on OS X (primary machine) +* Test candidate changes on OS X. +* Test candidate changes on a Linux machine (machine-name: lldb-linux). +* Test candidate changes on a FreeBSD machine (machine-name: lldb-freebsd). +* Do check-ins from OS X machine. + +Requirements: + +* OS X machine requires the lldb source layout: lldb, lldb/llvm, + lldb/llvm/tools/clang. Note this is different than the canonical + llvm, llvm/tools/clang, llvm/tools/lldb layout that we'll want on + the Linux and FreeBSD test machines. + +* Linux machine requires the llvm, llvm/tools/clang and + llvm/tools/lldb layout. + +* FreeBSD machine requires the same layout as the llvm machine. + +sync-source.py configuration in ~/.sync-sourcerc: + +# This is my configuration with a comment. Configuration +# files are JSON-based. +{ "configurations": [ + # Here we have a collection of named configuration blocks. + # Configuration blocks can chain back to a parent by name. + { + # Every block has a name so it can be referenced from + # the command line or chained back to by a child block + # for sharing. + "name": "base_tot_settings", + + # This directive lists the "directory ids" that we'll care + # about. If your local repository has additional directories + # for other projects that need to ride along, add them here. + # For defaulting purposes, it makes sense to name the + # directory IDs as the most likely name for the directory + # itself. For stock LLDB from top of tree, we generally only + # care about lldb, llvm and clang. + "dir_names": [ "llvm", "clang", "lldb" ], + + # This section describes where the source files live on + # the primary machine. There should always be a base_dir + # entry, which indicates where in the local filesystem the + # projects are rooted. For each dir in dir_names, there + # should be either: + # 1. an entry named {dir-id}_dir (e.g. llvm_dir), which + # specifies the source directory for the given dir id + # relative to the base_dir entry, OR + # 2. no entry, in which case the directory is assumed to + # be the same as {dir-id}. In the example below, the + # base_dir-relative directory for the "lldb" dir-id is + # defaulted to being "lldb". That's exactly what + # we need in an OS X-style lldb dir layout. + "source": { + "base_dir": "~/work/lldb-tot", + "llvm_dir": "lldb/llvm", + "clang_dir": "lldb/llvm/tools/clang" + }, + + # source_excludes covers any exclusions that: + # * should be applied when copying files from the source + # * should be excluded from deletion on the destination + # + # By default, ".git", ".svn" and ".pyc" are added to + # all dir-id exclusions. The default excludes can be + # controlled by the sync-source.py --default-excludes + # option. + # + # Below, I have transfer of the lldb dir skip everything + # rooted at "/llvm" below the the lldb dir. This is + # because we want the source OS X lldb to move to + # a destination of {some-dest-root}/llvm/tools/lldb, and + # not have the OS-X-inverted llvm copy over with the lldb + # transfer portion. We'll see the complete picture of + # how this works when we get to specifying destinations + # later on in the config. + # + # We also exclude the "/build" and "/llvm-build" dir rooted in + # the OS X-side sources. The Xcode configuration on this + # OS X machine will dump lldb builds in the /build directory + # relative to the lldb dir, and it will build llvm+clang in + # the /llvm-build dir relative to the lldb dir. + # + # Note the first forward slash in "/build" indicates to the + # transfer agent that we only want to exclude the + # ~/work/lldb-tot/lldb/build dir, not just any file or + # directory named "build" somewhere underneath the lldb + # directory. Without the leading forward slash, any file + # or directory called build anywhere underneath the lldb dir + # will be excluded, which is definitely not what we want here. + # + # For the llvm dir, we do a source-side exclude for + # "/tools/clang". We manage the clang transfer as a separate + # entity, so we don't want the movement of llvm to also move + # clang. + # + # The llvm_dir exclusion of "/tools/lldb" is the first example + # of an exclude targeting a requirement of the destination + # side. Normally the transfer agent will delete anything on + # the destination that is not present on the source. It is + # trying to mirror, and ensure both sides have the same + # content. The source side of llvm on OS X does not have a + # "/tools/lldb", so at first this exclude looks non-sensical. + # But on the canonical destination layout, lldb lives in + # {some-dest-root}/llvm/tools/lldb. Without this exclude, + # the transfer agent would blow away the tools/lldb directory + # on the destination every time we transfer, and then have to + # copy the lldb dir all over again. For rsync+ssh, that + # totally would defeat the huge transfer efficiencies gained + # by using rsync in the first place. + # + # Note the overloading of both source and dest style excludes + # ultimately comes from the rsync-style exclude mechanism. + # If it wasn't for that, I would have separated source and + # dest excludes out better. + "source_excludes": { + "lldb_dir": ["/llvm", "/build", "/llvm-build"], + "llvm_dir": ["/tools/lldb", "/tools/clang"] + } + }, + + # Top of tree public, common settings for all destinations. + { + # The name for this config block. + "name": "common_tot", + + # Here is our first chaining back to a parent config block. + # Any settings in "common_tot" not specified here are going + # to be retrieved from the parent. + "parent": "base_tot_settings", + + # The transfer agent class to use. Right now, the only one + # available is this one here that uses rsync over ssh. + # If some other mechanism is needed to reach this destination, + # it can be specified here in full [[package.]module.]class form. + "transfer_class": "transfer.rsync.RsyncOverSsh", + + # Specifies the destination-root-relative directories. + # Here our desination is rooted at: + # {some-yet-to-be-specified-destination-root} + "base_dir". + # In other words, each destination will have some kind of root + # for all relative file placement. We'll see those defined + # later, as they can change per destination machine. + # The block below describes the settings relative to that + # destination root. + # + # As before, each dir-id used in this configuration is + # expected to have either: + # 1. an entry named {dir-id}_dir (e.g. llvm_dir), which + # specifies the destination directory for the given dir id + # relative to the dest_root+base_dir entries, OR + # 2. no entry, in which case the directory is assumed to + # be the same as {dir-id}. In the example below, the + # dest_root+base_dir-relative directory for the "llvm" dir-id is + # defaulted to being "llvm". That's exactly what + # we need in a canonical llvm/clang/lldb setup on + # Linux/FreeBSD. + # + # Note we see the untangling of the OS X lldb-centric + # directory structure to the canonical llvm, + # llvm/tools/clang, llvm/tools/lldb structure below. + # We are mapping lldb into a subdirectory of the llvm + # directory. + # + # The transfer logic figures out which directories to copy + # first by finding the shortest destination absolute path + # and doing them in that order. For our case, this ensures + # llvm is copied over before lldb or clang. + "dest": { + "base_dir": "work/mirror/git", + "lldb_dir": "llvm/tools/lldb", + "clang_dir": "llvm/tools/clang" + } + }, + + # Describe the lldb-linux destination. With this, + # we're done with the mapping for transfer setup + # for the lldb-linux box. This configuration can + # be used either by: + # 1. having a parent "default" blockthat points to this one, + # which then gets used by default, or + # 2. using the --configuration/-c CONFIG option to + # specify using this name on the sync-source.py command line. + { + "name": "lldb-linux" + "parent": "common_tot", + + # The ssh block is understood by the rsync+ssh transfer + # agent. Other agents would probably require different + # agent-specific details that they could read from + # other blocks. + "ssh": { + # This specifies the host name (or IP address) as would + # be used as the target for an ssh command. + "dest_host": "lldb-linux.example.com", + + # root_dir specifies the global root directory for + # this destination. All destinations on this target + # will be in a directory that is built from + # root_dir + base_dir + {dir_id}_dir. + "root_dir" : "/home/tfiala", + + # The ssh user is specified here. + "user": "tfiala", + + # The ssh port is specified here. + "port": 22 + } + }, + + # Describe the lldb-freebsd destination. + # Very similar to the lldb-linux one. + { + "name": "lldb-freebsd" + "parent": "common_tot", + "ssh": { + "dest_host": "lldb-freebsd.example.com", + # Specify a different destination-specific root dir here. + "root_dir" : "/mnt/ssd02/fialato", + "user": "fialato", + # The ssh port is specified here. + "port": 2022 + } + }, + + # If a block named "default" exists, and if no configuration + # is specified on the command line, then the default block + # will be used. Use this block to point to the most common + # transfer destination you would use. + { + "name": "default", + "parent": "lldb-linux" + } +] +} + +Using it + +Now that we have a .sync-sourcerc file set up, we can do a transfer. +The .sync-sourcerc file will be searched for as follows, using the +first one that is found: + +* First check the --rc-file RCFILE option. If this is specified + and doesn't exist, it will raise an error and quit. + +* Check if the current directory has a .sync-sourcerc file. If so, + use that. + +* Use the .sync-sourcerc file from the user's home directory. + +Run the command: +python /path/to/sync-source.rc -c {configuration-name} + +The -c {configuration-name} can be left off, in which case a +configuration with the name 'default' will be used. + +After that, the transfer will occur. With the rsync-over-ssh +transfer agent, one rsync per dir-id will be used. rsync output +is redirected to the console. + +FEEDBACK + +Feel free to pass feedback along to Todd Fiala (todd.fiala@gmail.com). Index: utils/sync-source/lib/transfer/protocol.py =================================================================== --- /dev/null +++ utils/sync-source/lib/transfer/protocol.py @@ -0,0 +1,7 @@ +class Protocol(object): + def __init__(self, options, config): + self.options = options + self.config = config + + def transfer(transfer_specs, dry_run): + raise "transfer must be overridden by transfer implementation" Index: utils/sync-source/lib/transfer/rsync.py =================================================================== --- /dev/null +++ utils/sync-source/lib/transfer/rsync.py @@ -0,0 +1,60 @@ +import os.path +import pprint +import subprocess +import sys + +import transfer.protocol + + +class RsyncOverSsh(transfer.protocol.Protocol): + def __init__(self, options, config): + super(RsyncOverSsh, self).__init__(options, config) + self.ssh_config = config.get_value("ssh") + + def build_rsync_command(self, transfer_spec, dry_run): + dest_path = os.path.join( + self.ssh_config["root_dir"], + transfer_spec.dest_path) + flags = "-avz" + if dry_run: + flags += "n" + cmd = [ + "rsync", + flags, + "-e", + "ssh -p {}".format(self.ssh_config["port"]), + "--rsync-path", + # The following command needs to know the right way to do + # this on the dest platform - ensures the target dir exists. + "mkdir -p {} && rsync".format(dest_path) + ] + + # Add source dir exclusions + if transfer_spec.exclude_paths: + for exclude_path in transfer_spec.exclude_paths: + cmd.append("--exclude") + cmd.append(exclude_path) + + cmd.extend([ + "--delete", + transfer_spec.source_path + "/", + "{}@{}:{}".format( + self.ssh_config["user"], + self.ssh_config["dest_host"], + dest_path)]) + return cmd + + def transfer(self, transfer_specs, dry_run): + if self.options.verbose: + printer = pprint.PrettyPrinter() + for spec in transfer_specs: + printer.pprint(spec) + + for spec in transfer_specs: + cmd = self.build_rsync_command(spec, dry_run) + if self.options.verbose: + print "executing the following command:\n{}".format(cmd) + result = subprocess.call( + cmd, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr) + if result != 0: + return result Index: utils/sync-source/lib/transfer/transfer_spec.py =================================================================== --- /dev/null +++ utils/sync-source/lib/transfer/transfer_spec.py @@ -0,0 +1,11 @@ +class TransferSpec(object): + def __init__(self, source_path, exclude_paths, dest_path): + self.source_path = source_path + self.exclude_paths = exclude_paths + self.dest_path = dest_path + + def __repr__(self): + fmt = ( + "TransferSpec(source_path='{}', exclude_paths='{}', " + "dest_path='{}')") + return fmt.format(self.source_path, self.exclude_paths, self.dest_path) Index: utils/sync-source/pylintrc =================================================================== --- /dev/null +++ utils/sync-source/pylintrc @@ -0,0 +1,2 @@ +[Master] +init-hook='import os; import sys; sys.path.append(os.path.join(os.getcwd(), "lib")); print("hello from {}".format(os.getcwd()))' Index: utils/sync-source/sync-source.py =================================================================== --- /dev/null +++ utils/sync-source/sync-source.py @@ -0,0 +1,261 @@ +#!/usr/bin/env python +""" + The LLVM Compiler Infrastructure + +This file is distributed under the University of Illinois Open Source +License. See LICENSE.TXT for details. + +Sync lldb and related source from a local machine to a remote machine. + +This facilitates working on the lldb sourcecode on multiple machines +and multiple OS types, verifying changes across all. +""" + +import argparse +import cStringIO +import importlib +import json +import os.path +import re +import sys + +# Add the local lib directory to the python path. +LOCAL_LIB_PATH = os.path.join( + os.path.dirname(os.path.realpath(__file__)), + "lib") +sys.path.append(LOCAL_LIB_PATH) + +import transfer.transfer_spec + + +DOTRC_BASE_FILENAME = ".sync-sourcerc" + + +class Configuration(object): + """Provides chaining configuration lookup.""" + def __init__(self, rcdata_configs): + self.__rcdata_configs = rcdata_configs + + def get_value(self, key): + """ + Return the first value in the parent chain that has the key. + + The traversal starts from the most derived configuration (i.e. + child) and works all the way up the parent chain. + + @return the value of the first key in the parent chain that + contains a value for the given key. + """ + for config in self.__rcdata_configs: + if key in config: + return config[key] + return None + + def __getitem__(self, key): + value = self.get_value(key) + if value: + return value + else: + raise KeyError(key) + + +def parse_args(): + """@return options parsed from the command line.""" + parser = argparse.ArgumentParser() + parser.add_argument( + "--config-name", "-c", action="store", default="default", + help="specify configuration name to use") + parser.add_argument( + "--default-excludes", action="store", default="*.git,*.svn,*.pyc", + help=("comma-separated list of default file patterns to exclude " + "from each source directory and to protect from deletion " + "on each destination directory; if starting with forward " + "slash, it only matches at the top of the base directory")) + parser.add_argument( + "--dry-run", "-n", action="store_true", + help="do a dry run of the transfer operation, don't really transfer") + parser.add_argument( + "--rc-file", "-r", action="store", + help="specify the sync-source rc file to use for configurations") + parser.add_argument( + "--verbose", "-v", action="store_true", help="turn on verbose output") + return parser.parse_args() + + +def read_rcfile(filename): + """Returns the json-parsed contents of the input file.""" + + # First parse file contents, removing all comments but + # preserving the line count. + regex = re.compile(r"#.*$") + + comment_stripped_file = cStringIO.StringIO() + with open(filename, "r") as json_file: + for line in json_file: + comment_stripped_file.write(regex.sub("", line)) + return json.load(cStringIO.StringIO(comment_stripped_file.getvalue())) + + +def find_appropriate_rcfile(options): + # Use an options-specified rcfile if specified. + if options.rc_file and len(options.rc_file) > 0: + if not os.path.isfile(options.rc_file): + # If it doesn't exist, error out here. + raise "rcfile '{}' specified but doesn't exist".format( + options.rc_file) + return options.rc_file + + # Check if current directory .sync-sourcerc exists. If so, use it. + local_rc_filename = os.path.abspath(DOTRC_BASE_FILENAME) + if os.path.isfile(local_rc_filename): + return local_rc_filename + + # Check if home directory .sync-sourcerc exists. If so, use it. + homedir_rc_filename = os.path.abspath( + os.path.join(os.path.expanduser("~"), DOTRC_BASE_FILENAME)) + if os.path.isfile(homedir_rc_filename): + return homedir_rc_filename + + # Nothing matched. We don't have an rc filename candidate. + return None + + +def get_configuration(options, rcdata, config_name): + rcdata_configs = [] + next_config_name = config_name + while next_config_name: + # Find the next rcdata configuration for the given name. + rcdata_config = next( + config for config in rcdata["configurations"] + if config["name"] == next_config_name) + + # See if we found it. + if rcdata_config: + # This is our next configuration to use in the chain. + rcdata_configs.append(rcdata_config) + + # If we have a parent, check that next. + if "parent" in rcdata_config: + next_config_name = rcdata_config["parent"] + else: + next_config_name = None + else: + raise "failed to find specified parent config '{}'".format( + next_config_name) + return Configuration(rcdata_configs) + + +def create_transfer_agent(options, configuration): + transfer_class_spec = configuration.get_value("transfer_class") + if options.verbose: + print "specified transfer class: '{}'".format(transfer_class_spec) + + # Load the module (possibly package-qualified). + components = transfer_class_spec.split(".") + module = importlib.import_module(".".join(components[:-1])) + + # Create the class name we need to load. + clazz = getattr(module, components[-1]) + return clazz(options, configuration) + + +def sync_configured_sources(options, configuration, default_excludes): + # Look up the transfer method. + transfer_agent = create_transfer_agent(options, configuration) + + # For each configured dir_names source, do the following transfer: + # 1. Start with base_dir + {source-dir-name}_dir + # 2. Copy all files recursively, but exclude + # all dirs specified by source_excludes: + # skip all base_dir + {source-dir-name}_dir + + # {source-dir-name}_dir excludes. + source_dirs = configuration.get_value("source") + source_excludes = configuration.get_value("source_excludes") + dest_dirs = configuration.get_value("dest") + + source_base_dir = source_dirs["base_dir"] + dest_base_dir = dest_dirs["base_dir"] + dir_ids = configuration.get_value("dir_names") + transfer_specs = [] + + for dir_id in dir_ids: + dir_key = "{}_dir".format(dir_id) + + # Build the source dir (absolute) that we're copying from. + # Defaults the base-relative source dir to the source id (e.g. lldb) + rel_source_dir = source_dirs.get(dir_key, dir_id) + transfer_source_dir = os.path.expanduser( + os.path.join(source_base_dir, rel_source_dir)) + + # Exclude dirs do two things: + # 1) stop items from being copied on the source side, and + # 2) protect things from being deleted on the dest side. + # + # In both cases, they are specified relative to the base + # directory on either the source or dest side. + # + # Specifying a leading '/' in the directory will limit it to + # be rooted in the base directory. i.e. "/.git" will only + # match {base-dir}/.git, not {base-dir}/subdir/.git, but + # ".svn" will match {base-dir}/.svn and + # {base-dir}/subdir/.svn. + # + # If excludes are specified for this dir_id, then pass along + # the excludes. These are relative to the dir_id directory + # source, and get passed along that way as well. + transfer_source_excludes = list(default_excludes) + if source_excludes and dir_key in source_excludes: + transfer_source_excludes.extend(source_excludes[dir_key]) + + # Build the destination-base-relative dest dir into which + # we'll be syncing. Relative directory defaults to the + # dir id + rel_dest_dir = dest_dirs.get(dir_key, dir_id) + transfer_dest_dir = os.path.join(dest_base_dir, rel_dest_dir) + + # Add the exploded paths to the list that we'll ask the + # transfer agent to transfer for us. + transfer_specs.append( + transfer.transfer_spec.TransferSpec( + transfer_source_dir, + transfer_source_excludes, + transfer_dest_dir)) + + # Do the transfer. + if len(transfer_specs) > 0: + transfer_agent.transfer(transfer_specs, options.dry_run) + else: + raise "nothing to transfer, bad configuration?" + + +def main(): + """Drives the main program.""" + options = parse_args() + + if options.default_excludes and len(options.default_excludes) > 0: + default_excludes = options.default_excludes.split(",") + else: + default_excludes = [] + + # Locate the rc filename to load, then load it. + rc_filename = find_appropriate_rcfile(options) + if rc_filename: + if options.verbose: + print "reading rc data from file '{}'".format(rc_filename) + rcdata = read_rcfile(rc_filename) + else: + sys.stderr.write("no rcfile specified, cannot guess configuration") + exit(1) + + # Find configuration. + configuration = get_configuration(options, rcdata, options.config_name) + if not configuration: + sys.stderr.write("failed to find configuration for {}".format( + options.config_data)) + exit(2) + + # Kick off the transfer. + sync_configured_sources(options, configuration, default_excludes) + +if __name__ == "__main__": + main()