Index: utils/docker/build_docker_image.sh =================================================================== --- utils/docker/build_docker_image.sh +++ utils/docker/build_docker_image.sh @@ -38,6 +38,9 @@ Can be specified multiple times. -i|--install-target name of a cmake install target to build and include in the resulting archive. Can be specified multiple times. + -c|--checksums name of a file, containing checksums of llvm checkout. + Script will fail if checksums of the checkout do not + match. Required options: --source and --docker-repository, at least one --install-target. @@ -66,6 +69,7 @@ EOF } +CHECKSUMS_FILE="" SEEN_INSTALL_TARGET=0 while [[ $# -gt 0 ]]; do case "$1" in @@ -95,6 +99,11 @@ BUILDSCRIPT_ARGS="$BUILDSCRIPT_ARGS $1 $2" shift 2 ;; + -c|--checksums) + shift + CHECKSUMS_FILE="$1" + shift + ;; --) shift BUILDSCRIPT_ARGS="$BUILDSCRIPT_ARGS -- $*" @@ -141,6 +150,11 @@ cp -r "$SOURCE_DIR/$IMAGE_SOURCE" "$BUILD_DIR/$IMAGE_SOURCE" cp -r "$SOURCE_DIR/scripts" "$BUILD_DIR/scripts" +mkdir "$BUILD_DIR/checksums" +if [ "$CHECKSUMS_FILE" != "" ]; then + cp "$CHECKSUMS_FILE" "$BUILD_DIR/checksums/checksums.txt" +fi + if [ "$DOCKER_TAG" != "" ]; then DOCKER_TAG=":$DOCKER_TAG" fi Index: utils/docker/debian8/build/Dockerfile =================================================================== --- utils/docker/debian8/build/Dockerfile +++ utils/docker/debian8/build/Dockerfile @@ -19,7 +19,7 @@ # Install compiler, python and subversion. RUN apt-get update && \ apt-get install -y --no-install-recommends ca-certificates gnupg \ - build-essential python2.7 wget subversion ninja-build && \ + build-essential python wget subversion ninja-build && \ rm -rf /var/lib/apt/lists/* # Import public key required for verifying signature of cmake download. @@ -37,9 +37,11 @@ tar xzf cmake-3.7.2-Linux-x86_64.tar.gz -C /usr/local --strip-components=1 && \ cd / && rm -rf /tmp/cmake-install +ADD checksums /tmp/checksums +ADD scripts /tmp/scripts + # Arguments passed to build_install_clang.sh. ARG buildscript_args # Run the build. Results of the build will be available as /tmp/clang.tar.gz. -ADD scripts/build_install_llvm.sh /tmp -RUN /tmp/build_install_llvm.sh ${buildscript_args} +RUN /tmp/scripts/build_install_llvm.sh ${buildscript_args} Index: utils/docker/example/build/Dockerfile =================================================================== --- utils/docker/example/build/Dockerfile +++ utils/docker/example/build/Dockerfile @@ -18,9 +18,11 @@ # FIXME: Install llvm/clang build dependencies. Including compiler to # build stage1, cmake, subversion, ninja, etc. -# Arguments to pass to build_install_clang.sh. +ADD checksums /tmp/checksums +ADD scripts /tmp/scripts + +# Arguments passed to build_install_clang.sh. ARG buildscript_args # Run the build. Results of the build will be available as /tmp/clang.tar.gz. -ADD scripts/build_install_llvm.sh /tmp -RUN /tmp/build_install_llvm.sh ${buildscript_args} +RUN /tmp/scripts/build_install_llvm.sh ${buildscript_args} Index: utils/docker/nvidia-cuda/build/Dockerfile =================================================================== --- utils/docker/nvidia-cuda/build/Dockerfile +++ utils/docker/nvidia-cuda/build/Dockerfile @@ -17,10 +17,15 @@ # Install llvm build dependencies. RUN apt-get update && \ - apt-get install -y --no-install-recommends ca-certificates cmake python2.7 \ + apt-get install -y --no-install-recommends ca-certificates cmake python \ subversion ninja-build && \ rm -rf /var/lib/apt/lists/* +ADD checksums /tmp/checksums +ADD scripts /tmp/scripts + +# Arguments passed to build_install_clang.sh. +ARG buildscript_args + # Run the build. Results of the build will be available as /tmp/clang.tar.gz. -ADD scripts/build_install_llvm.sh /tmp -RUN /tmp/build_install_llvm.sh ${buildscript_args} +RUN /tmp/scripts/build_install_llvm.sh ${buildscript_args} Index: utils/docker/scripts/build_install_llvm.sh =================================================================== --- utils/docker/scripts/build_install_llvm.sh +++ utils/docker/scripts/build_install_llvm.sh @@ -181,6 +181,16 @@ "$CLANG_BUILD_DIR/src/clang/tools/extra" fi +CHECKSUMS_FILE="/tmp/checksums/checksums.txt" + +if [ -f "$CHECKSUMS_FILE" ]; then + echo "Validating checksums for LLVM checkout..." + python "$(dirname $0)/llvm_checksum/llvm_checksum.py" -c "$CHECKSUMS_FILE" \ + --partial --multi_dir "$CLANG_BUILD_DIR/src" +else + echo "Skipping checksumming checks..." +fi + mkdir "$CLANG_BUILD_DIR/build" pushd "$CLANG_BUILD_DIR/build" Index: utils/docker/scripts/llvm_checksum/llvm_checksum.py =================================================================== --- /dev/null +++ utils/docker/scripts/llvm_checksum/llvm_checksum.py @@ -0,0 +1,58 @@ +#!/usr/bin/python +""" A small program to compute checksums of LLVM checkout. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import logging +import sys +from argparse import ArgumentParser +from llvm_checksum_utils import * + + +def main(): + parser = ArgumentParser() + parser.add_argument( + "-v", "--verbose", action="store_true", help="enable debug logging") + parser.add_argument("-c", "--check", metavar="reference_file", + help="read checksums from reference_file and " + + "check they match checksums of llvm_path.") + parser.add_argument("--partial", action="store_true", + help="ignore projects from reference_file " + + "that are not checked out in llvm_path.") + parser.add_argument("--multi_dir", action="store_true", + help="indicates llvm_path contains llvm, checked out " + + "into multiple directories, as opposed to a " + + "typical single source tree checkout.") + parser.add_argument("llvm_path") + + args = parser.parse_args() + if args.check is not None: + with open(args.check, "r") as f: + reference_checksums = ReadLLVMChecksums(f) + else: + reference_checksums = None + + if args.verbose: + logging.basicConfig(level=logging.DEBUG) + + llvm_projects = CreateLLVMProjects(not args.multi_dir) + checksums = ComputeLLVMChecksums(args.llvm_path, llvm_projects) + + if reference_checksums is None: + WriteLLVMChecksums(checksums, sys.stdout) + sys.exit(0) + + if not ValidateChecksums(reference_checksums, checksums, args.partial): + sys.stdout.write("Checksums differ.\nNew checksums:\n") + WriteLLVMChecksums(checksums, sys.stdout) + sys.stdout.write("Reference checksums:\n") + WriteLLVMChecksums(reference_checksums, sys.stdout) + sys.exit(1) + else: + sys.stdout.write("Checksums match.") + + +if __name__ == "__main__": + main() Index: utils/docker/scripts/llvm_checksum/llvm_checksum_utils.py =================================================================== --- /dev/null +++ utils/docker/scripts/llvm_checksum/llvm_checksum_utils.py @@ -0,0 +1,304 @@ +"""Contains helper functions to compute checksums for LLVM checkouts. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import hashlib +import logging +import os +import os.path +import re +import sys + +SVN_DATES_REGEX = re.compile(r"\$(Date|LastChangedDate)[^\$]+\$") + + +class FileKind(object): + """ Kind of file in results of checksum_recursively. + """ + FILE = 0 + VALID_SYMLINK = 1 + BROKEN_SYMLINK = 2 + + +def checksum_recursively(path, + is_ignored, + content_hasher, + hash_algo=hashlib.sha256): + """ Computes checksums for all files and symlinks under path. + + Results also include files and symlinks in all recursive subdirectories. + + Args: + path: a directory for computing the checksum. + is_ignored: a function to indicate parts of directory tree should be ignored + during checksumming. + content_hasher: a function to compute checksums of the file and symlink + contents. + hash_algo: a function that creates a hasher object with a preferred hashing + algorithm. An 'update' method will be called on created object to + populate it with the contents to hash. + + Returns: + List of tuple of the form (kind, path, checksum). The `kind` field has type + FileKind. + """ + + def process_file(fullpath): + if os.path.islink(fullpath): + # Compute checksum of symlink's contents. + hasher = hash_algo() + symlink_exists = os.path.exists(fullpath) + if symlink_exists: + # Use contents of the symlink for valid symlinks. + content_hasher(fullpath, hasher) + else: + # Use target for broken symlinks. + target = os.readlink(fullpath) + hasher.update(target) + symlink_checksum = hasher.hexdigest() + logging.debug("Checksum %s for %s symlink %s", symlink_checksum, "valid" + if symlink_exists else "broken", fullpath) + return (FileKind.VALID_SYMLINK if symlink_exists else + FileKind.BROKEN_SYMLINK, fullpath, symlink_checksum) + else: + # Compute checksum of the file. + hasher = hash_algo() + content_hasher(fullpath, hasher) + file_checksum = hasher.hexdigest() + logging.debug("Checksum %s for file %s", file_checksum, fullpath) + # Remember the checksum + return (FileKind.FILE, fullpath, file_checksum) + + def raise_error(err): + raise err + + # Compute symlinks for all files inside dir_path. + def process_dir(dir_path): + checksums = list() + for root, dirs, files in os.walk(dir_path, onerror=raise_error): + # Don't recurse into ignored subdirectories. + dirs[:] = [d for d in dirs if not is_ignored(os.path.join(root, d))] + # Process files. + for f in files: + fullpath = os.path.join(root, f) + if is_ignored(fullpath): + continue + checksums.append(process_file(fullpath)) + return checksums + + results = process_dir(path) + # Sort results by path. + results.sort(key=lambda x: x[1]) + return results + + +def dir_checksum(path, is_ignored, content_hasher, hash_algo=hashlib.sha256): + """ Computes a checksum of the directory. + + Calls checksum_recursively and combines results into a single checksum. + + Args: + path: a directory for computing the checksum. + is_ignored: a function to check whether the path should be ignored when + calculating the hash code. + content_hasher: a function to compute checksums of the file and symlink + contents. + hash_algo: a function that creates a hasher object with the preferred + hashing algorithm. An 'update' method will be called on created object to + populate it with the contents to hash. + + Returns: + Result of calling hasher.hexdigest(), where hasher was created by calling + hash_algo(). + """ + + # Computes checksums for files and symlinks under path. + file_checksums = checksum_recursively(path, is_ignored, content_hasher, + hash_algo) + hasher = hash_algo() + for kind, file_path, checksum in file_checksums: + # Feed path of the file to hasher. + relpath = os.path.relpath(file_path, path) + hasher.update(relpath) + # Feed a kind of the file (symlink, file, broken symlink) to the hasher + if kind == FileKind.FILE: + hasher.update("@file") + elif kind == FileKind.VALID_SYMLINK: + hasher.update("@symlink") + else: + assert kind == FileKind.BROKEN_SYMLINK + hasher.update("@broken_symlink") + # Feed checksum to hasher. + hasher.update(checksum) + return hasher.hexdigest() + + +class LLVMProject(object): + """An LLVM project with a descriptive name and a relative checkout path. + """ + + def __init__(self, name, relpath): + self.name = name + self.relpath = relpath + + +def CreateLLVMProjects(single_tree_checkout): + """Returns a list of LLVMProject instances, describing relative paths of a typical LLVM checkout. + + Args: + single_tree_checkout: + When True, relative paths for each project points to a typical single + source tree checkout. + When False, relative paths for each projects points to a separate + directory. However, clang-tools-extra is an exception, its relative path + will always be 'clang/tools/extra'. + """ + # FIXME: cover all of llvm projects. + + # Projects that reside inside 'projects/' in a single source tree checkout. + ORDINARY_PROJECTS = [ + "compiler-rt", "dragonegg", "libcxx", "libcxxabi", "libunwind", + "parallel-libs", "test-suite" + ] + # Projects that reside inside 'tools/' in a single source tree checkout. + TOOLS_PROJECTS = ["clang", "lld", "lldb", "llgo"] + + if single_tree_checkout: + projects = [LLVMProject("llvm", "")] + projects += [ + LLVMProject(p, os.path.join("projects", p)) for p in ORDINARY_PROJECTS + ] + projects += [ + LLVMProject(p, os.path.join("tools", p)) for p in TOOLS_PROJECTS + ] + projects.append( + LLVMProject("clang-tools-extra", + os.path.join("tools", "clang", "tools", "extra"))) + else: + projects = [LLVMProject("llvm", "llvm")] + projects += [LLVMProject(p, p) for p in ORDINARY_PROJECTS] + projects += [LLVMProject(p, p) for p in TOOLS_PROJECTS] + projects.append( + LLVMProject("clang-tools-extra", os.path.join("clang", "tools", + "extra"))) + return projects + + +def ComputeLLVMChecksums(root_path, projects): + """Compute checksums for LLVM sources checked out using svn. + + Args: + root_path: a directory of llvm checkout. + projects: a list of LLVMProject instances, which describe checkout paths, + relative to root_path. + + Returns: + A dict mapping from project name to project checksum. + """ + project_dirs = set([os.path.join(root_path, p.relpath) for p in projects]) + + def is_ignored(path): + # Don't recurse into llvm subprojects. + if path in project_dirs: + return True + # Don't recurse into .svn and .git subdirectories. + dirname, basename = os.path.split(path) + if basename == ".svn" or basename == ".git": + return True + return False + + def replace_svn_substitutions(contents): + # Replace svn substitutions for $Date$ and $LastChangedDate$. + # Unfortunately, these are locale-specific for local machine. + return SVN_DATES_REGEX.sub("$\1$", contents) + + def hash_ignoring_subsitutions(file_path, hasher): + with open(file_path, "rb") as f: + contents = f.read() + new_contents = replace_svn_substitutions(contents) + if contents != new_contents: + logging.debug("Replaced svn keyword substitutions in %s", file_path) + logging.debug("\n\tBefore\n%s\n\tAfter\n%s", contents, new_contents) + hasher.update(new_contents) + + project_checksums = dict() + # Hash each project using dir_checksum. + for proj in projects: + fullpath = os.path.join(root_path, proj.relpath) + if os.path.exists(fullpath): + logging.info("Computing checksum for %s", proj.name) + checksum = dir_checksum( + fullpath, is_ignored, content_hasher=hash_ignoring_subsitutions) + project_checksums[proj.name] = checksum + else: + logging.info("Folder %s doesn't exist, skipping project %s", proj.relpath, + proj.name) + return project_checksums + + +def WriteLLVMChecksums(checksums, f): + """Writes checksums to a text file. + + Args: + checksums: a dict mapping from project name to project checksum (result of + ComputeLLVMChecksums). + f: a file object to write into. + """ + + for proj in sorted(checksums.keys()): + f.write("{} {}\n".format(checksums[proj], proj)) + + +def ReadLLVMChecksums(f): + """Reads checksums from a text file, produced by WriteLLVMChecksums. + + Returns: + A dict, mapping from project name to project checksum. + """ + checksums = {} + while True: + line = f.readline() + if line == "": + break + checksum, proj = line.split() + checksums[proj] = checksum + return checksums + + +def ValidateChecksums(reference_checksums, + new_checksums, + allow_missing_projects=False): + """Validates that reference_checksums and new_checksums match. + + Args: + reference_checksums: a dict of reference checksums, mapping from a project + name to a project checksum. + new_checksums: a dict of checksums to be checked, mapping from a project + name to a project checksum. + allow_missing_projects: + When True, reference_checksums may contain more projects than + new_checksums. Projects missing from new_checksums are ignored. + When False, new_checksums and reference_checksums must contain checksums + for the same set of projects. If there is a project in + reference_checksums, missing from new_checksums, ValidateChecksums + will return False. + + Returns: + True, if checksums match with regards to allow_missing_projects flag value. + False, otherwise. + """ + if not allow_missing_projects: + if len(new_checksums) != len(reference_checksums): + return False + + for proj, checksum in new_checksums.iteritems(): + # We never computed a checksum for this project. + if proj not in reference_checksums: + return False + # Checksum did not match. + if reference_checksums[proj] != checksum: + return False + + return True