Index: cmake/modules/AddLLVM.cmake =================================================================== --- cmake/modules/AddLLVM.cmake +++ cmake/modules/AddLLVM.cmake @@ -328,11 +328,13 @@ # May specify header files for IDE generators. # SONAME # Should set SONAME link flags and create symlinks +# PLUGIN_TOOL +# The tool (i.e. cmake target) that this plugin will link against # ) function(llvm_add_library name) cmake_parse_arguments(ARG "MODULE;SHARED;STATIC;OBJECT;DISABLE_LLVM_LINK_LLVM_DYLIB;SONAME" - "OUTPUT_NAME" + "OUTPUT_NAME;PLUGIN_TOOL" "ADDITIONAL_HEADERS;DEPENDS;LINK_COMPONENTS;LINK_LIBS;OBJLIBS" ${ARGN}) list(APPEND LLVM_COMMON_DEPENDS ${ARG_DEPENDS}) @@ -350,11 +352,15 @@ if(ARG_SHARED OR ARG_STATIC) message(WARNING "MODULE with SHARED|STATIC doesn't make sense.") endif() - if(NOT LLVM_ENABLE_PLUGINS) + # Plugins that link against a tool are allowed even when plugins in general are not + if(NOT LLVM_ENABLE_PLUGINS AND NOT (ARG_PLUGIN_TOOL AND LLVM_EXPORT_SYMBOLS_FOR_PLUGINS)) message(STATUS "${name} ignored -- Loadable modules not supported on this platform.") return() endif() else() + if(ARG_PLUGIN_TOOL) + message(WARNING "PLUGIN_TOOL without MODULE doesn't make sense.") + endif() if(BUILD_SHARED_LIBS AND NOT ARG_STATIC) set(ARG_SHARED TRUE) endif() @@ -468,7 +474,10 @@ endif() endif() - if (DEFINED LLVM_LINK_COMPONENTS OR DEFINED ARG_LINK_COMPONENTS) + if(ARG_MODULE AND LLVM_EXPORT_SYMBOLS_FOR_PLUGINS AND ARG_PLUGIN_TOOL AND (WIN32 OR CYGWIN)) + # On DLL platforms symbols are imported from the tool by linking against it. + set(llvm_libs ${ARG_PLUGIN_TOOL}) + elseif (DEFINED LLVM_LINK_COMPONENTS OR DEFINED ARG_LINK_COMPONENTS) if (LLVM_LINK_LLVM_DYLIB AND NOT ARG_DISABLE_LLVM_LINK_LLVM_DYLIB) set(llvm_libs LLVM) else() @@ -673,7 +682,67 @@ endmacro(add_llvm_executable name) function(export_executable_symbols target) - if (NOT MSVC) # MSVC's linker doesn't support exporting all symbols. + if (LLVM_EXPORTED_SYMBOL_FILE) + # The symbol file should contain the symbols we want the executable to + # export + set_target_properties(${target} PROPERTIES ENABLE_EXPORTS 1) + elseif (LLVM_EXPORT_SYMBOLS_FOR_PLUGINS) + # Extract the symbols to export from the static libraries that the + # executable links against. + set_target_properties(${target} PROPERTIES ENABLE_EXPORTS 1) + set(exported_symbol_file ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/${target}.symbols) + # We need to consider not just the direct link dependencies, but also the + # transitive link dependencies. Do this by starting with the set of direct + # dependencies, then the dependencies of those dependencies, and so on. + get_target_property(new_libs ${target} LINK_LIBRARIES) + set(link_libs ${new_libs}) + while(NOT "${new_libs}" STREQUAL "") + foreach(lib ${new_libs}) + get_target_property(lib_type ${lib} TYPE) + if("${lib_type}" STREQUAL "STATIC_LIBRARY") + list(APPEND static_libs ${lib}) + else() + list(APPEND other_libs ${lib}) + endif() + get_target_property(transitive_libs ${lib} INTERFACE_LINK_LIBRARIES) + foreach(transitive_lib ${transitive_libs}) + list(FIND link_libs ${transitive_lib} idx) + if(TARGET ${transitive_lib} AND idx EQUAL -1) + list(APPEND newer_libs ${transitive_lib}) + list(APPEND link_libs ${transitive_lib}) + endif() + endforeach(transitive_lib) + endforeach(lib) + set(new_libs ${newer_libs}) + set(newer_libs "") + endwhile() + if (MSVC) + set(mangling microsoft) + else() + set(mangling itanium) + endif() + add_custom_command(OUTPUT ${exported_symbol_file} + COMMAND ${PYTHON_EXECUTABLE} ${LLVM_MAIN_SRC_DIR}/utils/extract_symbols.py --mangling=${mangling} ${static_libs} > ${exported_symbol_file} + WORKING_DIRECTORY ${LLVM_LIBRARY_OUTPUT_INTDIR} + DEPENDS ${LLVM_MAIN_SRC_DIR}/utils/extract_symbols.py ${static_libs} + VERBATIM + COMMENT "Generating export list for ${target}") + add_llvm_symbol_exports( ${target} ${exported_symbol_file} ) + # If something links against this executable then we want a + # transitive link against only the libraries whose symbols + # we aren't exporting. + set_target_properties(${target} PROPERTIES INTERFACE_LINK_LIBRARIES "${other_libs}") + # The default import library suffix that cmake uses for cygwin/mingw is + # ".dll.a", but for clang.exe that causes a collision with libclang.dll, + # where the import libraries of both get named libclang.dll.a. Use a suffix + # of ".exe.a" to avoid this. + if(CYGWIN OR MINGW) + set_target_properties(${target} PROPERTIES IMPORT_SUFFIX ".exe.a") + endif() + elseif(NOT (WIN32 OR CYGWIN)) + # On Windows auto-exporting everything doesn't work because of the limit on + # the size of the exported symbol table, but on other platforms we can do + # it without any trouble. set_target_properties(${target} PROPERTIES ENABLE_EXPORTS 1) if (APPLE) set_property(TARGET ${target} APPEND_STRING PROPERTY Index: cmake/modules/HandleLLVMOptions.cmake =================================================================== --- cmake/modules/HandleLLVMOptions.cmake +++ cmake/modules/HandleLLVMOptions.cmake @@ -595,15 +595,6 @@ endif() endif() -if(CYGWIN OR MINGW) - # Prune --out-implib from executables. It doesn't make sense even - # with --export-all-symbols. - string(REGEX REPLACE "-Wl,--out-implib,[^ ]+ " " " - CMAKE_C_LINK_EXECUTABLE "${CMAKE_C_LINK_EXECUTABLE}") - string(REGEX REPLACE "-Wl,--out-implib,[^ ]+ " " " - CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE}") -endif() - if(MSVC) # Remove flags here, for exceptions and RTTI. # Each target property or source property should be responsible to control @@ -641,6 +632,19 @@ CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS) endif() +# This option makes utils/extract_symbols.py be used to determine the list of +# symbols to export from LLVM tools. This is necessary when using MSVC if you +# want to allow plugins, though note that the plugin has to explicitly link +# against (exactly one) tool so we can't unilaterally turn on +# LLVM_ENABLE_PLUGINS when it's enabled. +option(LLVM_EXPORT_SYMBOLS_FOR_PLUGINS "Export symbols from LLVM tools so that plugins can import them" OFF) +if(BUILD_SHARED_LIBS AND LLVM_EXPORT_SYMBOLS_FOR_PLUGINS) + message(FATAL_ERROR "BUILD_SHARED_LIBS not compatible with LLVM_EXPORT_SYMBOLS_FOR_PLUGINS") +endif() +if(LLVM_LINK_LLVM_DYLIB AND LLVM_EXPORT_SYMBOLS_FOR_PLUGINS) + message(FATAL_ERROR "LLVM_LINK_LLVM_DYLIB not compatible with LLVM_EXPORT_SYMBOLS_FOR_PLUGINS") +endif() + # Plugin support # FIXME: Make this configurable. if(WIN32 OR CYGWIN) Index: cmake/modules/LLVMConfig.cmake.in =================================================================== --- cmake/modules/LLVMConfig.cmake.in +++ cmake/modules/LLVMConfig.cmake.in @@ -40,6 +40,7 @@ set(LLVM_ENABLE_PIC @LLVM_ENABLE_PIC@) set(LLVM_ENABLE_PLUGINS @LLVM_ENABLE_PLUGINS@) +set(LLVM_EXPORT_SYMBOLS_FOR_PLUGINS @LLVM_EXPORT_SYMBOLS_FOR_PLUGINS@) set(LLVM_PLUGIN_EXT @LLVM_PLUGIN_EXT@) set(LLVM_ON_UNIX @LLVM_ON_UNIX@) Index: lib/Transforms/Hello/CMakeLists.txt =================================================================== --- lib/Transforms/Hello/CMakeLists.txt +++ lib/Transforms/Hello/CMakeLists.txt @@ -15,4 +15,6 @@ DEPENDS intrinsics_gen + PLUGIN_TOOL + opt ) Index: utils/extract_symbols.py =================================================================== --- /dev/null +++ utils/extract_symbols.py @@ -0,0 +1,288 @@ +#!/usr/bin/env python + +"""A tool for extracting a list of symbols to export + +When exporting symbols from a dll or exe we either need to mark the symbols in +the source code as __declspec(dllexport) or supply a list of symbols to the +linker. This program automates the latter by inspecting the symbol tables of a +list of link inputs and deciding which of those symbols need to be exported. + +We can't just export all the defined symbols, as there's a limit of 65535 +exported symbols and in clang we go way over that, particularly in a debug +build. Therefore a large part of the work is pruning symbols either which can't +be imported, or which we think are things that have definitions in public header +files (i.e. template instantiations) and we would get defined in the thing +importing these symbols anyway. +""" + +import sys +import re +import os +import subprocess +import multiprocessing +import argparse + +def dumpbin_get_symbols(lib): + process = subprocess.Popen(['dumpbin','/symbols',lib], bufsize=1, + stdout=subprocess.PIPE, stdin=subprocess.PIPE) + process.stdin.close() + for line in process.stdout: + # Look for external symbols that are defined in some section + match = re.match("^.+SECT.+External\s+\|\s+(\S+).*$", line) + if match: + yield match.group(1) + process.wait() + +def nm_get_symbols(lib): + process = subprocess.Popen(['nm',lib], bufsize=1, + stdout=subprocess.PIPE, stdin=subprocess.PIPE) + process.stdin.close() + for line in process.stdout: + # Look for external symbols that are defined in some section + match = re.match("^\S+\s+[BDGRSTVW]\s+(\S+)$", line) + if match: + yield match.group(1) + process.wait() + +def readobj_get_symbols(lib): + process = subprocess.Popen(['llvm-readobj','-symbols',lib], bufsize=1, + stdout=subprocess.PIPE, stdin=subprocess.PIPE) + process.stdin.close() + for line in process.stdout: + # When looking through the output of llvm-readobj we expect to see Name, + # Section, then StorageClass, so record Name and Section when we see + # them and decide if this is a defined external symbol when we see + # StorageClass. + match = re.search('Name: (\S+)', line) + if match: + name = match.group(1) + match = re.search('Section: (\S+)', line) + if match: + section = match.group(1) + match = re.search('StorageClass: (\S+)', line) + if match: + storageclass = match.group(1) + if section != 'IMAGE_SYM_ABSOLUTE' and \ + section != 'IMAGE_SYM_UNDEFINED' and \ + storageclass == 'External': + yield name + process.wait() + +# MSVC mangles names to ?@. By examining the +# identifier/type mangling we can decide which symbols could possibly be +# required and which we can discard. +def should_keep_microsoft_symbol(symbol): + # Keep unmangled (i.e. extern "C") names + if not symbol.startswith('?'): + return symbol + # Function template instantiations start with ?$, discard them as it's + # assumed that the definition is public + elif symbol.startswith('??$'): + return None + # Deleting destructors start with ?_G or ?_E and can be discarded because + # link.exe gives you a warning telling you they can't be exported if you + # don't + elif symbol.startswith('??_G') or symbol.startswith('??_E'): + return None + # Constructors (?0) and destructors (?1) of templates (?$) are assumed to be + # defined in headers and not required to be kept + elif symbol.startswith('??0?$') or symbol.startswith('??1?$'): + return None + # An anonymous namespace is mangled as ?A(maybe hex number)@. Any symbol + # that mentions an anonymous namespace can be discarded, as the anonymous + # namespace doesn't exist outside of that translation unit. + elif re.search('\?A(0x\w+)?@', symbol): + return None + # Keep mangled llvm:: and clang:: function symbols. How we detect these is a + # bit of a mess and imprecise, but that avoids having to completely demangle + # the symbol name. The outermost namespace is at the end of the identifier + # mangling, and the identifier mangling is followed by the type mangling, so + # we look for (llvm|clang)@@ followed by something that looks like a + # function type mangling. To spot a function type we use (this is derived + # from clang/lib/AST/MicrosoftMangle.cpp): + # ::= + # + # + # ::= [A-Z] + # ::= [A-Z0-9_]* + # ::= [A-JQ] + # ::= .+ + # ::= X (void) + # ::= .+@ (list of types) + # ::= .*Z (list of types, varargs) + # ::= exceptions are not allowed + elif re.search('(llvm|clang)@@[A-Z][A-Z0-9_]*[A-JQ].+(X|.+@|.*Z)$', symbol): + return symbol + return None + +# Itanium manglings are of the form _Z. For +# some reason we end up with far fewer symbols compared to when using MSVC, so +# we need to do very little discarding of symbols. +def should_keep_itanium_symbol(symbol): + # Cygwin gcc appears to prepend an extra _, start by removing it + if symbol.startswith('_') and not symbol.startswith('_Z'): + symbol = symbol[1:] + # Keep unmangled names + if not symbol.startswith('_') and not symbol.startswith('.'): + return symbol + # Keep llvm:: and clang:: names + if re.match('_Z\D*(4llvm|5clang)', symbol): + return symbol + return None + +def extract_symbols(arg): + get_symbols, should_keep_symbol, lib = arg + symbols = dict() + for symbol in get_symbols(lib): + symbol = should_keep_symbol(symbol) + if symbol: + symbols[symbol] = 1 + symbols.setdefault(symbol,0) + return symbols + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description='Extract symbols to export from libraries') + parser.add_argument('--mangling', choices=["itanium","microsoft"], + required=True, help='expected symbol mangling scheme') + parser.add_argument('--tool', choices=['dumpbin','nm','llvm-readobj'], + help='tool to use to extract symbols') + parser.add_argument('libs', metavar="lib", type=str, nargs='+', + help='libraries to extract symbols from') + args = parser.parse_args() + + # Determine the function to use to get the list of symbols from the inputs. + tools = { 'dumpbin' : dumpbin_get_symbols, + 'nm' : nm_get_symbols, + 'llvm-readobj' : readobj_get_symbols } + if args.tool: + get_symbols = tools[args.tool] + else: + # Find a tool to use by trying each in turn until we find one that + # exists (subprocess.call will throw OSError when the program does not + # exist). + get_symbols = None + for exe in tools.keys(): + try: + # Close std streams as we don't want any output and we don't + # want the process to wait for something on stdin. + p = subprocess.Popen([exe], stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + stdin=subprocess.PIPE) + p.stdout.close() + p.stderr.close() + p.stdin.close() + p.wait() + get_symbols = tools[exe] + break + except OSError: + continue + if not get_symbols: + print >>sys.stderr, "Couldn't find a program to read symbols with" + exit(1) + + # How we determine which symbols to keep and which to discard depends on + # the mangling scheme + if args.mangling == 'microsoft': + should_keep_symbol = should_keep_microsoft_symbol + else: + should_keep_symbol = should_keep_itanium_symbol + + # Get the list of libraries to extract symbols from + libs = list() + for lib in args.libs: + # When invoked by cmake the arguments are the cmake target names of the + # libraries, so we need to add .lib/.a to the end and maybe lib to the + # start to get the filename. Also allow objects. + suffixes = ['.lib','.a','.obj','.o'] + if not any([lib.endswith(s) for s in suffixes]): + for s in suffixes: + if os.path.exists(lib+s): + lib = lib+s + break + if os.path.exists('lib'+lib+s): + lib = 'lib'+lib+s + break + if not any([lib.endswith(s) for s in suffixes]): + print >>sys.stderr, "Don't know what to do with argument "+lib + exit(1) + libs.append(lib) + + # Extract symbols from libraries in parallel. This is a huge time saver when + # doing a debug build, as there are hundreds of thousands of symbols in each + # library. + pool = multiprocessing.Pool() + try: + # Only one argument can be passed to the mapping function, and we can't + # use a lambda or local function definition as that doesn't work on + # windows, so create a list of tuples which duplicates the arguments + # that are the same in all calls. + vals = [(get_symbols, should_keep_symbol, x) for x in libs] + # Do an async map then wait for the result to make sure that + # KeyboardInterrupt gets caught correctly (see + # http://bugs.python.org/issue8296) + result = pool.map_async(extract_symbols, vals) + pool.close() + libs_symbols = result.get(3600) + except KeyboardInterrupt: + # On Ctrl-C terminate everything and exit + pool.terminate() + pool.join() + exit(1) + + # Merge everything into a single dict + symbols = dict() + for this_lib_symbols in libs_symbols: + for k,v in this_lib_symbols.items(): + symbols[k] = v + symbols.setdefault(k,0) + + # Count instances of template member functions, and map the symbol name to + # the function. We do this under the assumption that if a template member + # function is instantiated many times it's probably declared in a public + # header file. + template_function_count = dict() + template_function_mapping = dict() + template_function_count[""] = 0 + for k in symbols: + name = None + if args.mangling == 'microsoft': + # Template member functions start with ?@?$@ + match = re.search("^\?(\??\w+\@\?\$\w+)\@", k) + if match: + name = match.group(1) + else: + # Template arguments are bracketed by I/E, so look for mangled + # names followed by something bracketed by I/E. + match = re.match('_Z\D+(.+)', k) + if match: + # The name of the template is a sequence of , + # with the value of the number giving the length of the string. + # Skip past this to find (potentially) the template argument. + def skip_names(arg): + tmp = re.match('(\d+)(.+)', arg) + if tmp: + n = int(tmp.group(1)) + return skip_names(tmp.group(2)[n:]) + else: + return arg + remainder = skip_names(match.group(1)) + # If what's left is a template argument the template name is + # the thing before it. + if re.match('I\w+E', remainder): + name = match.group(1)[:-len(remainder)] + if name: + old_count = template_function_count.setdefault(name,0) + template_function_count[name] = old_count + 1 + template_function_mapping[k] = name + else: + template_function_mapping[k] = "" + + # Print symbols which both: + # * Appear in exactly one input, as symbols defined in multiple + # objects/libraries are assumed to have public definitions. + # * Aren't instances of template member functions which have been + # instantiated 100 times or more, which are assumed to have public + # definitions. (100 is an arbitrary guess here.) + for k,v in symbols.items(): + template_count = template_function_count[template_function_mapping[k]] + if v == 1 and template_count < 100: + print k