Index: CMakeLists.txt =================================================================== --- CMakeLists.txt +++ CMakeLists.txt @@ -31,6 +31,7 @@ add_subdirectory(llvm-readobj) add_subdirectory(llvm-rtdyld) add_subdirectory(llvm-dwarfdump) +add_subdirectory(llvm-strings) if( LLVM_USE_INTEL_JITEVENTS ) add_subdirectory(llvm-jitlistener) endif( LLVM_USE_INTEL_JITEVENTS ) Index: Makefile =================================================================== --- Makefile +++ Makefile @@ -34,7 +34,7 @@ bugpoint llvm-bcanalyzer \ llvm-diff macho-dump llvm-objdump llvm-readobj \ llvm-rtdyld llvm-dwarfdump llvm-cov \ - llvm-size llvm-stress llvm-mcmarkup \ + llvm-size llvm-stress llvm-strings llvm-mcmarkup \ llvm-symbolizer # If Intel JIT Events support is configured, build an extra tool to test it. Index: llvm-strings/CMakeLists.txt =================================================================== --- llvm-strings/CMakeLists.txt +++ llvm-strings/CMakeLists.txt @@ -0,0 +1,6 @@ +set(LLVM_LINK_COMPONENTS asmparser instrumentation scalaropts ipo + linker bitreader bitwriter vectorize) + +add_llvm_tool(llvm-strings + llvm-strings.cpp + ) Index: llvm-strings/LLVMBuild.txt =================================================================== --- llvm-strings/LLVMBuild.txt +++ llvm-strings/LLVMBuild.txt @@ -0,0 +1,22 @@ +;===- ./tools/binutils/LLVMBuild.txt ---------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Tool +name = strings +parent = Tools +required_libraries = Archive Index: llvm-strings/Makefile =================================================================== --- llvm-strings/Makefile +++ llvm-strings/Makefile @@ -0,0 +1,17 @@ +##===- tools/llvm-ar/Makefile ------------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL := ../.. +TOOLNAME := llvm-strings +LINK_COMPONENTS := archive Object + +# This tool has no plugins, optimize startup time. +TOOL_NO_EXPORTS := 1 + +include $(LEVEL)/Makefile.common Index: llvm-strings/llvm-strings.cpp =================================================================== --- llvm-strings/llvm-strings.cpp +++ llvm-strings/llvm-strings.cpp @@ -0,0 +1,275 @@ +//===-- llvm-strings.cpp - find the printable strings in a file ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This program is a utility that works like traditional Unix "strings", that +// is, it prints out printable strings in a binary, object, or archive file. +// +// "llvm-strings" supports many of the features of GNU "strings", but not all. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/StringExtras.h" + +#include "llvm/Object/Archive.h" +#include "llvm/Object/ObjectFile.h" + +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/PrettyStackTrace.h" +#include "llvm/Support/Program.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Signals.h" +#include "llvm/Support/system_error.h" + +const char endl = '\n'; +using namespace llvm; + +bool isPrintable(char c) { + return isalnum(c) || ispunct(c) || + (isspace(c) && (!iscntrl(c) || c == '\t')) || + (isascii(c) && isprint(c)); + // Easy to replace this with a table at some point +} + +/* + A string is just a sequence of printable characters + followed by a non-printable character. + For each string we find, call the callback. + If the callback fails, quit immediately. +*/ +template +error_code findStringsInBlob(StringRef theData, std::size_t initialOffset, + std::size_t sizeThreshold, bool verbose, + PrintFunction pf) { + error_code RetVal; + const char *p = theData.begin(); + const char *end = p + theData.size(); + const char *curr = NULL; + + for (; p != end; ++p) { + if (isPrintable(*p)) { + if (curr == NULL) + curr = p; // start new string + } else if (curr != NULL) { // not printable, and we have a string + const std::size_t sz = p - curr; + if (sz >= sizeThreshold) + if ((RetVal = pf(initialOffset + (curr - theData.begin()), + StringRef(curr, sz)))) + break; + curr = NULL; // start a new string + } + } + + return RetVal; +} + +template error_code findStringsInObjectFile( + const object::ObjectFile *obj, std::size_t initialOffset, + std::size_t sizeThreshold, bool dumpAllSections, bool verbose, + PrintFunction pf) { + + if (verbose) + outs() << "Object type: " << obj->getFileFormatName() << endl; + + error_code RetVal; + for (object::section_iterator Iter = obj->begin_sections(), + End = obj->end_sections(); + !RetVal && Iter != End; Iter.increment(RetVal)) { + bool isText; + Iter->isText(isText); + if (dumpAllSections || !isText) { // we're dumping it + if (verbose) { + StringRef section_name; + Iter->getName(section_name); + outs() << "Object section: '" << section_name << "'"; + if (isText) + outs() << " (text)"; + outs() << endl; + } + + uint64_t start_of_section; + StringRef contents; + if (Iter->getAddress(start_of_section)) + errs() << "## Error getting start address of section "; + if (Iter->getContents(contents)) + errs() << "## Error contents of section "; + std::size_t offset = initialOffset + start_of_section; + RetVal = findStringsInBlob(contents, offset, sizeThreshold, verbose, pf); + } + } + + return RetVal; +} + +template +error_code findStringsInArchive(const object::Archive *a, StringRef onlyFile, + std::size_t initialOffset, + std::size_t sizeThreshold, bool dumpAllSections, + bool verbose, PrintFunction pf) { + + error_code RetVal; + bool FoundIt = onlyFile.empty(); + for (object::Archive::child_iterator Iter = a->begin_children(), + End = a->end_children(); + !RetVal && Iter != End; ++Iter) { + OwningPtr child; + if (!Iter->getAsBinary(child)) { + if (object::ObjectFile *o = dyn_cast(child.get())) { + if (verbose) { + outs() << "Archive section: '" << o->getFileName() << "'" << endl; + } + + if (onlyFile.empty() || onlyFile == o->getFileName()) { + std::size_t childOffset = initialOffset; /* + something */ + RetVal = findStringsInObjectFile(o, childOffset, sizeThreshold, + dumpAllSections, verbose, pf); + FoundIt = true; + } + } + } + } + + if (!FoundIt) + errs() << "archive: " << a->getFileName() + << " does not contain a member named: " << onlyFile << endl; + return RetVal; +} + +enum OffsetDisplayKind { + ODNone, + ODDecimal, + ODOctal, + ODHex +}; +static const char *OffsetDisplayNames[] = { "None", "Decimal", "Octal", "Hex" }; + +cl::opt OptionOffsetDisplay( + "t", cl::desc("offset format: d|o|x -> decimal|octal|hex"), + cl::values(clEnumValN(ODDecimal, "d", "Display string offsets in decimal"), + clEnumValN(ODOctal, "o", "Display string offsets in octal"), + clEnumValN(ODHex, "x", "Display string offsets in hexadecimal"), + clEnumValEnd), + cl::init(ODNone)); + +cl::list InputFileNames(cl::Positional, cl::desc("")); +cl::opt OptionMinStringLength("n", cl::desc("Minimum string length"), + cl::init(4)); +cl::opt OptionVerbose("v", cl::desc("print progress information"), + cl::init(false)); +cl::opt OptionAllObjectSections( + "a", cl::desc("Look in all sections of object files"), cl::init(false)); +cl::opt OptionTreatAllFilesAsBinary( + "b", cl::desc("Look in all bytes of all files"), cl::init(false)); + +error_code printFn(std::size_t offset, StringRef str) { + // print the offset, if necessary + switch (OptionOffsetDisplay) { + case ODDecimal: + outs() << format("%ld\t", offset); + break; + case ODOctal: + outs() << format("%lo\t", offset); + break; + case ODHex: + outs() << format("%lx\t", offset); + break; + default: + break; + } + + // print the data + llvm::outs() << str << endl; + return error_code::success(); +} + +error_code processOne(const std::string &fileName) { + error_code RetVal; + OwningPtr aFile; + std::string main_fn = fileName; + std::string sub_fn; + + // This is what we display to the user + if (fileName == "-") + main_fn = ""; + else if (*fileName.rbegin() == ')') { // Looking for "xxx(yyy)" + std::size_t open_paren = fileName.find('('); + if (open_paren != std::string::npos) { + main_fn = fileName.substr(0, open_paren); + sub_fn = fileName.substr(open_paren + 1, + fileName.size() - open_paren - 2); + } + } + + if ((RetVal = MemoryBuffer::getFileOrSTDIN(main_fn, aFile))) + errs() << "## Can't open file '" << main_fn << "'!" << endl; + else { + sys::fs::file_magic file_type = sys::fs::identify_magic(aFile->getBuffer()); + if (!file_type.is_object() || OptionTreatAllFilesAsBinary) { + if (OptionVerbose) + outs() << "Processing '" << main_fn << "' as a binary file" << endl; + RetVal = findStringsInBlob(aFile->getBuffer(), 0, OptionMinStringLength, + OptionVerbose, printFn); + } else if (file_type == sys::fs::file_magic::archive) { + if (OptionVerbose) + outs() << "Processing '" << main_fn << "' as an archive file" << endl; + + OwningPtr archive; + if (!(RetVal = object::createBinary(aFile.take(), archive))) + if (object::Archive *a = dyn_cast(archive.get())) + RetVal = findStringsInArchive(a, sub_fn, 0, OptionMinStringLength, + OptionAllObjectSections, OptionVerbose, + printFn); + } else { // An object file of some kind + if (OptionVerbose) + outs() << "Processing '" << main_fn << "' as an object file" << endl; + + OwningPtr obj; + if (!(RetVal = object::createBinary(aFile.take(), obj))) + if (object::ObjectFile *o = dyn_cast(obj.get())) + RetVal = findStringsInObjectFile(o, 0, OptionMinStringLength, + OptionAllObjectSections, + OptionVerbose, printFn); + } + } + + return RetVal; +} + +int main(int argc, const char *argv[]) { + sys::PrintStackTraceOnErrorSignal(); + PrettyStackTraceProgram X(argc, argv); + cl::ParseCommandLineOptions(argc, argv); + + // Display the current settings, if asked + if (OptionVerbose) { + outs() << "Minimum string length: " << OptionMinStringLength << endl; + outs() << "Offset display: " + << OffsetDisplayNames[int (OptionOffsetDisplay)] << endl; + if (OptionAllObjectSections) + outs() << "Look in all sections of object files" << endl; + else + outs() << "Do not search in TEXT sections of object files" << endl; + } + + error_code Err; + if (InputFileNames.empty()) { + if (!(Err = sys::Program::ChangeStdinToBinary())) + Err = processOne("-"); // read from stdin + } else + for (std::vector::iterator Iter = InputFileNames.begin(), + End = InputFileNames.end(); + !Err && Iter != End; ++Iter) { + Err = processOne(*Iter); + } + + return Err != error_code::success(); +}