Index: llvm/trunk/include/llvm/Support/SymbolRemappingReader.h =================================================================== --- llvm/trunk/include/llvm/Support/SymbolRemappingReader.h +++ llvm/trunk/include/llvm/Support/SymbolRemappingReader.h @@ -0,0 +1,133 @@ +//===- SymbolRemappingReader.h - Read symbol remapping file -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains definitions needed for reading and applying symbol +// remapping files. +// +// Support is provided only for the Itanium C++ name mangling scheme for now. +// +// NOTE: If you are making changes to this file format, please remember +// to document them in the Clang documentation at +// tools/clang/docs/UsersManual.rst. +// +// File format +// ----------- +// +// The symbol remappings are written as an ASCII text file. Blank lines and +// lines starting with a # are ignored. All other lines specify a kind of +// mangled name fragment, along with two fragments of that kind that should +// be treated as equivalent, separated by spaces. +// +// See http://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling for a +// description of the Itanium name mangling scheme. +// +// The accepted fragment kinds are: +// +// * name A , such as 6foobar or St3__1 +// * type A , such as Ss or N4llvm9StringRefE +// * encoding An (a complete mangling without the leading _Z) +// +// For example: +// +// # Ignore int / long differences to treat symbols from 32-bit and 64-bit +// # builds with differing size_t / ptrdiff_t / intptr_t as equivalent. +// type i l +// type j m +// +// # Ignore differences between libc++ and libstdc++, and between libstdc++'s +// # C++98 and C++11 ABIs. +// name 3std St3__1 +// name 3std St7__cxx11 +// +// # Remap a function overload to a specialization of a template (including +// # any local symbols declared within it). +// encoding N2NS1fEi N2NS1fIiEEvT_ +// +// # Substitutions must be remapped separately from namespace 'std' for now. +// name Sa NSt3__19allocatorE +// name Sb NSt3__112basic_stringE +// type Ss NSt3__112basic_stringIcSt11char_traitsIcESaE +// # ... +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_SYMBOLREMAPPINGREADER_H +#define LLVM_SUPPORT_SYMBOLREMAPPINGREADER_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ItaniumManglingCanonicalizer.h" +#include "llvm/Support/MemoryBuffer.h" + +namespace llvm { + +class SymbolRemappingParseError : public ErrorInfo { +public: + SymbolRemappingParseError(StringRef File, int64_t Line, Twine Message) + : File(File), Line(Line), Message(Message.str()) {} + + void log(llvm::raw_ostream &OS) const override { + OS << File << ':' << Line << ": " << Message; + } + std::error_code convertToErrorCode() const override { + return llvm::inconvertibleErrorCode(); + } + + StringRef getFileName() const { return File; } + int64_t getLineNum() const { return Line; } + StringRef getMessage() const { return Message; } + + static char ID; + +private: + std::string File; + int64_t Line; + std::string Message; +}; + +/// Reader for symbol remapping files. +/// +/// Remaps the symbol names in profile data to match those in the program +/// according to a set of rules specified in a given file. +class SymbolRemappingReader { +public: + /// Read remappings from the given buffer, which must live as long as + /// the remapper. + Error read(MemoryBuffer &B); + + /// A Key represents an equivalence class of symbol names. + using Key = uintptr_t; + + /// Construct a key for the given symbol, or return an existing one if an + /// equivalent name has already been inserted. The symbol name must live + /// as long as the remapper. + /// + /// The result will be Key() if the name cannot be remapped (typically + /// because it is not a valid mangled name). + Key insert(StringRef FunctionName) { + return Canonicalizer.canonicalize(FunctionName); + } + + /// Map the given symbol name into the key for the corresponding equivalence + /// class. + /// + /// The result will typically be Key() if no equivalent symbol has been + /// inserted, but this is not guaranteed: a Key different from all keys ever + /// returned by \c insert may be returned instead. + Key lookup(StringRef FunctionName) { + return Canonicalizer.lookup(FunctionName); + } + +private: + ItaniumManglingCanonicalizer Canonicalizer; +}; + +} // end namespace llvm + +#endif // LLVM_SUPPORT_SYMBOLREMAPPINGREADER_H Index: llvm/trunk/lib/Support/CMakeLists.txt =================================================================== --- llvm/trunk/lib/Support/CMakeLists.txt +++ llvm/trunk/lib/Support/CMakeLists.txt @@ -122,6 +122,7 @@ StringPool.cpp StringSaver.cpp StringRef.cpp + SymbolRemappingReader.cpp SystemUtils.cpp TarWriter.cpp TargetParser.cpp Index: llvm/trunk/lib/Support/SymbolRemappingReader.cpp =================================================================== --- llvm/trunk/lib/Support/SymbolRemappingReader.cpp +++ llvm/trunk/lib/Support/SymbolRemappingReader.cpp @@ -0,0 +1,81 @@ +//===- SymbolRemappingReader.cpp - Read symbol remapping file -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains definitions needed for reading and applying symbol +// remapping files. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/SymbolRemappingReader.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/LineIterator.h" + +using namespace llvm; + +char SymbolRemappingParseError::ID; + +/// Load a set of name remappings from a text file. +/// +/// See the documentation at the top of the file for an explanation of +/// the expected format. +Error SymbolRemappingReader::read(MemoryBuffer &B) { + line_iterator LineIt(B, /*SkipBlanks=*/true, '#'); + + auto ReportError = [&](Twine Msg) { + return llvm::make_error( + B.getBufferIdentifier(), LineIt.line_number(), Msg); + }; + + for (; !LineIt.is_at_eof(); ++LineIt) { + StringRef Line = *LineIt; + Line = Line.ltrim(' '); + // line_iterator only detects comments starting in column 1. + if (Line.startswith("#") || Line.empty()) + continue; + + SmallVector Parts; + Line.split(Parts, ' ', /*MaxSplits*/-1, /*KeepEmpty*/false); + + if (Parts.size() != 3) + return ReportError("Expected 'kind mangled_name mangled_name', " + "found '" + Line + "'"); + + using FK = ItaniumManglingCanonicalizer::FragmentKind; + Optional FragmentKind = StringSwitch>(Parts[0]) + .Case("name", FK::Name) + .Case("type", FK::Type) + .Case("encoding", FK::Encoding) + .Default(None); + if (!FragmentKind) + return ReportError("Invalid kind, expected 'name', 'type', or 'encoding'," + " found '" + Parts[0] + "'"); + + using EE = ItaniumManglingCanonicalizer::EquivalenceError; + switch (Canonicalizer.addEquivalence(*FragmentKind, Parts[1], Parts[2])) { + case EE::Success: + break; + + case EE::ManglingAlreadyUsed: + return ReportError("Manglings '" + Parts[1] + "' and '" + Parts[2] + "' " + "have both been used in prior remappings. Move this " + "remapping earlier in the file."); + + case EE::InvalidFirstMangling: + return ReportError("Could not demangle '" + Parts[1] + "' " + "as a <" + Parts[0] + ">; invalid mangling?"); + + case EE::InvalidSecondMangling: + return ReportError("Could not demangle '" + Parts[2] + "' " + "as a <" + Parts[0] + ">; invalid mangling?"); + } + } + + return Error::success(); +} Index: llvm/trunk/unittests/Support/CMakeLists.txt =================================================================== --- llvm/trunk/unittests/Support/CMakeLists.txt +++ llvm/trunk/unittests/Support/CMakeLists.txt @@ -54,6 +54,7 @@ SpecialCaseListTest.cpp StringPool.cpp SwapByteOrderTest.cpp + SymbolRemappingReaderTest.cpp TarWriterTest.cpp TargetParserTest.cpp TaskQueueTest.cpp Index: llvm/trunk/unittests/Support/SymbolRemappingReaderTest.cpp =================================================================== --- llvm/trunk/unittests/Support/SymbolRemappingReaderTest.cpp +++ llvm/trunk/unittests/Support/SymbolRemappingReaderTest.cpp @@ -0,0 +1,96 @@ +//===- unittests/Support/SymbolRemappingReaderTest.cpp --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/SymbolRemappingReader.h" +#include "llvm/Support/MemoryBuffer.h" +#include "gtest/gtest.h" + +using namespace llvm; + +namespace { +class SymbolRemappingReaderTest : public testing::Test { +public: + std::unique_ptr Buffer; + SymbolRemappingReader Reader; + + std::string readWithErrors(StringRef Text, StringRef BufferName) { + Buffer = MemoryBuffer::getMemBuffer(Text, BufferName); + Error E = Reader.read(*Buffer); + EXPECT_TRUE((bool)E); + return toString(std::move(E)); + } + + void read(StringRef Text, StringRef BufferName) { + Buffer = MemoryBuffer::getMemBuffer(Text, BufferName); + Error E = Reader.read(*Buffer); + EXPECT_FALSE((bool)E); + } +}; +} // unnamed namespace + +TEST_F(SymbolRemappingReaderTest, ParseErrors) { + EXPECT_EQ(readWithErrors("error", "foo.map"), + "foo.map:1: Expected 'kind mangled_name mangled_name', " + "found 'error'"); + + EXPECT_EQ(readWithErrors("error m1 m2", "foo.map"), + "foo.map:1: Invalid kind, expected 'name', 'type', or 'encoding', " + "found 'error'"); +} + +TEST_F(SymbolRemappingReaderTest, DemanglingErrors) { + EXPECT_EQ(readWithErrors("type i banana", "foo.map"), + "foo.map:1: Could not demangle 'banana' as a ; " + "invalid mangling?"); + EXPECT_EQ(readWithErrors("name i 1X", "foo.map"), + "foo.map:1: Could not demangle 'i' as a ; " + "invalid mangling?"); + EXPECT_EQ(readWithErrors("name 1X 1fv", "foo.map"), + "foo.map:1: Could not demangle '1fv' as a ; " + "invalid mangling?"); + EXPECT_EQ(readWithErrors("encoding 1fv 1f1gE", "foo.map"), + "foo.map:1: Could not demangle '1f1gE' as a ; " + "invalid mangling?"); +} + +TEST_F(SymbolRemappingReaderTest, BadMappingOrder) { + StringRef Map = R"( + # N::foo == M::bar + name N1N3fooE N1M3barE + + # N:: == M:: + name 1N 1M + )"; + EXPECT_EQ(readWithErrors(Map, "foo.map"), + "foo.map:6: Manglings '1N' and '1M' have both been used in prior " + "remappings. Move this remapping earlier in the file."); +} + +TEST_F(SymbolRemappingReaderTest, RemappingsAdded) { + StringRef Map = R"( + # A::foo == B::bar + name N1A3fooE N1B3barE + + # int == long + type i l + + # void f() = void g() + encoding 1fIiEvv 1gIiEvv + )"; + + read(Map, "foo.map"); + auto Key = Reader.insert("_ZN1B3bar3bazIiEEvv"); + EXPECT_NE(Key, SymbolRemappingReader::Key()); + EXPECT_EQ(Key, Reader.lookup("_ZN1A3foo3bazIlEEvv")); + EXPECT_NE(Key, Reader.lookup("_ZN1C3foo3bazIlEEvv")); + + Key = Reader.insert("_Z1fIiEvv"); + EXPECT_NE(Key, SymbolRemappingReader::Key()); + EXPECT_EQ(Key, Reader.lookup("_Z1gIlEvv")); +}