Index: llvm/include/llvm/Support/Anonymization.h =================================================================== --- /dev/null +++ llvm/include/llvm/Support/Anonymization.h @@ -0,0 +1,156 @@ +//===-- Anonymization.h - String anonymization helper -----------*- C++ +//-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides helper classes to perform string anonymization, which is +// used to hide symbol names in the IR. Because this affects the debug +// information, the anonymizer supports generating a reversible mapping, when +// requested, so that for example dsymutil can reverse the transformation. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_ANONYMIZATION_H +#define LLVM_SUPPORT_ANONYMIZATION_H + +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + +/// Base class providing basic anonymization capability. +class Anonymizer { + using MapTy = StringMap; + +public: + Anonymizer() : ForwardMap(64, Alloc), IrreveribleForwardMap(32, Alloc) {} + + Anonymizer(const Anonymizer &) = delete; + Anonymizer &operator=(const Anonymizer &) = delete; + + enum { keepPrefix = true }; + + /// Anonymize a string. + /// + /// If \p Reverse is true, the transformation must be reversible. + /// If \p KeepPrefix is true, the 'l' or 'L' prefix are excluded from + /// anonymization. + StringRef anonymize(StringRef S, bool Reverse = false, + bool KeepPrefix = keepPrefix); + + /// Reverse lookup a string in the map. + Expected lookup(StringRef S) const { return lookupImpl(S); } + + /// Write out the reverse map in textural form. + /// + /// Derived classes may wish to use a different format. + void writeReverseMap(raw_ostream &OS) const { writeReverseMapImpl(OS); } + + /// Read back the reverse map from textural form. + /// + /// Derived classes may wish to use a different format. + Error readReverseMap(MemoryBuffer *Buffer) { + return readReverseMapImpl(Buffer); + } + + BumpPtrAllocator &getAllocator() { return Alloc; } + + /// As StringRef::copy, but will terminate with null. + char *copyString(StringRef S) { + auto Ret = allocateString(S.size()); + std::copy(S.begin(), S.end(), Ret); + return Ret; + } + + /// Look up symbol in the forward map. + Expected lookupForwardMap(StringRef Symbol) const; + +protected: + /// Derived classes will implement this method to return the newly anonymized + /// string. Derived classes are free to track internal state, but are not + /// required to (e.g. the base class tracks the reverse mapping, forward + /// mapping, and provides an allocator). + /// + /// The base class will only ever call this method once for unique input, + /// enforcing an idempotent set of calls under sequential composition. + virtual StringRef anonymizeImpl(StringRef S, bool Reverse, + bool KeepPrefix) = 0; + + virtual Expected lookupImpl(StringRef S) const = 0; + virtual void writeReverseMapImpl(raw_ostream &OS) const = 0; + virtual Error readReverseMapImpl(MemoryBuffer *Buffer) = 0; + + /// For sub classes, allocate some new memory for a string and ensure the + /// memory is terminated with a null. + char *allocateString(unsigned Length) { + assert(Length > 0 && "no length?"); + char *P = (char *)Alloc.Allocate(Length + 1, 1); + P[Length] = '\0'; + return P; + } + + virtual ~Anonymizer() = default; + +private: + BumpPtrAllocator Alloc; + MapTy ForwardMap; + MapTy IrreveribleForwardMap; + + virtual void anchor(); +}; + +/// Stateful incrementor anonymizer. +/// +/// Minimal stateful anonymizer, which track the number of anonymized symbols +/// and appends that number to the end of a prefix. Thus, to get the same +/// resulting anonymization, this must be fed the inputs in the same order. +class IncrementAnonymizer : public Anonymizer { + using ReverseMapTy = std::vector; + using Anonymizer::Anonymizer; + +public: + IncrementAnonymizer(StringRef Prefix = "__hidden#", StringRef Suffix = "_", + StringRef IRPrefix = "__ir_hidden#") + : Prefix(copyString(Prefix)), Suffix(copyString(Suffix)), + IRPrefix(IRPrefix) { + assert(!Prefix.equals(IRPrefix) && + "Prefix and IRPrefix cannot be the same"); + } + +protected: + virtual StringRef anonymizeImpl(StringRef S, bool Reverse, + bool KeepPrefix) override; + virtual Expected lookupImpl(StringRef S) const override; + virtual void writeReverseMapImpl(raw_ostream &OS) const override; + virtual Error readReverseMapImpl(MemoryBuffer *Buffer) override; + +private: + StringRef Prefix; + StringRef Suffix; + StringRef IRPrefix; + + unsigned Num = 0; + unsigned IrNum = 0; + + ReverseMapTy ReverseMap; + + /// Extract the number from a key. + Expected findIndex(StringRef Key) const; + + virtual void anchor() override; +}; + +} // namespace llvm + +#endif // LLVM_SUPPORT_ANONYMIZATION_H Index: llvm/lib/Support/Anonymization.cpp =================================================================== --- /dev/null +++ llvm/lib/Support/Anonymization.cpp @@ -0,0 +1,151 @@ +//===-- Anonymization.cpp - String anonymization helper -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides helper classes to perform string anonymization +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Anonymization.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/FormatVariadic.h" + +using namespace llvm; + +StringRef Anonymizer::anonymize(StringRef S, bool Reverse, bool KeepPrefixfix) { + // Return if the string is empty. + if (S.empty()) + return S; + + // See if we've already encountered this value + if (Reverse) { + if (ForwardMap.count(S)) + return ForwardMap.lookup(S); + auto Res = anonymizeImpl(S, Reverse, KeepPrefixfix); + ForwardMap[S] = Res; + return Res; + } + + if (IrreveribleForwardMap.count(S)) + return IrreveribleForwardMap.lookup(S); + auto Res = anonymizeImpl(S, Reverse, KeepPrefixfix); + IrreveribleForwardMap[S] = Res; + return Res; +} + +Expected Anonymizer::lookupForwardMap(StringRef Symbol) const { + if (ForwardMap.count(Symbol)) + return ForwardMap.lookup(Symbol); + + return make_error(formatv("No entry found for '{0}'", Symbol), + inconvertibleErrorCode()); +} + +void Anonymizer::anchor() {} +void IncrementAnonymizer::anchor() {} + +StringRef IncrementAnonymizer::anonymizeImpl(StringRef S, bool Reverse, + bool KeepPrefixfix) { + SmallString<128> NextVal; + raw_svector_ostream OS(NextVal); + + if (Reverse) { + // If symbol begins with l or L, keep that prefix + if (KeepPrefixfix && (S.startswith("L") || S.startswith("l"))) + OS << S.substr(0, 1); + + OS << Prefix << Num++ << Suffix; + } else { + OS << IRPrefix << IrNum++ << Suffix; + } + + if (Reverse) { + ReverseMap.push_back(copyString(S)); + assert(Num == ReverseMap.size() && "ReverseMap has wrong size"); + } + + return copyString(OS.str()); +} + +Expected IncrementAnonymizer::lookupImpl(StringRef Str) const { + auto Index = findIndex(Str); + if (!Index) + return Index.takeError(); + + if (*Index < ReverseMap.size()) + return ReverseMap[*Index]; + + return make_error(formatv("No entry found for '{0}'", Str), + inconvertibleErrorCode()); +} + +void IncrementAnonymizer::writeReverseMapImpl(raw_ostream &OS) const { + OS << "BCSymbolMap Version: 2.0\n"; + for (auto S : ReverseMap) { + assert(S != "" && "failed to initialize a member"); + OS << S << "\n"; + } +} + +Error IncrementAnonymizer::readReverseMapImpl(llvm::MemoryBuffer *Buffer) { + llvm::StringRef Data(Buffer->getBufferStart(), Buffer->getBufferSize()); + llvm::StringRef LHS; + // Check version string first. + std::tie(LHS, Data) = Data.split('\n'); + if (!LHS.startswith("BCSymbolMap Version:")) { + // Version string not present, assume version 1.0. + ReverseMap.emplace_back(copyString(LHS)); + ++Num; + } else { + llvm::StringRef VersionStr; + std::tie(LHS, VersionStr) = LHS.split(':'); + VersionStr = VersionStr.ltrim(); + if (VersionStr != "1.0" && VersionStr != "2.0") + return make_error( + formatv("Symbol map version '{0}' is not supported.", VersionStr), + inconvertibleErrorCode()); + } + + while (!Data.empty()) { + std::tie(LHS, Data) = Data.split('\n'); + ReverseMap.emplace_back(copyString(LHS)); + ++Num; + } + + return Error::success(); +} + +Expected IncrementAnonymizer::findIndex(StringRef Key) const { + const size_t PrefixIdx = Key.find(Prefix); + if (PrefixIdx == StringRef::npos) + return make_error( + formatv("Invalid key '{0}' does not contain suffix '{1}'", Key, Prefix), + inconvertibleErrorCode()); + + const size_t SuffixIdx = Key.rfind(Suffix); + if (PrefixIdx == StringRef::npos) + return make_error( + formatv("Invalid key '{0}' does not contain suffix '{1}'", Key, Suffix), + inconvertibleErrorCode()); + + const size_t IndexBegin = PrefixIdx + Prefix.size(); + const size_t IndexLengt = SuffixIdx - IndexBegin; + + if (IndexBegin + IndexLengt >= Key.size()) + return make_error(formatv("Invalid key '{0}'", Key), + inconvertibleErrorCode()); + + unsigned long long Ret; + if (auto Error = + getAsUnsignedInteger({Key.data() + IndexBegin, IndexLengt}, 10, Ret)) + return make_error( + formatv("Unable to parse index from key '{0}'", Key), + inconvertibleErrorCode()); + return Ret; +}; Index: llvm/lib/Support/CMakeLists.txt =================================================================== --- llvm/lib/Support/CMakeLists.txt +++ llvm/lib/Support/CMakeLists.txt @@ -37,6 +37,7 @@ add_llvm_library(LLVMSupport AMDGPUMetadata.cpp + Anonymization.cpp APFloat.cpp APInt.cpp APSInt.cpp Index: llvm/unittests/Support/AnonymizationTest.cpp =================================================================== --- /dev/null +++ llvm/unittests/Support/AnonymizationTest.cpp @@ -0,0 +1,225 @@ +//===---------- llvm/unittest/Support/AnonymizationTest.cpp ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Anonymization.h" +#include "gtest/gtest.h" + +using namespace llvm; + +static constexpr std::array Hidden = { + {"__hidden#0_", "__hidden#1_", "__hidden#2_"}}; +static constexpr std::array IRHidden = { + {"__ir_hidden#0_", "__ir_hidden#1_", "__ir_hidden#2_"}}; +static constexpr std::array Symbol = {{"foo", "bar", "baz"}}; + +TEST(AnonymizationTest, anonymizeSimple) { + IncrementAnonymizer Anonymizer; + for (unsigned J = 0, E = 2; J < E; ++J) { + for (unsigned I = 0, E = std::tuple_size::value; I < E; + ++I) { + EXPECT_EQ(StringRef(IRHidden[I]), Anonymizer.anonymize(Symbol[I])); + } + } +} + +TEST(AnonymizationTest, anonymizeReverse) { + IncrementAnonymizer Anonymizer; + for (unsigned J = 0, E = 2; J < E; ++J) { + for (unsigned I = 0, E = std::tuple_size::value; I < E; + ++I) { + EXPECT_EQ(StringRef(Hidden[I]), Anonymizer.anonymize(Symbol[I], true)); + } + } +} + +TEST(AnonymizationTest, anonymizeKeepPrefix) { + IncrementAnonymizer Anonymizer; + for (unsigned J = 0, E = 2; J < E; ++J) { + for (unsigned I = 0, E = std::tuple_size::value; I < E; + ++I) { + EXPECT_EQ(StringRef(Hidden[I]), + Anonymizer.anonymize(Symbol[I], true, true)); + } + } +} + +TEST(AnonymizationTest, anonymizeKeepPrefixl) { + IncrementAnonymizer Anonymizer; + for (unsigned J = 0, E = 2; J < E; ++J) { + for (unsigned I = 0, E = std::tuple_size::value; I < E; + ++I) { + EXPECT_EQ( + (Twine("l") + Hidden[I]).str(), + Anonymizer.anonymize((Twine("l") + Symbol[I]).str(), true, true)); + } + } +} + +TEST(AnonymizationTest, anonymizeKeepPrefixL) { + IncrementAnonymizer Anonymizer; + for (unsigned J = 0, E = 2; J < E; ++J) { + for (unsigned I = 0, E = std::tuple_size::value; I < E; + ++I) { + EXPECT_EQ( + (Twine("L") + Hidden[I]).str(), + Anonymizer.anonymize((Twine("L") + Symbol[I]).str(), true, true)); + } + } +} + +TEST(AnonymizationTest, lookupInvalid) { + IncrementAnonymizer Anonymizer; + EXPECT_EQ("__ir_hidden#0_", Anonymizer.anonymize("foo")); + EXPECT_EQ("__hidden#0_", Anonymizer.anonymize("foo", true)); + EXPECT_EQ("__hidden#1_", Anonymizer.anonymize("bar", true)); + + // We didn't set the 'reverse' flag so we get an __ir__hidden which we cannot + // reverse. + { + auto Foo = Anonymizer.lookup("__ir_hidden#0_"); + EXPECT_FALSE(Foo.operator bool()); + llvm::consumeError(Foo.takeError()); + } + + // Make sure we don't accept broken input. + { + auto Foo = Anonymizer.lookup("__ir_hidden0_"); + EXPECT_FALSE(Foo.operator bool()); + llvm::consumeError(Foo.takeError()); + + auto Bar = Anonymizer.lookup("__hidden#1"); + EXPECT_FALSE(Bar.operator bool()); + llvm::consumeError(Bar.takeError()); + + auto Baz = Anonymizer.lookup("__ir_hidden#2_"); + EXPECT_FALSE(Baz.operator bool()); + llvm::consumeError(Baz.takeError()); + } + + // Make sure we don't accept invalid indices. + { + auto Foo = Anonymizer.lookup("__hidden#2_"); + EXPECT_FALSE(Foo.operator bool()); + llvm::consumeError(Foo.takeError()); + } +} + +TEST(AnonymizationTest, lookupValid) { + IncrementAnonymizer Anonymizer; + EXPECT_EQ("__hidden#0_", Anonymizer.anonymize("foo", true)); + EXPECT_EQ("__hidden#1_", Anonymizer.anonymize("bar", true)); + + // This time we did set the reverse flag, so we should be able to find the + // original string. + { + auto Foo = Anonymizer.lookup("__hidden#0_"); + EXPECT_TRUE(Foo.operator bool()); + EXPECT_EQ("foo", *Foo); + + auto Bar = Anonymizer.lookup("__hidden#1_"); + EXPECT_TRUE(Bar.operator bool()); + EXPECT_EQ("bar", *Bar); + } +} + +TEST(AnonymizationTest, customPrefixSuffix) { + IncrementAnonymizer Anonymizer("_", "_"); + EXPECT_EQ("_0_", Anonymizer.anonymize("foo", true)); + EXPECT_EQ("_1_", Anonymizer.anonymize("bar", true)); + + // Ensure we accept input with the same prefix and suffix. + { + auto Foo = Anonymizer.lookup("_0_"); + EXPECT_TRUE(Foo.operator bool()); + EXPECT_EQ("foo", *Foo); + + auto Bar = Anonymizer.lookup("_1_"); + EXPECT_TRUE(Bar.operator bool()); + EXPECT_EQ("bar", *Bar); + } + + // Ensure we reject missing prefix. + { + auto Foo = Anonymizer.lookup("0_"); + EXPECT_FALSE(Foo.operator bool()); + llvm::consumeError(Foo.takeError()); + } + + // Ensure we reject missing suffix. + { + auto Foo = Anonymizer.lookup("_0"); + EXPECT_FALSE(Foo.operator bool()); + llvm::consumeError(Foo.takeError()); + } + + // Ensure we reject missing prefix and suffix. + { + auto Foo = Anonymizer.lookup("0"); + EXPECT_FALSE(Foo.operator bool()); + llvm::consumeError(Foo.takeError()); + } +} + +TEST(AnonymizationTest, serialization) { + IncrementAnonymizer Anonymizer; + EXPECT_EQ("__hidden#0_", Anonymizer.anonymize("foo", true)); + EXPECT_EQ("__hidden#1_", Anonymizer.anonymize("bar", true)); + EXPECT_EQ("__hidden#2_", Anonymizer.anonymize("baz", true)); + + std::string Expected = "BCSymbolMap Version: 2.0\nfoo\nbar\nbaz\n"; + std::string Serialized; + + llvm::raw_string_ostream OS(Serialized); + Anonymizer.writeReverseMap(OS); + OS.flush(); + + EXPECT_EQ(Expected, Serialized); +} + +TEST(AnonymizationTest, deserialization) { + std::string Serialized = "BCSymbolMap Version: 2.0\nfoo\nbar\nbaz\n"; + std::unique_ptr Buffer = MemoryBuffer::getMemBuffer(Serialized); + + IncrementAnonymizer Anonymizer; + auto E = Anonymizer.readReverseMap(Buffer.get()); + EXPECT_FALSE(E.operator bool()); + + for (unsigned I = 0, E = std::tuple_size::value; I < E; + ++I) { + auto S = Anonymizer.lookup(Hidden[I]); + EXPECT_TRUE(S.operator bool()); + EXPECT_EQ(StringRef(Symbol[I]), *S); + } +} + +TEST(AnonymizationTest, deserializationInvalidVersion) { + std::string Serialized = "BCSymbolMap Version: 3.0\nfoo\nbar\nbaz\n"; + std::unique_ptr Buffer = MemoryBuffer::getMemBuffer(Serialized); + + IncrementAnonymizer Anonymizer; + auto E = Anonymizer.readReverseMap(Buffer.get()); + EXPECT_TRUE(E.operator bool()); + llvm::consumeError(std::move(E)); +} + +TEST(AnonymizationTest, deserializationNoVersion) { + std::string Serialized = "foo\nbar\nbaz\n"; + std::unique_ptr Buffer = MemoryBuffer::getMemBuffer(Serialized); + + IncrementAnonymizer Anonymizer; + auto E = Anonymizer.readReverseMap(Buffer.get()); + EXPECT_FALSE(E.operator bool()); + + for (unsigned I = 0, E = std::tuple_size::value; I < E; + ++I) { + auto S = Anonymizer.lookup(Hidden[I]); + EXPECT_TRUE(S.operator bool()); + EXPECT_EQ(StringRef(Symbol[I]), *S); + } +} Index: llvm/unittests/Support/CMakeLists.txt =================================================================== --- llvm/unittests/Support/CMakeLists.txt +++ llvm/unittests/Support/CMakeLists.txt @@ -5,6 +5,7 @@ add_llvm_unittest(SupportTests AlignOfTest.cpp AllocatorTest.cpp + AnonymizationTest.cpp ARMAttributeParser.cpp ArrayRecyclerTest.cpp BinaryStreamTest.cpp