Index: llvm/include/llvm/Support/Anonymization.h =================================================================== --- /dev/null +++ llvm/include/llvm/Support/Anonymization.h @@ -0,0 +1,147 @@ +//===-- Anonymization.h - String anonymization helper -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides helper classes to perform string anonymization. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_ANONYMIZATION_H +#define LLVM_SUPPORT_ANONYMIZATION_H + +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + +/// Base class providing basic anonymization capability. +/// +/// Derived classes are to implement anonymizeImpl. +class Anonymizer { + using MapTy = StringMap; + +public: + Anonymizer() + : Alloc(), ForwardMap(64, Alloc), IrreveribleForwardMap(32, Alloc) {} + + Anonymizer(const Anonymizer &) = delete; + Anonymizer &operator=(const Anonymizer &) = delete; + + /// Anonymize a string, remember in the reverseMap if reverse is true. + /// Keep 'l' or 'L' prefix when keepPrefix is true. + StringRef anonymize(StringRef Src, bool reverse = false, + bool keepPrefix = false); + + /// Revese lookup a string in the map. + Expected lookup(StringRef Str) const { return lookupImpl(Str); } + + /// Write out the reverse map in textural form. + /// + /// Derived classes may wish to use a different format. + void writeReverseMap(raw_ostream &OS) const { writeReverseMapImpl(OS); } + + /// Read back the reverse map from textural form. + /// + /// Derived classes may wish to use a different format. + void readReverseMap(MemoryBuffer *buf) { readReverseMapImpl(buf); } + + // Access the allocator. + BumpPtrAllocator &getAllocator() { return Alloc; } + + // As StringRef::copy, but will terminate with null. + char *copyString(StringRef S) { + auto Ret = allocateString(S.size()); + std::copy(S.begin(), S.end(), Ret); + return Ret; + } + + // look up symbol in the forward map + Expected lookupForwardMap(StringRef Symbol) const; + +protected: + // Derived classes will implement this method to return the newly anonymized + // string. Derived classe are free to track internal state, but are not + // required to (e.g. the base class tracks the reverse mapping, forward + // mapping, and provides an allocator). + // + // The base class will only ever call this once for any unique input, + // enforcing idempotence under sequential composition. + virtual StringRef anonymizeImpl(StringRef, bool, bool) = 0; + + // Other virtual implemationation need to provided by subclass. + virtual Expected lookupImpl(StringRef) const = 0; + virtual void writeReverseMapImpl(raw_ostream &OS) const = 0; + virtual void readReverseMapImpl(MemoryBuffer *buf) = 0; + + // For sub classes, new up some new memory for a string. Will terminate + // The memory with null. + char *allocateString(unsigned Length) { + assert(Length > 0 && "no length?"); + char *P = (char *)Alloc.Allocate(Length + 1, 1); + P[Length] = '\0'; + return P; + } + + virtual ~Anonymizer() {} + +private: + BumpPtrAllocator Alloc; + // Track the mapping, to enforce idempotence under sequential composition. + MapTy ForwardMap; + + MapTy IrreveribleForwardMap; + + virtual void anchor() = 0; +}; + +/// Stateful incrementor anonymizer. +/// +/// Minimal stateful anonymizer, which track the number of anonymized symbols +/// and appends that number to the end of a prefix. Thus, to get the same +/// resulting anonymization, this must be fed the inputs in the same order. +class IncrementAnonymizer : public Anonymizer { + using ReverseMapTy = std::vector; + using Anonymizer::Anonymizer; + +public: + IncrementAnonymizer(StringRef Prefix = "__hidden#", StringRef Suffix = "_", + StringRef IrPrefix = "__ir_hidden#") + : Anonymizer(), Pre(copyString(Prefix)), Suf(copyString(Suffix)), + IrPre(IrPrefix) { + assert(!Prefix.equals(IrPrefix) && + "Prefix and IrPrefix cannot be the same"); + } + +protected: + virtual StringRef anonymizeImpl(StringRef, bool, bool) override; + virtual Expected lookupImpl(StringRef) const override; + virtual void writeReverseMapImpl(raw_ostream &OS) const override; + virtual void readReverseMapImpl(MemoryBuffer *buf) override; + +private: + StringRef Pre; + StringRef Suf; + StringRef IrPre; + unsigned Num = 0; + unsigned IrNum = 0; + ReverseMapTy ReverseMap; + + // Extract the number from a key. + Expected findIndex(StringRef Key) const; + + virtual void anchor() override; +}; + +} // namespace llvm + +#endif // LLVM_SUPPORT_ANONYMIZATION_H Index: llvm/lib/Support/Anonymization.cpp =================================================================== --- /dev/null +++ llvm/lib/Support/Anonymization.cpp @@ -0,0 +1,147 @@ +//===-- Anonymization.cpp - String anonymization helper -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides helper classes to perform string anonymization +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Anonymization.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/FormatVariadic.h" + +using namespace llvm; + +StringRef Anonymizer::anonymize(StringRef Src, bool reverse, bool keepPrefix) { + // Return if the string is empty. + if (Src.empty()) + return Src; + + // See if we've already encountered this value + if (reverse) { + if (ForwardMap.count(Src)) + return ForwardMap.lookup(Src); + + auto Res = anonymizeImpl(Src, reverse, keepPrefix); + ForwardMap[Src] = Res; + return Res; + } else { + if (IrreveribleForwardMap.count(Src)) + return IrreveribleForwardMap.lookup(Src); + auto Res = anonymizeImpl(Src, reverse, keepPrefix); + IrreveribleForwardMap[Src] = Res; + return Res; + } +} + +Expected Anonymizer::lookupForwardMap(StringRef Symbol) const { + if (ForwardMap.count(Symbol)) + return ForwardMap.lookup(Symbol); + + return make_error(formatv("No entry found for '{0}'", Symbol), + inconvertibleErrorCode()); +} + +void IncrementAnonymizer::anchor() {} + +StringRef IncrementAnonymizer::anonymizeImpl(StringRef S, bool reverse, + bool keepPrefix) { + SmallString<128> NextVal; + raw_svector_ostream OS(NextVal); + + if (reverse) { + // If symbol begins with l or L, keep that prefix + if (keepPrefix && (S.startswith("L") || S.startswith("l"))) + OS << S.substr(0, 1); + + OS << Pre << Num++ << Suf; + } else { + OS << IrPre << IrNum++ << Suf; + } + + if (reverse) { + ReverseMap.push_back(copyString(S)); + assert(Num == ReverseMap.size() && "ReverseMap has wrong size"); + } + + return copyString(OS.str()); +} + +Expected IncrementAnonymizer::lookupImpl(StringRef Str) const { + auto Index = findIndex(Str); + if (!Index) + return Index.takeError(); + + if (*Index < ReverseMap.size()) + return ReverseMap[*Index]; + + return make_error(formatv("No entry found for '{0}'", Str), + inconvertibleErrorCode()); +} + +void IncrementAnonymizer::writeReverseMapImpl(raw_ostream &OS) const { + OS << "BCSymbolMap Version: 2.0\n"; + for (auto S : ReverseMap) { + assert(S != "" && "failed to initialize a member"); + OS << S << "\n"; + } +} + +void IncrementAnonymizer::readReverseMapImpl(llvm::MemoryBuffer *buf) { + llvm::StringRef Data(buf->getBufferStart(), buf->getBufferSize()); + llvm::StringRef LHS; + // Check version string first. + std::tie(LHS, Data) = Data.split('\n'); + if (!LHS.startswith("BCSymbolMap Version:")) { + // Version string not present, warns but try to parse it. + llvm::errs() << "warning: missing version string. Assuming 1.0.\n"; + ReverseMap.emplace_back(copyString(LHS)); + ++Num; + } else if (!LHS.equals("BCSymbolMap Version: 1.0")) { + llvm::StringRef VersionNum; + std::tie(LHS, VersionNum) = LHS.split(':'); + llvm::errs() << "warning: symbol map version " << VersionNum + << "is not supported. Not symbolication.\n"; + return; + } + while (!Data.empty()) { + std::tie(LHS, Data) = Data.split('\n'); + ReverseMap.emplace_back(copyString(LHS)); + ++Num; + } +} + +Expected IncrementAnonymizer::findIndex(StringRef Key) const { + const size_t PreIdx = Key.find(Pre); + if (PreIdx == StringRef::npos) + return make_error( + formatv("Invalid key '{0}' does not contain suffix '{1}'", Key, Pre), + inconvertibleErrorCode()); + + const size_t SufIdx = Key.rfind(Suf); + if (PreIdx == StringRef::npos) + return make_error( + formatv("Invalid key '{0}' does not contain suffix '{1}'", Key, Suf), + inconvertibleErrorCode()); + + const size_t IndexBegin = PreIdx + Pre.size(); + const size_t IndexLengt = SufIdx - IndexBegin; + + if (IndexBegin + IndexLengt >= Key.size()) + return make_error(formatv("Invalid key '{0}'", Key), + inconvertibleErrorCode()); + + unsigned long long Ret; + if (auto Error = + getAsUnsignedInteger({Key.data() + IndexBegin, IndexLengt}, 10, Ret)) + return make_error( + formatv("Unable to parse index from key '{0}'", Key), + inconvertibleErrorCode()); + return Ret; +}; Index: llvm/lib/Support/CMakeLists.txt =================================================================== --- llvm/lib/Support/CMakeLists.txt +++ llvm/lib/Support/CMakeLists.txt @@ -37,6 +37,7 @@ add_llvm_library(LLVMSupport AMDGPUMetadata.cpp + Anonymization.cpp APFloat.cpp APInt.cpp APSInt.cpp Index: llvm/unittests/Support/AnonymizationTest.cpp =================================================================== --- /dev/null +++ llvm/unittests/Support/AnonymizationTest.cpp @@ -0,0 +1,136 @@ +//===---------- llvm/unittest/Support/AnonymizationTest.cpp ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Anonymization.h" +#include "gtest/gtest.h" + +using namespace llvm; + +TEST(AnonymizationTest, anonymizeSimple) { + IncrementAnonymizer Anonymizer; + EXPECT_EQ("__ir_hidden#0_", Anonymizer.anonymize("foo")); + EXPECT_EQ("__ir_hidden#1_", Anonymizer.anonymize("bar")); + EXPECT_EQ("__ir_hidden#2_", Anonymizer.anonymize("baz")); + + EXPECT_EQ("__ir_hidden#0_", Anonymizer.anonymize("foo")); + EXPECT_EQ("__ir_hidden#1_", Anonymizer.anonymize("bar")); + EXPECT_EQ("__ir_hidden#2_", Anonymizer.anonymize("baz")); +} + +TEST(AnonymizationTest, anonymizeReverse) { + IncrementAnonymizer Anonymizer; + EXPECT_EQ("__hidden#0_", Anonymizer.anonymize("foo", true)); + EXPECT_EQ("__hidden#1_", Anonymizer.anonymize("bar", true)); + EXPECT_EQ("__hidden#2_", Anonymizer.anonymize("baz", true)); + + EXPECT_EQ("__hidden#0_", Anonymizer.anonymize("foo", true)); + EXPECT_EQ("__hidden#1_", Anonymizer.anonymize("bar", true)); + EXPECT_EQ("__hidden#2_", Anonymizer.anonymize("baz", true)); +} + +TEST(AnonymizationTest, anonymizeKeepPrefix) { + IncrementAnonymizer Anonymizer; + EXPECT_EQ("__hidden#0_", Anonymizer.anonymize("foo", true, true)); + EXPECT_EQ("__hidden#1_", Anonymizer.anonymize("bar", true, true)); + EXPECT_EQ("__hidden#2_", Anonymizer.anonymize("baz", true, true)); + + EXPECT_EQ("l__hidden#3_", Anonymizer.anonymize("lfoo", true, true)); + EXPECT_EQ("l__hidden#4_", Anonymizer.anonymize("lbar", true, true)); + EXPECT_EQ("l__hidden#5_", Anonymizer.anonymize("lbaz", true, true)); + + EXPECT_EQ("L__hidden#6_", Anonymizer.anonymize("Lfoo", true, true)); + EXPECT_EQ("L__hidden#7_", Anonymizer.anonymize("Lbar", true, true)); + EXPECT_EQ("L__hidden#8_", Anonymizer.anonymize("Lbaz", true, true)); +} + +TEST(AnonymizationTest, lookupInvalid) { + IncrementAnonymizer Anonymizer; + EXPECT_EQ("__ir_hidden#0_", Anonymizer.anonymize("foo")); + EXPECT_EQ("__hidden#0_", Anonymizer.anonymize("foo", true)); + EXPECT_EQ("__hidden#1_", Anonymizer.anonymize("bar", true)); + + // We didn't set the 'reverse' flag so we get an __ir__hidden which we cannot + // reverse. + { + auto Foo = Anonymizer.lookup("__ir_hidden#0_"); + EXPECT_FALSE(Foo.operator bool()); + llvm::consumeError(Foo.takeError()); + } + + // Make sure we don't accept broken input. + { + auto Foo = Anonymizer.lookup("__ir_hidden0_"); + EXPECT_FALSE(Foo.operator bool()); + llvm::consumeError(Foo.takeError()); + + auto Bar = Anonymizer.lookup("__hidden#1"); + EXPECT_FALSE(Bar.operator bool()); + llvm::consumeError(Bar.takeError()); + + auto Baz = Anonymizer.lookup("__ir_hidden#2_"); + EXPECT_FALSE(Baz.operator bool()); + llvm::consumeError(Baz.takeError()); + } +} + +TEST(AnonymizationTest, lookupValid) { + IncrementAnonymizer Anonymizer; + EXPECT_EQ("__hidden#0_", Anonymizer.anonymize("foo", true)); + EXPECT_EQ("__hidden#1_", Anonymizer.anonymize("bar", true)); + + // This time we did set the reverse flag, so we should be able to find the + // original string. + { + auto Foo = Anonymizer.lookup("__hidden#0_"); + EXPECT_TRUE(Foo.operator bool()); + EXPECT_EQ("foo", *Foo); + + auto Bar = Anonymizer.lookup("__hidden#1_"); + EXPECT_TRUE(Bar.operator bool()); + EXPECT_EQ("bar", *Bar); + } +} + +TEST(AnonymizationTest, customPrefixSuffix) { + IncrementAnonymizer Anonymizer("_", "_"); + EXPECT_EQ("_0_", Anonymizer.anonymize("foo", true)); + EXPECT_EQ("_1_", Anonymizer.anonymize("bar", true)); + + // Ensure we accept input with the same prefix and suffix. + { + auto Foo = Anonymizer.lookup("_0_"); + EXPECT_TRUE(Foo.operator bool()); + EXPECT_EQ("foo", *Foo); + + auto Bar = Anonymizer.lookup("_1_"); + EXPECT_TRUE(Bar.operator bool()); + EXPECT_EQ("bar", *Bar); + } + + // Ensure we reject missing prefix. + { + auto Foo = Anonymizer.lookup("0_"); + EXPECT_FALSE(Foo.operator bool()); + llvm::consumeError(Foo.takeError()); + } + + // Ensure we reject missing suffix. + { + auto Foo = Anonymizer.lookup("_0"); + EXPECT_FALSE(Foo.operator bool()); + llvm::consumeError(Foo.takeError()); + } + + // Ensure we reject missing prefix and suffix. + { + auto Foo = Anonymizer.lookup("0"); + EXPECT_FALSE(Foo.operator bool()); + llvm::consumeError(Foo.takeError()); + } +} Index: llvm/unittests/Support/CMakeLists.txt =================================================================== --- llvm/unittests/Support/CMakeLists.txt +++ llvm/unittests/Support/CMakeLists.txt @@ -5,6 +5,7 @@ add_llvm_unittest(SupportTests AlignOfTest.cpp AllocatorTest.cpp + AnonymizationTest.cpp ARMAttributeParser.cpp ArrayRecyclerTest.cpp BinaryStreamTest.cpp