Index: llvm/include/llvm/Analysis/IRSimilarityIdentifier.h =================================================================== --- llvm/include/llvm/Analysis/IRSimilarityIdentifier.h +++ llvm/include/llvm/Analysis/IRSimilarityIdentifier.h @@ -37,6 +37,10 @@ // or comparison predicate. These are used to create a hash to map instructions // to integers to be used in similarity matching in sequences of instructions // +// Terminology: +// An IRSimilarityCandidate is a region of IRInstructionData (wrapped +// Instructions), usually used to denote a region of similarity has been found. +// //===----------------------------------------------------------------------===// #ifndef LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H @@ -386,6 +390,137 @@ InstructionClassification InstClassifier; }; +/// This is a class that wraps a range of IRInstructionData from one point to +/// another in the vector of IRInstructionData, which is a region of the +/// program. It is also responsible for defining the structure within this +/// region of instructions. +/// +/// The structure of a region is defined through a value numbering system +/// assigned to each unique value in a region at the creation of the +/// IRSimilarityCandidate. +/// +/// For example, for each Instruction we add a mapping for each new +/// value seen in that Instruction. +/// IR: Mapping Added: +/// %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2 +/// %add2 = add i32 %a, %1 %add2 -> 4 +/// %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5 +/// +/// We can compare IRSimilarityCandidates against one another. +/// The \ref isSimilar function compares each IRInstructionData against one +/// another and if we have the same sequences of IRInstructionData that would +/// create the same hash, we have similar IRSimilarityCandidates. +class IRSimilarityCandidate { +private: + /// The start index of this IRSimilarityCandidate in the instruction list. + unsigned StartIdx = 0; + + /// The number of instructions in this IRSimilarityCandidate. + unsigned Len = 0; + + /// The first instruction in this IRSimilarityCandidate. + IRInstructionData *FirstInst = nullptr; + + /// The last instruction in this IRSimilarityCandidate. + IRInstructionData *LastInst = nullptr; + + /// Global Value Numbering structures + /// @{ + /// Stores the mapping of the value to the number assigned to it in the + /// IRSimilarityCandidate. + DenseMap ValueToNumber; + /// Stores the mapping of the number to the value assigned this number. + DenseMap NumberToValue; + /// @} + +public: + /// \param StartIdx - The starting location of the region. + /// \param StartIdx - The length of the region. + /// \param FirstInstIt - The starting IRInstructionData of the region. + /// \param LastInstIt - The ending IRInstructionData of the region. + IRSimilarityCandidate(unsigned StartIdx, unsigned Len, + IRInstructionData *FirstInstIt, + IRInstructionData *LastInstIt); + + /// \param A - The first IRInstructionCandidate to compare. + /// \param B - The second IRInstructionCandidate to compare. + /// \returns True when every IRInstructionData in \p A is similar to every + /// IRInstructionData in \p B. + static bool isSimilar(const IRSimilarityCandidate &A, + const IRSimilarityCandidate &B); + /// Compare the start and end indices of the two IRSimilarityCandidates for + /// whether they overlap. If the start instruction of one + /// IRSimilarityCandidate is less than the end instruction of the other, and + /// the start instruction of one is greater than the start instruction of the + /// other, they overlap. + /// + /// \returns true if the IRSimilarityCandidates do not have overlapping + /// instructions. + static bool overlap(const IRSimilarityCandidate &A, + const IRSimilarityCandidate &B); + + /// \returns the number of instructions in this Candidate. + unsigned getLength() const { return Len; } + + /// \returns the start index of this IRSimilarityCandidate. + unsigned getStartIdx() const { return StartIdx; } + + /// \returns the end index of this IRSimilarityCandidate. + unsigned getEndIdx() const { return StartIdx + Len - 1; } + + /// \returns The first IRInstructionData. + IRInstructionData *front() const { return FirstInst; } + /// \returns The last IRInstructionData. + IRInstructionData *back() const { return LastInst; } + + /// \returns The first Instruction. + Instruction *frontInstruction() { return FirstInst->Inst; } + /// \returns The last Instruction + Instruction *backInstruction() { return LastInst->Inst; } + + /// \returns The BasicBlock the IRSimilarityCandidate starts in. + BasicBlock *getStartBB() { return FirstInst->Inst->getParent(); } + /// \returns The BasicBlock the IRSimilarityCandidate ends in. + BasicBlock *getEndBB() { return LastInst->Inst->getParent(); } + + /// \returns The Function that the IRSimilarityCandidate is located in. + Function *getFunction() { return getStartBB()->getParent(); } + + /// Finds the positive number associated with \p V if it has been mapped. + /// \param [in] V - the Value to find. + /// \returns The positive number corresponding to the value. + /// \returns None if not present. + Optional getGVN(Value *V) { + assert(V != nullptr && "Value is a nullptr?"); + DenseMap::iterator VNIt = ValueToNumber.find(V); + if (VNIt == ValueToNumber.end()) + return None; + return VNIt->second; + } + + /// Finds the Value associate with \p Num if it exists. + /// \param [in] Num - the number to find. + /// \returns The Value associated with the number. + /// \returns None if not present. + Optional fromGVN(unsigned Num) { + DenseMap::iterator VNIt = NumberToValue.find(Num); + if (VNIt == NumberToValue.end()) + return None; + assert(VNIt->second != nullptr && "Found value is a nullptr!"); + return VNIt->second; + } + + /// \param RHS -The IRSimilarityCandidate to compare against + /// \returns true if the IRSimilarityCandidate is occurs after the + /// IRSimilarityCandidate in the program. + bool operator<(const IRSimilarityCandidate &RHS) const { + return getStartIdx() > RHS.getStartIdx(); + } + + using iterator = IRInstructionDataList::iterator; + iterator begin() const { return iterator(front()); } + iterator end() const { return std::next(iterator(back())); } +}; } // end namespace IRSimilarity } // end namespace llvm Index: llvm/lib/Analysis/IRSimilarityIdentifier.cpp =================================================================== --- llvm/lib/Analysis/IRSimilarityIdentifier.cpp +++ llvm/lib/Analysis/IRSimilarityIdentifier.cpp @@ -163,3 +163,91 @@ return INumber; } + +IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len, + IRInstructionData *FirstInstIt, + IRInstructionData *LastInstIt) + : StartIdx(StartIdx), Len(Len) { + + assert(FirstInstIt != nullptr && "Instruction is nullptr!"); + assert(LastInstIt != nullptr && "Instruction is nullptr!"); + assert(StartIdx + Len > StartIdx && + "Overflow for IRSimilarityCandidate range?"); + assert(Len - 1 == + std::distance(iterator(FirstInstIt), iterator(LastInstIt)) && + "Length of the first and last IRInstructionData do not match the " + "given length"); + + // We iterate over the given instructions, and map each unique value + // to a unique number in the IRSimilarityCandidate ValueToNumber and + // NumberToValue maps. A constant get its own value globally, the individual + // uses of the constants are not considered to be unique. + // + // IR: Mapping Added: + // %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2 + // %add2 = add i32 %a, %1 %add2 -> 4 + // %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5 + // + // when replace with global values, starting from 1, would be + // + // 3 = add i32 1, 2 + // 4 = add i32 1, 3 + // 6 = add i32 5, 2 + unsigned LocalValNumber = 1; + IRInstructionDataList::iterator ID = iterator(*FirstInstIt); + for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) { + // Map the operand values to an unsigned integer if it does not already + // have an unsigned integer assigned to it. + for (Value *Arg : ID->OperVals) + if (ValueToNumber.find(Arg) == ValueToNumber.end()) { + ValueToNumber.try_emplace(Arg, LocalValNumber); + NumberToValue.try_emplace(LocalValNumber, Arg); + LocalValNumber++; + } + + // Mapping the instructions to an unsigned integer if it is not already + // exist in the mapping. + if (ValueToNumber.find(ID->Inst) == ValueToNumber.end()) { + ValueToNumber.try_emplace(ID->Inst, LocalValNumber); + NumberToValue.try_emplace(LocalValNumber, ID->Inst); + LocalValNumber++; + } + } + + // Setting the first and last instruction data pointers for the candidate. If + // we got through the entire for loop without hitting an assert, we know + // that both of these instructions are not nullptrs. + FirstInst = FirstInstIt; + LastInst = LastInstIt; +} + +bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate &A, + const IRSimilarityCandidate &B) { + if (A.getLength() != B.getLength()) + return false; + + auto InstrDataForBoth = + zip(make_range(A.begin(), A.end()), make_range(B.begin(), B.end())); + + return all_of(InstrDataForBoth, + [](std::tuple R) { + IRInstructionData &A = std::get<0>(R); + IRInstructionData &B = std::get<1>(R); + if (!A.Legal || !B.Legal) + return false; + return isClose(A, B); + }); +} + +bool IRSimilarityCandidate::overlap(const IRSimilarityCandidate &A, + const IRSimilarityCandidate &B) { + auto DoesOverlap = [](const IRSimilarityCandidate &X, + const IRSimilarityCandidate &Y) { + // Check: + // XXXXXX X starts before Y ends + // YYYYYYY Y starts after X starts + return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx; + }; + + return DoesOverlap(A, B) || DoesOverlap(B, A); +} Index: llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp =================================================================== --- llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp +++ llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp @@ -11,13 +11,13 @@ // //===----------------------------------------------------------------------===// +#include "gtest/gtest.h" #include "llvm/Analysis/IRSimilarityIdentifier.h" #include "llvm/AsmParser/Parser.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/SourceMgr.h" -#include "gtest/gtest.h" using namespace llvm; using namespace IRSimilarity; @@ -30,12 +30,9 @@ return M; } -void getVectors(Module &M, std::vector &InstrList, +void getVectors(Module &M, IRInstructionMapper &Mapper, + std::vector &InstrList, std::vector &UnsignedVec) { - SpecificBumpPtrAllocator InstDataAllocator; - SpecificBumpPtrAllocator IDLAllocator; - IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); - for (Function &F : M) for (BasicBlock &BB : F) Mapper.convertToUnsignedVec(BB, InstrList, UnsignedVec); @@ -56,7 +53,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); // Check that the size of the unsigned vector and the instruction list are the // same as a safety check. @@ -84,7 +84,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -109,7 +112,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -131,7 +137,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -154,7 +163,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -176,7 +188,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -199,7 +214,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -222,7 +240,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -245,7 +266,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -268,7 +292,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -291,7 +318,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -314,7 +344,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -337,7 +370,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -360,7 +396,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -383,7 +422,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -406,7 +448,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -429,7 +474,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -452,7 +500,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -475,7 +526,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -498,7 +552,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -521,7 +578,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -544,7 +604,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -567,7 +630,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -590,7 +656,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -613,7 +682,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -636,7 +708,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -673,7 +748,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); ASSERT_EQ(UnsignedVec.size(), static_cast(0)); @@ -697,7 +775,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); ASSERT_EQ(UnsignedVec.size(), static_cast(0)); @@ -717,7 +798,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); ASSERT_EQ(UnsignedVec.size(), static_cast(0)); @@ -741,7 +825,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); ASSERT_EQ(UnsignedVec.size(), static_cast(0)); @@ -763,7 +850,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); ASSERT_EQ(UnsignedVec.size(), static_cast(0)); @@ -793,7 +883,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); ASSERT_EQ(UnsignedVec.size(), static_cast(0)); @@ -823,7 +916,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); ASSERT_EQ(UnsignedVec.size(), static_cast(0)); @@ -849,7 +945,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); ASSERT_EQ(UnsignedVec.size(), static_cast(3)); @@ -875,7 +974,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); ASSERT_EQ(UnsignedVec.size(), static_cast(0)); @@ -904,7 +1006,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); ASSERT_EQ(UnsignedVec.size(), static_cast(0)); @@ -926,7 +1031,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); ASSERT_EQ(UnsignedVec.size(), static_cast(0)); @@ -948,7 +1056,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); ASSERT_EQ(UnsignedVec.size(), static_cast(0)); @@ -976,7 +1087,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); ASSERT_EQ(UnsignedVec.size(), static_cast(0)); @@ -1004,7 +1118,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); ASSERT_EQ(UnsignedVec.size(), static_cast(0)); @@ -1035,7 +1152,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); ASSERT_EQ(UnsignedVec.size(), static_cast(6)); @@ -1063,7 +1183,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); ASSERT_EQ(UnsignedVec.size(), static_cast(6)); @@ -1091,7 +1214,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); ASSERT_EQ(UnsignedVec.size(), static_cast(6)); @@ -1136,7 +1262,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); ASSERT_EQ(UnsignedVec.size(), static_cast(16)); @@ -1166,7 +1295,10 @@ std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); // Check that the size of the unsigned vector and the instruction list are the // same as a safety check. @@ -1175,3 +1307,219 @@ // Make sure that the unsigned vector is the expected size. ASSERT_TRUE(UnsignedVec.size() == 6); } + +// A helper function that accepts an instruction list from a module made up of +// two blocks of two legal instructions and terminator, and checks them for +// instruction similarity. +static bool longSimCandCompare(std::vector &InstrList) { + std::vector::iterator Start, End; + + Start = InstrList.begin(); + End = InstrList.begin(); + + std::advance(End, 1); + IRSimilarityCandidate Cand1(0, 2, *Start, *End); + + Start = InstrList.begin(); + End = InstrList.begin(); + + std::advance(Start, 3); + std::advance(End, 4); + IRSimilarityCandidate Cand2(3, 2, *Start, *End); + return IRSimilarityCandidate::isSimilar(Cand1, Cand2); +} + +// Checks that two adds with commuted operands are considered to be the same +// instructions. +TEST(IRSimilarityCandidate, CheckIdenticalInstructions) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = add i32 %b, %a + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); + + // Check to make sure that we have a long enough region. + ASSERT_EQ(InstrList.size(), static_cast(3)); + // Check that the instructions were added correctly to both vectors. + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + + std::vector::iterator Start, End; + Start = InstrList.begin(); + End = InstrList.begin(); + std::advance(End, 1); + IRSimilarityCandidate Cand1(0, 2, *Start, *End); + IRSimilarityCandidate Cand2(0, 2, *Start, *End); + + ASSERT_TRUE(IRSimilarityCandidate::isSimilar(Cand1, Cand2)); +} + +// Checks that IRSimilarityCandidates wrapping these two regions of instructions +// are able to differentiate between instructions that have different opcodes. +TEST(IRSimilarityCandidate, CheckRegionsDifferentInstruction) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = add i32 %b, %a + ret i32 0 + bb1: + %2 = sub i32 %a, %b + %3 = add i32 %b, %a + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); + + // Check to make sure that we have a long enough region. + ASSERT_EQ(InstrList.size(), static_cast(6)); + // Check that the instructions were added correctly to both vectors. + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + + ASSERT_FALSE(longSimCandCompare(InstrList)); +} + +// Checks that IRSimilarityCandidates wrapping these two regions of instructions +// are able to differentiate between instructions that have different types. +TEST(IRSimilarityCandidate, CheckRegionsDifferentTypes) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b, i64 %c, i64 %d) { + bb0: + %0 = add i32 %a, %b + %1 = add i32 %b, %a + ret i32 0 + bb1: + %2 = add i64 %c, %d + %3 = add i64 %d, %c + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); + + // Check to make sure that we have a long enough region. + ASSERT_EQ(InstrList.size(), static_cast(6)); + // Check that the instructions were added correctly to both vectors. + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + + ASSERT_FALSE(longSimCandCompare(InstrList)); +} + +// Check that debug instructions do not impact similarity. They are marked as +// invisible. +TEST(IRSimilarityCandidate, IdenticalWithDebug) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + call void @llvm.dbg.value(metadata !0) + %1 = add i32 %b, %a + ret i32 0 + bb1: + %2 = add i32 %a, %b + call void @llvm.dbg.value(metadata !1) + %3 = add i32 %b, %a + ret i32 0 + bb2: + %4 = add i32 %a, %b + %5 = add i32 %b, %a + ret i32 0 + } + + declare void @llvm.dbg.value(metadata) + !0 = distinct !{!"test\00", i32 10} + !1 = distinct !{!"test\00", i32 11})"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); + + // Check to make sure that we have a long enough region. + ASSERT_EQ(InstrList.size(), static_cast(9)); + // Check that the instructions were added correctly to both vectors. + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + + ASSERT_TRUE(longSimCandCompare(InstrList)); +} + +// Checks that IRSimilarityCandidates that include illegal instructions, are not +// considered to be the same set of instructions. In these sets of instructions +// the allocas are illegal. +TEST(IRSimilarityCandidate, IllegalInCandidate) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = add i32 %a, %b + %2 = alloca i32 + ret i32 0 + bb1: + %3 = add i32 %a, %b + %4 = add i32 %a, %b + %5 = alloca i32 + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); + + // Check to make sure that we have a long enough region. + ASSERT_EQ(InstrList.size(), static_cast(6)); + // Check that the instructions were added correctly to both vectors. + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + + std::vector::iterator Start, End; + + Start = InstrList.begin(); + End = InstrList.begin(); + + std::advance(End, 2); + IRSimilarityCandidate Cand1(0, 3, *Start, *End); + + Start = InstrList.begin(); + End = InstrList.begin(); + + std::advance(Start, 3); + std::advance(End, 5); + IRSimilarityCandidate Cand2(3, 3, *Start, *End); + ASSERT_FALSE(IRSimilarityCandidate::isSimilar(Cand1, Cand2)); +}