Index: llvm/include/llvm/Analysis/IRSimilarityIdentifier.h =================================================================== --- llvm/include/llvm/Analysis/IRSimilarityIdentifier.h +++ llvm/include/llvm/Analysis/IRSimilarityIdentifier.h @@ -381,6 +381,28 @@ /// The \ref isSimilar function compares each IRInstructionData against one /// another and if we have the same sequences of IRInstructionData that would /// create the same hash, we have similar IRSimilarityCandidates. +/// +/// We can also compare the structure of IRSimilarityCandidates. If we can +/// create a mapping of registers in the region contained by one +/// IRSimilarityCandidate to the region contained by different +/// IRSimilarityCandidate, they can be considered structurally similar. +/// +/// IRSimilarityCandidate1: IRSimilarityCandidate2: +/// %add1 = add i32 %a, %b %add1 = add i32 %d, %e +/// %add2 = add i32 %a, %c %add2 = add i32 %d, %f +/// %add3 = add i32 c1, c2 %add3 = add i32 c3, c4 +/// +/// Can have the following mapping from candidate to candidate of: +/// %a -> %d, %b -> %e, %c -> %f, c1 -> c3, c2 -> c4 +/// and can be considered similar. +/// +/// IRSimilarityCandidate1: IRSimilarityCandidate2: +/// %add1 = add i32 %a, %b %add1 = add i32 %d, c4 +/// %add2 = add i32 %a, %c %add2 = add i32 %d, %f +/// %add3 = add i32 c1, c2 %add3 = add i32 c3, c4 +/// +/// We cannot create the same mapping since the use of c4 is not used in the +/// same was as %b or c2. class IRSimilarityCandidate { private: /// The start index of this IRSimilarityCandidate in the instruction list. @@ -419,6 +441,32 @@ /// IRInstructionData in \p B. static bool isSimilar(const IRSimilarityCandidate &A, const IRSimilarityCandidate &B); + + /// \param A - The first IRInstructionCandidate to compare. + /// \param B - The second IRInstructionCandidate to compare. + /// \returns True when every IRInstructionData in \p A is structurally similar + /// to \p B. + static bool compareStructure(const IRSimilarityCandidate &A, + const IRSimilarityCandidate &B); + + /// Compare the operands in \p A and \p B and check that the current mapping + /// of global value numbers from \p A to \p B and \p B to \A is consistent. + /// + /// \param A - the first IRInstructionCandidate to compare. + /// \param B - The second IRInstructionCandidate to compare. + /// \param OperValsA - The operands in the instruction from \p A. + /// \param OperValsB - The operands in the instruction from \p B. + /// \param ValueNumberMappingA - The current mapping of global values + /// numbering from \p A to \p B. + /// \param ValueNumberMappingB - The current mapping of global values + /// numbering from \p B to \p A. + /// \returns true if the IRSimilarityCandidates operands are compatible. + static bool compareOperandMapping( + const IRSimilarityCandidate &A, const IRSimilarityCandidate &B, + ArrayRef &OperValsA, ArrayRef &OperValsB, + DenseMap> &ValueNumberMappingA, + DenseMap> &ValueNumberMappingB); + /// Compare the start and end indices of the two IRSimilarityCandidates for /// whether they overlap. If the start instruction of one /// IRSimilarityCandidate is less than the end instruction of the other, and @@ -492,6 +540,7 @@ iterator begin() const { return iterator(front()); } iterator end() const { return std::next(iterator(back())); } }; + } // end namespace IRSimilarity } // end namespace llvm Index: llvm/lib/Analysis/IRSimilarityIdentifier.cpp =================================================================== --- llvm/lib/Analysis/IRSimilarityIdentifier.cpp +++ llvm/lib/Analysis/IRSimilarityIdentifier.cpp @@ -238,6 +238,163 @@ }); } +/// Determine if operand number \p TargetArgVal is in the current mapping set +/// for operand number \p SourceArgVal. +/// +/// \param [in, out] CurrentSrcTgtNumberMapping current mapping of global +/// value numbers from source IRSimilarityCandidate to target +/// IRSimilarityCandidate. +/// \param [in] SourceArgVal The global value number for an operand in the +/// in the original candidate. +/// \param [in] TargetArgVal The global value number for the corresponding +/// operand in the other candidate. +/// \returns True if there exists a mapping and false if not. +bool checkNumberingAndReplace( + DenseMap> &CurrentSrcTgtNumberMapping, + unsigned SourceArgVal, unsigned TargetArgVal) { + // We are given two unsigned integers representing the global values of + // the operands in different IRSimilarityCandidates and a current mapping + // between the two. + // + // Source Operand GVN: 1 + // Target Operand GVN: 2 + // CurrentMapping: {1: {1, 2}} + // + // Since we have mapping, and the target operand is contained in the set, we + // update it to: + // CurrentMapping: {1: {2}} + // and can return true. But, if the mapping was + // CurrentMapping: {1: {3}} + // we would return false. + + bool WasInserted; + DenseMap>::iterator Val; + + std::tie(Val, WasInserted) = CurrentSrcTgtNumberMapping.insert( + std::make_pair(SourceArgVal, DenseSet({TargetArgVal}))); + + // If we created a new mapping, then we are done. + if (WasInserted) + return true; + + // If there is more than one option in the mapping set, and the target value + // is included in the mapping set replace that set with one that only includes + // the target value, as it is the only valid mapping via the non commutative + // instruction. + + DenseSet &TargetSet = Val->second; + if (TargetSet.size() > 1 && TargetSet.find(TargetArgVal) != TargetSet.end()) { + TargetSet.clear(); + TargetSet.insert(TargetArgVal); + return true; + } + + // Return true if we can find the value in the set. + return TargetSet.find(TargetArgVal) != TargetSet.end(); +} + +bool IRSimilarityCandidate::compareOperandMapping( + const IRSimilarityCandidate &A, const IRSimilarityCandidate &B, + ArrayRef &OperValsA, ArrayRef &OperValsB, + DenseMap> &ValueNumberMappingA, + DenseMap> &ValueNumberMappingB) { + // Iterators to keep track of where we are in the operands for each + // Instruction. + ArrayRef::iterator VIt = OperValsA.begin(); + ArrayRef::iterator OVIt = OperValsB.begin(); + unsigned OperandLength = OperValsA.size(); + + // For each operand, get the value numbering and ensure it is consistent. + for (unsigned Idx = 0; Idx < OperandLength; Idx++, VIt++, OVIt++) { + unsigned OperValA = A.ValueToNumber.find(*VIt)->second; + unsigned OperValB = B.ValueToNumber.find(*OVIt)->second; + + // Attempt to add a set with only the target value. If there is no mapping + // we can create it here. + // + // For an instruction like a subtraction: + // IRSimilarityCandidateA: IRSimilarityCandidateB: + // %resultA = sub %a, %b %resultB = sub %d, %e + // + // We map %a -> %d and %b -> %e. + // + // And check to see whether their mapping is consistent in + // checkNumberingAndReplace. + + if (!checkNumberingAndReplace(ValueNumberMappingA, OperValA, OperValB)) + return false; + + if (!checkNumberingAndReplace(ValueNumberMappingB, OperValB, OperValA)) + return false; + } + return true; +} + +bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A, + const IRSimilarityCandidate &B) { + if (A.getLength() != B.getLength()) + return false; + + if (A.ValueToNumber.size() != B.ValueToNumber.size()) + return false; + + iterator It = A.begin(); + iterator OIt = B.begin(); + + // These sets create a create a mapping between the values in one candidate + // to values in the other candidate. If we create a set with one element, + // and that same element maps to the original element in the candidate + // we have a good mapping. + DenseMap> ValueNumberMapping; + DenseMap> OtherValueNumberMapping; + DenseMap>::iterator ValueMappingIt; + + bool WasInserted; + + // Iterate over the instructions contained in each candidate + unsigned SectionLength = A.getStartIdx() + A.getLength(); + for (unsigned Loc = A.getStartIdx(); Loc < SectionLength; + It++, OIt++, Loc++) { + // Make sure the instructions are similar to one another. + if (!isClose(*It, *OIt)) + return false; + + Instruction *I = (*It).Inst; + Instruction *OI = (*OIt).Inst; + + if (!(*It).Legal || !(*OIt).Legal) + return false; + + // Get the operand sets for the instructions. + ArrayRef OperVals = (*It).OperVals; + ArrayRef OtherOperVals = (*OIt).OperVals; + + unsigned InstVal = A.ValueToNumber.find(I)->second; + unsigned OtherInstVal = B.ValueToNumber.find(OI)->second; + + // Ensure that the mappings for the instructions exists. + std::tie(ValueMappingIt, WasInserted) = ValueNumberMapping.insert( + std::make_pair(InstVal, DenseSet({OtherInstVal}))); + if (!WasInserted && ValueMappingIt->second.find(OtherInstVal) == + ValueMappingIt->second.end()) + return false; + + std::tie(ValueMappingIt, WasInserted) = OtherValueNumberMapping.insert( + std::make_pair(OtherInstVal, DenseSet({InstVal}))); + if (!WasInserted && ValueMappingIt->second.find(OtherInstVal) == + ValueMappingIt->second.end()) + return false; + + // TODO: Handle commutative instructions by mapping one operand to many + // operands instead only mapping a single operand to a single operand. + if (!compareOperandMapping(A, B, OperVals, OtherOperVals, + ValueNumberMapping, OtherValueNumberMapping)) + return false; + } + + return true; +} + bool IRSimilarityCandidate::overlap(const IRSimilarityCandidate &A, const IRSimilarityCandidate &B) { if (A.StartIdx <= B.getEndIdx() && A.StartIdx >= B.StartIdx) Index: llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp =================================================================== --- llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp +++ llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp @@ -1178,7 +1178,8 @@ // A helper function that accepts an instruction list from a module made up of // two blocks of two legal instructions and terminator, and checks them for // instruction similarity. -static bool longSimCandCompare(std::vector &InstrList) { +static bool longSimCandCompare(std::vector &InstrList, + bool Structure = false) { std::vector::iterator Start, End; @@ -1194,6 +1195,8 @@ std::advance(Start, 3); std::advance(End, 4); IRSimilarityCandidate Cand2(3, 2, *Start, *End); + if (Structure) + return IRSimilarityCandidate::compareStructure(Cand1, Cand2); return IRSimilarityCandidate::isSimilar(Cand1, Cand2); } @@ -1372,3 +1375,93 @@ IRSimilarityCandidate Cand2(3, 3, *Start, *End); ASSERT_FALSE(IRSimilarityCandidate::isSimilar(Cand1, Cand2)); } + +// Checks that different structure, in this case, where we introduce a new +// needed input in one region, is recognized as different. +TEST(IRSimilarityCandidate, DifferentStructure) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = add i32 %b, %a + ret i32 0 + bb1: + %2 = add i32 %a, %b + %3 = add i32 %b, %0 + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + // Check to make sure that we have a long enough region. + ASSERT_EQ(InstrList.size(), static_cast(6)); + // Check that the instructions were added correctly to both vectors. + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + + ASSERT_FALSE(longSimCandCompare(InstrList, true)); +} + +// Checks that the same structure is recognized between two candidates. The +// items %a and %b are used in the same way in both sets of instructions. +TEST(IRSimilarityCandidate, SameStructure) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = sub i32 %b, %a + ret i32 0 + bb1: + %2 = add i32 %a, %b + %3 = sub i32 %b, %a + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + // Check to make sure that we have a long enough region. + ASSERT_EQ(InstrList.size(), static_cast(6)); + // Check that the instructions were added correctly to both vectors. + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + + ASSERT_TRUE(longSimCandCompare(InstrList, true)); +} + +// Checks that the same structure is recognized between two candidates. While +// the input names are reversed, they still perform the same overall operation. +TEST(IRSimilarityCandidate, DifferentNameSameStructure) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = add i32 %b, %a + ret i32 0 + bb1: + %2 = add i32 %b, %a + %3 = add i32 %a, %b + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + // Check to make sure that we have a long enough region. + ASSERT_EQ(InstrList.size(), static_cast(6)); + // Check that the instructions were added correctly to both vectors. + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + + ASSERT_TRUE(longSimCandCompare(InstrList, true)); +}