Index: llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
===================================================================
--- llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
+++ llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
@@ -705,6 +705,8 @@
 
   /// \param [in] A - The first IRInstructionCandidate to compare.
   /// \param [in] B - The second IRInstructionCandidate to compare.
+  /// \param [in,out] OneToOne - A mapping of value numbers from candidate
+  /// \p A to candidate \B using the structure of the original instructions.
   /// \param [in,out] ValueNumberMappingA - A mapping of value numbers from
   /// candidate \p A to candidate \B.
   /// \param [in,out] ValueNumberMappingB - A mapping of value numbers from
@@ -714,6 +716,7 @@
   static bool
   compareStructure(const IRSimilarityCandidate &A,
                    const IRSimilarityCandidate &B,
+                   DenseMap<unsigned, unsigned> &OneToOne,
                    DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
                    DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB);
 
@@ -820,7 +823,8 @@
   /// the canonical ordering in \p SourceCand. These are defined based on the
   /// found mappings in \p ToSourceMapping and \p FromSourceMapping.  Both of
   /// these relationships should have the same information, just in opposite
-  /// directions.
+  /// directions.  This has some amount of non-determinism, as there is no
+  /// fallback if a two pairs GVNs match one another.
   ///
   /// \param [in, out] SourceCand - The IRSimilarityCandidate to create a
   /// canonical numbering from.
@@ -832,6 +836,49 @@
       IRSimilarityCandidate &SourceCand,
       DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping,
       DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping);
+  
+  /// Create a mapping for the value numbering of the calling
+  /// IRSimilarityCandidate, to a different separate set of numbers, based on
+  /// the canonical ordering in \p SourceCand. These are defined based on the
+  /// found mappings in \p ToSourceMapping and \p FromSourceMapping.  Both of
+  /// these relationships should have the same information, just in opposite
+  /// directions.  Uses the \p OneToOne mapping from target candidate to \p
+  /// SourceCand GVNs to determine the mapping first for values with multiple
+  /// mappings.  This mapping is created by the ordering of operands in the
+  /// instruction they are first seen in the candidates.
+  ///
+  /// \param [in, out] SourceCand - The IRSimilarityCandidate to create a
+  /// canonical numbering from.
+  /// \param [in,out] OneToOne - A mapping of value numbers from candidate
+  /// \p A to candidate \B using the structure of the original instructions.
+  /// \param ToSourceMapping - The mapping of value numbers from this candidate
+  /// to \p SourceCand.
+  /// \param FromSourceMapping - The mapping of value numbers from \p SoureCand
+  /// to this candidate.
+  void createCanonicalRelationFrom(
+      IRSimilarityCandidate &SourceCand,
+      DenseMap<unsigned, unsigned> &OneToOne,
+      DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping,
+      DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping);
+  
+  /// Create a mapping for the value numbering of the calling
+  /// IRSimilarityCandidate, to a different separate set of numbers, based on
+  /// the canonical ordering in \p SourceCand. These are defined based on the
+  /// canonical mapping defined between \p SoureCandLarge and
+  /// \p TargetCandLarge.  These IRSimilarityCandidates are already structurally
+  /// similar, ad fully encasulate the IRSimilarityCandidates in question.
+  /// These are used as a "bridge" from the \p SourceCand to the target.
+  ///
+  /// \param [in, out] SourceCand - The IRSimilarityCandidate to create a
+  /// canonical numbering from.
+  /// \param SoureCandLarge - The IRSimilarityCandidate fully containing
+  /// \p SourceCand.
+  /// \param TargetCandLarge -  The IRSimilarityCandidate fully containing
+  /// this Candidate.
+  void createCanonicalRelationFrom(
+      IRSimilarityCandidate &SourceCand,
+      IRSimilarityCandidate &SourceCandLarge,
+      IRSimilarityCandidate &TargetCandLarge);
 
   /// \param [in,out] BBSet - The set to track the basic blocks.
   void getBasicBlocks(DenseSet<BasicBlock *> &BBSet) const {
Index: llvm/lib/Analysis/IRSimilarityIdentifier.cpp
===================================================================
--- llvm/lib/Analysis/IRSimilarityIdentifier.cpp
+++ llvm/lib/Analysis/IRSimilarityIdentifier.cpp
@@ -754,7 +754,9 @@
                                              const IRSimilarityCandidate &B) {
   DenseMap<unsigned, DenseSet<unsigned>> MappingA;
   DenseMap<unsigned, DenseSet<unsigned>> MappingB;
-  return IRSimilarityCandidate::compareStructure(A, B, MappingA, MappingB);
+  DenseMap<unsigned, unsigned> OneToOne;
+  return IRSimilarityCandidate::compareStructure(A, B, OneToOne, MappingA,
+                                                 MappingB);
 }
 
 typedef detail::zippy<detail::zip_shortest, SmallVector<int, 4> &,
@@ -762,8 +764,13 @@
                       ArrayRef<Value *> &>
     ZippedRelativeLocationsT;
 
+typedef detail::zippy<detail::zip_shortest, ArrayRef<Value *> &,
+                      ArrayRef<Value *> &>
+    ZippedOperValsT;
+
 bool IRSimilarityCandidate::compareStructure(
     const IRSimilarityCandidate &A, const IRSimilarityCandidate &B,
+    DenseMap<unsigned, unsigned> &OneToOne,
     DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
     DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) {
   if (A.getLength() != B.getLength())
@@ -814,6 +821,13 @@
         std::make_pair(InstValB, DenseSet<unsigned>({InstValA})));
     if (!WasInserted && !ValueMappingIt->second.contains(InstValA))
       return false;
+    
+    ZippedOperValsT ZippedOperVals = zip(OperValsA, OperValsB);
+    for (const std::tuple<Value *, Value *> &OperPair : ZippedOperVals) {
+      unsigned GVNSource = A.ValueToNumber.find(std::get<0>(OperPair))->second;
+      unsigned GVNTarg = B.ValueToNumber.find(std::get<1>(OperPair))->second;
+      OneToOne.insert(std::make_pair(GVNTarg, GVNSource));
+    }
 
     // We have different paths for commutative instructions and non-commutative
     // instructions since commutative instructions could allow multiple mappings
@@ -988,6 +1002,16 @@
     IRSimilarityCandidate &SourceCand,
     DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping,
     DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping) {
+    DenseMap<unsigned, unsigned> OneToOne;
+    createCanonicalRelationFrom(SourceCand, OneToOne, ToSourceMapping,
+                                FromSourceMapping);
+}
+
+void IRSimilarityCandidate::createCanonicalRelationFrom(
+    IRSimilarityCandidate &SourceCand,
+    DenseMap<unsigned, unsigned> &OneToOne,
+    DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping,
+    DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping) {
   assert(SourceCand.CanonNumToNumber.size() != 0 &&
          "Base canonical relationship is empty!");
   assert(SourceCand.NumberToCanonNum.size() != 0 &&
@@ -1012,7 +1036,24 @@
     // here to ensure a one-to-one mapping.
     if (GVNMapping.second.size() > 1) {
       bool Found = false;
+      DenseMap<unsigned, unsigned>::iterator OTOIt = OneToOne.find(SourceGVN);
+
+      // If possible use the One-to-One mapping from the original instructions
+      // to determine the mapping between global value numbers.
+      if (OTOIt != OneToOne.end()) {
+        DenseMap<unsigned, DenseSet<unsigned>>::iterator It =
+            FromSourceMapping.find(OTOIt->second);
+
+        if (It->second.contains(SourceGVN)) {
+          Found = true;
+          ResultGVN = OTOIt->second;
+        }
+      }
+
       for (unsigned Val : GVNMapping.second) {
+        if (Found)
+          break;
+
         // We make sure the target value number hasn't already been reserved.
         if (UsedGVNs.contains(Val))
           continue;
@@ -1079,6 +1120,94 @@
   }
 }
 
+void IRSimilarityCandidate::createCanonicalRelationFrom(
+    IRSimilarityCandidate &SourceCand, IRSimilarityCandidate &SourceCandLarge,
+    IRSimilarityCandidate &TargetCandLarge) {
+  assert(SourceCand.CanonNumToNumber.size() != 0 &&
+         "Canonical Relationship is non-empty");
+  assert(SourceCand.NumberToCanonNum.size() != 0 &&
+         "Canonical Relationship is non-empty");
+
+  assert(SourceCandLarge.CanonNumToNumber.size() != 0 &&
+         "Canonical Relationship is non-empty");
+  assert(SourceCandLarge.NumberToCanonNum.size() != 0 &&
+         "Canonical Relationship is non-empty");
+  
+  assert(TargetCandLarge.CanonNumToNumber.size() != 0 &&
+         "Canonical Relationship is non-empty");
+  assert(TargetCandLarge.NumberToCanonNum.size() != 0 &&
+         "Canonical Relationship is non-empty");
+
+  assert(CanonNumToNumber.size() == 0 && "Canonical Relationship is non-empty");
+  assert(NumberToCanonNum.size() == 0 && "Canonical Relationship is non-empty");
+
+  // We're going to use the larger candidates as a "bridge" to create the
+  // canonical number for the target candidate since we have idetified two
+  // candidates as subsequences of larger sequences, and therefore must be
+  // structurally similar.
+  for (std::pair<Value *, unsigned> ValueNumPair : ValueToNumber) {
+    Value *CurrVal = ValueNumPair.first;
+    unsigned TargetCandGVN = ValueNumPair.second;
+
+    // Find the numbering in the large candidate that surrounds the 
+    // current candidate.
+    Optional<unsigned> OLargeTargetGVN = TargetCandLarge.getGVN(CurrVal);
+    assert(OLargeTargetGVN.has_value() && "GVN not found for Value");
+
+    // Get the canonical numbering in the large target candidate.
+    Optional<unsigned> OTargetCandCanon =
+        TargetCandLarge.getCanonicalNum(OLargeTargetGVN.value());
+    assert(OTargetCandCanon.has_value() &&
+           "Canononical Number not found for GVN");
+    
+    // Get the GVN in the large source candidate from the canonical numbering.
+    Optional<unsigned> OLargeSourceGVN =
+        SourceCandLarge.fromCanonicalNum(OTargetCandCanon.value());
+    assert(OLargeSourceGVN.has_value() &&
+           "GVN Number not found for Canonical Number");
+    
+    // Get the Value from the GVN in the large source candidate.
+    Optional<Value *> OLargeSourceV =
+        SourceCandLarge.fromGVN(OLargeSourceGVN.value());
+    assert(OLargeSourceV.has_value() && "Value not found for GVN");
+
+    // Get the GVN number for the Value in the source candidate.
+    Optional<unsigned> OSourceGVN =
+        SourceCand.getGVN(OLargeSourceV.value());
+    if (!OSourceGVN.has_value()) {
+      TargetCandLarge.frontInstruction()->getParent()->getParent()->getParent()->dump();
+      errs() << "------\n";
+      CurrVal->dump();
+      TargetCandLarge.frontInstruction()->dump();
+      TargetCandLarge.backInstruction()->dump();
+      frontInstruction()->dump();
+      backInstruction()->dump();
+      errs() << "------\n";
+      errs() << "------\n";
+      OLargeSourceV.value()->dump();
+      SourceCandLarge.frontInstruction()->dump();
+      SourceCandLarge.backInstruction()->dump();
+      SourceCand.frontInstruction()->dump();
+      SourceCand.backInstruction()->dump();
+      errs() << "------\n";
+      
+    }
+    assert(OSourceGVN.has_value() && "GVN Number not found for Value");
+
+    // Get the canonical numbering from the GVN/
+    Optional<unsigned> OSourceCanon =
+        SourceCand.getCanonicalNum(OSourceGVN.value());
+    assert(OSourceCanon.has_value() && "Canon Number not found for GVN");
+
+    // Insert the canonical numbering and GVN pair into their respective
+    // mappings.
+    CanonNumToNumber.insert(
+        std::make_pair(OSourceCanon.value(), TargetCandGVN));
+    NumberToCanonNum.insert(
+        std::make_pair(TargetCandGVN, OSourceCanon.value()));
+  }
+}
+
 void IRSimilarityCandidate::createCanonicalMappingFor(
     IRSimilarityCandidate &CurrCand) {
   assert(CurrCand.CanonNumToNumber.size() == 0 &&
@@ -1096,6 +1225,76 @@
   }
 }
 
+/// Look for larger IRSimilarityCandidates From the previously matched
+/// IRSimilarityCandidates that fully contain \p CandA or \p CandB.  If there is
+/// an overlap, return a pair of structurally similar, larger
+/// IRSimilarityCandidates.
+///
+/// \param [in] CandA - The first candidate we are trying to determine the
+/// structure of.
+/// \param [in] CandB - The second candidate we are trying to determine the
+/// structure of.
+/// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in
+/// a circuit to the IRSimilarityCandidates that include this instruction.
+/// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a
+/// number representing the structural group assigned to it.
+static Optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
+CheckLargerCands(
+    IRSimilarityCandidate &CandA, IRSimilarityCandidate &CandB,
+    DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> &IndexToIncludedCand,
+    DenseMap<IRSimilarityCandidate *, unsigned> &CandToGroup) {
+  DenseMap<unsigned, IRSimilarityCandidate *> IncludedGroupAndCandA;
+  DenseMap<unsigned, IRSimilarityCandidate *> IncludedGroupAndCandB;
+  DenseSet<unsigned> IncludedGroupsA;
+  DenseSet<unsigned> IncludedGroupsB;
+
+  // Find the overall similarity group numbers that fully contain the candidate,
+  // and record the larger candidate for each group.
+  DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>>::iterator IdxIt;
+  IdxIt = IndexToIncludedCand.find(CandA.getStartIdx());
+  Optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>> Result;
+  if (IdxIt == IndexToIncludedCand.end())
+    return Result;
+  for (IRSimilarityCandidate *MatchedCand : IdxIt->second) {
+    if (MatchedCand->getStartIdx() > CandA.getStartIdx() ||
+        (MatchedCand->getEndIdx() < CandA.getEndIdx()))
+      continue;
+    unsigned GroupNum = CandToGroup.find(MatchedCand)->second;
+    IncludedGroupAndCandA.insert(std::make_pair(GroupNum, MatchedCand));
+    IncludedGroupsA.insert(GroupNum);
+  }
+
+  // Find the overall similarity group numbers that fully contain the next
+  // candidate, and record the larger candidate for each group.
+  IdxIt = IndexToIncludedCand.find(CandB.getStartIdx());
+  if (IdxIt == IndexToIncludedCand.end())
+    return Result;
+  for (IRSimilarityCandidate *MatchedCand : IdxIt->second) {
+    if (MatchedCand->getStartIdx() > CandB.getStartIdx() ||
+        MatchedCand->getEndIdx() < CandB.getEndIdx())
+      continue;
+    unsigned GroupNum = CandToGroup.find(MatchedCand)->second;
+    IncludedGroupAndCandB.insert(std::make_pair(GroupNum, MatchedCand));
+    IncludedGroupsB.insert(GroupNum);
+  }
+
+  // Find the intersection between the two groups, these are the groups where
+  // the larger candidates exist.
+  set_intersect(IncludedGroupsA, IncludedGroupsB);
+
+  // If there is no intersection between the sets, then we cannot determine
+  // whether or not there is a match.
+  if (IncludedGroupsA.empty())
+    return Result;
+  
+  // Create a pair that contains the larger candidates.
+  DenseMap<unsigned, IRSimilarityCandidate *>::iterator ItA, ItB;
+  ItA = IncludedGroupAndCandA.find(*IncludedGroupsA.begin());
+  ItB = IncludedGroupAndCandB.find(*IncludedGroupsA.begin());
+  Result = std::make_pair(ItA->second, ItB->second);
+  return Result;
+}
+
 /// From the list of IRSimilarityCandidates, perform a comparison between each
 /// IRSimilarityCandidate to determine if there are overlapping
 /// IRInstructionData, or if they do not have the same structure.
@@ -1105,9 +1304,16 @@
 /// \param [out] StructuralGroups - the mapping of unsigned integers to vector
 /// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the
 /// vector are structurally similar to one another.
+/// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in
+/// a circuit to the IRSimilarityCandidates that include this instruction.
+/// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a
+/// number representing the structural group assigned to it.
 static void findCandidateStructures(
     std::vector<IRSimilarityCandidate> &CandsForRepSubstring,
-    DenseMap<unsigned, SimilarityGroup> &StructuralGroups) {
+    DenseMap<unsigned, SimilarityGroup> &StructuralGroups,
+    DenseMap<unsigned,  DenseSet<IRSimilarityCandidate *>> &IndexToIncludedCand,
+    DenseMap<IRSimilarityCandidate *, unsigned> &CandToOverallGroup
+    ) {
   std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt,
       InnerCandEndIt;
 
@@ -1133,6 +1339,7 @@
   // compatibility with other instructions
   DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA;
   DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB;
+  DenseMap<unsigned, unsigned> OneToOne;
   for (CandIt = CandsForRepSubstring.begin(),
       CandEndIt = CandsForRepSubstring.end();
        CandIt != CandEndIt; CandIt++) {
@@ -1170,17 +1377,37 @@
       if (CandToGroupItInner != CandToGroup.end())
         continue;
 
+      // Check if we have found structural similarity between two candidates
+      // that fully contains the first and second candidates.
+      Optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
+          LargerPair = CheckLargerCands(
+              *CandIt, *InnerCandIt, IndexToIncludedCand, CandToOverallGroup);
+
+      // If a pair was found, it means that we can assume that these smaller
+      // substrings are also structurally similar.  Use the larger candidates to
+      // determine the canonical mapping between the two sections.
+      if (LargerPair.has_value()) {
+        SameStructure = true;
+        InnerCandIt->createCanonicalRelationFrom(
+            *CandIt, *LargerPair.value().first, *LargerPair.value().second);
+        CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
+        CurrentGroupPair->second.push_back(*InnerCandIt);
+        continue;
+      }
+
       // Otherwise we determine if they have the same structure and add it to
       // vector if they match.
       ValueNumberMappingA.clear();
       ValueNumberMappingB.clear();
+      OneToOne.clear();
       SameStructure = IRSimilarityCandidate::compareStructure(
-          *CandIt, *InnerCandIt, ValueNumberMappingA, ValueNumberMappingB);
+          *CandIt, *InnerCandIt, OneToOne, ValueNumberMappingA,
+          ValueNumberMappingB);
       if (!SameStructure)
         continue;
 
-      InnerCandIt->createCanonicalRelationFrom(*CandIt, ValueNumberMappingA,
-                                               ValueNumberMappingB);
+      InnerCandIt->createCanonicalRelationFrom(
+          *CandIt, OneToOne, ValueNumberMappingA, ValueNumberMappingB);
       CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
       CurrentGroupPair->second.push_back(*InnerCandIt);
     }
@@ -1196,24 +1423,58 @@
   std::vector<SimilarityGroup> NewCandidateGroups;
 
   DenseMap<unsigned, SimilarityGroup> StructuralGroups;
+  DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> IndexToIncludedCand;
+  DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup; 
 
   // Iterate over the subsequences found by the Suffix Tree to create
   // IRSimilarityCandidates for each repeated subsequence and determine which
   // instances are structurally similar to one another.
-  for (SuffixTree::RepeatedSubstring &RS : ST) {
+
+  // Sort the suffix tree from longest substring to shortest.
+  std::vector<SuffixTree::RepeatedSubstring> RSes;
+  for (SuffixTree::RepeatedSubstring &RS : ST)
+    RSes.push_back(RS);
+
+  llvm::stable_sort(RSes, [](const SuffixTree::RepeatedSubstring &LHS,
+                             const SuffixTree::RepeatedSubstring &RHS) {
+    return LHS.Length > RHS.Length;
+  });
+  for (SuffixTree::RepeatedSubstring &RS : RSes) {
     createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, RS,
                                    CandsForRepSubstring);
 
     if (CandsForRepSubstring.size() < 2)
       continue;
 
-    findCandidateStructures(CandsForRepSubstring, StructuralGroups);
-    for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups)
+    findCandidateStructures(CandsForRepSubstring, StructuralGroups,
+                            IndexToIncludedCand, CandToGroup);
+    for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups) {
       // We only add the group if it contains more than one
       // IRSimilarityCandidate.  If there is only one, that means there is no
       // other repeated subsequence with the same structure.
-      if (Group.second.size() > 1)
+      if (Group.second.size() > 1) {
         SimilarityCandidates->push_back(Group.second);
+        // Iterate over each candidate in the group, and add an entry for each
+        // instruction included with a mapping to a set of
+        // IRSimilarityCandidates that include that instruction.
+        for (IRSimilarityCandidate &IRCand : SimilarityCandidates->back()) {
+          for (unsigned Idx = IRCand.getStartIdx(), Edx = IRCand.getEndIdx();
+               Idx <= Edx; ++Idx) {
+            DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>>::iterator
+                IdIt;
+            IdIt = IndexToIncludedCand.find(Idx);
+            bool Inserted = false;
+            if (IdIt == IndexToIncludedCand.end())
+              std::tie(IdIt, Inserted) = IndexToIncludedCand.insert(
+                  std::make_pair(Idx, DenseSet<IRSimilarityCandidate *>()));
+            IdIt->second.insert(&IRCand);
+          }
+          // Add mapping of candidate to the overall similarity group number.
+          CandToGroup.insert(
+              std::make_pair(&IRCand, SimilarityCandidates->size() - 1));
+        }
+      }
+    }
 
     CandsForRepSubstring.clear();
     StructuralGroups.clear();