Index: llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
===================================================================
--- llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
+++ llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
@@ -33,6 +33,10 @@
 // or comparison predicate.  These are used to create a hash to map instructions
 // to integers to be used in similarity matching in sequences of instructions
 //
+// Terminology:
+// An IRSimilarityCandidate is a region of IRInstructionData (wrapped
+// Instructions), usually used to denote a region of similarity has been found.
+//
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H
@@ -359,6 +363,137 @@
   InstructionClassification InstClassifier;
 };
 
+/// This is a class that wraps a range of IRInstructionData from one point to
+/// another in the vector of IRInstructionData, which is a region of the
+/// program.  It is also responsible for defining the structure within this
+/// region of instructions.
+///
+/// The structure of a region is defined through a value numbering system
+/// assigned to each unique value in a region at the creation of the
+/// IRSimilarityCandidate.
+///
+/// For example, for each Instruction we add a mapping for each new
+/// value seen in that Instruction.
+/// IR:                    Mapping Added:
+/// %add1 = add i32 %a, c1    %add1 -> 3, %a -> 1, c1 -> 2
+/// %add2 = add i32 %a, %1    %add2 -> 4
+/// %add3 = add i32 c2, c1    %add3 -> 6, c2 -> 5
+///
+/// We can compare IRSimilarityCandidates against one another.
+/// The \ref isSimilar function compares each IRInstructionData against one
+/// another and if we have the same sequences of IRInstructionData that would
+/// create the same hash, we have similar IRSimilarityCandidates.
+class IRSimilarityCandidate {
+private:
+  /// The start index of this IRSimilarityCandidate in the instruction list.
+  unsigned StartIdx = 0;
+
+  /// The number of instructions in this IRSimilarityCandidate.
+  unsigned Len = 0;
+
+  /// The first instruction in this IRSimilarityCandidate.
+  IRInstructionData *FirstInst = nullptr;
+
+  /// The last instruction in this IRSimilarityCandidate.
+  IRInstructionData *LastInst = nullptr;
+
+  /// Global Value Numbering structures
+  /// @{
+  /// Stores the mapping of the value to the number assigned to it in the
+  /// IRSimilarityCandidate.
+  DenseMap<Value *, unsigned> ValueToNumber;
+  /// Stores the mapping of the number to the value assigned this number.
+  DenseMap<unsigned, Value *> NumberToValue;
+  /// @}
+
+public:
+  /// \param StartIdx - The starting location of the region.
+  /// \param StartIdx - The length of the region.
+  /// \param FirstInstIt - The starting IRInstructionData of the region.
+  /// \param LastInstIt - The ending IRInstructionData of the region.
+  IRSimilarityCandidate(unsigned StartIdx, unsigned Len,
+                        IRInstructionData *FirstInstIt,
+                        IRInstructionData *LastInstIt);
+
+  /// \param A - The first IRInstructionCandidate to compare.
+  /// \param B - The second IRInstructionCandidate to compare.
+  /// \returns True when every IRInstructionData in \p A is similar to every
+  /// IRInstructionData in \p B.
+  static bool isSimilar(const IRSimilarityCandidate &A,
+                        const IRSimilarityCandidate &B);
+  /// Compare the start and end indices of the two IRSimilarityCandidates for
+  /// whether they overlap. If the start instruction of one
+  /// IRSimilarityCandidate is less than the end instruction of the other, and
+  /// the start instruction of one is greater than the start instruction of the
+  /// other, they overlap.
+  ///
+  /// \returns true if the IRSimilarityCandidates do not have overlapping
+  /// instructions.
+  static bool overlap(const IRSimilarityCandidate &A,
+                      const IRSimilarityCandidate &B);
+
+  /// \returns the number of instructions in this Candidate.
+  unsigned getLength() const { return Len; }
+
+  /// \returns the start index of this IRSimilarityCandidate.
+  unsigned getStartIdx() const { return StartIdx; }
+
+  /// \returns the end index of this IRSimilarityCandidate.
+  unsigned getEndIdx() const { return StartIdx + Len - 1; }
+
+  /// \returns The first IRInstructionData.
+  IRInstructionData *front() const { return FirstInst; }
+  /// \returns The last IRInstructionData.
+  IRInstructionData *back() const { return LastInst; }
+
+  /// \returns The first Instruction.
+  Instruction *frontInstruction() { return FirstInst->Inst; }
+  /// \returns The last Instruction
+  Instruction *backInstruction() { return LastInst->Inst; }
+
+  /// \returns The BasicBlock the IRSimilarityCandidate starts in.
+  BasicBlock *getStartBB() { return FirstInst->Inst->getParent(); }
+  /// \returns The BasicBlock the IRSimilarityCandidate ends in.
+  BasicBlock *getEndBB() { return LastInst->Inst->getParent(); }
+
+  /// \returns The Function that the IRSimilarityCandidate is located in.
+  Function *getFunction() { return getStartBB()->getParent(); }
+
+  /// Finds the positive number associated with \p V if it has been mapped.
+  /// \param [in] V - the Value to find.
+  /// \returns The positive number corresponding to the value.
+  /// \returns None if not present.
+  Optional<unsigned> getGVN(Value *V) {
+    assert(V != nullptr && "Value is a nullptr?");
+    DenseMap<Value *, unsigned>::iterator VNIt = ValueToNumber.find(V);
+    if (VNIt == ValueToNumber.end())
+      return None;
+    return VNIt->second;
+  }
+
+  /// Finds the Value associate with \p Num if it exists.
+  /// \param [in] Num - the number to find.
+  /// \returns The Value associated with the number.
+  /// \returns None if not present.
+  Optional<Value *> fromGVN(unsigned Num) {
+    DenseMap<unsigned, Value *>::iterator VNIt = NumberToValue.find(Num);
+    if (VNIt == NumberToValue.end())
+      return None;
+    assert(VNIt->second != nullptr && "Found value is a nullptr!");
+    return VNIt->second;
+  }
+
+  /// \param RHS -The IRSimilarityCandidate to compare against
+  /// \returns true if the IRSimilarityCandidate is occurs after the
+  /// IRSimilarityCandidate in the program.
+  bool operator<(const IRSimilarityCandidate &RHS) const {
+    return getStartIdx() > RHS.getStartIdx();
+  }
+
+  using iterator = IRInstructionDataList::iterator;
+  iterator begin() const { return iterator(front()); }
+  iterator end() const { return std::next(iterator(back())); }
+};
 } // end namespace IRSimilarity
 } // end namespace llvm
 
Index: llvm/lib/Analysis/IRSimilarityIdentifier.cpp
===================================================================
--- llvm/lib/Analysis/IRSimilarityIdentifier.cpp
+++ llvm/lib/Analysis/IRSimilarityIdentifier.cpp
@@ -154,3 +154,91 @@
 
   return INumber;
 }
+
+IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len,
+                                             IRInstructionData *FirstInstIt,
+                                             IRInstructionData *LastInstIt)
+    : StartIdx(StartIdx), Len(Len) {
+
+  assert(FirstInstIt != nullptr && "Instruction is nullptr!");
+  assert(LastInstIt != nullptr && "Instruction is nullptr!");
+  assert(StartIdx + Len > StartIdx &&
+         "Overflow for IRSimilarityCandidate range?");
+  assert(Len - 1 ==
+             std::distance(iterator(FirstInstIt), iterator(LastInstIt)) &&
+         "Length of the first and last IRInstructionData do not match the "
+         "given length");
+
+  // We iterate over the given instructions, and map each unique value
+  // to a unique number in the IRSimilarityCandidate ValueToNumber and
+  // NumberToValue maps.  A constant get its own value globally, the individual
+  // uses of the constants are not considered to be unique.
+  //
+  // IR:                    Mapping Added:
+  // %add1 = add i32 %a, c1    %add1 -> 3, %a -> 1, c1 -> 2
+  // %add2 = add i32 %a, %1    %add2 -> 4
+  // %add3 = add i32 c2, c1    %add3 -> 6, c2 -> 5
+  //
+  // when replace with global values, starting from 1, would be
+  //
+  // 3 = add i32 1, 2
+  // 4 = add i32 1, 3
+  // 6 = add i32 5, 2
+  unsigned LocalValNumber = 1;
+  IRInstructionDataList::iterator ID = iterator(*FirstInstIt);
+  for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) {
+    // Map the operand values to an unsigned integer if it does not already
+    // have an unsigned integer assigned to it.
+    for (Value *Arg : ID->OperVals)
+      if (ValueToNumber.find(Arg) == ValueToNumber.end()) {
+        ValueToNumber.try_emplace(Arg, LocalValNumber);
+        NumberToValue.try_emplace(LocalValNumber, Arg);
+        LocalValNumber++;
+      }
+
+    // Mapping the instructions to an unsigned integer if it is not already
+    // exist in the mapping.
+    if (ValueToNumber.find(ID->Inst) == ValueToNumber.end()) {
+      ValueToNumber.try_emplace(ID->Inst, LocalValNumber);
+      NumberToValue.try_emplace(LocalValNumber, ID->Inst);
+      LocalValNumber++;
+    }
+  }
+
+  // Setting the first and last instruction data pointers for the candidate.  If
+  // we got through the entire for loop without hitting an assert, we know
+  // that both of these instructions are not nullptrs.
+  FirstInst = FirstInstIt;
+  LastInst = LastInstIt;
+}
+
+bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate &A,
+                                      const IRSimilarityCandidate &B) {
+  if (A.getLength() != B.getLength())
+    return false;
+
+  auto InstrDataForBoth =
+      zip(make_range(A.begin(), A.end()), make_range(B.begin(), B.end()));
+
+  return all_of(InstrDataForBoth,
+                [](std::tuple<IRInstructionData &, IRInstructionData &> R) {
+                  IRInstructionData &A = std::get<0>(R);
+                  IRInstructionData &B = std::get<1>(R);
+                  if (!A.Legal || !B.Legal)
+                    return false;
+                  return isClose(A, B);
+                });
+}
+
+bool IRSimilarityCandidate::overlap(const IRSimilarityCandidate &A,
+                                    const IRSimilarityCandidate &B) {
+  auto DoesOverlap = [](const IRSimilarityCandidate &X,
+                        const IRSimilarityCandidate &Y) {
+    // Check:
+    // XXXXXX        X starts before Y ends
+    //      YYYYYYY  Y starts after X starts
+    return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx;
+  };
+
+  return DoesOverlap(A, B) || DoesOverlap(B, A);
+}
Index: llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp
===================================================================
--- llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp
+++ llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp
@@ -1174,3 +1174,205 @@
   // Make sure that the unsigned vector is the expected size.
   ASSERT_TRUE(UnsignedVec.size() == 6);
 }
+
+// A helper function that accepts an instruction list from a module made up of
+// two blocks of two legal instructions and terminator, and checks them for
+// instruction similarity.
+static bool longSimCandCompare(std::vector<IRInstructionData *> &InstrList) {
+
+  std::vector<IRInstructionData *>::iterator Start, End;
+
+  Start = InstrList.begin();
+  End = InstrList.begin();
+
+  std::advance(End, 1);
+  IRSimilarityCandidate Cand1(0, 2, *Start, *End);
+
+  Start = InstrList.begin();
+  End = InstrList.begin();
+
+  std::advance(Start, 3);
+  std::advance(End, 4);
+  IRSimilarityCandidate Cand2(3, 2, *Start, *End);
+  return IRSimilarityCandidate::isSimilar(Cand1, Cand2);
+}
+
+// Checks that two adds with commuted operands are considered to be the same
+// instructions.
+TEST(IRSimilarityCandidate, CheckIdenticalInstructions) {
+  StringRef ModuleString = R"(
+                          define i32 @f(i32 %a, i32 %b) {
+                          bb0:
+                             %0 = add i32 %a, %b
+                             %1 = add i32 %b, %a
+                             ret i32 0
+                          })";
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
+
+  std::vector<IRInstructionData *> InstrList;
+  std::vector<unsigned> UnsignedVec;
+
+  getVectors(*M, InstrList, UnsignedVec);
+
+  // Check to make sure that we have a long enough region.
+  ASSERT_EQ(InstrList.size(), static_cast<unsigned>(3));
+  // Check that the instructions were added correctly to both vectors.
+  ASSERT_TRUE(InstrList.size() == UnsignedVec.size());
+
+  std::vector<IRInstructionData *>::iterator Start, End;
+  Start = InstrList.begin();
+  End = InstrList.begin();
+  std::advance(End, 1);
+  IRSimilarityCandidate Cand1(0, 2, *Start, *End);
+  IRSimilarityCandidate Cand2(0, 2, *Start, *End);
+
+  ASSERT_TRUE(IRSimilarityCandidate::isSimilar(Cand1, Cand2));
+}
+
+// Checks that IRSimilarityCandidates wrapping these two regions of instructions
+// are able to differentiate between instructions that have different opcodes.
+TEST(IRSimilarityCandidate, CheckRegionsDifferentInstruction) {
+  StringRef ModuleString = R"(
+                          define i32 @f(i32 %a, i32 %b) {
+                          bb0:
+                             %0 = add i32 %a, %b
+                             %1 = add i32 %b, %a
+                             ret i32 0
+                          bb1:
+                             %2 = sub i32 %a, %b
+                             %3 = add i32 %b, %a
+                             ret i32 0
+                          })";
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
+
+  std::vector<IRInstructionData *> InstrList;
+  std::vector<unsigned> UnsignedVec;
+
+  getVectors(*M, InstrList, UnsignedVec);
+
+  // Check to make sure that we have a long enough region.
+  ASSERT_EQ(InstrList.size(), static_cast<unsigned>(6));
+  // Check that the instructions were added correctly to both vectors.
+  ASSERT_TRUE(InstrList.size() == UnsignedVec.size());
+
+  ASSERT_FALSE(longSimCandCompare(InstrList));
+}
+
+// Checks that IRSimilarityCandidates wrapping these two regions of instructions
+// are able to differentiate between instructions that have different types.
+TEST(IRSimilarityCandidate, CheckRegionsDifferentTypes) {
+  StringRef ModuleString = R"(
+                          define i32 @f(i32 %a, i32 %b, i64 %c, i64 %d) {
+                          bb0:
+                             %0 = add i32 %a, %b
+                             %1 = add i32 %b, %a
+                             ret i32 0
+                          bb1:
+                             %2 = add i64 %c, %d
+                             %3 = add i64 %d, %c
+                             ret i32 0
+                          })";
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
+
+  std::vector<IRInstructionData *> InstrList;
+  std::vector<unsigned> UnsignedVec;
+
+  getVectors(*M, InstrList, UnsignedVec);
+
+  // Check to make sure that we have a long enough region.
+  ASSERT_EQ(InstrList.size(), static_cast<unsigned>(6));
+  // Check that the instructions were added correctly to both vectors.
+  ASSERT_TRUE(InstrList.size() == UnsignedVec.size());
+
+  ASSERT_FALSE(longSimCandCompare(InstrList));
+}
+
+// Check that debug instructions do not impact similarity. They are marked as
+// invisible.
+TEST(IRSimilarityCandidate, IdenticalWithDebug) {
+  StringRef ModuleString = R"(
+                          define i32 @f(i32 %a, i32 %b) {
+                          bb0:
+                             %0 = add i32 %a, %b
+                             call void @llvm.dbg.value(metadata !0)
+                             %1 = add i32 %b, %a
+                             ret i32 0
+                          bb1:
+                             %2 = add i32 %a, %b
+                             call void @llvm.dbg.value(metadata !1)
+                             %3 = add i32 %b, %a
+                             ret i32 0
+                          bb2:
+                             %4 = add i32 %a, %b
+                             %5 = add i32 %b, %a
+                             ret i32 0       
+                          }
+
+                          declare void @llvm.dbg.value(metadata)
+                          !0 = distinct !{!"test\00", i32 10}
+                          !1 = distinct !{!"test\00", i32 11})";
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
+
+  std::vector<IRInstructionData *> InstrList;
+  std::vector<unsigned> UnsignedVec;
+
+  getVectors(*M, InstrList, UnsignedVec);
+
+  // Check to make sure that we have a long enough region.
+  ASSERT_EQ(InstrList.size(), static_cast<unsigned>(9));
+  // Check that the instructions were added correctly to both vectors.
+  ASSERT_TRUE(InstrList.size() == UnsignedVec.size());
+
+  ASSERT_TRUE(longSimCandCompare(InstrList));
+}
+
+// Checks that IRSimilarityCandidates that include illegal instructions, are not
+// considered to be the same set of instructions.  In these sets of instructions
+// the allocas are illegal.
+TEST(IRSimilarityCandidate, IllegalInCandidate) {
+  StringRef ModuleString = R"(
+                          define i32 @f(i32 %a, i32 %b) {
+                          bb0:
+                             %0 = add i32 %a, %b
+                             %1 = add i32 %a, %b
+                             %2 = alloca i32
+                             ret i32 0
+                          bb1:
+                             %3 = add i32 %a, %b
+                             %4 = add i32 %a, %b
+                             %5 = alloca i32
+                             ret i32 0
+                          })";
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
+
+  std::vector<IRInstructionData *> InstrList;
+  std::vector<unsigned> UnsignedVec;
+
+  getVectors(*M, InstrList, UnsignedVec);
+
+  // Check to make sure that we have a long enough region.
+  ASSERT_EQ(InstrList.size(), static_cast<unsigned>(6));
+  // Check that the instructions were added correctly to both vectors.
+  ASSERT_TRUE(InstrList.size() == UnsignedVec.size());
+
+  std::vector<IRInstructionData *>::iterator Start, End;
+
+  Start = InstrList.begin();
+  End = InstrList.begin();
+
+  std::advance(End, 2);
+  IRSimilarityCandidate Cand1(0, 3, *Start, *End);
+
+  Start = InstrList.begin();
+  End = InstrList.begin();
+
+  std::advance(Start, 3);
+  std::advance(End, 5);
+  IRSimilarityCandidate Cand2(3, 3, *Start, *End);
+  ASSERT_FALSE(IRSimilarityCandidate::isSimilar(Cand1, Cand2));
+}