Index: llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
===================================================================
--- llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
+++ llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
@@ -466,12 +466,31 @@
   /// \param ValueNumberMappingB - The current mapping of global values
   /// numbering from \p B to \p A.
   /// \returns true if the IRSimilarityCandidates operands are compatible.
-  static bool compareOperandMapping(
+  static bool compareNonCommutativeOperandMapping(
       const IRSimilarityCandidate &A, const IRSimilarityCandidate &B,
       ArrayRef<Value *> &OperValsA, ArrayRef<Value *> &OperValsB,
       DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
       DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB);
 
+  /// Compare the operands in \p A and \p B and check that the current mapping
+  /// of global value numbers from \p A to \p B and \p B to \A is consistent
+  /// given that the operands are commutative.
+  ///
+  /// \param A - The first IRInstructionCandidate to compare.
+  /// \param B - The second IRInstructionCandidate to compare.
+  /// \param OperValsA - The operands in the instruction from \p A.
+  /// \param OperValsB - The operands in the instruction from \p B.
+  /// \param ValueNumberMappingA - The current mapping of global values
+  /// numbering from \p A to \p B.
+  /// \param ValueNumberMappingB - The current mapping of global values
+  /// numbering from \p B to \p A.
+  /// \returns true if the IRSimilarityCandidates operands are compatible.
+  static bool compareCommutativeOperandMapping(
+      const IRSimilarityCandidate &A, const IRSimilarityCandidate &B,
+      ArrayRef<Value *> &OperValsA, ArrayRef<Value *> &OperValsB,
+      DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMapping,
+      DenseMap<unsigned, DenseSet<unsigned>> &OtherValueNumberMapping);
+
   /// Compare the start and end indices of the two IRSimilarityCandidates for
   /// whether they overlap. If the start instruction of one
   /// IRSimilarityCandidate is less than the end instruction of the other, and
Index: llvm/lib/Analysis/IRSimilarityIdentifier.cpp
===================================================================
--- llvm/lib/Analysis/IRSimilarityIdentifier.cpp
+++ llvm/lib/Analysis/IRSimilarityIdentifier.cpp
@@ -15,6 +15,7 @@
 #include "llvm/Analysis/IRSimilarityIdentifier.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Operator.h"
 #include "llvm/IR/User.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Support/SuffixTree.h"
@@ -240,6 +241,96 @@
                 });
 }
 
+/// Determine if one or more of the assigned global value numbers for the
+/// operands in \p TargetValueNumbers is in the current mapping set for operand
+/// numbers in \p SourceOperands.  The set of possible corresponding global
+/// value numbers are replaced with the most recent version of compatible
+/// values.
+///
+/// \param [in] SourceValueToNumberMapping - The mapping of a Value to global
+/// value number for the source IRInstructionCandidate.
+/// \param [in, out] CurrentSrcTgtNumberMapping - The current mapping of source
+/// IRSimilarityCandidate global value numbers to a set of possible numbers in
+/// the target.
+/// \param [in] SourceOperands - The operands in the original
+/// IRSimilarityCandidate in the current instruction.
+/// \param [in] TargetValueNumbers - The global value numbers of the operands in
+/// the corresponding Instruction in the other IRSimilarityCandidate.
+/// \returns true if there exists a possible mapping between the source
+/// Instruction operands and the target Instruction operands, and false if not.
+static bool checkNumberingAndReplaceCommutative(
+  const DenseMap<Value *, unsigned> &SourceValueToNumberMapping,
+  DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,
+  ArrayRef<Value *> &SourceOperands,
+  DenseSet<unsigned> &TargetValueNumbers){
+
+  DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt;
+
+  unsigned ArgVal;
+  bool WasInserted;
+
+  // Iterate over the operands in the source IRSimilarityCandidate to determine
+  // whether there exists an operand in the other IRSimilarityCandidate that
+  // creates a valid mapping of Value to Value between the
+  // IRSimilarityCaniddates.
+  for (Value *V : SourceOperands) {
+    ArgVal = SourceValueToNumberMapping.find(V)->second;
+
+    std::tie(ValueMappingIt, WasInserted) = CurrentSrcTgtNumberMapping.insert(
+        std::make_pair(ArgVal, TargetValueNumbers));
+
+    // Instead of finding a current mapping, we inserted a set.  This means a
+    // mapping did not exist for the source Instruction operand, it has no
+    // current constraints we need to check.
+    if (WasInserted)
+      continue;
+
+    // If a mapping already exists for the source operand to the values in the
+    // other IRSimilarityCandidate we need to iterate over the items in other
+    // IRSimilarityCandidate's Instruction to determine whether there is a valid
+    // mapping of Value to Value.
+    DenseSet<unsigned> NewSet;
+    for (unsigned &Curr : ValueMappingIt->second)
+      // If we can find the value in the mapping, we add it to the new set.
+      if (TargetValueNumbers.find(Curr) != TargetValueNumbers.end())
+        NewSet.insert(Curr);
+
+    // If we could not find a Value, return 0.
+    if (NewSet.empty())
+      return false;
+    
+    // Otherwise replace the old mapping with the newly constructed one.
+    if (NewSet.size() != ValueMappingIt->second.size())
+      ValueMappingIt->second.swap(NewSet);
+
+    // We have reached no conclusions about the mapping, and cannot remove
+    // any items from the other operands, so we move to check the next operand.
+    if (ValueMappingIt->second.size() != 1)
+      continue;
+
+
+    unsigned ValToRemove = *ValueMappingIt->second.begin();
+    // When there is only one item left in the mapping for and operand, remove
+    // the value from the other operands.  If it results in there being no
+    // mapping, return false, it means the mapping is wrong
+    for (Value *InnerV : SourceOperands) {
+      if (V == InnerV)
+        continue;
+
+      unsigned InnerVal = SourceValueToNumberMapping.find(InnerV)->second;
+      ValueMappingIt = CurrentSrcTgtNumberMapping.find(InnerVal);
+      if (ValueMappingIt == CurrentSrcTgtNumberMapping.end())
+        continue;
+
+      ValueMappingIt->second.erase(ValToRemove);
+      if (ValueMappingIt->second.empty())
+        return false;
+    }
+  }
+
+  return true;
+}
+
 /// Determine if operand number \p TargetArgVal is in the current mapping set
 /// for operand number \p SourceArgVal.
 ///
@@ -295,7 +386,7 @@
   return TargetSet.find(TargetArgVal) != TargetSet.end();
 }
 
-bool IRSimilarityCandidate::compareOperandMapping(
+bool IRSimilarityCandidate::compareNonCommutativeOperandMapping(
     const IRSimilarityCandidate &A, const IRSimilarityCandidate &B,
     ArrayRef<Value *> &OperValsA, ArrayRef<Value *> &OperValsB,
     DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
@@ -332,6 +423,43 @@
   return true;
 }
 
+bool IRSimilarityCandidate::compareCommutativeOperandMapping(
+    const IRSimilarityCandidate &A, const IRSimilarityCandidate &B,
+    ArrayRef<Value *> &OperValsA, ArrayRef<Value *> &OperValsB,
+    DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMapping,
+    DenseMap<unsigned, DenseSet<unsigned>> &OtherValueNumberMapping) {
+  DenseSet<unsigned> ValueNumbers;      
+  DenseSet<unsigned> OtherValueNumbers;
+
+  ArrayRef<Value *>::iterator VIt = OperValsA.begin();
+  ArrayRef<Value *>::iterator OVIt = OperValsB.begin();
+  unsigned OperandLength = OperValsA.size();
+
+  // Find the value number sets for the operands.
+  for (unsigned Idx = 0; Idx < OperandLength;
+       Idx++, VIt++, OVIt++) {
+    ValueNumbers.insert(A.ValueToNumber.find(*VIt)->second);
+    OtherValueNumbers.insert(B.ValueToNumber.find(*OVIt)->second);
+  }
+
+  // Iterate over the operands in the first IRSimilarityCandidate and make sure
+  // there exists a possible mapping with the operands in the second
+  // IRSimilarityCandidate.
+  if (!checkNumberingAndReplaceCommutative(
+          A.ValueToNumber, ValueNumberMapping, OperValsA, OtherValueNumbers))
+    return false;
+
+  // Iterate over the operands in the second IRSimilarityCandidate and make sure
+  // there exists a possible mapping with the operands in the first
+  // IRSimilarityCandidate.
+  if (!checkNumberingAndReplaceCommutative(B.ValueToNumber,
+                                          OtherValueNumberMapping,
+                                          OperValsB, ValueNumbers))
+    return false;
+
+  return true;
+}
+
 bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A,
                                              const IRSimilarityCandidate &B) {
   if (A.getLength() != B.getLength())
@@ -387,10 +515,21 @@
                             ValueMappingIt->second.end())
       return false;
 
-    // TODO: Handle commutative instructions by mapping one operand to many
-    // operands instead only mapping a single operand to a single operand.
-    if (!compareOperandMapping(A, B, OperVals, OtherOperVals,
-                               ValueNumberMapping, OtherValueNumberMapping))
+    // We have different paths for commutative instructions and non-commutative
+    // instructions since commutative instructions could allow multiple mappings
+    // to certain values.
+    if (I->isCommutative() && !isa<FPMathOperator>(I)) {
+      if (!compareCommutativeOperandMapping(A, B, OperVals, OtherOperVals,
+                                     ValueNumberMapping,
+                                     OtherValueNumberMapping))
+        return false;
+      continue;
+    }
+
+    // Handle the non-commutative cases.
+    if(!compareNonCommutativeOperandMapping(A, B, OperVals, OtherOperVals,
+                                     ValueNumberMapping,
+                                     OtherValueNumberMapping))
       return false;
   }
   return true;
Index: llvm/test/Transforms/IROutliner/outlining-commutative-fp.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/IROutliner/outlining-commutative-fp.ll
@@ -0,0 +1,107 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
+
+; This test checks that floating point commutative instructions are not treated
+; as commutative.  Even though an ffadd is technically commutative, the order
+; of operands still needs to be enforced since the process of fadding floating
+; point values requires the order to be the same.
+
+; We make sure that we outline the identical regions from the first two
+; functions, but not the third.  this is because the operands are in a different
+; order in a floating point instruction in this section.
+
+define void @outline_from_fadd1() {
+; CHECK-LABEL: @outline_from_fadd1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca double, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca double, align 4
+; CHECK-NEXT:    [[C:%.*]] = alloca double, align 4
+; CHECK-NEXT:    call void @outlined_ir_func_0(double* [[A]], double* [[B]], double* [[C]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %a = alloca double, align 4
+  %b = alloca double, align 4
+  %c = alloca double, align 4
+  store double 2.0, double* %a, align 4
+  store double 3.0, double* %b, align 4
+  store double 4.0, double* %c, align 4
+  %al = load double, double* %a
+  %bl = load double, double* %b
+  %cl = load double, double* %c
+  %0 = fadd double %al, %bl
+  %1 = fadd double %al, %cl
+  %2 = fadd double %bl, %cl
+  ret void
+}
+
+define void @outline_from_fadd2.0() {
+; CHECK-LABEL: @outline_from_fadd2.0(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca double, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca double, align 4
+; CHECK-NEXT:    [[C:%.*]] = alloca double, align 4
+; CHECK-NEXT:    call void @outlined_ir_func_0(double* [[A]], double* [[B]], double* [[C]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %a = alloca double, align 4
+  %b = alloca double, align 4
+  %c = alloca double, align 4
+  store double 2.0, double* %a, align 4
+  store double 3.0, double* %b, align 4
+  store double 4.0, double* %c, align 4
+  %al = load double, double* %a
+  %bl = load double, double* %b
+  %cl = load double, double* %c
+  %0 = fadd double %al, %bl
+  %1 = fadd double %al, %cl
+  %2 = fadd double %bl, %cl
+  ret void
+}
+
+define void @outline_from_flipped_fadd3.0() {
+; CHECK-LABEL: @outline_from_flipped_fadd3.0(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca double, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca double, align 4
+; CHECK-NEXT:    [[C:%.*]] = alloca double, align 4
+; CHECK-NEXT:    store double 2.000000e+00, double* [[A]], align 4
+; CHECK-NEXT:    store double 3.000000e+00, double* [[B]], align 4
+; CHECK-NEXT:    store double 4.000000e+00, double* [[C]], align 4
+; CHECK-NEXT:    [[AL:%.*]] = load double, double* [[A]], align 8
+; CHECK-NEXT:    [[BL:%.*]] = load double, double* [[B]], align 8
+; CHECK-NEXT:    [[CL:%.*]] = load double, double* [[C]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = fadd double [[BL]], [[AL]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd double [[CL]], [[AL]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fadd double [[CL]], [[BL]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  %a = alloca double, align 4
+  %b = alloca double, align 4
+  %c = alloca double, align 4
+  store double 2.0, double* %a, align 4
+  store double 3.0, double* %b, align 4
+  store double 4.0, double* %c, align 4
+  %al = load double, double* %a
+  %bl = load double, double* %b
+  %cl = load double, double* %c
+  %0 = fadd double %bl, %al
+  %1 = fadd double %cl, %al
+  %2 = fadd double %cl, %bl
+  ret void
+}
+
+; CHECK: define internal void @outlined_ir_func_0(double* [[ARG0:%.*]], double* [[ARG1:%.*]], double* [[ARG2:%.*]]) #0 {
+; CHECK: entry_to_outline:
+; CHECK-NEXT:    store double 2.000000e+00, double* [[ARG0]], align 4
+; CHECK-NEXT:    store double 3.000000e+00, double* [[ARG1]], align 4
+; CHECK-NEXT:    store double 4.000000e+00, double* [[ARG2]], align 4
+; CHECK-NEXT:    [[AL:%.*]] = load double, double* [[ARG0]], align 8
+; CHECK-NEXT:    [[BL:%.*]] = load double, double* [[ARG1]], align 8
+; CHECK-NEXT:    [[CL:%.*]] = load double, double* [[ARG2]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = fadd double [[AL]], [[BL]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd double [[AL]], [[CL]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fadd double [[BL]], [[CL]]
+
Index: llvm/test/Transforms/IROutliner/outlining-commutative.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/IROutliner/outlining-commutative.ll
@@ -0,0 +1,254 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
+
+; This test checks that commutative instructions where the operands are
+; swapped are outlined as the same function.
+
+; It also checks that non-commutative instructions outlined as different
+; functions when the operands are swapped;
+
+; These are identical functions, except that in the flipped functions,
+; the operands in the adds are commuted.  However, since add instructions
+; are commutative, we should still outline from all four as the same
+; instruction.
+
+define void @outline_from_add1() {
+; CHECK-LABEL: @outline_from_add1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[C]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  %c = alloca i32, align 4
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  store i32 4, i32* %c, align 4
+  %al = load i32, i32* %a
+  %bl = load i32, i32* %b
+  %cl = load i32, i32* %c
+  %0 = add i32 %al, %bl
+  %1 = add i32 %al, %cl
+  %2 = add i32 %bl, %cl
+  ret void
+}
+
+define void @outline_from_add2() {
+; CHECK-LABEL: @outline_from_add2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[C]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  %c = alloca i32, align 4
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  store i32 4, i32* %c, align 4
+  %al = load i32, i32* %a
+  %bl = load i32, i32* %b
+  %cl = load i32, i32* %c
+  %0 = add i32 %al, %bl
+  %1 = add i32 %al, %cl
+  %2 = add i32 %bl, %cl
+  ret void
+}
+
+define void @outline_from_flipped_add3() {
+; CHECK-LABEL: @outline_from_flipped_add3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[C]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  %c = alloca i32, align 4
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  store i32 4, i32* %c, align 4
+  %al = load i32, i32* %a
+  %bl = load i32, i32* %b
+  %cl = load i32, i32* %c
+  %0 = add i32 %bl, %al
+  %1 = add i32 %cl, %al
+  %2 = add i32 %cl, %bl
+  ret void
+}
+
+define void @outline_from_flipped_add4() {
+; CHECK-LABEL: @outline_from_flipped_add4(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[C]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  %c = alloca i32, align 4
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  store i32 4, i32* %c, align 4
+  %al = load i32, i32* %a
+  %bl = load i32, i32* %b
+  %cl = load i32, i32* %c
+  %0 = add i32 %bl, %al
+  %1 = add i32 %cl, %al
+  %2 = add i32 %cl, %bl
+  ret void
+}
+
+; These are identical functions, except that in the flipped functions,
+; the operands in the subtractions are commuted.  Since subtraction
+; instructions are not commutative, we should outline the first two functions
+; differently than the second two functions.
+
+define void @outline_from_sub1() {
+; CHECK-LABEL: @outline_from_sub1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @outlined_ir_func_2(i32* [[A]], i32* [[B]], i32* [[C]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  %c = alloca i32, align 4
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  store i32 4, i32* %c, align 4
+  %al = load i32, i32* %a
+  %bl = load i32, i32* %b
+  %cl = load i32, i32* %c
+  %0 = sub i32 %al, %bl
+  %1 = sub i32 %al, %cl
+  %2 = sub i32 %bl, %cl
+  ret void
+}
+
+define void @outline_from_sub2() {
+; CHECK-LABEL: @outline_from_sub2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @outlined_ir_func_2(i32* [[A]], i32* [[B]], i32* [[C]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  %c = alloca i32, align 4
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  store i32 4, i32* %c, align 4
+  %al = load i32, i32* %a
+  %bl = load i32, i32* %b
+  %cl = load i32, i32* %c
+  %0 = sub i32 %al, %bl
+  %1 = sub i32 %al, %cl
+  %2 = sub i32 %bl, %cl
+  ret void
+}
+
+define void @dontoutline_from_flipped_sub3() {
+; CHECK-LABEL: @dontoutline_from_flipped_sub3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @outlined_ir_func_1(i32* [[A]], i32* [[B]], i32* [[C]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  %c = alloca i32, align 4
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  store i32 4, i32* %c, align 4
+  %al = load i32, i32* %a
+  %bl = load i32, i32* %b
+  %cl = load i32, i32* %c
+  %0 = sub i32 %bl, %al
+  %1 = sub i32 %cl, %al
+  %2 = sub i32 %cl, %bl
+  ret void
+}
+
+define void @dontoutline_from_flipped_sub4() {
+; CHECK-LABEL: @dontoutline_from_flipped_sub4(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @outlined_ir_func_1(i32* [[A]], i32* [[B]], i32* [[C]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  %c = alloca i32, align 4
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  store i32 4, i32* %c, align 4
+  %al = load i32, i32* %a
+  %bl = load i32, i32* %b
+  %cl = load i32, i32* %c
+  %0 = sub i32 %bl, %al
+  %1 = sub i32 %cl, %al
+  %2 = sub i32 %cl, %bl
+  ret void
+}
+
+; CHECK: define internal void @outlined_ir_func_0(i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]]) #0 {
+; CHECK: entry_to_outline:
+; CHECK-NEXT:    store i32 2, i32* [[ARG0]], align 4
+; CHECK-NEXT:    store i32 3, i32* [[ARG1]], align 4
+; CHECK-NEXT:    store i32 4, i32* [[ARG2]], align 4
+; CHECK-NEXT:    [[AL:%.*]] = load i32, i32* [[ARG0]], align 4
+; CHECK-NEXT:    [[BL:%.*]] = load i32, i32* [[ARG1]], align 4
+; CHECK-NEXT:    [[CL:%.*]] = load i32, i32* [[ARG2]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[AL]], [[BL]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[AL]], [[CL]]
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[BL]], [[CL]]
+
+; CHECK: define internal void @outlined_ir_func_1(i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]]) #0 {
+; CHECK: entry_to_outline:
+; CHECK-NEXT:    store i32 2, i32* [[ARG0]], align 4
+; CHECK-NEXT:    store i32 3, i32* [[ARG1]], align 4
+; CHECK-NEXT:    store i32 4, i32* [[ARG2]], align 4
+; CHECK-NEXT:    [[AL:%.*]] = load i32, i32* [[ARG0]], align 4
+; CHECK-NEXT:    [[BL:%.*]] = load i32, i32* [[ARG1]], align 4
+; CHECK-NEXT:    [[CL:%.*]] = load i32, i32* [[ARG2]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = sub i32 [[BL]], [[AL]]
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[CL]], [[AL]]
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 [[CL]], [[BL]]
+
+; CHECK: define internal void @outlined_ir_func_2(i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]]) #0 {
+; CHECK: entry_to_outline:
+; CHECK-NEXT:    store i32 2, i32* [[ARG0]], align 4
+; CHECK-NEXT:    store i32 3, i32* [[ARG1]], align 4
+; CHECK-NEXT:    store i32 4, i32* [[ARG2]], align 4
+; CHECK-NEXT:    [[AL:%.*]] = load i32, i32* [[ARG0]], align 4
+; CHECK-NEXT:    [[BL:%.*]] = load i32, i32* [[ARG1]], align 4
+; CHECK-NEXT:    [[CL:%.*]] = load i32, i32* [[ARG2]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = sub i32 [[AL]], [[BL]]
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[AL]], [[CL]]
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 [[BL]], [[CL]]
Index: llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp
===================================================================
--- llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp
+++ llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp
@@ -1528,8 +1528,8 @@
   ASSERT_TRUE(SimilarityCandidates.size() == 0);
 }
 
-// This test checks to see whether we can detect similarity for commutativen
-// instructions where the operands have been reversed.  Right now, we cannot.
+// This test checks to see whether we can detect similarity for commutative
+// instructions where the operands have been reversed.
 TEST(IRSimilarityIdentifier, CommutativeSimilarity) {
   StringRef ModuleString = R"(
                           define i32 @f(i32 %a, i32 %b) {
@@ -1548,13 +1548,45 @@
   std::vector<std::vector<IRSimilarityCandidate>> SimilarityCandidates;
   getSimilarities(*M, SimilarityCandidates);
 
+  ASSERT_TRUE(SimilarityCandidates.size() == 1);
+  for (std::vector<IRSimilarityCandidate> &Cands : SimilarityCandidates) {
+    ASSERT_TRUE(Cands.size() == 2);
+    unsigned InstIdx = 0;
+    for (IRSimilarityCandidate &Cand : Cands) {
+      ASSERT_TRUE(Cand.getStartIdx() == InstIdx);
+      InstIdx += 3;
+    }
+  }
+}
+
+// This test checks to see whether we can detect different structure in
+// commutative instructions.  In this case, the second operand in the second
+// add is different.
+TEST(IRSimilarityIdentifier, NoCommutativeSimilarity) {
+  StringRef ModuleString = R"(
+                          define i32 @f(i32 %a, i32 %b) {
+                          bb0:
+                             %0 = add i32 %a, %b
+                             %1 = add i32 %1, %b
+                             br label %bb1
+                          bb1:
+                             %2 = add i32 %a, %b
+                             %3 = add i32 %2, %a
+                             ret i32 0
+                          })";
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
+
+  std::vector<std::vector<IRSimilarityCandidate>> SimilarityCandidates;
+  getSimilarities(*M, SimilarityCandidates);
+
   ASSERT_TRUE(SimilarityCandidates.size() == 0);
 }
 
-// Check that we are not finding commutative similarity in non commutative
+// Check that we are not finding similarity in non commutative
 // instructions.  That is, while the instruction and operands used are the same
-// in the two subtraction sequences, they cannot be counted as the same since
-// a subtraction is not commutative.
+// in the two subtraction sequences, they are in a different order, and cannot
+// be counted as the same since a subtraction is not commutative.
 TEST(IRSimilarityIdentifier, NonCommutativeDifference) {
   StringRef ModuleString = R"(
                           define i32 @f(i32 %a, i32 %b) {