Index: llvm/include/llvm/Analysis/IRSimilarityIdentifier.h =================================================================== --- llvm/include/llvm/Analysis/IRSimilarityIdentifier.h +++ llvm/include/llvm/Analysis/IRSimilarityIdentifier.h @@ -466,12 +466,31 @@ /// \param ValueNumberMappingB - The current mapping of global values /// numbering from \p B to \p A. /// \returns true if the IRSimilarityCandidates operands are compatible. - static bool compareOperandMapping( + static bool compareNonCommutativeOperandMapping( const IRSimilarityCandidate &A, const IRSimilarityCandidate &B, ArrayRef &OperValsA, ArrayRef &OperValsB, DenseMap> &ValueNumberMappingA, DenseMap> &ValueNumberMappingB); + /// Compare the operands in \p A and \p B and check that the current mapping + /// of global value numbers from \p A to \p B and \p B to \A is consistent + /// given that the operands are commutative. + /// + /// \param A - The first IRInstructionCandidate to compare. + /// \param B - The second IRInstructionCandidate to compare. + /// \param OperValsA - The operands in the instruction from \p A. + /// \param OperValsB - The operands in the instruction from \p B. + /// \param ValueNumberMappingA - The current mapping of global values + /// numbering from \p A to \p B. + /// \param ValueNumberMappingB - The current mapping of global values + /// numbering from \p B to \p A. + /// \returns true if the IRSimilarityCandidates operands are compatible. + static bool compareCommutativeOperandMapping( + const IRSimilarityCandidate &A, const IRSimilarityCandidate &B, + ArrayRef &OperValsA, ArrayRef &OperValsB, + DenseMap> &ValueNumberMapping, + DenseMap> &OtherValueNumberMapping); + /// Compare the start and end indices of the two IRSimilarityCandidates for /// whether they overlap. If the start instruction of one /// IRSimilarityCandidate is less than the end instruction of the other, and Index: llvm/lib/Analysis/IRSimilarityIdentifier.cpp =================================================================== --- llvm/lib/Analysis/IRSimilarityIdentifier.cpp +++ llvm/lib/Analysis/IRSimilarityIdentifier.cpp @@ -15,6 +15,7 @@ #include "llvm/Analysis/IRSimilarityIdentifier.h" #include "llvm/ADT/DenseMap.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Operator.h" #include "llvm/IR/User.h" #include "llvm/InitializePasses.h" #include "llvm/Support/SuffixTree.h" @@ -240,6 +241,96 @@ }); } +/// Determine if one or more of the assigned global value numbers for the +/// operands in \p TargetValueNumbers is in the current mapping set for operand +/// numbers in \p SourceOperands. The set of possible corresponding global +/// value numbers are replaced with the most recent version of compatible +/// values. +/// +/// \param [in] SourceValueToNumberMapping - The mapping of a Value to global +/// value number for the source IRInstructionCandidate. +/// \param [in, out] CurrentSrcTgtNumberMapping - The current mapping of source +/// IRSimilarityCandidate global value numbers to a set of possible numbers in +/// the target. +/// \param [in] SourceOperands - The operands in the original +/// IRSimilarityCandidate in the current instruction. +/// \param [in] TargetValueNumbers - The global value numbers of the operands in +/// the corresponding Instruction in the other IRSimilarityCandidate. +/// \returns true if there exists a possible mapping between the source +/// Instruction operands and the target Instruction operands, and false if not. +static bool checkNumberingAndReplaceCommutative( + const DenseMap &SourceValueToNumberMapping, + DenseMap> &CurrentSrcTgtNumberMapping, + ArrayRef &SourceOperands, + DenseSet &TargetValueNumbers){ + + DenseMap>::iterator ValueMappingIt; + + unsigned ArgVal; + bool WasInserted; + + // Iterate over the operands in the source IRSimilarityCandidate to determine + // whether there exists an operand in the other IRSimilarityCandidate that + // creates a valid mapping of Value to Value between the + // IRSimilarityCaniddates. + for (Value *V : SourceOperands) { + ArgVal = SourceValueToNumberMapping.find(V)->second; + + std::tie(ValueMappingIt, WasInserted) = CurrentSrcTgtNumberMapping.insert( + std::make_pair(ArgVal, TargetValueNumbers)); + + // Instead of finding a current mapping, we inserted a set. This means a + // mapping did not exist for the source Instruction operand, it has no + // current constraints we need to check. + if (WasInserted) + continue; + + // If a mapping already exists for the source operand to the values in the + // other IRSimilarityCandidate we need to iterate over the items in other + // IRSimilarityCandidate's Instruction to determine whether there is a valid + // mapping of Value to Value. + DenseSet NewSet; + for (unsigned &Curr : ValueMappingIt->second) + // If we can find the value in the mapping, we add it to the new set. + if (TargetValueNumbers.find(Curr) != TargetValueNumbers.end()) + NewSet.insert(Curr); + + // If we could not find a Value, return 0. + if (NewSet.empty()) + return false; + + // Otherwise replace the old mapping with the newly constructed one. + if (NewSet.size() != ValueMappingIt->second.size()) + ValueMappingIt->second.swap(NewSet); + + // We have reached no conclusions about the mapping, and cannot remove + // any items from the other operands, so we move to check the next operand. + if (ValueMappingIt->second.size() != 1) + continue; + + + unsigned ValToRemove = *ValueMappingIt->second.begin(); + // When there is only one item left in the mapping for and operand, remove + // the value from the other operands. If it results in there being no + // mapping, return false, it means the mapping is wrong + for (Value *InnerV : SourceOperands) { + if (V == InnerV) + continue; + + unsigned InnerVal = SourceValueToNumberMapping.find(InnerV)->second; + ValueMappingIt = CurrentSrcTgtNumberMapping.find(InnerVal); + if (ValueMappingIt == CurrentSrcTgtNumberMapping.end()) + continue; + + ValueMappingIt->second.erase(ValToRemove); + if (ValueMappingIt->second.empty()) + return false; + } + } + + return true; +} + /// Determine if operand number \p TargetArgVal is in the current mapping set /// for operand number \p SourceArgVal. /// @@ -295,7 +386,7 @@ return TargetSet.find(TargetArgVal) != TargetSet.end(); } -bool IRSimilarityCandidate::compareOperandMapping( +bool IRSimilarityCandidate::compareNonCommutativeOperandMapping( const IRSimilarityCandidate &A, const IRSimilarityCandidate &B, ArrayRef &OperValsA, ArrayRef &OperValsB, DenseMap> &ValueNumberMappingA, @@ -332,6 +423,43 @@ return true; } +bool IRSimilarityCandidate::compareCommutativeOperandMapping( + const IRSimilarityCandidate &A, const IRSimilarityCandidate &B, + ArrayRef &OperValsA, ArrayRef &OperValsB, + DenseMap> &ValueNumberMapping, + DenseMap> &OtherValueNumberMapping) { + DenseSet ValueNumbers; + DenseSet OtherValueNumbers; + + ArrayRef::iterator VIt = OperValsA.begin(); + ArrayRef::iterator OVIt = OperValsB.begin(); + unsigned OperandLength = OperValsA.size(); + + // Find the value number sets for the operands. + for (unsigned Idx = 0; Idx < OperandLength; + Idx++, VIt++, OVIt++) { + ValueNumbers.insert(A.ValueToNumber.find(*VIt)->second); + OtherValueNumbers.insert(B.ValueToNumber.find(*OVIt)->second); + } + + // Iterate over the operands in the first IRSimilarityCandidate and make sure + // there exists a possible mapping with the operands in the second + // IRSimilarityCandidate. + if (!checkNumberingAndReplaceCommutative( + A.ValueToNumber, ValueNumberMapping, OperValsA, OtherValueNumbers)) + return false; + + // Iterate over the operands in the second IRSimilarityCandidate and make sure + // there exists a possible mapping with the operands in the first + // IRSimilarityCandidate. + if (!checkNumberingAndReplaceCommutative(B.ValueToNumber, + OtherValueNumberMapping, + OperValsB, ValueNumbers)) + return false; + + return true; +} + bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A, const IRSimilarityCandidate &B) { if (A.getLength() != B.getLength()) @@ -387,10 +515,21 @@ ValueMappingIt->second.end()) return false; - // TODO: Handle commutative instructions by mapping one operand to many - // operands instead only mapping a single operand to a single operand. - if (!compareOperandMapping(A, B, OperVals, OtherOperVals, - ValueNumberMapping, OtherValueNumberMapping)) + // We have different paths for commutative instructions and non-commutative + // instructions since commutative instructions could allow multiple mappings + // to certain values. + if (I->isCommutative() && !isa(I)) { + if (!compareCommutativeOperandMapping(A, B, OperVals, OtherOperVals, + ValueNumberMapping, + OtherValueNumberMapping)) + return false; + continue; + } + + // Handle the non-commutative cases. + if(!compareNonCommutativeOperandMapping(A, B, OperVals, OtherOperVals, + ValueNumberMapping, + OtherValueNumberMapping)) return false; } return true; Index: llvm/test/Transforms/IROutliner/outlining-commutative-fp.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/outlining-commutative-fp.ll @@ -0,0 +1,107 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -verify -iroutliner < %s | FileCheck %s + +; This test checks that floating point commutative instructions are not treated +; as commutative. Even though an ffadd is technically commutative, the order +; of operands still needs to be enforced since the process of fadding floating +; point values requires the order to be the same. + +; We make sure that we outline the identical regions from the first two +; functions, but not the third. this is because the operands are in a different +; order in a floating point instruction in this section. + +define void @outline_from_fadd1() { +; CHECK-LABEL: @outline_from_fadd1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca double, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca double, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca double, align 4 +; CHECK-NEXT: call void @outlined_ir_func_0(double* [[A]], double* [[B]], double* [[C]]) +; CHECK-NEXT: ret void +; +entry: + %a = alloca double, align 4 + %b = alloca double, align 4 + %c = alloca double, align 4 + store double 2.0, double* %a, align 4 + store double 3.0, double* %b, align 4 + store double 4.0, double* %c, align 4 + %al = load double, double* %a + %bl = load double, double* %b + %cl = load double, double* %c + %0 = fadd double %al, %bl + %1 = fadd double %al, %cl + %2 = fadd double %bl, %cl + ret void +} + +define void @outline_from_fadd2.0() { +; CHECK-LABEL: @outline_from_fadd2.0( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca double, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca double, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca double, align 4 +; CHECK-NEXT: call void @outlined_ir_func_0(double* [[A]], double* [[B]], double* [[C]]) +; CHECK-NEXT: ret void +; +entry: + %a = alloca double, align 4 + %b = alloca double, align 4 + %c = alloca double, align 4 + store double 2.0, double* %a, align 4 + store double 3.0, double* %b, align 4 + store double 4.0, double* %c, align 4 + %al = load double, double* %a + %bl = load double, double* %b + %cl = load double, double* %c + %0 = fadd double %al, %bl + %1 = fadd double %al, %cl + %2 = fadd double %bl, %cl + ret void +} + +define void @outline_from_flipped_fadd3.0() { +; CHECK-LABEL: @outline_from_flipped_fadd3.0( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca double, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca double, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca double, align 4 +; CHECK-NEXT: store double 2.000000e+00, double* [[A]], align 4 +; CHECK-NEXT: store double 3.000000e+00, double* [[B]], align 4 +; CHECK-NEXT: store double 4.000000e+00, double* [[C]], align 4 +; CHECK-NEXT: [[AL:%.*]] = load double, double* [[A]], align 8 +; CHECK-NEXT: [[BL:%.*]] = load double, double* [[B]], align 8 +; CHECK-NEXT: [[CL:%.*]] = load double, double* [[C]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = fadd double [[BL]], [[AL]] +; CHECK-NEXT: [[TMP1:%.*]] = fadd double [[CL]], [[AL]] +; CHECK-NEXT: [[TMP2:%.*]] = fadd double [[CL]], [[BL]] +; CHECK-NEXT: ret void +; +entry: + %a = alloca double, align 4 + %b = alloca double, align 4 + %c = alloca double, align 4 + store double 2.0, double* %a, align 4 + store double 3.0, double* %b, align 4 + store double 4.0, double* %c, align 4 + %al = load double, double* %a + %bl = load double, double* %b + %cl = load double, double* %c + %0 = fadd double %bl, %al + %1 = fadd double %cl, %al + %2 = fadd double %cl, %bl + ret void +} + +; CHECK: define internal void @outlined_ir_func_0(double* [[ARG0:%.*]], double* [[ARG1:%.*]], double* [[ARG2:%.*]]) #0 { +; CHECK: entry_to_outline: +; CHECK-NEXT: store double 2.000000e+00, double* [[ARG0]], align 4 +; CHECK-NEXT: store double 3.000000e+00, double* [[ARG1]], align 4 +; CHECK-NEXT: store double 4.000000e+00, double* [[ARG2]], align 4 +; CHECK-NEXT: [[AL:%.*]] = load double, double* [[ARG0]], align 8 +; CHECK-NEXT: [[BL:%.*]] = load double, double* [[ARG1]], align 8 +; CHECK-NEXT: [[CL:%.*]] = load double, double* [[ARG2]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = fadd double [[AL]], [[BL]] +; CHECK-NEXT: [[TMP1:%.*]] = fadd double [[AL]], [[CL]] +; CHECK-NEXT: [[TMP2:%.*]] = fadd double [[BL]], [[CL]] + Index: llvm/test/Transforms/IROutliner/outlining-commutative.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/outlining-commutative.ll @@ -0,0 +1,254 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -verify -iroutliner < %s | FileCheck %s + +; This test checks that commutative instructions where the operands are +; swapped are outlined as the same function. + +; It also checks that non-commutative instructions outlined as different +; functions when the operands are swapped; + +; These are identical functions, except that in the flipped functions, +; the operands in the adds are commuted. However, since add instructions +; are commutative, we should still outline from all four as the same +; instruction. + +define void @outline_from_add1() { +; CHECK-LABEL: @outline_from_add1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[C]]) +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + %0 = add i32 %al, %bl + %1 = add i32 %al, %cl + %2 = add i32 %bl, %cl + ret void +} + +define void @outline_from_add2() { +; CHECK-LABEL: @outline_from_add2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[C]]) +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + %0 = add i32 %al, %bl + %1 = add i32 %al, %cl + %2 = add i32 %bl, %cl + ret void +} + +define void @outline_from_flipped_add3() { +; CHECK-LABEL: @outline_from_flipped_add3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[C]]) +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + %0 = add i32 %bl, %al + %1 = add i32 %cl, %al + %2 = add i32 %cl, %bl + ret void +} + +define void @outline_from_flipped_add4() { +; CHECK-LABEL: @outline_from_flipped_add4( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[C]]) +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + %0 = add i32 %bl, %al + %1 = add i32 %cl, %al + %2 = add i32 %cl, %bl + ret void +} + +; These are identical functions, except that in the flipped functions, +; the operands in the subtractions are commuted. Since subtraction +; instructions are not commutative, we should outline the first two functions +; differently than the second two functions. + +define void @outline_from_sub1() { +; CHECK-LABEL: @outline_from_sub1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @outlined_ir_func_2(i32* [[A]], i32* [[B]], i32* [[C]]) +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + %0 = sub i32 %al, %bl + %1 = sub i32 %al, %cl + %2 = sub i32 %bl, %cl + ret void +} + +define void @outline_from_sub2() { +; CHECK-LABEL: @outline_from_sub2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @outlined_ir_func_2(i32* [[A]], i32* [[B]], i32* [[C]]) +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + %0 = sub i32 %al, %bl + %1 = sub i32 %al, %cl + %2 = sub i32 %bl, %cl + ret void +} + +define void @dontoutline_from_flipped_sub3() { +; CHECK-LABEL: @dontoutline_from_flipped_sub3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @outlined_ir_func_1(i32* [[A]], i32* [[B]], i32* [[C]]) +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + %0 = sub i32 %bl, %al + %1 = sub i32 %cl, %al + %2 = sub i32 %cl, %bl + ret void +} + +define void @dontoutline_from_flipped_sub4() { +; CHECK-LABEL: @dontoutline_from_flipped_sub4( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @outlined_ir_func_1(i32* [[A]], i32* [[B]], i32* [[C]]) +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + %0 = sub i32 %bl, %al + %1 = sub i32 %cl, %al + %2 = sub i32 %cl, %bl + ret void +} + +; CHECK: define internal void @outlined_ir_func_0(i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]]) #0 { +; CHECK: entry_to_outline: +; CHECK-NEXT: store i32 2, i32* [[ARG0]], align 4 +; CHECK-NEXT: store i32 3, i32* [[ARG1]], align 4 +; CHECK-NEXT: store i32 4, i32* [[ARG2]], align 4 +; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[ARG0]], align 4 +; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[ARG1]], align 4 +; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[ARG2]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[AL]], [[BL]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[AL]], [[CL]] +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BL]], [[CL]] + +; CHECK: define internal void @outlined_ir_func_1(i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]]) #0 { +; CHECK: entry_to_outline: +; CHECK-NEXT: store i32 2, i32* [[ARG0]], align 4 +; CHECK-NEXT: store i32 3, i32* [[ARG1]], align 4 +; CHECK-NEXT: store i32 4, i32* [[ARG2]], align 4 +; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[ARG0]], align 4 +; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[ARG1]], align 4 +; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[ARG2]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[BL]], [[AL]] +; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[CL]], [[AL]] +; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[CL]], [[BL]] + +; CHECK: define internal void @outlined_ir_func_2(i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]]) #0 { +; CHECK: entry_to_outline: +; CHECK-NEXT: store i32 2, i32* [[ARG0]], align 4 +; CHECK-NEXT: store i32 3, i32* [[ARG1]], align 4 +; CHECK-NEXT: store i32 4, i32* [[ARG2]], align 4 +; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[ARG0]], align 4 +; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[ARG1]], align 4 +; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[ARG2]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[AL]], [[BL]] +; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[AL]], [[CL]] +; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[BL]], [[CL]] Index: llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp =================================================================== --- llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp +++ llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp @@ -1528,8 +1528,8 @@ ASSERT_TRUE(SimilarityCandidates.size() == 0); } -// This test checks to see whether we can detect similarity for commutativen -// instructions where the operands have been reversed. Right now, we cannot. +// This test checks to see whether we can detect similarity for commutative +// instructions where the operands have been reversed. TEST(IRSimilarityIdentifier, CommutativeSimilarity) { StringRef ModuleString = R"( define i32 @f(i32 %a, i32 %b) { @@ -1548,13 +1548,45 @@ std::vector> SimilarityCandidates; getSimilarities(*M, SimilarityCandidates); + ASSERT_TRUE(SimilarityCandidates.size() == 1); + for (std::vector &Cands : SimilarityCandidates) { + ASSERT_TRUE(Cands.size() == 2); + unsigned InstIdx = 0; + for (IRSimilarityCandidate &Cand : Cands) { + ASSERT_TRUE(Cand.getStartIdx() == InstIdx); + InstIdx += 3; + } + } +} + +// This test checks to see whether we can detect different structure in +// commutative instructions. In this case, the second operand in the second +// add is different. +TEST(IRSimilarityIdentifier, NoCommutativeSimilarity) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = add i32 %1, %b + br label %bb1 + bb1: + %2 = add i32 %a, %b + %3 = add i32 %2, %a + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector> SimilarityCandidates; + getSimilarities(*M, SimilarityCandidates); + ASSERT_TRUE(SimilarityCandidates.size() == 0); } -// Check that we are not finding commutative similarity in non commutative +// Check that we are not finding similarity in non commutative // instructions. That is, while the instruction and operands used are the same -// in the two subtraction sequences, they cannot be counted as the same since -// a subtraction is not commutative. +// in the two subtraction sequences, they are in a different order, and cannot +// be counted as the same since a subtraction is not commutative. TEST(IRSimilarityIdentifier, NonCommutativeDifference) { StringRef ModuleString = R"( define i32 @f(i32 %a, i32 %b) {