Index: llvm/include/llvm/Analysis/IRSimilarityIdentifier.h =================================================================== --- llvm/include/llvm/Analysis/IRSimilarityIdentifier.h +++ llvm/include/llvm/Analysis/IRSimilarityIdentifier.h @@ -127,6 +127,8 @@ /// to a less than form. It is None otherwise. Optional RevisedPredicate; + SmallVector RelativeBlockLocations; + /// Gather the information that is difficult to gather for an Instruction, or /// is changed. i.e. the operands of an Instruction and the Types of those /// operands. This extra information allows for similarity matching to make @@ -145,6 +147,16 @@ /// \return the consistent comparison predicate. static CmpInst::Predicate predicateForConsistency(CmpInst *CI); + /// A function that for an IRInstructionData containing a branch, finds the + /// relative distances from the source basic block to the target by taking + /// the difference of the number assigned to the current basic block and the + /// target basic block of the branch. + /// + /// \param BasicBlockToInteger - The mapping of basic blocks to their location + /// in the module. + void + setBranchSuccessors(DenseMap &BasicBlockToInteger); + /// Hashes \p Value based on its opcode, types, and operand types. /// Two IRInstructionData instances produce the same hash when they perform /// the same operation. @@ -288,6 +300,10 @@ DenseMap InstructionIntegerMap; + /// A mapping for a basic block in a module to it's assigned number/location + /// in the module. + DenseMap BasicBlockToInteger; + /// Set if we added an illegal number in the previous step. /// Since each illegal number is unique, we only need one of them between /// each range of legal numbers. This lets us make sure we don't add more @@ -329,6 +345,24 @@ IRInstructionDataList *IDL = nullptr; + /// Assigns values to all the basic blocks in function \p F starting from + /// integer \p BBNumber. + /// + /// \param F - The function containing the basic blocks to assign numbers to. + /// \param BBNumber - The number to start from. + void initializeForBBs(Function &F, unsigned &BBNumber) { + for (BasicBlock &BB : F) + BasicBlockToInteger.insert(std::make_pair(&BB, BBNumber++)); + } + + /// Assigns values to all the basic blocks in Module \p M. + /// \param M - The modules containing the basic blocks to assign numbers to. + void initializeForBBs(Module &M) { + unsigned BBNumber = 0; + for (Function &F : M) + initializeForBBs(F, BBNumber); + } + /// Maps the Instructions in a BasicBlock \p BB to legal or illegal integers /// determined by \p InstrType. Two Instructions are mapped to the same value /// if they are close as defined by the InstructionData class above. @@ -386,7 +420,11 @@ InstructionClassification() {} // TODO: Determine a scheme to resolve when the label is similar enough. - InstrType visitBranchInst(BranchInst &BI) { return Illegal; } + InstrType visitBranchInst(BranchInst &BI) { + if (EnableBranches) + return Legal; + return Illegal; + } // TODO: Determine a scheme to resolve when the labels are similar enough. InstrType visitPHINode(PHINode &PN) { return Illegal; } // TODO: Handle allocas. @@ -419,6 +457,8 @@ // TODO: Handle interblock similarity. InstrType visitTerminator(Instruction &I) { return Illegal; } InstrType visitInstruction(Instruction &I) { return Legal; } + + bool EnableBranches = false; }; /// Maps an Instruction to a member of InstrType. @@ -595,6 +635,31 @@ DenseMap> &ToAMapping, DenseMap> &FromAMapping); + /// \param [in,out] BBSet - The set to track the basic blocks. + /// \returns The BasicBlock the IRSimilarityCandidate ends in. + void getBasicBlocks(DenseSet &BBSet) const { + for (IRInstructionData &ID : *this) { + BasicBlock *BB = ID.Inst->getParent(); + if (BBSet.contains(BB)) + continue; + BBSet.insert(BB); + } + } + + /// \param [in,out] BBSet - The set to track the basic blocks. + /// \param [in,out] BBList - A list in order of use to track the basic blocks. + /// \returns The BasicBlock the IRSimilarityCandidate ends in. + void getBasicBlocks(DenseSet &BBSet, + std::vector &BBList) const { + for (IRInstructionData &ID : *this) { + BasicBlock *BB = ID.Inst->getParent(); + if (BBSet.contains(BB)) + continue; + BBSet.insert(BB); + BBList.push_back(BB); + } + } + /// Compare the start and end indices of the two IRSimilarityCandidates for /// whether they overlap. If the start instruction of one /// IRSimilarityCandidate is less than the end instruction of the other, and @@ -714,8 +779,9 @@ /// analyzing the module. class IRSimilarityIdentifier { public: - IRSimilarityIdentifier() - : Mapper(&InstDataAllocator, &InstDataListAllocator) {} + IRSimilarityIdentifier(bool MatchBranches = true) + : Mapper(&InstDataAllocator, &InstDataListAllocator), + EnableBranches(MatchBranches) {} private: /// Map the instructions in the module to unsigned integers, using mapping @@ -791,6 +857,8 @@ /// instance of IRInstructionData. IRInstructionMapper Mapper; + bool EnableBranches = true; + /// The SimilarityGroups found with the most recent run of \ref /// findSimilarity. None if there is no recent run. Optional SimilarityCandidates; Index: llvm/lib/Analysis/IRSimilarityIdentifier.cpp =================================================================== --- llvm/lib/Analysis/IRSimilarityIdentifier.cpp +++ llvm/lib/Analysis/IRSimilarityIdentifier.cpp @@ -23,6 +23,10 @@ using namespace llvm; using namespace IRSimilarity; +cl::opt DoNotEnableBranches( + "no-ir-sim-branch-matching", cl::init(false), cl::ReallyHidden, + cl::desc("disable similarity matching, and outlining, across branches.")); + IRInstructionData::IRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDList) : Inst(&I), Legal(Legality), IDL(&IDList) { @@ -49,6 +53,30 @@ } } +void IRInstructionData::setBranchSuccessors( + DenseMap &BasicBlockToInteger) { + assert(isa(Inst) && "Instruction must be branch"); + + BranchInst *BI = cast(Inst); + DenseMap::iterator BBNumIt; + + BBNumIt = BasicBlockToInteger.find(BI->getParent()); + assert(BBNumIt != BasicBlockToInteger.end() && + "Could not find location for BasicBlock!"); + + int CurrentBlockNumber = static_cast(BBNumIt->second); + + for (BasicBlock *Successor : BI->successors()) { + BBNumIt = BasicBlockToInteger.find(Successor); + assert(BBNumIt != BasicBlockToInteger.end() && + "Could not find number for BasicBlock!"); + int OtherBlockNumber = static_cast(BBNumIt->second); + + int Relative = OtherBlockNumber - CurrentBlockNumber; + RelativeBlockLocations.push_back(Relative); + } +} + CmpInst::Predicate IRInstructionData::predicateForConsistency(CmpInst *CI) { switch (CI->getPredicate()) { case CmpInst::FCMP_OGT: @@ -143,6 +171,11 @@ return false; } + if (isa(A.Inst) && isa(B.Inst)) { + if (A.RelativeBlockLocations.size() != B.RelativeBlockLocations.size()) + return false; + } + return true; } @@ -156,10 +189,6 @@ std::vector IntegerMappingForBB; std::vector InstrListForBB; - HaveLegalRange = false; - CanCombineWithPrevInstr = false; - AddedIllegalLastTime = true; - for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) { switch (InstClassifier.visit(*It)) { case InstrType::Legal: @@ -175,7 +204,8 @@ } if (HaveLegalRange) { - mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true); + if (AddedIllegalLastTime) + mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true); for (IRInstructionData *ID : InstrListForBB) this->IDL->push_back(*ID); llvm::append_range(InstrList, InstrListForBB); @@ -203,6 +233,9 @@ IRInstructionData *ID = allocateIRInstructionData(*It, true, *IDL); InstrListForBB.push_back(ID); + if (isa(*It)) + ID->setBranchSuccessors(BasicBlockToInteger); + // Add to the instruction list bool WasInserted; DenseMap::iterator @@ -640,6 +673,55 @@ {A, OperValsA, ValueNumberMappingA}, {B, OperValsB, ValueNumberMappingB})) return false; + + // Here we check that between two corresponding branch instructions, + // when the branch targets a basic block within the same region, the + // relative locations are the same, and otherwise, are targeting basic + // blocks outside the region at the same locations. + + // We are able to make the assumption about blocks outside of the region + // since the target block labels are considered values and will follow the + // same number matching that we defined for the other instructions in the + // region. So, at this point, in each location we target a specific block + // outside the region, we are targeting a corresponding block in each + // analagous location in the region we are comparing to. + if ((isa(ItA->Inst) && isa(ItB->Inst)) || + (isa(ItA->Inst) && isa(ItB->Inst))) { + if (ItA->RelativeBlockLocations.size() != + ItB->RelativeBlockLocations.size()) + return false; + + if (OperValsA.size() != OperValsB.size()) + return false; + + detail::zippy &, + SmallVector &, ArrayRef &, + ArrayRef &> + ZippedRelativeLocations = + zip(ItA->RelativeBlockLocations, ItB->RelativeBlockLocations, + OperValsA, OperValsB); + if (!all_of(ZippedRelativeLocations, + [&A, &B](std::tuple R) { + BasicBlock *ABB = static_cast(std::get<2>(R)); + BasicBlock *BBB = static_cast(std::get<3>(R)); + bool AContained = false; + bool BContained = false; + DenseSet BasicBlockA; + DenseSet BasicBlockB; + A.getBasicBlocks(BasicBlockA); + B.getBasicBlocks(BasicBlockB); + if (BasicBlockA.find(ABB) != BasicBlockA.end()) + AContained = true; + if (BasicBlockB.find(BBB) != BasicBlockB.end()) + BContained = true; + if (AContained != BContained) + return false; + if (AContained) + return std::get<0>(R) == std::get<1>(R); + return true; + })) + return false; + } } return true; } @@ -665,6 +747,8 @@ std::vector IntegerMappingForModule; // Iterate over the functions in the module to map each Instruction in each // BasicBlock to an unsigned integer. + Mapper.initializeForBBs(M); + for (Function &F : M) { if (F.empty()) @@ -672,15 +756,16 @@ for (BasicBlock &BB : F) { - if (BB.sizeWithoutDebug() < 2) - continue; - // BB has potential to have similarity since it has a size greater than 2 // and can therefore match other regions greater than 2. Map it to a list // of unsigned integers. Mapper.convertToUnsignedVec(BB, InstrListForModule, IntegerMappingForModule); } + + BasicBlock::iterator It = F.begin()->end(); + Mapper.mapToIllegalUnsigned(It, IntegerMappingForModule, InstrListForModule, + true); } // Insert the InstrListForModule at the end of the overall InstrList so that @@ -715,6 +800,16 @@ std::vector &CandsForRepSubstring) { unsigned StringLen = RS.Length; + if (StringLen < 2) + return; + + if (StringLen == 2) { + const unsigned StartIdx = *RS.StartIndices.begin(); + if (isa(InstrList[StartIdx]->Inst) && + isa(InstrList[StartIdx + 1]->Inst)) { + return; + } + } // Create an IRSimilarityCandidate for instance of this subsequence \p RS. for (const unsigned &StartIdx : RS.StartIndices) { @@ -951,6 +1046,7 @@ std::vector InstrList; std::vector IntegerMapping; + Mapper.InstClassifier.EnableBranches = this->EnableBranches; populateMapper(Modules, InstrList, IntegerMapping); findCandidates(InstrList, IntegerMapping); @@ -960,6 +1056,7 @@ SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) { resetSimilarityCandidates(); + Mapper.InstClassifier.EnableBranches = this->EnableBranches; std::vector InstrList; std::vector IntegerMapping; @@ -980,7 +1077,7 @@ } bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) { - IRSI.reset(new IRSimilarityIdentifier()); + IRSI.reset(new IRSimilarityIdentifier(!DoNotEnableBranches)); return false; } @@ -996,9 +1093,9 @@ AnalysisKey IRSimilarityAnalysis::Key; IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M, - ModuleAnalysisManager &) { + ModuleAnalysisManager &) { - auto IRSI = IRSimilarityIdentifier(); + auto IRSI = IRSimilarityIdentifier(!DoNotEnableBranches); IRSI.findSimilarity(M); return IRSI; } Index: llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp =================================================================== --- llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp +++ llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp @@ -41,7 +41,7 @@ void getSimilarities( Module &M, std::vector> &SimilarityCandidates) { - IRSimilarityIdentifier Identifier; + IRSimilarityIdentifier Identifier(false); SimilarityCandidates = Identifier.findSimilarity(M); } @@ -726,22 +726,8 @@ ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); } -// In most cases, the illegal instructions we are collecting don't require any -// sort of setup. In these cases, we can just only have illegal instructions, -// and the mapper will create 0 length vectors, and we can check that. - -// In cases where we have legal instructions needed to set up the illegal -// instruction, to check illegal instructions are assigned unsigned integers -// from the maximum value decreasing to 0, it will be greater than a legal -// instruction that comes after. So to check that we have an illegal -// instruction, we place a legal instruction after an illegal instruction, and -// check that the illegal unsigned integer is greater than the unsigned integer -// of the legal instruction. - -// Checks that the branch is mapped to be illegal since there is extra checking -// needed to ensure that a branch in one region is branching to an isomorphic -// location in a different region. -TEST(IRInstructionMapper, BranchIllegal) { +// Checks that the branch is mapped to legal when the option is set. +TEST(IRInstructionMapper, BranchLegal) { StringRef ModuleString = R"( define i32 @f(i32 %a, i32 %b) { bb0: @@ -759,12 +745,28 @@ SpecificBumpPtrAllocator InstDataAllocator; SpecificBumpPtrAllocator IDLAllocator; IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + Mapper.InstClassifier.EnableBranches = true; + Mapper.initializeForBBs(*M); getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); - ASSERT_EQ(UnsignedVec.size(), static_cast(0)); + ASSERT_EQ(UnsignedVec.size(), static_cast(3)); + ASSERT_TRUE(UnsignedVec[1] > UnsignedVec[0]); + ASSERT_TRUE(UnsignedVec[1] < UnsignedVec[2]); } +// In most cases, the illegal instructions we are collecting don't require any +// sort of setup. In these cases, we can just only have illegal instructions, +// and the mapper will create 0 length vectors, and we can check that. + +// In cases where we have legal instructions needed to set up the illegal +// instruction, to check illegal instructions are assigned unsigned integers +// from the maximum value decreasing to 0, it will be greater than a legal +// instruction that comes after. So to check that we have an illegal +// instruction, we place a legal instruction after an illegal instruction, and +// check that the illegal unsigned integer is greater than the unsigned integer +// of the legal instruction. + // Checks that a PHINode is mapped to be illegal since there is extra checking // needed to ensure that a branch in one region is bin an isomorphic // location in a different region. @@ -1445,8 +1447,8 @@ getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); - ASSERT_EQ(UnsignedVec.size(), static_cast(6)); - ASSERT_TRUE(UnsignedVec[2] < UnsignedVec[1]); + ASSERT_EQ(UnsignedVec.size(), static_cast(7)); + ASSERT_TRUE(UnsignedVec[2] < UnsignedVec[0]); } // Checks that a memcpy instruction is mapped to an illegal value. @@ -1476,8 +1478,10 @@ getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); - ASSERT_EQ(UnsignedVec.size(), static_cast(6)); - ASSERT_TRUE(UnsignedVec[2] < UnsignedVec[1]); + ASSERT_EQ(UnsignedVec.size(), static_cast(7)); + errs() << UnsignedVec[1] << " " << UnsignedVec[2] << " " << UnsignedVec[3] << "\n"; + ASSERT_TRUE(UnsignedVec[2] > UnsignedVec[3]); + ASSERT_TRUE(UnsignedVec[2] < UnsignedVec[0]); } // Checks that a memmove instruction is mapped to an illegal value. @@ -1507,8 +1511,8 @@ getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); - ASSERT_EQ(UnsignedVec.size(), static_cast(6)); - ASSERT_TRUE(UnsignedVec[2] < UnsignedVec[1]); + ASSERT_EQ(UnsignedVec.size(), static_cast(7)); + ASSERT_TRUE(UnsignedVec[2] < UnsignedVec[0]); } // Checks that a variable argument instructions are mapped to an illegal value. @@ -1555,11 +1559,10 @@ getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); - ASSERT_EQ(UnsignedVec.size(), static_cast(16)); - ASSERT_TRUE(UnsignedVec[4] < UnsignedVec[3]); - ASSERT_TRUE(UnsignedVec[7] < UnsignedVec[6]); - ASSERT_TRUE(UnsignedVec[10] < UnsignedVec[9]); - ASSERT_TRUE(UnsignedVec[13] < UnsignedVec[12]); + ASSERT_EQ(UnsignedVec.size(), static_cast(17)); + ASSERT_TRUE(UnsignedVec[7] < UnsignedVec[0]); + ASSERT_TRUE(UnsignedVec[13] < UnsignedVec[10]); + ASSERT_TRUE(UnsignedVec[16] < UnsignedVec[13]); } // Check the length of adding two illegal instructions one after th other. We @@ -1599,21 +1602,24 @@ // two blocks of two legal instructions and terminator, and checks them for // instruction similarity. static bool longSimCandCompare(std::vector &InstrList, - bool Structure = false) { + bool Structure = false, + unsigned Length=2, + unsigned StartIdxOne=0, + unsigned StartIdxTwo=3) { std::vector::iterator Start, End; Start = InstrList.begin(); End = InstrList.begin(); - std::advance(End, 1); - IRSimilarityCandidate Cand1(0, 2, *Start, *End); + std::advance(End, StartIdxOne + Length - 1); + IRSimilarityCandidate Cand1(StartIdxOne, Length, *Start, *End); Start = InstrList.begin(); End = InstrList.begin(); - std::advance(Start, 3); - std::advance(End, 4); - IRSimilarityCandidate Cand2(3, 2, *Start, *End); + std::advance(Start, StartIdxTwo); + std::advance(End, StartIdxTwo + Length - 1); + IRSimilarityCandidate Cand2(StartIdxTwo, Length, *Start, *End); if (Structure) return IRSimilarityCandidate::compareStructure(Cand1, Cand2); return IRSimilarityCandidate::isSimilar(Cand1, Cand2); @@ -2083,6 +2089,206 @@ ASSERT_TRUE(longSimCandCompare(InstrList, true)); } +// Checks that the same structure is recognized between two candidates when +// the branches target other blocks inside the same region, the relative +// distance between the blocks must be the same. +TEST(IRSimilarityCandidate, SameBranchStructureInternal) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = add i32 %b, %a + br label %bb1 + bb1: + %2 = add i32 %b, %a + %3 = add i32 %a, %b + ret i32 0 + } + + define i32 @f2(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = add i32 %b, %a + br label %bb1 + bb1: + %2 = add i32 %b, %a + %3 = add i32 %a, %b + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + Mapper.InstClassifier.EnableBranches = true; + Mapper.initializeForBBs(*M); + getVectors(*M, Mapper, InstrList, UnsignedVec); + + // Check to make sure that we have a long enough region. + ASSERT_EQ(InstrList.size(), static_cast(12)); + // Check that the instructions were added correctly to both vectors. + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + + ASSERT_TRUE(longSimCandCompare(InstrList, true, 5, 0, 6)); +} + +// Checks that the different structure is recognized between two candidates, +// when the branches target other blocks inside the same region, the relative +// distance between the blocks must be the same. +TEST(IRSimilarityCandidate, DifferentBranchStructureInternal) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = add i32 %b, %a + br label %bb2 + bb1: + %2 = add i32 %b, %a + %3 = add i32 %a, %b + br label %bb2 + bb2: + %4 = add i32 %b, %a + %5 = add i32 %a, %b + ret i32 0 + } + + define i32 @f2(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = add i32 %b, %a + br label %bb1 + bb1: + %2 = add i32 %b, %a + %3 = add i32 %a, %b + br label %bb2 + bb2: + %4 = add i32 %b, %a + %5 = add i32 %a, %b + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + Mapper.InstClassifier.EnableBranches = true; + Mapper.initializeForBBs(*M); + getVectors(*M, Mapper, InstrList, UnsignedVec); + + // Check to make sure that we have a long enough region. + ASSERT_EQ(InstrList.size(), static_cast(18)); + // Check that the instructions were added correctly to both vectors. + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + + ASSERT_FALSE(longSimCandCompare(InstrList, true, 6, 0, 9)); +} + +// Checks that the same structure is recognized between two candidates, when +// the branches target other blocks outside region, the relative distance +// does not need to be the same. +TEST(IRSimilarityCandidate, SameBranchStructureOutside) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = add i32 %b, %a + br label %bb1 + bb1: + %2 = add i32 %b, %a + %3 = add i32 %a, %b + ret i32 0 + } + + define i32 @f2(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = add i32 %b, %a + br label %bb1 + bb1: + %2 = add i32 %b, %a + %3 = add i32 %a, %b + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + Mapper.InstClassifier.EnableBranches = true; + Mapper.initializeForBBs(*M); + getVectors(*M, Mapper, InstrList, UnsignedVec); + + // Check to make sure that we have a long enough region. + ASSERT_EQ(InstrList.size(), static_cast(12)); + // Check that the instructions were added correctly to both vectors. + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + + ASSERT_TRUE(longSimCandCompare(InstrList, true, 3, 0, 6)); +} + +// Checks that the same structure is recognized between two candidates, when +// the branches target other blocks outside region, the relative distance +// does not need to be the same. +TEST(IRSimilarityCandidate, DifferentBranchStructureOutside) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = add i32 %b, %a + br label %bb1 + bb1: + %2 = add i32 %b, %a + %3 = add i32 %a, %b + ret i32 0 + } + + define i32 @f2(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = add i32 %b, %a + br label %bb2 + bb1: + %2 = add i32 %b, %a + %3 = add i32 %a, %b + br label %bb2 + bb2: + %4 = add i32 %b, %a + %5 = add i32 %a, %b + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + Mapper.InstClassifier.EnableBranches = true; + Mapper.initializeForBBs(*M); + getVectors(*M, Mapper, InstrList, UnsignedVec); + + // Check to make sure that we have a long enough region. + ASSERT_EQ(InstrList.size(), static_cast(15)); + // Check that the instructions were added correctly to both vectors. + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + + ASSERT_TRUE(longSimCandCompare(InstrList, true, 3, 0, 6)); +} + // Checks that two sets of identical instructions are found to be the same. // Both sequences of adds have the same operand ordering, and the same // instructions, making them strcturally equivalent.