Index: llvm/include/llvm/Analysis/IRSimilarityIdentifier.h =================================================================== --- llvm/include/llvm/Analysis/IRSimilarityIdentifier.h +++ llvm/include/llvm/Analysis/IRSimilarityIdentifier.h @@ -127,6 +127,8 @@ /// to a less than form. It is None otherwise. Optional RevisedPredicate; + SmallVector RelativeBlockLocations; + /// Gather the information that is difficult to gather for an Instruction, or /// is changed. i.e. the operands of an Instruction and the Types of those /// operands. This extra information allows for similarity matching to make @@ -145,6 +147,9 @@ /// \return the consistent comparison predicate. static CmpInst::Predicate predicateForConsistency(CmpInst *CI); + void + setBranchSuccessors(DenseMap &BasicBlockToInteger); + /// Hashes \p Value based on its opcode, types, and operand types. /// Two IRInstructionData instances produce the same hash when they perform /// the same operation. @@ -284,10 +289,14 @@ /// The next available integer to assign to a legal Instruction to. unsigned LegalInstrNumber = 0; + unsigned BBNumber = 0; + /// Correspondence from IRInstructionData to unsigned integers. DenseMap InstructionIntegerMap; + DenseMap BasicBlockToInteger; + /// Set if we added an illegal number in the previous step. /// Since each illegal number is unique, we only need one of them between /// each range of legal numbers. This lets us make sure we don't add more @@ -329,6 +338,16 @@ IRInstructionDataList *IDL = nullptr; + void initializeForBBs(Function &F) { + for (BasicBlock &BB : F) + BasicBlockToInteger.insert(std::make_pair(&BB, BBNumber++)); + } + + void initializeForBBs(Module &M) { + for (Function &F : M) + initializeForBBs(F); + } + /// Maps the Instructions in a BasicBlock \p BB to legal or illegal integers /// determined by \p InstrType. Two Instructions are mapped to the same value /// if they are close as defined by the InstructionData class above. @@ -386,7 +405,11 @@ InstructionClassification() {} // TODO: Determine a scheme to resolve when the label is similar enough. - InstrType visitBranchInst(BranchInst &BI) { return Illegal; } + InstrType visitBranchInst(BranchInst &BI) { + if (EnableBranches) + return Legal; + return Illegal; + } // TODO: Determine a scheme to resolve when the labels are similar enough. InstrType visitPHINode(PHINode &PN) { return Illegal; } // TODO: Handle allocas. @@ -419,6 +442,8 @@ // TODO: Handle interblock similarity. InstrType visitTerminator(Instruction &I) { return Illegal; } InstrType visitInstruction(Instruction &I) { return Legal; } + + bool EnableBranches = false; }; /// Maps an Instruction to a member of InstrType. @@ -596,6 +621,31 @@ DenseMap> &ToAMapping, DenseMap> &FromAMapping); + /// \param [in,out] BBSet - The set to track the basic blocks. + /// \returns The BasicBlock the IRSimilarityCandidate ends in. + void getBasicBlocks(DenseSet &BBSet) const { + for (IRInstructionData &ID : *this) { + BasicBlock *BB = ID.Inst->getParent(); + if (BBSet.contains(BB)) + continue; + BBSet.insert(BB); + } + } + + /// \param [in,out] BBSet - The set to track the basic blocks. + /// \param [in,out] BBList - A list in order of use to track the basic blocks. + /// \returns The BasicBlock the IRSimilarityCandidate ends in. + void getBasicBlocks(DenseSet &BBSet, + std::vector &BBList) const { + for (IRInstructionData &ID : *this) { + BasicBlock *BB = ID.Inst->getParent(); + if (BBSet.contains(BB)) + continue; + BBSet.insert(BB); + BBList.push_back(BB); + } + } + /// Compare the start and end indices of the two IRSimilarityCandidates for /// whether they overlap. If the start instruction of one /// IRSimilarityCandidate is less than the end instruction of the other, and Index: llvm/lib/Analysis/IRSimilarityIdentifier.cpp =================================================================== --- llvm/lib/Analysis/IRSimilarityIdentifier.cpp +++ llvm/lib/Analysis/IRSimilarityIdentifier.cpp @@ -23,6 +23,10 @@ using namespace llvm; using namespace IRSimilarity; +cl::opt EnableBranches( + "ir-sim-branches", cl::init(false), cl::ReallyHidden, + cl::desc("enable similarity mactching across branches.")); + IRInstructionData::IRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDList) : Inst(&I), Legal(Legality), IDL(&IDList) { @@ -49,6 +53,30 @@ } } +void IRInstructionData::setBranchSuccessors( + DenseMap &BasicBlockToInteger) { + assert(isa(Inst) && "Instruction must be branch"); + + BranchInst *BI = cast(Inst); + DenseMap::iterator BBNumIt; + + BBNumIt = BasicBlockToInteger.find(BI->getParent()); + assert(BBNumIt != BasicBlockToInteger.end() && + "Could not find location for BasicBlock!"); + + int CurrentBlockNumber = static_cast(BBNumIt->second); + + for (BasicBlock *Successor : BI->successors()) { + BBNumIt = BasicBlockToInteger.find(Successor); + assert(BBNumIt != BasicBlockToInteger.end() && + "Could not find number for BasicBlock!"); + int OtherBlockNumber = static_cast(BBNumIt->second); + + int Relative = OtherBlockNumber - CurrentBlockNumber; + RelativeBlockLocations.push_back(Relative); + } +} + CmpInst::Predicate IRInstructionData::predicateForConsistency(CmpInst *CI) { switch (CI->getPredicate()) { case CmpInst::FCMP_OGT: @@ -143,6 +171,11 @@ return false; } + if (isa(A.Inst) && isa(B.Inst)) { + if (A.RelativeBlockLocations.size() != B.RelativeBlockLocations.size()) + return false; + } + return true; } @@ -156,10 +189,6 @@ std::vector IntegerMappingForBB; std::vector InstrListForBB; - HaveLegalRange = false; - CanCombineWithPrevInstr = false; - AddedIllegalLastTime = true; - for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) { switch (InstClassifier.visit(*It)) { case InstrType::Legal: @@ -175,6 +204,7 @@ } if (HaveLegalRange) { + if (AddedIllegalLastTime) mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true); for (IRInstructionData *ID : InstrListForBB) this->IDL->push_back(*ID); @@ -203,6 +233,9 @@ IRInstructionData *ID = allocateIRInstructionData(*It, true, *IDL); InstrListForBB.push_back(ID); + if (isa(*It)) + ID->setBranchSuccessors(BasicBlockToInteger); + // Add to the instruction list bool WasInserted; DenseMap::iterator @@ -640,6 +673,48 @@ {A, OperValsA, ValueNumberMappingA}, {B, OperValsB, ValueNumberMappingB})) return false; + + // Here we check that between two corresponding branch instructions, + // when the branch targets a basic block within the same region, the + // relative locations are the same, and otherwise, are targeting basic + // blocks outside the region at the same locations. + if ((isa(ItA->Inst) && isa(ItB->Inst)) || + (isa(ItA->Inst) && isa(ItB->Inst))) { + if (ItA->RelativeBlockLocations.size() != + ItB->RelativeBlockLocations.size()) + return false; + + if (OperValsA.size() != OperValsB.size()) + return false; + + detail::zippy &, + SmallVector &, ArrayRef &, + ArrayRef &> + ZippedRelativeLocations = + zip(ItA->RelativeBlockLocations, ItB->RelativeBlockLocations, + OperValsA, OperValsB); + if (!all_of(ZippedRelativeLocations, + [&A, &B](std::tuple R) { + BasicBlock *ABB = static_cast(std::get<2>(R)); + BasicBlock *BBB = static_cast(std::get<3>(R)); + bool AContained = false; + bool BContained = false; + DenseSet BasicBlockA; + DenseSet BasicBlockB; + A.getBasicBlocks(BasicBlockA); + B.getBasicBlocks(BasicBlockB); + if (BasicBlockA.find(ABB) != BasicBlockA.end()) + AContained = true; + if (BasicBlockB.find(BBB) != BasicBlockB.end()) + BContained = true; + if (AContained != BContained) + return false; + if (AContained) + return std::get<0>(R) == std::get<1>(R); + return true; + })) + return false; + } } return true; } @@ -665,6 +740,8 @@ std::vector IntegerMappingForModule; // Iterate over the functions in the module to map each Instruction in each // BasicBlock to an unsigned integer. + Mapper.initializeForBBs(M); + for (Function &F : M) { if (F.empty()) @@ -672,15 +749,16 @@ for (BasicBlock &BB : F) { - if (BB.sizeWithoutDebug() < 2) - continue; - // BB has potential to have similarity since it has a size greater than 2 // and can therefore match other regions greater than 2. Map it to a list // of unsigned integers. Mapper.convertToUnsignedVec(BB, InstrListForModule, IntegerMappingForModule); } + + BasicBlock::iterator It = F.begin()->end(); + Mapper.mapToIllegalUnsigned(It, IntegerMappingForModule, InstrListForModule, + true); } // Insert the InstrListForModule at the end of the overall InstrList so that @@ -715,6 +793,8 @@ std::vector &CandsForRepSubstring) { unsigned StringLen = RS.Length; + if (StringLen < 2) + return; // Create an IRSimilarityCandidate for instance of this subsequence \p RS. for (const unsigned &StartIdx : RS.StartIndices) { @@ -914,6 +994,7 @@ void IRSimilarityIdentifier::findCandidates( std::vector &InstrList, std::vector &IntegerMapping) { + SuffixTree ST(IntegerMapping); std::vector CandsForRepSubstring; @@ -951,6 +1032,7 @@ std::vector InstrList; std::vector IntegerMapping; + Mapper.InstClassifier.EnableBranches = EnableBranches; populateMapper(Modules, InstrList, IntegerMapping); findCandidates(InstrList, IntegerMapping); @@ -960,6 +1042,7 @@ SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) { resetSimilarityCandidates(); + Mapper.InstClassifier.EnableBranches = EnableBranches; std::vector InstrList; std::vector IntegerMapping; Index: llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp =================================================================== --- llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp +++ llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp @@ -726,22 +726,8 @@ ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); } -// In most cases, the illegal instructions we are collecting don't require any -// sort of setup. In these cases, we can just only have illegal instructions, -// and the mapper will create 0 length vectors, and we can check that. - -// In cases where we have legal instructions needed to set up the illegal -// instruction, to check illegal instructions are assigned unsigned integers -// from the maximum value decreasing to 0, it will be greater than a legal -// instruction that comes after. So to check that we have an illegal -// instruction, we place a legal instruction after an illegal instruction, and -// check that the illegal unsigned integer is greater than the unsigned integer -// of the legal instruction. - -// Checks that the branch is mapped to be illegal since there is extra checking -// needed to ensure that a branch in one region is branching to an isomorphic -// location in a different region. -TEST(IRInstructionMapper, BranchIllegal) { +// Checks that the branch is mapped to legal when the option is set. +TEST(IRInstructionMapper, BranchLegal) { StringRef ModuleString = R"( define i32 @f(i32 %a, i32 %b) { bb0: @@ -759,12 +745,28 @@ SpecificBumpPtrAllocator InstDataAllocator; SpecificBumpPtrAllocator IDLAllocator; IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + Mapper.InstClassifier.EnableBranches = true; + Mapper.initializeForBBs(*M); getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); - ASSERT_EQ(UnsignedVec.size(), static_cast(0)); + ASSERT_EQ(UnsignedVec.size(), static_cast(3)); + ASSERT_TRUE(UnsignedVec[1] > UnsignedVec[0]); + ASSERT_TRUE(UnsignedVec[1] < UnsignedVec[2]); } +// In most cases, the illegal instructions we are collecting don't require any +// sort of setup. In these cases, we can just only have illegal instructions, +// and the mapper will create 0 length vectors, and we can check that. + +// In cases where we have legal instructions needed to set up the illegal +// instruction, to check illegal instructions are assigned unsigned integers +// from the maximum value decreasing to 0, it will be greater than a legal +// instruction that comes after. So to check that we have an illegal +// instruction, we place a legal instruction after an illegal instruction, and +// check that the illegal unsigned integer is greater than the unsigned integer +// of the legal instruction. + // Checks that a PHINode is mapped to be illegal since there is extra checking // needed to ensure that a branch in one region is bin an isomorphic // location in a different region. @@ -1445,8 +1447,8 @@ getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); - ASSERT_EQ(UnsignedVec.size(), static_cast(6)); - ASSERT_TRUE(UnsignedVec[2] < UnsignedVec[1]); + ASSERT_EQ(UnsignedVec.size(), static_cast(7)); + ASSERT_TRUE(UnsignedVec[2] < UnsignedVec[0]); } // Checks that a memcpy instruction is mapped to an illegal value. @@ -1476,8 +1478,10 @@ getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); - ASSERT_EQ(UnsignedVec.size(), static_cast(6)); - ASSERT_TRUE(UnsignedVec[2] < UnsignedVec[1]); + ASSERT_EQ(UnsignedVec.size(), static_cast(7)); + errs() << UnsignedVec[1] << " " << UnsignedVec[2] << " " << UnsignedVec[3] << "\n"; + ASSERT_TRUE(UnsignedVec[2] > UnsignedVec[3]); + ASSERT_TRUE(UnsignedVec[2] < UnsignedVec[0]); } // Checks that a memmove instruction is mapped to an illegal value. @@ -1507,8 +1511,8 @@ getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); - ASSERT_EQ(UnsignedVec.size(), static_cast(6)); - ASSERT_TRUE(UnsignedVec[2] < UnsignedVec[1]); + ASSERT_EQ(UnsignedVec.size(), static_cast(7)); + ASSERT_TRUE(UnsignedVec[2] < UnsignedVec[0]); } // Checks that a variable argument instructions are mapped to an illegal value. @@ -1555,11 +1559,10 @@ getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); - ASSERT_EQ(UnsignedVec.size(), static_cast(16)); - ASSERT_TRUE(UnsignedVec[4] < UnsignedVec[3]); - ASSERT_TRUE(UnsignedVec[7] < UnsignedVec[6]); - ASSERT_TRUE(UnsignedVec[10] < UnsignedVec[9]); - ASSERT_TRUE(UnsignedVec[13] < UnsignedVec[12]); + ASSERT_EQ(UnsignedVec.size(), static_cast(17)); + ASSERT_TRUE(UnsignedVec[7] < UnsignedVec[0]); + ASSERT_TRUE(UnsignedVec[13] < UnsignedVec[10]); + ASSERT_TRUE(UnsignedVec[16] < UnsignedVec[13]); } // Check the length of adding two illegal instructions one after th other. We @@ -1599,21 +1602,24 @@ // two blocks of two legal instructions and terminator, and checks them for // instruction similarity. static bool longSimCandCompare(std::vector &InstrList, - bool Structure = false) { + bool Structure = false, + unsigned Length=2, + unsigned StartIdxOne=0, + unsigned StartIdxTwo=3) { std::vector::iterator Start, End; Start = InstrList.begin(); End = InstrList.begin(); - std::advance(End, 1); - IRSimilarityCandidate Cand1(0, 2, *Start, *End); + std::advance(End, StartIdxOne + Length - 1); + IRSimilarityCandidate Cand1(StartIdxOne, Length, *Start, *End); Start = InstrList.begin(); End = InstrList.begin(); - std::advance(Start, 3); - std::advance(End, 4); - IRSimilarityCandidate Cand2(3, 2, *Start, *End); + std::advance(Start, StartIdxTwo); + std::advance(End, StartIdxTwo + Length - 1); + IRSimilarityCandidate Cand2(StartIdxTwo, Length, *Start, *End); if (Structure) return IRSimilarityCandidate::compareStructure(Cand1, Cand2); return IRSimilarityCandidate::isSimilar(Cand1, Cand2); @@ -2083,6 +2089,206 @@ ASSERT_TRUE(longSimCandCompare(InstrList, true)); } +// Checks that the same structure is recognized between two candidates when +// the branches target other blocks inside the same region, the relative +// distance between the blocks must be the same. +TEST(IRSimilarityCandidate, SameBranchStructureInternal) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = add i32 %b, %a + br label %bb1 + bb1: + %2 = add i32 %b, %a + %3 = add i32 %a, %b + ret i32 0 + } + + define i32 @f2(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = add i32 %b, %a + br label %bb1 + bb1: + %2 = add i32 %b, %a + %3 = add i32 %a, %b + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + Mapper.InstClassifier.EnableBranches = true; + Mapper.initializeForBBs(*M); + getVectors(*M, Mapper, InstrList, UnsignedVec); + + // Check to make sure that we have a long enough region. + ASSERT_EQ(InstrList.size(), static_cast(12)); + // Check that the instructions were added correctly to both vectors. + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + + ASSERT_TRUE(longSimCandCompare(InstrList, true, 5, 0, 6)); +} + +// Checks that the different structure is recognized between two candidates, +// when the branches target other blocks inside the same region, the relative +// distance between the blocks must be the same. +TEST(IRSimilarityCandidate, DifferentBranchStructureInternal) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = add i32 %b, %a + br label %bb2 + bb1: + %2 = add i32 %b, %a + %3 = add i32 %a, %b + br label %bb2 + bb2: + %4 = add i32 %b, %a + %5 = add i32 %a, %b + ret i32 0 + } + + define i32 @f2(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = add i32 %b, %a + br label %bb1 + bb1: + %2 = add i32 %b, %a + %3 = add i32 %a, %b + br label %bb2 + bb2: + %4 = add i32 %b, %a + %5 = add i32 %a, %b + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + Mapper.InstClassifier.EnableBranches = true; + Mapper.initializeForBBs(*M); + getVectors(*M, Mapper, InstrList, UnsignedVec); + + // Check to make sure that we have a long enough region. + ASSERT_EQ(InstrList.size(), static_cast(18)); + // Check that the instructions were added correctly to both vectors. + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + + ASSERT_FALSE(longSimCandCompare(InstrList, true, 6, 0, 9)); +} + +// Checks that the same structure is recognized between two candidates, when +// the branches target other blocks outside region, the relative distance +// does not need to be the same. +TEST(IRSimilarityCandidate, SameBranchStructureOutside) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = add i32 %b, %a + br label %bb1 + bb1: + %2 = add i32 %b, %a + %3 = add i32 %a, %b + ret i32 0 + } + + define i32 @f2(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = add i32 %b, %a + br label %bb1 + bb1: + %2 = add i32 %b, %a + %3 = add i32 %a, %b + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + Mapper.InstClassifier.EnableBranches = true; + Mapper.initializeForBBs(*M); + getVectors(*M, Mapper, InstrList, UnsignedVec); + + // Check to make sure that we have a long enough region. + ASSERT_EQ(InstrList.size(), static_cast(12)); + // Check that the instructions were added correctly to both vectors. + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + + ASSERT_TRUE(longSimCandCompare(InstrList, true, 3, 0, 6)); +} + +// Checks that the same structure is recognized between two candidates, when +// the branches target other blocks outside region, the relative distance +// does not need to be the same. +TEST(IRSimilarityCandidate, DifferentBranchStructureOutside) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = add i32 %b, %a + br label %bb1 + bb1: + %2 = add i32 %b, %a + %3 = add i32 %a, %b + ret i32 0 + } + + define i32 @f2(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = add i32 %b, %a + br label %bb2 + bb1: + %2 = add i32 %b, %a + %3 = add i32 %a, %b + br label %bb2 + bb2: + %4 = add i32 %b, %a + %5 = add i32 %a, %b + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + Mapper.InstClassifier.EnableBranches = true; + Mapper.initializeForBBs(*M); + getVectors(*M, Mapper, InstrList, UnsignedVec); + + // Check to make sure that we have a long enough region. + ASSERT_EQ(InstrList.size(), static_cast(15)); + // Check that the instructions were added correctly to both vectors. + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + + ASSERT_TRUE(longSimCandCompare(InstrList, true, 3, 0, 6)); +} + // Checks that two sets of identical instructions are found to be the same. // Both sequences of adds have the same operand ordering, and the same // instructions, making them strcturally equivalent.