Index: llvm/include/llvm/Analysis/IRSimilarityIdentifier.h =================================================================== --- llvm/include/llvm/Analysis/IRSimilarityIdentifier.h +++ llvm/include/llvm/Analysis/IRSimilarityIdentifier.h @@ -110,7 +110,8 @@ /// by \ref isSameOperationAs. /// TODO: Handle GetElementPtrInsts, as some of the operands have to be the /// exact same, and some do not. -struct IRInstructionData : ilist_node { +struct IRInstructionData + : ilist_node> { /// The source Instruction that is being wrapped. Instruction *Inst = nullptr; @@ -127,12 +128,23 @@ /// to a less than form. It is None otherwise. Optional RevisedPredicate; + /// This structure holds the distances of how far "ahead of" or "behind" the + /// target blocks of a branch, or the incoming blocks of a phi nodes are. + /// If the value is negative, it means that the block was registered before + /// the block of this instruction in terms of blocks in the function. + SmallVector RelativeBlockLocations; + /// Gather the information that is difficult to gather for an Instruction, or /// is changed. i.e. the operands of an Instruction and the Types of those /// operands. This extra information allows for similarity matching to make /// assertions that allow for more flexibility when checking for whether an /// Instruction performs the same operation. IRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDL); + IRInstructionData(Instruction *I, bool Legality, IRInstructionDataList &IDL); + + /// The shared code needed to fill in the data stuctures for IRInstrucitonData + /// when it is constructed from a reference or a pointer. + void initializeInstruction(); /// Get the predicate that the compare instruction is using for hashing the /// instruction. the IRInstructionData must be wrapping a CmpInst. @@ -145,6 +157,16 @@ /// \return the consistent comparison predicate. static CmpInst::Predicate predicateForConsistency(CmpInst *CI); + /// For an IRInstructionData containing a branch, finds the + /// relative distances from the source basic block to the target by taking + /// the difference of the number assigned to the current basic block and the + /// target basic block of the branch. + /// + /// \param BasicBlockToInteger - The mapping of basic blocks to their location + /// in the module. + void + setBranchSuccessors(DenseMap &BasicBlockToInteger); + /// Hashes \p Value based on its opcode, types, and operand types. /// Two IRInstructionData instances produce the same hash when they perform /// the same operation. @@ -198,7 +220,8 @@ IRInstructionDataList *IDL = nullptr; }; -struct IRInstructionDataList : simple_ilist {}; +struct IRInstructionDataList + : simple_ilist> {}; /// Compare one IRInstructionData class to another IRInstructionData class for /// whether they are performing a the same operation, and can mapped to the @@ -288,6 +311,10 @@ DenseMap InstructionIntegerMap; + /// A mapping for a basic block in a module to its assigned number/location + /// in the module. + DenseMap BasicBlockToInteger; + /// Set if we added an illegal number in the previous step. /// Since each illegal number is unique, we only need one of them between /// each range of legal numbers. This lets us make sure we don't add more @@ -321,6 +348,17 @@ /// \returns An allocated IRInstructionData struct. IRInstructionData *allocateIRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDL); + + /// Get an allocated IRInstructionData struct using the InstDataAllocator. + /// + /// \param I - The Instruction to wrap with IRInstructionData as a pointer. + /// \param Legality - A boolean value that is true if the instruction is to + /// be considered for similarity, and false if not. + /// \param IDL - The InstructionDataList that the IRInstructionData is + /// inserted into. + /// \returns An allocated IRInstructionData struct. + IRInstructionData *allocateIRInstructionData(Instruction *I, bool Legality, + IRInstructionDataList &IDL); /// Get an allocated IRInstructionDataList object using the IDLAllocator. /// @@ -329,6 +367,24 @@ IRInstructionDataList *IDL = nullptr; + /// Assigns values to all the basic blocks in function \p F starting from + /// integer \p BBNumber. + /// + /// \param F - The function containing the basic blocks to assign numbers to. + /// \param BBNumber - The number to start from. + void initializeForBBs(Function &F, unsigned &BBNumber) { + for (BasicBlock &BB : F) + BasicBlockToInteger.insert(std::make_pair(&BB, BBNumber++)); + } + + /// Assigns values to all the basic blocks in Module \p M. + /// \param M - The module containing the basic blocks to assign numbers to. + void initializeForBBs(Module &M) { + unsigned BBNumber = 0; + for (Function &F : M) + initializeForBBs(F, BBNumber); + } + /// Maps the Instructions in a BasicBlock \p BB to legal or illegal integers /// determined by \p InstrType. Two Instructions are mapped to the same value /// if they are close as defined by the InstructionData class above. @@ -386,7 +442,11 @@ InstructionClassification() {} // TODO: Determine a scheme to resolve when the label is similar enough. - InstrType visitBranchInst(BranchInst &BI) { return Illegal; } + InstrType visitBranchInst(BranchInst &BI) { + if (EnableBranches) + return Legal; + return Illegal; + } // TODO: Determine a scheme to resolve when the labels are similar enough. InstrType visitPHINode(PHINode &PN) { return Illegal; } // TODO: Handle allocas. @@ -419,6 +479,10 @@ // TODO: Handle interblock similarity. InstrType visitTerminator(Instruction &I) { return Illegal; } InstrType visitInstruction(Instruction &I) { return Legal; } + + // The flag variable that lets the classifier know whether we should + // allow branches to be checked for similarity. + bool EnableBranches = false; }; /// Maps an Instruction to a member of InstrType. @@ -546,6 +610,18 @@ DenseMap> &ValueNumberMapping; }; + struct RelativeLocMapping { + /// The IRSimilarityCandidate that holds the instruction the relative + /// location was pulled from. + const IRSimilarityCandidate &IRSC; + + /// The relative location to be analyzed. + int RelativeLocation; + + // The corresponding value. + Value *OperVal; + }; + /// Compare the operands in \p A and \p B and check that the current mapping /// of global value numbers from \p A to \p B and \p B to \A is consistent. /// @@ -569,6 +645,18 @@ static bool compareCommutativeOperandMapping(OperandMapping A, OperandMapping B); + /// Compare the relative locations in \p A and \p B and check that the + /// distances match if both locations are contained in the region, and that + /// the branches both point outside the region if they do not. + /// + /// \param A - The first IRInstructionCandidate, relative location value, + /// and incoming block. + /// \param B - The second IRInstructionCandidate, relative location value, + /// and incoming block. + /// \returns true if the relative locations match. + static bool checkRelativeLocations(RelativeLocMapping A, + RelativeLocMapping B); + /// Create a mapping from the value numbering to a different separate set of /// numbers. This will serve as a guide for relating one candidate to another. /// The canonical number gives use the ability identify which global value @@ -596,6 +684,31 @@ DenseMap> &ToSourceMapping, DenseMap> &FromSourceMapping); + /// \param [in,out] BBSet - The set to track the basic blocks. + /// \returns The BasicBlock the IRSimilarityCandidate ends in. + void getBasicBlocks(DenseSet &BBSet) const { + for (IRInstructionData &ID : *this) { + BasicBlock *BB = ID.Inst->getParent(); + if (BBSet.contains(BB)) + continue; + BBSet.insert(BB); + } + } + + /// \param [in,out] BBSet - The set to track the basic blocks. + /// \param [in,out] BBList - A list in order of use to track the basic blocks. + /// \returns The BasicBlock the IRSimilarityCandidate ends in. + void getBasicBlocks(DenseSet &BBSet, + std::vector &BBList) const { + for (IRInstructionData &ID : *this) { + BasicBlock *BB = ID.Inst->getParent(); + if (BBSet.contains(BB)) + continue; + BBSet.insert(BB); + BBList.push_back(BB); + } + } + /// Compare the start and end indices of the two IRSimilarityCandidates for /// whether they overlap. If the start instruction of one /// IRSimilarityCandidate is less than the end instruction of the other, and @@ -727,8 +840,9 @@ /// analyzing the module. class IRSimilarityIdentifier { public: - IRSimilarityIdentifier() - : Mapper(&InstDataAllocator, &InstDataListAllocator) {} + IRSimilarityIdentifier(bool MatchBranches = true) + : Mapper(&InstDataAllocator, &InstDataListAllocator), + EnableBranches(MatchBranches) {} private: /// Map the instructions in the module to unsigned integers, using mapping @@ -804,6 +918,10 @@ /// instance of IRInstructionData. IRInstructionMapper Mapper; + /// The flag variable that marks whether we should check branches for + /// similarity, or only look within basic blocks. + bool EnableBranches = true; + /// The SimilarityGroups found with the most recent run of \ref /// findSimilarity. None if there is no recent run. Optional SimilarityCandidates; Index: llvm/lib/Analysis/IRSimilarityIdentifier.cpp =================================================================== --- llvm/lib/Analysis/IRSimilarityIdentifier.cpp +++ llvm/lib/Analysis/IRSimilarityIdentifier.cpp @@ -23,13 +23,23 @@ using namespace llvm; using namespace IRSimilarity; +cl::opt + DisableBranches("no-ir-sim-branch-matching", cl::init(false), + cl::ReallyHidden, + cl::desc("disable similarity matching, and outlining, " + "across branches for debugging purposes.")); + IRInstructionData::IRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDList) : Inst(&I), Legal(Legality), IDL(&IDList) { + initializeInstruction(); +} + +void IRInstructionData::initializeInstruction() { // We check for whether we have a comparison instruction. If it is, we // find the "less than" version of the predicate for consistency for // comparison instructions throught the program. - if (CmpInst *C = dyn_cast(&I)) { + if (CmpInst *C = dyn_cast(Inst)) { CmpInst::Predicate Predicate = predicateForConsistency(C); if (Predicate != C->getPredicate()) RevisedPredicate = Predicate; @@ -37,8 +47,8 @@ // Here we collect the operands and their types for determining whether // the structure of the operand use matches between two different candidates. - for (Use &OI : I.operands()) { - if (isa(I) && RevisedPredicate.hasValue()) { + for (Use &OI : Inst->operands()) { + if (isa(Inst) && RevisedPredicate.hasValue()) { // If we have a CmpInst where the predicate is reversed, it means the // operands must be reversed as well. OperVals.insert(OperVals.begin(), OI.get()); @@ -49,6 +59,37 @@ } } +IRInstructionData::IRInstructionData(Instruction *I, bool Legality, + IRInstructionDataList &IDList) + : Inst(I), Legal(Legality), IDL(&IDList) { + if (I) + initializeInstruction(); +} + +void IRInstructionData::setBranchSuccessors( + DenseMap &BasicBlockToInteger) { + assert(isa(Inst) && "Instruction must be branch"); + + BranchInst *BI = cast(Inst); + DenseMap::iterator BBNumIt; + + BBNumIt = BasicBlockToInteger.find(BI->getParent()); + assert(BBNumIt != BasicBlockToInteger.end() && + "Could not find location for BasicBlock!"); + + int CurrentBlockNumber = static_cast(BBNumIt->second); + + for (BasicBlock *Successor : BI->successors()) { + BBNumIt = BasicBlockToInteger.find(Successor); + assert(BBNumIt != BasicBlockToInteger.end() && + "Could not find number for BasicBlock!"); + int OtherBlockNumber = static_cast(BBNumIt->second); + + int Relative = OtherBlockNumber - CurrentBlockNumber; + RelativeBlockLocations.push_back(Relative); + } +} + CmpInst::Predicate IRInstructionData::predicateForConsistency(CmpInst *CI) { switch (CI->getPredicate()) { case CmpInst::FCMP_OGT: @@ -143,6 +184,10 @@ return false; } + if (isa(A.Inst) && isa(B.Inst) && + A.RelativeBlockLocations.size() != B.RelativeBlockLocations.size()) + return false; + return true; } @@ -156,10 +201,6 @@ std::vector IntegerMappingForBB; std::vector InstrListForBB; - HaveLegalRange = false; - CanCombineWithPrevInstr = false; - AddedIllegalLastTime = true; - for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) { switch (InstClassifier.visit(*It)) { case InstrType::Legal: @@ -175,7 +216,8 @@ } if (HaveLegalRange) { - mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true); + if (AddedIllegalLastTime) + mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true); for (IRInstructionData *ID : InstrListForBB) this->IDL->push_back(*ID); llvm::append_range(InstrList, InstrListForBB); @@ -203,6 +245,9 @@ IRInstructionData *ID = allocateIRInstructionData(*It, true, *IDL); InstrListForBB.push_back(ID); + if (isa(*It)) + ID->setBranchSuccessors(BasicBlockToInteger); + // Add to the instruction list bool WasInserted; DenseMap::iterator @@ -235,6 +280,12 @@ return new (InstDataAllocator->Allocate()) IRInstructionData(I, Legality, IDL); } +IRInstructionData * +IRInstructionMapper::allocateIRInstructionData(Instruction *I, bool Legality, + IRInstructionDataList &IDL) { + return new (InstDataAllocator->Allocate()) IRInstructionData(I, Legality, IDL); +} + IRInstructionDataList * IRInstructionMapper::allocateIRInstructionDataList() { return new (IDLAllocator->Allocate()) IRInstructionDataList(); @@ -255,6 +306,8 @@ IRInstructionData *ID = nullptr; if (!End) ID = allocateIRInstructionData(*It, false, *IDL); + else + ID = allocateIRInstructionData(nullptr, false, *IDL); InstrListForBB.push_back(ID); // Remember that we added an illegal number last time. @@ -563,6 +616,23 @@ return true; } +bool IRSimilarityCandidate::checkRelativeLocations(RelativeLocMapping A, + RelativeLocMapping B) { + BasicBlock *ABB = static_cast(A.OperVal); + BasicBlock *BBB = static_cast(B.OperVal); + DenseSet BasicBlockA; + DenseSet BasicBlockB; + A.IRSC.getBasicBlocks(BasicBlockA); + B.IRSC.getBasicBlocks(BasicBlockB); + bool AContained = BasicBlockA.find(ABB) != BasicBlockA.end(); + bool BContained = BasicBlockB.find(BBB) != BasicBlockB.end(); + if (AContained != BContained) + return false; + if (AContained) + return A.RelativeLocation == B.RelativeLocation; + return true; +} + bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A, const IRSimilarityCandidate &B) { DenseMap> MappingA; @@ -639,6 +709,42 @@ {A, OperValsA, ValueNumberMappingA}, {B, OperValsB, ValueNumberMappingB})) return false; + + // Here we check that between two corresponding branch instructions, + // when the branch targets a basic block within the same region, the + // relative locations are the same, and otherwise, are targeting basic + // blocks outside the region at the same locations. + + // We are able to make the assumption about blocks outside of the region + // since the target block labels are considered values and will follow the + // same number matching that we defined for the other instructions in the + // region. So, at this point, in each location we target a specific block + // outside the region, we are targeting a corresponding block in each + // analagous location in the region we are comparing to. + if (!(isa(ItA->Inst) && isa(ItB->Inst)) && + !(isa(ItA->Inst) && isa(ItB->Inst))) + continue; + + if (ItA->RelativeBlockLocations.size() != + ItB->RelativeBlockLocations.size()) + return false; + + if (OperValsA.size() != OperValsB.size()) + return false; + + detail::zippy &, + SmallVector &, ArrayRef &, + ArrayRef &> + ZippedRelativeLocations = + zip(ItA->RelativeBlockLocations, ItB->RelativeBlockLocations, + OperValsA, OperValsB); + if (!all_of(ZippedRelativeLocations, + [&A, &B](std::tuple R) { + return checkRelativeLocations( + {A, std::get<0>(R), std::get<2>(R)}, + {B, std::get<1>(R), std::get<3>(R)}); + })) + return false; } return true; } @@ -664,6 +770,8 @@ std::vector IntegerMappingForModule; // Iterate over the functions in the module to map each Instruction in each // BasicBlock to an unsigned integer. + Mapper.initializeForBBs(M); + for (Function &F : M) { if (F.empty()) @@ -671,15 +779,18 @@ for (BasicBlock &BB : F) { - if (BB.sizeWithoutDebug() < 2) - continue; - // BB has potential to have similarity since it has a size greater than 2 // and can therefore match other regions greater than 2. Map it to a list // of unsigned integers. Mapper.convertToUnsignedVec(BB, InstrListForModule, IntegerMappingForModule); } + + BasicBlock::iterator It = F.begin()->end(); + Mapper.mapToIllegalUnsigned(It, IntegerMappingForModule, InstrListForModule, + true); + if (InstrListForModule.size() > 0) + Mapper.IDL->push_back(*InstrListForModule.back()); } // Insert the InstrListForModule at the end of the overall InstrList so that @@ -714,6 +825,8 @@ std::vector &CandsForRepSubstring) { unsigned StringLen = RS.Length; + if (StringLen < 2) + return; // Create an IRSimilarityCandidate for instance of this subsequence \p RS. for (const unsigned &StartIdx : RS.StartIndices) { @@ -954,6 +1067,7 @@ std::vector InstrList; std::vector IntegerMapping; + Mapper.InstClassifier.EnableBranches = this->EnableBranches; populateMapper(Modules, InstrList, IntegerMapping); findCandidates(InstrList, IntegerMapping); @@ -963,6 +1077,7 @@ SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) { resetSimilarityCandidates(); + Mapper.InstClassifier.EnableBranches = this->EnableBranches; std::vector InstrList; std::vector IntegerMapping; @@ -983,7 +1098,7 @@ } bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) { - IRSI.reset(new IRSimilarityIdentifier()); + IRSI.reset(new IRSimilarityIdentifier(!DisableBranches)); return false; } @@ -999,9 +1114,9 @@ AnalysisKey IRSimilarityAnalysis::Key; IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M, - ModuleAnalysisManager &) { + ModuleAnalysisManager &) { - auto IRSI = IRSimilarityIdentifier(); + auto IRSI = IRSimilarityIdentifier(!DisableBranches); IRSI.findSimilarity(M); return IRSI; } Index: llvm/lib/Transforms/IPO/IROutliner.cpp =================================================================== --- llvm/lib/Transforms/IPO/IROutliner.cpp +++ llvm/lib/Transforms/IPO/IROutliner.cpp @@ -1381,10 +1381,19 @@ // Since we do not have any similarity data about this particular // instruction, we cannot confidently outline it, and must discard this // candidate. - if (std::next(ID.getIterator())->Inst != - ID.Inst->getNextNonDebugInstruction()) - return true; - return !InstructionClassifier.visit(ID.Inst); + IRInstructionDataList::iterator IDIt = std::next(ID.getIterator()); + Instruction *NextInst = nullptr; + if (!ID.Inst->isTerminator()) + NextInst = ID.Inst->getNextNonDebugInstruction(); + else if (IDIt->Inst != nullptr) + NextInst = + &*IDIt->Inst->getParent()->instructionsWithoutDebug().begin(); + + if (IDIt->Inst != nullptr) + if (IDIt->Inst != NextInst) + return true; + + return !this->InstructionClassifier.visit(ID.Inst); }); } @@ -1436,9 +1445,18 @@ // Since we do not have any similarity data about this particular // instruction, we cannot confidently outline it, and must discard this // candidate. - if (std::next(ID.getIterator())->Inst != - ID.Inst->getNextNonDebugInstruction()) - return true; + IRInstructionDataList::iterator IDIt = std::next(ID.getIterator()); + Instruction *NextInst = nullptr; + if (!ID.Inst->isTerminator()) + NextInst = ID.Inst->getNextNonDebugInstruction(); + else if (IDIt->Inst != nullptr) + NextInst = + &*IDIt->Inst->getParent()->instructionsWithoutDebug().begin(); + + if (IDIt->Inst != nullptr) + if (IDIt->Inst != NextInst) + return true; + return !this->InstructionClassifier.visit(ID.Inst); }); Index: llvm/test/Transforms/IROutliner/illegal-branches.ll =================================================================== --- llvm/test/Transforms/IROutliner/illegal-branches.ll +++ llvm/test/Transforms/IROutliner/illegal-branches.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -no-ir-sim-branch-matching -ir-outlining-no-cost < %s | FileCheck %s ; Show that we do not extract sections with branches as it would require extra ; label and control flow checking. Index: llvm/test/Transforms/IROutliner/illegal-catchpad.ll =================================================================== --- llvm/test/Transforms/IROutliner/illegal-catchpad.ll +++ llvm/test/Transforms/IROutliner/illegal-catchpad.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -no-ir-sim-branch-matching -ir-outlining-no-cost < %s | FileCheck %s ; This test checks that catchpad instructions are not outlined even if they ; in a similar section. Dealing with exception handling inside of an outlined Index: llvm/test/Transforms/IROutliner/illegal-cleanup.ll =================================================================== --- llvm/test/Transforms/IROutliner/illegal-cleanup.ll +++ llvm/test/Transforms/IROutliner/illegal-cleanup.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -no-ir-sim-branch-matching -ir-outlining-no-cost < %s | FileCheck %s ; This test checks that cleanuppad instructions are not outlined even if they ; in a similar section. Dealing with exception handling inside of an outlined Index: llvm/test/Transforms/IROutliner/illegal-landingpad.ll =================================================================== --- llvm/test/Transforms/IROutliner/illegal-landingpad.ll +++ llvm/test/Transforms/IROutliner/illegal-landingpad.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -no-ir-sim-branch-matching -ir-outlining-no-cost < %s | FileCheck %s ; This test checks that landingpad instructions are not outlined even if they ; in a similar section. Dealing with exception handling inside of an outlined Index: llvm/test/Transforms/IROutliner/region-end-of-module.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/region-end-of-module.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; This test checks that we do not faul when there is a similarity group with +; an ending instruction that is also the end of the module. + +@a = global i8* null + +define void @foo() { +entry: + br label %for.cond1 + +for.cond1: + br label %for.body + +for.body: + %inc = add nsw i32 2, 1 + br label %for.cond1 + +for.end: + %inc3 = add nsw i32 2, 1 + br label %for.cond1 +} +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_COND1:%.*]] +; CHECK: for.cond1: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INC:%.*]] = add nsw i32 2, 1 +; CHECK-NEXT: br label [[FOR_COND1]] +; CHECK: for.end: +; CHECK-NEXT: [[INC3:%.*]] = add nsw i32 2, 1 +; CHECK-NEXT: br label [[FOR_COND1]] +; Index: llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp =================================================================== --- llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp +++ llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp @@ -41,7 +41,7 @@ void getSimilarities( Module &M, std::vector> &SimilarityCandidates) { - IRSimilarityIdentifier Identifier; + IRSimilarityIdentifier Identifier(false); SimilarityCandidates = Identifier.findSimilarity(M); } @@ -726,22 +726,8 @@ ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); } -// In most cases, the illegal instructions we are collecting don't require any -// sort of setup. In these cases, we can just only have illegal instructions, -// and the mapper will create 0 length vectors, and we can check that. - -// In cases where we have legal instructions needed to set up the illegal -// instruction, to check illegal instructions are assigned unsigned integers -// from the maximum value decreasing to 0, it will be greater than a legal -// instruction that comes after. So to check that we have an illegal -// instruction, we place a legal instruction after an illegal instruction, and -// check that the illegal unsigned integer is greater than the unsigned integer -// of the legal instruction. - -// Checks that the branch is mapped to be illegal since there is extra checking -// needed to ensure that a branch in one region is branching to an isomorphic -// location in a different region. -TEST(IRInstructionMapper, BranchIllegal) { +// Checks that the branch is mapped to legal when the option is set. +TEST(IRInstructionMapper, BranchLegal) { StringRef ModuleString = R"( define i32 @f(i32 %a, i32 %b) { bb0: @@ -759,12 +745,28 @@ SpecificBumpPtrAllocator InstDataAllocator; SpecificBumpPtrAllocator IDLAllocator; IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + Mapper.InstClassifier.EnableBranches = true; + Mapper.initializeForBBs(*M); getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); - ASSERT_EQ(UnsignedVec.size(), static_cast(0)); + ASSERT_EQ(UnsignedVec.size(), static_cast(3)); + ASSERT_TRUE(UnsignedVec[1] > UnsignedVec[0]); + ASSERT_TRUE(UnsignedVec[1] < UnsignedVec[2]); } +// In most cases, the illegal instructions we are collecting don't require any +// sort of setup. In these cases, we can just only have illegal instructions, +// and the mapper will create 0 length vectors, and we can check that. + +// In cases where we have legal instructions needed to set up the illegal +// instruction, to check illegal instructions are assigned unsigned integers +// from the maximum value decreasing to 0, it will be greater than a legal +// instruction that comes after. So to check that we have an illegal +// instruction, we place a legal instruction after an illegal instruction, and +// check that the illegal unsigned integer is greater than the unsigned integer +// of the legal instruction. + // Checks that a PHINode is mapped to be illegal since there is extra checking // needed to ensure that a branch in one region is bin an isomorphic // location in a different region. @@ -1445,8 +1447,8 @@ getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); - ASSERT_EQ(UnsignedVec.size(), static_cast(6)); - ASSERT_TRUE(UnsignedVec[2] < UnsignedVec[1]); + ASSERT_EQ(UnsignedVec.size(), static_cast(7)); + ASSERT_TRUE(UnsignedVec[2] < UnsignedVec[0]); } // Checks that a memcpy instruction is mapped to an illegal value. @@ -1476,8 +1478,9 @@ getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); - ASSERT_EQ(UnsignedVec.size(), static_cast(6)); - ASSERT_TRUE(UnsignedVec[2] < UnsignedVec[1]); + ASSERT_EQ(UnsignedVec.size(), static_cast(7)); + ASSERT_GT(UnsignedVec[2], UnsignedVec[3]); + ASSERT_LT(UnsignedVec[2], UnsignedVec[0]); } // Checks that a memmove instruction is mapped to an illegal value. @@ -1507,8 +1510,8 @@ getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); - ASSERT_EQ(UnsignedVec.size(), static_cast(6)); - ASSERT_TRUE(UnsignedVec[2] < UnsignedVec[1]); + ASSERT_EQ(UnsignedVec.size(), static_cast(7)); + ASSERT_LT(UnsignedVec[2], UnsignedVec[0]); } // Checks that a variable argument instructions are mapped to an illegal value. @@ -1555,11 +1558,10 @@ getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); - ASSERT_EQ(UnsignedVec.size(), static_cast(16)); - ASSERT_TRUE(UnsignedVec[4] < UnsignedVec[3]); - ASSERT_TRUE(UnsignedVec[7] < UnsignedVec[6]); - ASSERT_TRUE(UnsignedVec[10] < UnsignedVec[9]); - ASSERT_TRUE(UnsignedVec[13] < UnsignedVec[12]); + ASSERT_EQ(UnsignedVec.size(), static_cast(17)); + ASSERT_TRUE(UnsignedVec[7] < UnsignedVec[0]); + ASSERT_TRUE(UnsignedVec[13] < UnsignedVec[10]); + ASSERT_TRUE(UnsignedVec[16] < UnsignedVec[13]); } // Check the length of adding two illegal instructions one after th other. We @@ -1599,21 +1601,23 @@ // two blocks of two legal instructions and terminator, and checks them for // instruction similarity. static bool longSimCandCompare(std::vector &InstrList, - bool Structure = false) { + bool Structure = false, unsigned Length = 2, + unsigned StartIdxOne = 0, + unsigned StartIdxTwo = 3) { std::vector::iterator Start, End; Start = InstrList.begin(); End = InstrList.begin(); - std::advance(End, 1); - IRSimilarityCandidate Cand1(0, 2, *Start, *End); + std::advance(End, StartIdxOne + Length - 1); + IRSimilarityCandidate Cand1(StartIdxOne, Length, *Start, *End); Start = InstrList.begin(); End = InstrList.begin(); - std::advance(Start, 3); - std::advance(End, 4); - IRSimilarityCandidate Cand2(3, 2, *Start, *End); + std::advance(Start, StartIdxTwo); + std::advance(End, StartIdxTwo + Length - 1); + IRSimilarityCandidate Cand2(StartIdxTwo, Length, *Start, *End); if (Structure) return IRSimilarityCandidate::compareStructure(Cand1, Cand2); return IRSimilarityCandidate::isSimilar(Cand1, Cand2); @@ -2083,6 +2087,206 @@ ASSERT_TRUE(longSimCandCompare(InstrList, true)); } +// Checks that the same structure is recognized between two candidates when +// the branches target other blocks inside the same region, the relative +// distance between the blocks must be the same. +TEST(IRSimilarityCandidate, SameBranchStructureInternal) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = add i32 %b, %a + br label %bb1 + bb1: + %2 = add i32 %b, %a + %3 = add i32 %a, %b + ret i32 0 + } + + define i32 @f2(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = add i32 %b, %a + br label %bb1 + bb1: + %2 = add i32 %b, %a + %3 = add i32 %a, %b + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + Mapper.InstClassifier.EnableBranches = true; + Mapper.initializeForBBs(*M); + getVectors(*M, Mapper, InstrList, UnsignedVec); + + // Check to make sure that we have a long enough region. + ASSERT_EQ(InstrList.size(), static_cast(12)); + // Check that the instructions were added correctly to both vectors. + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + + ASSERT_TRUE(longSimCandCompare(InstrList, true, 5, 0, 6)); +} + +// Checks that the different structure is recognized between two candidates, +// when the branches target other blocks inside the same region, the relative +// distance between the blocks must be the same. +TEST(IRSimilarityCandidate, DifferentBranchStructureInternal) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = add i32 %b, %a + br label %bb2 + bb1: + %2 = add i32 %b, %a + %3 = add i32 %a, %b + br label %bb2 + bb2: + %4 = add i32 %b, %a + %5 = add i32 %a, %b + ret i32 0 + } + + define i32 @f2(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = add i32 %b, %a + br label %bb1 + bb1: + %2 = add i32 %b, %a + %3 = add i32 %a, %b + br label %bb2 + bb2: + %4 = add i32 %b, %a + %5 = add i32 %a, %b + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + Mapper.InstClassifier.EnableBranches = true; + Mapper.initializeForBBs(*M); + getVectors(*M, Mapper, InstrList, UnsignedVec); + + // Check to make sure that we have a long enough region. + ASSERT_EQ(InstrList.size(), static_cast(18)); + // Check that the instructions were added correctly to both vectors. + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + + ASSERT_FALSE(longSimCandCompare(InstrList, true, 6, 0, 9)); +} + +// Checks that the same structure is recognized between two candidates, when +// the branches target other blocks outside region, the relative distance +// does not need to be the same. +TEST(IRSimilarityCandidate, SameBranchStructureOutside) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = add i32 %b, %a + br label %bb1 + bb1: + %2 = add i32 %b, %a + %3 = add i32 %a, %b + ret i32 0 + } + + define i32 @f2(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = add i32 %b, %a + br label %bb1 + bb1: + %2 = add i32 %b, %a + %3 = add i32 %a, %b + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + Mapper.InstClassifier.EnableBranches = true; + Mapper.initializeForBBs(*M); + getVectors(*M, Mapper, InstrList, UnsignedVec); + + // Check to make sure that we have a long enough region. + ASSERT_EQ(InstrList.size(), static_cast(12)); + // Check that the instructions were added correctly to both vectors. + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + + ASSERT_TRUE(longSimCandCompare(InstrList, true, 3, 0, 6)); +} + +// Checks that the same structure is recognized between two candidates, when +// the branches target other blocks outside region, the relative distance +// does not need to be the same. +TEST(IRSimilarityCandidate, DifferentBranchStructureOutside) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = add i32 %b, %a + br label %bb1 + bb1: + %2 = add i32 %b, %a + %3 = add i32 %a, %b + ret i32 0 + } + + define i32 @f2(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = add i32 %b, %a + br label %bb2 + bb1: + %2 = add i32 %b, %a + %3 = add i32 %a, %b + br label %bb2 + bb2: + %4 = add i32 %b, %a + %5 = add i32 %a, %b + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + Mapper.InstClassifier.EnableBranches = true; + Mapper.initializeForBBs(*M); + getVectors(*M, Mapper, InstrList, UnsignedVec); + + // Check to make sure that we have a long enough region. + ASSERT_EQ(InstrList.size(), static_cast(15)); + // Check that the instructions were added correctly to both vectors. + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + + ASSERT_TRUE(longSimCandCompare(InstrList, true, 3, 0, 6)); +} + // Checks that two sets of identical instructions are found to be the same. // Both sequences of adds have the same operand ordering, and the same // instructions, making them strcturally equivalent.