Index: llvm/include/llvm/Analysis/IRSimilarityIdentifier.h =================================================================== --- /dev/null +++ llvm/include/llvm/Analysis/IRSimilarityIdentifier.h @@ -0,0 +1,367 @@ +//===- IRSimilarityIdentifier.h - Find similarity in a module --------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// \file +// Interface file for the IRSimilarityIdentifier for identifying similarities in +// IR including the IRInstructionMapper, which maps an Instruction to unsigned +// integers. +// +// Two sequences of instructions are called "similar" if they perform the same +// series of operations for all inputs. +// +// \code +// %1 = add i32 %a, 10 +// %2 = add i32 %a, %1 +// %3 = icmp slt icmp %1, %2 +// \endcode +// +// and +// +// \code +// %1 = add i32 11, %a +// %2 = sub i32 %a, %1 +// %3 = icmp sgt icmp %2, %1 +// \endcode +// +// ultimately have the same result, even if the inputs, and structure are +// slightly different. +// +// For instructions, we do not worry about operands that do not have fixed +// semantic meaning to the program. We consider the opcode that the instruction +// has, the types, parameters, and extra information such as the function name, +// or comparison predicate. These are used to create a hash to map instructions +// to integers to be used in similarity matching in sequences of instructions +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H +#define LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H + +#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Allocator.h" + +namespace llvm { +namespace IRSimilarity { + +/// This represents what is and is not supported when finding similarity in +/// Instructions. +/// +/// Legal Instructions are considered when looking at similarity between +/// Instructions. +/// +/// Illegal Instructions cannot be considered when looking for similarity +/// between Instructions. They act as boundaries between similarity regions. +/// +/// Invisible Instructions are skipped over during analysis. +// TODO: Shared with MachineOutliner +enum InstrType { Legal, Illegal, Invisible }; + +/// This provides the utilities for hashing an Instruction to an unsigned +/// integer. Two IRInstructionDatas produce the same hash value when their +/// underlying Instructions perform the same operation (even if they don't have +/// the same input operands.) +/// As a more concrete example, consider the following: +/// +/// \code +/// %add1 = add i32 %a, %b +/// %add2 = add i32 %c, %d +/// %add3 = add i64 %e, %f +/// \endcode +/// +// Then the IRInstructionData wrappers for these Instructions may be hashed like +/// so: +/// +/// \code +/// ; These two adds have the same types and operand types, so they hash to the +/// ; same number. +/// %add1 = add i32 %a, %b ; Hash: 1 +/// %add2 = add i32 %c, %d ; Hash: 1 +/// ; This add produces an i64. This differentiates it from %add1 and %add2. So, +/// ; it hashes to a different number. +/// %add3 = add i64 %e, %f; Hash: 2 +/// \endcode +/// +/// +/// This hashing scheme will be used to represent the program as a very long +/// string. This string can then be placed in a data structure which can be used +/// for similarity queries. +/// +/// TODO: Handle types of Instructions which can be equal even with different +/// operands. (E.g. comparisons with swapped predicates.) +/// TODO: Handle CallInsts, which are only checked for function type +/// by \ref isSameOperationAs. +/// TODO: Handle GetElementPtrInsts, as some of the operands have to be the +/// exact same, and some do not. +struct IRInstructionData : ilist_node { + + /// The source Instruction that is being wrapped. + Instruction *Inst = nullptr; + /// The values of the operands in the Instruction. + SmallVector OperVals; + /// The legality of the wrapped instruction. This is informed by InstrType, + /// and is used when checking when two instructions are considered similar. + /// If either instruction is not legal, the instructions are automatically not + /// considered similar. + bool Legal; + + /// Gather the information that is difficult to gather for an Instruction, or + /// is changed. i.e. the operands of an Instruction and the Types of those + /// operands. This extra information allows for similarity matching to make + /// assertions that allow for more flexibility when checking for whether an + /// Instruction performs the same operation. + IRInstructionData(Instruction &I, bool Legality); + + /// Hashes \p Value based on its opcode, types, and operand types. + /// Two IRInstructionData instances produce the same hash when they perform + /// the same operation. + /// + /// As a simple example, consider the following instructions. + /// + /// \code + /// %add1 = add i32 %x1, %y1 + /// %add2 = add i32 %x2, %y2 + /// + /// %sub = sub i32 %x1, %y1 + /// + /// %add_i64 = add i64 %x2, %y2 + /// \endcode + /// + /// Because the first two adds operate the same types, and are performing the + /// same action, they will be hashed to the same value. + /// + /// However, the subtraction instruction is not the same as an addition, and + /// will be hashed to a different value. + /// + /// Finally, the last add has a different type compared to the first two add + /// instructions, so it will also be hashed to a different value that any of + /// the previous instructions. + /// + /// \param [in] Value - The IRInstructionData instance to be hashed. + /// \returns A hash_value of the IRInstructionData. + friend hash_code hash_value(const IRInstructionData &ID) { + SmallVector OperTypes; + for (Value *V : ID.OperVals) + OperTypes.push_back(V->getType()); + + return llvm::hash_combine( + llvm::hash_value(ID.Inst->getOpcode()), + llvm::hash_value(ID.Inst->getType()), + llvm::hash_combine_range(OperTypes.begin(), OperTypes.end())); + } +}; + +/// Compare one IRInstructionData class to another IRInstructionData class for +/// whether they are performing a the same operation, and can mapped to the +/// same value. For regular instructions if the hash value is the same, then +/// they will also be close. +/// +/// \param A - The first IRInstructionData class to compare +/// \param B - The second IRInstructionData class to compare +/// \returns true if \p A and \p B are similar enough to be mapped to the same +/// value. +bool isClose(const IRInstructionData &A, const IRInstructionData &B); + +struct IRInstructionDataTraits : DenseMapInfo { + static inline IRInstructionData *getEmptyKey() { return nullptr; } + static inline IRInstructionData *getTombstoneKey() { + return reinterpret_cast(-1); + } + + static unsigned getHashValue(const IRInstructionData *E) { + using llvm::hash_value; + assert(E && "IRInstructionData is a nullptr?"); + return hash_value(*E); + } + + static bool isEqual(const IRInstructionData *LHS, + const IRInstructionData *RHS) { + if (RHS == getEmptyKey() || RHS == getTombstoneKey() || + LHS == getEmptyKey() || LHS == getTombstoneKey()) + return LHS == RHS; + + assert(LHS && RHS && "nullptr should have been caught by getEmptyKey?"); + return isClose(*LHS, *RHS); + } +}; + +/// Helper struct for converting the Instructions in a Module into a vector of +/// unsigned integers. This vector of unsigned integers can be thought of as a +/// "numeric string". This numeric string can then be queried by, for example, +/// data structures that find repeated substrings. +/// +/// This hashing is done per BasicBlock in the module. To hash Instructions +/// based off of their operations, each Instruction is wrapped in an +/// IRInstructionData struct. The unsigned integer for an IRInstructionData +/// depends on: +/// - The hash provided by the IRInstructionData. +/// - Which member of InstrType the IRInstructionData is classified as. +// See InstrType for more details on the possible classifications, and how they +// manifest in the numeric string. +/// +/// The numeric string for an individual BasicBlock is terminated by an unique +/// unsigned integer. This prevents data structures which rely on repetition +/// from matching across BasicBlocks. (For example, the SuffixTree.) +/// As a concrete example, if we have the following two BasicBlocks: +/// \code +/// bb0: +/// %add1 = add i32 %a, %b +/// %add2 = add i32 %c, %d +/// %add3 = add i64 %e, %f +/// bb1: +/// %sub = sub i32 %c, %d +/// \endcode +/// We may hash the Instructions like this (via IRInstructionData): +/// \code +/// bb0: +/// %add1 = add i32 %a, %b ; Hash: 1 +/// %add2 = add i32 %c, %d; Hash: 1 +/// %add3 = add i64 %e, %f; Hash: 2 +/// bb1: +/// %sub = sub i32 %c, %d; Hash: 3 +/// %add4 = add i32 %c, %d ; Hash: 1 +/// \endcode +/// And produce a "numeric string representation" like so: +/// 1, 1, 2, unique_integer_1, 3, 1, unique_integer_2 +/// +/// TODO: This is very similar to the MachineOutliner, and should be +/// consolidated into the same interface. +struct IRInstructionMapper { + /// The starting illegal instruction number to map to. + /// + /// Set to -3 for compatibility with DenseMapInfo. + unsigned IllegalInstrNumber = static_cast(-3); + + /// The next available integer to assign to a legal Instruction to. + unsigned LegalInstrNumber = 0; + + /// Correspondence from IRInstructionData to unsigned integers. + DenseMap + InstructionIntegerMap; + + /// Set if we added an illegal number in the previous step. + /// Since each illegal number is unique, we only need one of them between + /// each range of legal numbers. This lets us make sure we don't add more + /// than one illegal number per range. + bool AddedIllegalLastTime = false; + + /// Marks whether we found a illegal instruction in the previous step. + bool CanCombineWithPrevInstr = false; + + /// Marks whether we have found a set of instructions that is long enough + /// to be considered for similarity. + bool HaveLegalRange = false; + + /// This allocator pointer is in charge of holding on to the IRInstructionData + /// so it is not deallocated until whatever external tool is using it is done + /// with the information. + SpecificBumpPtrAllocator *InstDataAllocator = nullptr; + + /// Get an allocated IRInstructionData struct using the InstDataAllocator. + /// + /// \param I - The Instruction to wrap with IRInstructionData. + /// \param Legality - A boolean value that is true if the instruction is to + /// be considered for similarity, and false if not. + /// \returns An allocated IRInstructionData struct. + IRInstructionData *allocateIRInstructionData(Instruction &I, bool Legality); + + /// Maps the Instructions in a BasicBlock \p BB to legal or illegal integers + /// determined by \p InstrType. Two Instructions are mapped to the same value + /// if they are close as defined by the InstructionData class above. + /// + /// \param [in] BB - The BasicBlock to be mapped to integers. + /// \param [in,out] InstrList - Vector of IRInstructionData to append to. + /// \param [in,out] IntegerMapping - Vector of unsigned integers to append to. + void convertToUnsignedVec(BasicBlock &BB, + std::vector &InstrList, + std::vector &IntegerMapping); + + /// Maps an Instruction to a legal integer. + /// + /// \param [in] It - The Instruction to be mapped to an integer. + /// \param [in,out] IntegerMappingForBB - Vector of unsigned integers to + /// append to. + /// \param [in,out] InstrList - Vector of InstructionData to append + /// to. \returns The integer \p It was mapped to. + unsigned mapToLegalUnsigned(BasicBlock::iterator &It, + std::vector &IntegerMappingForBB, + std::vector &InstrListForBB); + + /// Maps an Instruction to an illegal integer. + /// + /// \param [in] It - The \p Instruction to be mapped to an integer. + /// \param [in,out] IntegerMappingForBB - Vector of unsigned integers to + /// append to. + /// \param [in,out] InstrList - Vector of IRInstructionData to append to. + /// \param End - true if creating a dummy IRInstructionData at the end of a + /// basic block. + /// \returns The integer \p It was mapped to. + unsigned mapToIllegalUnsigned( + BasicBlock::iterator &It, std::vector &IntegerMappingForBB, + std::vector &InstrListForBB, bool End = false); + + IRInstructionMapper(SpecificBumpPtrAllocator *IDA) + : InstDataAllocator(IDA) { + // Make sure that the implementation of DenseMapInfo hasn't + // changed. + assert(DenseMapInfo::getEmptyKey() == static_cast(-1) && + "DenseMapInfo's empty key isn't -1!"); + assert(DenseMapInfo::getTombstoneKey() == + static_cast(-2) && + "DenseMapInfo's tombstone key isn't -2!"); + } + + /// Custom InstVisitor to classify different instructions for whether it can + /// be analyzed for similarity. + struct InstructionClassification + : public InstVisitor { + InstructionClassification() {} + + // TODO: Determine a scheme to resolve when the label is similar enough. + InstrType visitBranchInst(BranchInst &BI) { return Illegal; } + // TODO: Determine a scheme to resolve when the labels are similar enough. + InstrType visitPHINode(PHINode &PN) { return Illegal; } + // TODO: Handle allocas. + InstrType visitAllocaInst(AllocaInst &AI) { return Illegal; } + // We exclude variable argument instructions since variable arguments + // requires extra checking of the argument list. + InstrType visitVAArgInst(VAArgInst &VI) { return Illegal; } + // We exclude all exception handling cases since they are so context + // dependent. + InstrType visitLandingPadInst(LandingPadInst &LPI) { return Illegal; } + InstrType visitFuncletPadInst(FuncletPadInst &FPI) { return Illegal; } + // DebugInfo should be included in the regions, but should not be + // analyzed for similarity as it has no bearing on the outcome of the + // program. + InstrType visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return Invisible; } + // TODO: Handle GetElementPtrInsts + InstrType visitGetElementPtrInst(GetElementPtrInst &GEPI) { + return Illegal; + } + // TODO: Handle specific intrinsics. + InstrType visitIntrinsicInst(IntrinsicInst &II) { return Illegal; } + // TODO: Handle CallInsts. + InstrType visitCallInst(CallInst &CI) { return Illegal; } + // TODO: We do not current handle similarity that changes the control flow. + InstrType visitInvokeInst(InvokeInst &II) { return Illegal; } + // TODO: We do not current handle similarity that changes the control flow. + InstrType visitCallBrInst(CallBrInst &CBI) { return Illegal; } + // TODO: Handle interblock similarity. + InstrType visitTerminator(Instruction &I) { return Illegal; } + InstrType visitInstruction(Instruction &I) { return Legal; } + }; + + /// Maps an Instruction to a member of InstrType. + InstructionClassification InstClassifier; +}; + +} // end namespace IRSimilarity +} // end namespace llvm + +#endif // LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H Index: llvm/lib/Analysis/CMakeLists.txt =================================================================== --- llvm/lib/Analysis/CMakeLists.txt +++ llvm/lib/Analysis/CMakeLists.txt @@ -54,6 +54,7 @@ GlobalsModRef.cpp GuardUtils.cpp HeatUtils.cpp + IRSimilarityIdentifier.cpp IVDescriptors.cpp IVUsers.cpp IndirectCallPromotionAnalysis.cpp Index: llvm/lib/Analysis/IRSimilarityIdentifier.cpp =================================================================== --- /dev/null +++ llvm/lib/Analysis/IRSimilarityIdentifier.cpp @@ -0,0 +1,156 @@ +//===- IRSimilarityIdentifier.cpp - Find similarity in a module -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// \file +// Implementation file for the IRSimilarityIdentifier for identifying +// similarities in IR including the IRInstructionMapper. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/IRSimilarityIdentifier.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/User.h" + +using namespace llvm; +using namespace IRSimilarity; + +IRInstructionData::IRInstructionData(Instruction &I, bool Legality) + : Inst(&I), Legal(Legality) { + // Here we collect the operands to be used to determine whether two + // instructions are similar to one another. + for (Use &OI : I.operands()) + OperVals.push_back(OI.get()); +} + +bool IRSimilarity::isClose(const IRInstructionData &A, + const IRInstructionData &B) { + return A.Legal && A.Inst->isSameOperationAs(B.Inst); +} + +// TODO: This is the same as the MachineOutliner, and should be consolidated +// into the same interface. +void IRInstructionMapper::convertToUnsignedVec( + BasicBlock &BB, std::vector &InstrList, + std::vector &IntegerMapping) { + BasicBlock::iterator It = BB.begin(); + + std::vector IntegerMappingForBB; + std::vector InstrListForBB; + + HaveLegalRange = false; + CanCombineWithPrevInstr = false; + AddedIllegalLastTime = true; + + for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) { + switch (InstClassifier.visit(*It)) { + case InstrType::Legal: + mapToLegalUnsigned(It, IntegerMappingForBB, InstrListForBB); + break; + case InstrType::Illegal: + mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB); + break; + case InstrType::Invisible: + AddedIllegalLastTime = false; + break; + } + } + + if (HaveLegalRange) { + mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true); + InstrList.insert(InstrList.end(), InstrListForBB.begin(), + InstrListForBB.end()); + IntegerMapping.insert(IntegerMapping.end(), IntegerMappingForBB.begin(), + IntegerMappingForBB.end()); + } +} + +// TODO: This is the same as the MachineOutliner, and should be consolidated +// into the same interface. +unsigned IRInstructionMapper::mapToLegalUnsigned( + BasicBlock::iterator &It, std::vector &IntegerMappingForBB, + std::vector &InstrListForBB) { + // We added something legal, so we should unset the AddedLegalLastTime + // flag. + AddedIllegalLastTime = false; + + // If we have at least two adjacent legal instructions (which may have + // invisible instructions in between), remember that. + if (CanCombineWithPrevInstr) + HaveLegalRange = true; + CanCombineWithPrevInstr = true; + + // Get the integer for this instruction or give it the current + // LegalInstrNumber. + IRInstructionData *ID = allocateIRInstructionData(*It, true); + InstrListForBB.push_back(ID); + + // Add to the instruction list + bool WasInserted; + DenseMap::iterator + ResultIt; + std::tie(ResultIt, WasInserted) = + InstructionIntegerMap.insert(std::make_pair(ID, LegalInstrNumber)); + unsigned INumber = ResultIt->second; + + // There was an insertion. + if (WasInserted) + LegalInstrNumber++; + + IntegerMappingForBB.push_back(INumber); + + // Make sure we don't overflow or use any integers reserved by the DenseMap. + assert(LegalInstrNumber < IllegalInstrNumber && + "Instruction mapping overflow!"); + + assert(LegalInstrNumber != DenseMapInfo::getEmptyKey() && + "Tried to assign DenseMap tombstone or empty key to instruction."); + assert(LegalInstrNumber != DenseMapInfo::getTombstoneKey() && + "Tried to assign DenseMap tombstone or empty key to instruction."); + + return INumber; +} + +IRInstructionData * +IRInstructionMapper::allocateIRInstructionData(Instruction &I, bool Legality) { + return new (InstDataAllocator->Allocate()) IRInstructionData(I, Legality); +} + +// TODO: This is the same as the MachineOutliner, and should be consolidated +// into the same interface. +unsigned IRInstructionMapper::mapToIllegalUnsigned( + BasicBlock::iterator &It, std::vector &IntegerMappingForBB, + std::vector &InstrListForBB, bool End) { + // Can't combine an illegal instruction. Set the flag. + CanCombineWithPrevInstr = false; + + // Only add one illegal number per range of legal numbers. + if (AddedIllegalLastTime) + return IllegalInstrNumber; + + IRInstructionData *ID = nullptr; + if (!End) + ID = allocateIRInstructionData(*It, false); + InstrListForBB.push_back(ID); + + // Remember that we added an illegal number last time. + AddedIllegalLastTime = true; + unsigned INumber = IllegalInstrNumber; + IntegerMappingForBB.push_back(IllegalInstrNumber--); + + assert(LegalInstrNumber < IllegalInstrNumber && + "Instruction mapping overflow!"); + + assert(IllegalInstrNumber != DenseMapInfo::getEmptyKey() && + "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); + + assert(IllegalInstrNumber != DenseMapInfo::getTombstoneKey() && + "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); + + return INumber; +} Index: llvm/unittests/Analysis/CMakeLists.txt =================================================================== --- llvm/unittests/Analysis/CMakeLists.txt +++ llvm/unittests/Analysis/CMakeLists.txt @@ -29,6 +29,7 @@ DomTreeUpdaterTest.cpp GlobalsModRefTest.cpp FunctionPropertiesAnalysisTest.cpp + IRSimilarityIdentifierTest.cpp IVDescriptorsTest.cpp LazyCallGraphTest.cpp LoadsTest.cpp Index: llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp =================================================================== --- /dev/null +++ llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp @@ -0,0 +1,1177 @@ +//===- IRSimilarityIdentifierTest.cpp - IRSimilarityIdentifier unit tests -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Tests for components for finding similarity such as the instruction mapper, +// suffix tree usage, and structural analysis. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/IRSimilarityIdentifier.h" +#include "llvm/AsmParser/Parser.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/SourceMgr.h" +#include "gtest/gtest.h" + +using namespace llvm; +using namespace IRSimilarity; + +static std::unique_ptr makeLLVMModule(LLVMContext &Context, + StringRef ModuleStr) { + SMDiagnostic Err; + std::unique_ptr M = parseAssemblyString(ModuleStr, Err, Context); + assert(M && "Bad LLVM IR?"); + return M; +} + +void getVectors(Module &M, std::vector &InstrList, + std::vector &UnsignedVec) { + SpecificBumpPtrAllocator InstDataAllocator; + IRInstructionMapper Mapper(&InstDataAllocator); + + for (Function &F : M) + for (BasicBlock &BB : F) + Mapper.convertToUnsignedVec(BB, InstrList, UnsignedVec); +} + +// Checks that different opcodes are mapped to different values. +TEST(IRInstructionMapper, OpcodeDifferentiation) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = mul i32 %a, %b + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + // Check that the size of the unsigned vector and the instruction list are the + // same as a safety check. + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + + // Make sure that the unsigned vector is the expected size. + ASSERT_TRUE(UnsignedVec.size() == 3); + + // Check whether the instructions are not mapped to the same value. + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + +// Checks that the same opcodes and types are mapped to the same values. +TEST(IRInstructionMapper, OpcodeTypeSimilarity) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = add i32 %b, %a + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + + // Check whether the instructions are mapped to the same value. + ASSERT_TRUE(UnsignedVec[0] == UnsignedVec[1]); +} + +// Checks that the same opcode and different types are mapped to different +// values. +TEST(IRInstructionMapper, TypeDifferentiation) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b, i64 %c, i64 %d) { + bb0: + %0 = add i32 %a, %b + %1 = add i64 %c, %d + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + +// Checks that different predicates map to different values. +TEST(IRInstructionMapper, PredicateDifferentiation) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = icmp sge i32 %b, %a + %1 = icmp slt i32 %a, %b + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + +// Checks that predicates with the same swapped predicate map to different +// values. +TEST(IRInstructionMapper, PredicateIsomorphism) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = icmp sgt i32 %a, %b + %1 = icmp slt i32 %b, %a + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + +// Checks that the same predicate maps to the same value. +TEST(IRInstructionMapper, PredicateSimilarity) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = icmp slt i32 %a, %b + %1 = icmp slt i32 %b, %a + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] == UnsignedVec[1]); +} + +// Checks that the same predicate maps to the same value for floating point +// CmpInsts. +TEST(IRInstructionMapper, FPPredicateSimilarity) { + StringRef ModuleString = R"( + define i32 @f(double %a, double %b) { + bb0: + %0 = fcmp olt double %a, %b + %1 = fcmp olt double %b, %a + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] == UnsignedVec[1]); +} + +// Checks that the different predicate maps to a different value for floating +// point CmpInsts. +TEST(IRInstructionMapper, FPPredicatDifference) { + StringRef ModuleString = R"( + define i32 @f(double %a, double %b) { + bb0: + %0 = fcmp olt double %a, %b + %1 = fcmp oge double %b, %a + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + +// Checks that the zexts that have the same type parameters map to the same +// unsigned integer. +TEST(IRInstructionMapper, ZextTypeSimilarity) { + StringRef ModuleString = R"( + define i32 @f(i32 %a) { + bb0: + %0 = zext i32 %a to i64 + %1 = zext i32 %a to i64 + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] == UnsignedVec[1]); +} + +// Checks that the sexts that have the same type parameters map to the same +// unsigned integer. +TEST(IRInstructionMapper, SextTypeSimilarity) { + StringRef ModuleString = R"( + define i32 @f(i32 %a) { + bb0: + %0 = sext i32 %a to i64 + %1 = sext i32 %a to i64 + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] == UnsignedVec[1]); +} + +// Checks that the zexts that have the different type parameters map to the +// different unsigned integers. +TEST(IRInstructionMapper, ZextTypeDifference) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i8 %b) { + bb0: + %0 = zext i32 %a to i64 + %1 = zext i8 %b to i32 + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + + +// Checks that the sexts that have the different type parameters map to the +// different unsigned integers. +TEST(IRInstructionMapper, SextTypeDifference) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i8 %b) { + bb0: + %0 = sext i32 %a to i64 + %1 = sext i8 %b to i32 + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + +// Checks that loads that have the same type are mapped to the same unsigned +// integer. +TEST(IRInstructionMapper, LoadSimilarType) { + StringRef ModuleString = R"( + define i32 @f(i32* %a, i32* %b) { + bb0: + %0 = load i32, i32* %a + %1 = load i32, i32* %b + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] == UnsignedVec[1]); +} + +// Checks that loads that have the different types are mapped to +// different unsigned integers. +TEST(IRInstructionMapper, LoadDifferentType) { + StringRef ModuleString = R"( + define i32 @f(i32* %a, i64* %b) { + bb0: + %0 = load i32, i32* %a + %1 = load i64, i64* %b + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + +// Checks that loads that have the different aligns are mapped to different +// unsigned integers. +TEST(IRInstructionMapper, LoadDifferentAlign) { + StringRef ModuleString = R"( + define i32 @f(i32* %a, i32* %b) { + bb0: + %0 = load i32, i32* %a, align 4 + %1 = load i32, i32* %b, align 8 + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + +// Checks that loads that have the different volatile settings are mapped to +// different unsigned integers. +TEST(IRInstructionMapper, LoadDifferentVolatile) { + StringRef ModuleString = R"( + define i32 @f(i32* %a, i32* %b) { + bb0: + %0 = load volatile i32, i32* %a + %1 = load i32, i32* %b + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + +// Checks that loads that have the same volatile settings are mapped to +// different unsigned integers. +TEST(IRInstructionMapper, LoadSameVolatile) { + StringRef ModuleString = R"( + define i32 @f(i32* %a, i32* %b) { + bb0: + %0 = load volatile i32, i32* %a + %1 = load volatile i32, i32* %b + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] == UnsignedVec[1]); +} + +// Checks that loads that have the different atomicity settings are mapped to +// different unsigned integers. +TEST(IRInstructionMapper, LoadDifferentAtomic) { + StringRef ModuleString = R"( + define i32 @f(i32* %a, i32* %b) { + bb0: + %0 = load atomic i32, i32* %a unordered, align 4 + %1 = load atomic i32, i32* %b monotonic, align 4 + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + +// Checks that loads that have the same atomicity settings are mapped to +// different unsigned integers. +TEST(IRInstructionMapper, LoadSameAtomic) { + StringRef ModuleString = R"( + define i32 @f(i32* %a, i32* %b) { + bb0: + %0 = load atomic i32, i32* %a unordered, align 4 + %1 = load atomic i32, i32* %b unordered, align 4 + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] == UnsignedVec[1]); +} + +// Checks that stores that have the same type are mapped to the same unsigned +// integer. +TEST(IRInstructionMapper, StoreSimilarType) { + StringRef ModuleString = R"( + define i32 @f(i32* %a, i32* %b) { + bb0: + store i32 1, i32* %a + store i32 2, i32* %a + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] == UnsignedVec[1]); +} + +// Checks that stores that have the different types are mapped to +// different unsigned integers. +TEST(IRInstructionMapper, StoreDifferentType) { + StringRef ModuleString = R"( + define i32 @f(i32* %a, i64* %b) { + bb0: + store i32 1, i32* %a + store i64 1, i64* %b + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + +// Checks that stores that have the different aligns are mapped to different +// unsigned integers. +TEST(IRInstructionMapper, StoreDifferentAlign) { + StringRef ModuleString = R"( + define i32 @f(i32* %a, i32* %b) { + bb0: + store i32 1, i32* %a, align 4 + store i32 1, i32* %b, align 8 + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + +// Checks that stores that have the different volatile settings are mapped to +// different unsigned integers. +TEST(IRInstructionMapper, StoreDifferentVolatile) { + StringRef ModuleString = R"( + define i32 @f(i32* %a, i32* %b) { + bb0: + store volatile i32 1, i32* %a + store i32 1, i32* %b + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + +// Checks that stores that have the same volatile settings are mapped to +// different unsigned integers. +TEST(IRInstructionMapper, StoreSameVolatile) { + StringRef ModuleString = R"( + define i32 @f(i32* %a, i32* %b) { + bb0: + store volatile i32 1, i32* %a + store volatile i32 1, i32* %b + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] == UnsignedVec[1]); +} + +// Checks that loads that have the same atomicity settings are mapped to +// different unsigned integers. +TEST(IRInstructionMapper, StoreSameAtomic) { + StringRef ModuleString = R"( + define i32 @f(i32* %a, i32* %b) { + bb0: + store atomic i32 1, i32* %a unordered, align 4 + store atomic i32 1, i32* %b unordered, align 4 + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] == UnsignedVec[1]); +} + +// Checks that loads that have the different atomicity settings are mapped to +// different unsigned integers. +TEST(IRInstructionMapper, StoreDifferentAtomic) { + StringRef ModuleString = R"( + define i32 @f(i32* %a, i32* %b) { + bb0: + store atomic i32 1, i32* %a unordered, align 4 + store atomic i32 1, i32* %b monotonic, align 4 + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + +// In most cases, the illegal instructions we are collecting don't require any +// sort of setup. In these cases, we can just only have illegal instructions, +// and the mapper will create 0 length vectors, and we can check that. + +// In cases where we have legal instructions needed to set up the illegal +// instruction, to check illegal instructions are assigned unsigned integers +// from the maximum value decreasing to 0, it will be greater than a legal +// instruction that comes after. So to check that we have an illegal +// instruction, we place a legal instruction after an illegal instruction, and +// check that the illegal unsigned integer is greater than the unsigned integer +// of the legal instruction. + +// Checks that the branch is mapped to be illegal since there is extra checking +// needed to ensure that a branch in one region is branching to an isomorphic +// location in a different region. +TEST(IRInstructionMapper, BranchIllegal) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = icmp slt i32 %a, %b + br i1 %0, label %bb0, label %bb1 + bb1: + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(0)); +} + +// Checks that a PHINode is mapped to be illegal since there is extra checking +// needed to ensure that a branch in one region is bin an isomorphic +// location in a different region. +TEST(IRInstructionMapper, PhiIllegal) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = phi i1 [ 0, %bb0 ], [ %0, %bb1 ] + ret i32 0 + bb1: + ret i32 1 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(0)); +} + +// Checks that an alloca instruction is mapped to be illegal. +TEST(IRInstructionMapper, AllocaIllegal) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = alloca i32 + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(0)); +} + +// Checks that an getelementptr instruction is mapped to be illegal. There is +// extra checking required for the parameters if a getelementptr has more than +// two operands. +TEST(IRInstructionMapper, GetElementPtrIllegal) { + StringRef ModuleString = R"( + %struct.RT = type { i8, [10 x [20 x i32]], i8 } + %struct.ST = type { i32, double, %struct.RT } + define i32 @f(%struct.ST* %s, i32 %a, i32 %b) { + bb0: + %0 = getelementptr inbounds %struct.ST, %struct.ST* %s, i64 1 + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(0)); +} + +// Checks that a call instruction is mapped to be illegal. We have to perform +// extra checks to ensure that both the name and function type are the same. +TEST(IRInstructionMapper, CallIllegal) { + StringRef ModuleString = R"( + declare i32 @f1(i32, i32) + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = call i32 @f1(i32 %a, i32 %b) + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(0)); +} + +// Checks that an invoke instruction is mapped to be illegal. Invoke +// instructions are considered to be illegal because of the change in the +// control flow that is currently not recognized. +TEST(IRInstructionMapper, InvokeIllegal) { + StringRef ModuleString = R"( + define i32 @f(i8 *%gep1, i32 %b) { + then: + invoke i32 undef(i8* undef) + to label %invoke unwind label %lpad + + invoke: + unreachable + + lpad: + landingpad { i8*, i32 } + catch i8* null + unreachable + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(0)); +} + +// Checks that an callbr instructions are considered to be illegal. Callbr +// instructions are considered to be illegal because of the change in the +// control flow that is currently not recognized. +TEST(IRInstructionMapper, CallBrInstIllegal) { + StringRef ModuleString = R"( + define void @test() { + fail: + ret void + } + + define i32 @f(i32 %a, i32 %b) { + bb0: + callbr void asm "xorl $0, $0; jmp ${1:l}", "r,X,~{dirflag},~{fpsr},~{flags}"(i32 %a, i8* blockaddress(@test, %fail)) to label %normal [label %fail] + fail: + ret i32 0 + normal: + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(0)); +} + +// Checks that an debuginfo intrinsics are mapped to be invisible. Since they +// do not semantically change the program, they can be recognized as similar. +TEST(IRInstructionMapper, DebugInfoInvisible) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + then: + %0 = add i32 %a, %b + call void @llvm.dbg.value(metadata !0) + %1 = add i32 %a, %b + ret i32 0 + } + + declare void @llvm.dbg.value(metadata) + !0 = distinct !{!"test\00", i32 10})"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(3)); +} + +// The following are all exception handling intrinsics. We do not currently +// handle these instruction because they are very context dependent. + +// Checks that an eh.typeid.for intrinsic is mapped to be illegal. +TEST(IRInstructionMapper, ExceptionHandlingTypeIdIllegal) { + StringRef ModuleString = R"( + @_ZTIi = external constant i8* + define i32 @f() { + then: + %0 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) + ret i32 0 + } + + declare i32 @llvm.eh.typeid.for(i8*))"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(0)); +} + +// Checks that an eh.exceptioncode intrinsic is mapped to be illegal. +TEST(IRInstructionMapper, ExceptionHandlingExceptionCodeIllegal) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + entry: + %0 = catchswitch within none [label %__except] unwind to caller + + __except: + %1 = catchpad within %0 [i8* null] + catchret from %1 to label %__except + + then: + %2 = call i32 @llvm.eh.exceptioncode(token %1) + ret i32 0 + } + + declare i32 @llvm.eh.exceptioncode(token))"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(0)); +} + +// Checks that an eh.unwind intrinsic is mapped to be illegal. +TEST(IRInstructionMapper, ExceptionHandlingUnwindIllegal) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + entry: + call void @llvm.eh.unwind.init() + ret i32 0 + } + + declare void @llvm.eh.unwind.init())"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(0)); +} + +// Checks that an eh.exceptionpointer intrinsic is mapped to be illegal. +TEST(IRInstructionMapper, ExceptionHandlingExceptionPointerIllegal) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + entry: + %0 = call i8* @llvm.eh.exceptionpointer.p0i8(i32 0) + ret i32 0 + } + + declare i8* @llvm.eh.exceptionpointer.p0i8(i32))"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(0)); +} + +// Checks that a catchpad instruction is mapped to an illegal value. +TEST(IRInstructionMapper, CatchpadIllegal) { + StringRef ModuleString = R"( + declare void @llvm.donothing() nounwind readnone + + define void @function() personality i8 3 { + entry: + invoke void @llvm.donothing() to label %normal unwind label %exception + exception: + %cs1 = catchswitch within none [label %catchpad1] unwind to caller + catchpad1: + catchpad within %cs1 [] + br label %normal + normal: + ret void + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(0)); +} + +// Checks that a cleanuppad instruction is mapped to an illegal value. +TEST(IRInstructionMapper, CleanuppadIllegal) { + StringRef ModuleString = R"( + declare void @llvm.donothing() nounwind readnone + + define void @function() personality i8 3 { + entry: + invoke void @llvm.donothing() to label %normal unwind label %exception + exception: + %cs1 = catchswitch within none [label %catchpad1] unwind to caller + catchpad1: + %clean = cleanuppad within none [] + br label %normal + normal: + ret void + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(0)); +} + +// The following three instructions are memory transfer and setting based, which +// are considered illegal since is extra checking needed to handle the address +// space checking. + +// Checks that a memset instruction is mapped to an illegal value. +TEST(IRInstructionMapper, MemSetIllegal) { + StringRef ModuleString = R"( + declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) + + define i64 @function(i64 %x, i64 %z, i64 %n) { + entry: + %pool = alloca [59 x i64], align 4 + %tmp = bitcast [59 x i64]* %pool to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false) + %cmp3 = icmp eq i64 %n, 0 + %a = add i64 %x, %z + %c = add i64 %x, %z + ret i64 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(6)); + ASSERT_TRUE(UnsignedVec[2] < UnsignedVec[1]); +} + +// Checks that a memcpy instruction is mapped to an illegal value. +TEST(IRInstructionMapper, MemCpyIllegal) { + StringRef ModuleString = R"( + declare void @llvm.memcpy.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) + + define i64 @function(i64 %x, i64 %z, i64 %n) { + entry: + %pool = alloca [59 x i64], align 4 + %tmp = bitcast [59 x i64]* %pool to i8* + call void @llvm.memcpy.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false) + %cmp3 = icmp eq i64 %n, 0 + %a = add i64 %x, %z + %c = add i64 %x, %z + ret i64 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(6)); + ASSERT_TRUE(UnsignedVec[2] < UnsignedVec[1]); +} + +// Checks that a memmove instruction is mapped to an illegal value. +TEST(IRInstructionMapper, MemMoveIllegal) { + StringRef ModuleString = R"( + declare void @llvm.memmove.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) + + define i64 @function(i64 %x, i64 %z, i64 %n) { + entry: + %pool = alloca [59 x i64], align 4 + %tmp = bitcast [59 x i64]* %pool to i8* + call void @llvm.memmove.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false) + %cmp3 = icmp eq i64 %n, 0 + %a = add i64 %x, %z + %c = add i64 %x, %z + ret i64 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(6)); + ASSERT_TRUE(UnsignedVec[2] < UnsignedVec[1]); +} + +// Checks that a variable argument instructions are mapped to an illegal value. +// We exclude variable argument instructions since variable arguments +// requires extra checking of the argument list. +TEST(IRInstructionMapper, VarArgsIllegal) { + StringRef ModuleString = R"( + declare void @llvm.va_start(i8*) + declare void @llvm.va_copy(i8*, i8*) + declare void @llvm.va_end(i8*) + + define i32 @func1(i32 %a, double %b, i8* %v, ...) nounwind { + entry: + %a.addr = alloca i32, align 4 + %b.addr = alloca double, align 8 + %ap = alloca i8*, align 4 + %c = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + store double %b, double* %b.addr, align 8 + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + store double %b, double* %b.addr, align 8 + store double %b, double* %b.addr, align 8 + %0 = va_arg i8** %ap, i32 + store double %b, double* %b.addr, align 8 + store double %b, double* %b.addr, align 8 + call void @llvm.va_copy(i8* %v, i8* %ap1) + store double %b, double* %b.addr, align 8 + store double %b, double* %b.addr, align 8 + call void @llvm.va_end(i8* %ap1) + store i32 %0, i32* %c, align 4 + %tmp = load i32, i32* %c, align 4 + ret i32 %tmp + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(16)); + ASSERT_TRUE(UnsignedVec[4] < UnsignedVec[3]); + ASSERT_TRUE(UnsignedVec[7] < UnsignedVec[6]); + ASSERT_TRUE(UnsignedVec[10] < UnsignedVec[9]); + ASSERT_TRUE(UnsignedVec[13] < UnsignedVec[12]); +} + +// Check the length of adding two illegal instructions one after th other. We +// should find that only one element is added for each illegal range. +TEST(IRInstructionMapper, RepeatedIllegalLength) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = mul i32 %a, %b + %2 = call i32 @f(i32 %a, i32 %b) + %3 = call i32 @f(i32 %a, i32 %b) + %4 = add i32 %a, %b + %5 = mul i32 %a, %b + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + // Check that the size of the unsigned vector and the instruction list are the + // same as a safety check. + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + + // Make sure that the unsigned vector is the expected size. + ASSERT_TRUE(UnsignedVec.size() == 6); +}