Index: llvm/test/CMakeLists.txt =================================================================== --- llvm/test/CMakeLists.txt +++ llvm/test/CMakeLists.txt @@ -109,6 +109,7 @@ llvm-readelf llvm-reduce llvm-rtdyld + llvm-sim llvm-size llvm-split llvm-strings Index: llvm/test/lit.cfg.py =================================================================== --- llvm/test/lit.cfg.py +++ llvm/test/lit.cfg.py @@ -162,7 +162,7 @@ 'llvm-link', 'llvm-lto', 'llvm-lto2', 'llvm-mc', 'llvm-mca', 'llvm-modextract', 'llvm-nm', 'llvm-objcopy', 'llvm-objdump', 'llvm-pdbutil', 'llvm-profdata', 'llvm-ranlib', 'llvm-rc', 'llvm-readelf', - 'llvm-readobj', 'llvm-rtdyld', 'llvm-size', 'llvm-split', 'llvm-strings', + 'llvm-readobj', 'llvm-rtdyld', 'llvm-sim', 'llvm-size', 'llvm-split', 'llvm-strings', 'llvm-strip', 'llvm-tblgen', 'llvm-undname', 'llvm-c-test', 'llvm-cxxfilt', 'llvm-xray', 'yaml2obj', 'obj2yaml', 'yaml-bench', 'verify-uselistorder', 'bugpoint', 'llc', 'llvm-symbolizer', 'opt', 'sancov', 'sanstats']) Index: llvm/test/tools/llvm-sim/Inputs/sim1.ll =================================================================== --- /dev/null +++ llvm/test/tools/llvm-sim/Inputs/sim1.ll @@ -0,0 +1,27 @@ +define void @similar_func1() { +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + ret void +} + +define void @similar_func2() { +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + ret void +} Index: llvm/test/tools/llvm-sim/fail-cases.test =================================================================== --- /dev/null +++ llvm/test/tools/llvm-sim/fail-cases.test @@ -0,0 +1,8 @@ +# RUN: not llvm-sim %s 2>&1 | FileCheck %s +# RUN: not llvm-sim %s.2 2>&1 | FileCheck -DMSG=%errc_ENOENT %s --check-prefix=EXIST + +# File reading error messaging tests. + +# CHECK: error: expected top-level entity + +# EXIST: error: Could not open input file: [[MSG]] Index: llvm/test/tools/llvm-sim/single-sim-file.test =================================================================== --- /dev/null +++ llvm/test/tools/llvm-sim/single-sim-file.test @@ -0,0 +1,57 @@ +# RUN: llvm-sim -o %t %S/Inputs/sim1.ll +# RUN: FileCheck %s < %t + +# Checking the output of a single module test. + +# CHECK: { +# CHECK-NEXT: "1": [ +# CHECK-NEXT: { +# CHECK-NEXT: "start": 8, +# CHECK-NEXT: "end": 9 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "start": 18, +# CHECK-NEXT: "end": 19 +# CHECK-NEXT: } +# CHECK-NEXT: ], +# CHECK-NEXT: "2": [ +# CHECK-NEXT: { +# CHECK-NEXT: "start": 7, +# CHECK-NEXT: "end": 9 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "start": 17, +# CHECK-NEXT: "end": 19 +# CHECK-NEXT: } +# CHECK-NEXT: ], +# CHECK-NEXT: "3": [ +# CHECK-NEXT: { +# CHECK-NEXT: "start": 6, +# CHECK-NEXT: "end": 9 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "start": 16, +# CHECK-NEXT: "end": 19 +# CHECK-NEXT: } +# CHECK-NEXT: ], +# CHECK-NEXT: "4": [ +# CHECK-NEXT: { +# CHECK-NEXT: "start": 5, +# CHECK-NEXT: "end": 9 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "start": 15, +# CHECK-NEXT: "end": 19 +# CHECK-NEXT: } +# CHECK-NEXT: ], +# CHECK-NEXT: "5": [ +# CHECK-NEXT: { +# CHECK-NEXT: "start": 4, +# CHECK-NEXT: "end": 9 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "start": 14, +# CHECK-NEXT: "end": 19 +# CHECK-NEXT: } +# CHECK-NEXT: ] +# CHECK-NEXT: } Index: llvm/test/tools/llvm-sim/single-sim.test =================================================================== --- /dev/null +++ llvm/test/tools/llvm-sim/single-sim.test @@ -0,0 +1,56 @@ +# RUN: llvm-sim -o - %S/Inputs/sim1.ll | FileCheck %s + +# Checking the output of a single module test. + +# CHECK: { +# CHECK-NEXT: "1": [ +# CHECK-NEXT: { +# CHECK-NEXT: "start": 8, +# CHECK-NEXT: "end": 9 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "start": 18, +# CHECK-NEXT: "end": 19 +# CHECK-NEXT: } +# CHECK-NEXT: ], +# CHECK-NEXT: "2": [ +# CHECK-NEXT: { +# CHECK-NEXT: "start": 7, +# CHECK-NEXT: "end": 9 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "start": 17, +# CHECK-NEXT: "end": 19 +# CHECK-NEXT: } +# CHECK-NEXT: ], +# CHECK-NEXT: "3": [ +# CHECK-NEXT: { +# CHECK-NEXT: "start": 6, +# CHECK-NEXT: "end": 9 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "start": 16, +# CHECK-NEXT: "end": 19 +# CHECK-NEXT: } +# CHECK-NEXT: ], +# CHECK-NEXT: "4": [ +# CHECK-NEXT: { +# CHECK-NEXT: "start": 5, +# CHECK-NEXT: "end": 9 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "start": 15, +# CHECK-NEXT: "end": 19 +# CHECK-NEXT: } +# CHECK-NEXT: ], +# CHECK-NEXT: "5": [ +# CHECK-NEXT: { +# CHECK-NEXT: "start": 4, +# CHECK-NEXT: "end": 9 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "start": 14, +# CHECK-NEXT: "end": 19 +# CHECK-NEXT: } +# CHECK-NEXT: ] +# CHECK-NEXT: } Index: llvm/tools/llvm-sim/CMakeLists.txt =================================================================== --- /dev/null +++ llvm/tools/llvm-sim/CMakeLists.txt @@ -0,0 +1,9 @@ +set(LLVM_LINK_COMPONENTS + Core + Support + Analysis + IRReader) + +add_llvm_tool(llvm-sim + llvm-sim.cpp +) Index: llvm/tools/llvm-sim/llvm-sim.cpp =================================================================== --- /dev/null +++ llvm/tools/llvm-sim/llvm-sim.cpp @@ -0,0 +1,149 @@ +//===-- llvm-sim.cpp - Find similar sections of programs -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This program finds similar sections of a Module, and exports them as a JSON +// file. +// +// To find similarities contained across multiple modules, please use llvm-link +// first to merge the modules. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/IRSimilarityIdentifier.h" +#include "llvm/IRReader/IRReader.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/InitLLVM.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/ToolOutputFile.h" + +using namespace llvm; +using namespace IRSimilarity; + +static cl::opt OutputFilename("o", cl::desc("Output Filename"), + cl::init("-"), + cl::value_desc("filename")); + +static cl::opt InputSourceFile(cl::Positional, + cl::desc(""), + cl::init("-"), + cl::value_desc("filename")); + +/// Retrieve the unique number \p I was mapped to in parseBitcodeFile. +/// +/// \param I - The Instruction to find the instruction number for. +/// \param LLVMInstNum - The mapping of Instructions to their location in the +/// module represented by an unsigned integer. +/// \returns The instruction number for \p I if it exists. +Optional +getPositionInModule(const Instruction *I, + const DenseMap &LLVMInstNum) { + assert(I && "Instruction is nullptr!"); + DenseMap::const_iterator It = LLVMInstNum.find(I); + if (It == LLVMInstNum.end()) + return None; + return It->second; +} + +/// Exports the given SimilarityGroups to a JSON file at \p FilePath. +/// +/// \param FilePath - The path to the output location. +/// \param SimSections - The similarity groups to process. +/// \param LLVMInstNum - The mapping of Instructions to their location in the +/// module represented by an unsigned integer. +/// \returns A nonzero error code if there was a failure creating the file. +std::error_code +exportToFile(const StringRef FilePath, + const SimilarityGroupList &SimSections, + const DenseMap &LLVMInstNum) { + std::error_code EC; + std::unique_ptr Out( + new ToolOutputFile(FilePath, EC, sys::fs::OF_None)); + if (EC) + return EC; + + json::OStream J(Out->os(), 1); + J.objectBegin(); + + unsigned SimOption = 1; + // Process each list of SimilarityGroups organized by the Module. + for (const SimilarityGroup &G : SimSections) { + std::string SimOptionStr = std::to_string(SimOption); + J.attributeBegin(SimOptionStr); + J.arrayBegin(); + // For each file there is a list of the range where the similarity + // exists. + for (const IRSimilarityCandidate &C : G) { + Optional Start = + getPositionInModule((*C.front()).Inst, LLVMInstNum); + Optional End = + getPositionInModule((*C.back()).Inst, LLVMInstNum); + + assert(Start.hasValue() && + "Could not find instruction number for first instruction"); + assert(End.hasValue() && + "Could not find instruction number for last instruction"); + + J.object([&] { + J.attribute("start", Start.getValue()); + J.attribute("end", End.getValue()); + }); + } + J.arrayEnd(); + J.attributeEnd(); + SimOption++; + } + J.objectEnd(); + + Out->keep(); + + return EC; +} + +int main(int argc, const char *argv[]) { + InitLLVM X(argc, argv); + + cl::ParseCommandLineOptions(argc, argv, "LLVM IR Similarity Visualizer\n"); + + LLVMContext CurrContext; + SMDiagnostic Err; + std::unique_ptr ModuleToAnalyze = + parseIRFile(InputSourceFile, Err, CurrContext); + + if (!ModuleToAnalyze) { + Err.print(argv[0], errs()); + return 1; + } + + // Mapping from an Instruction pointer to its occurrence in a sequential + // list of all the Instructions in a Module. + DenseMap LLVMInstNum; + + // We give each instruction a number, which gives us a start and end value + // for the beginning and end of each IRSimilarityCandidate. + unsigned InstructionNumber = 1; + for (Function &F : *ModuleToAnalyze) + for (BasicBlock &BB : F) + for (Instruction &I : BB.instructionsWithoutDebug()) + LLVMInstNum[&I]= InstructionNumber++; + + // The similarity identifier we will use to find the similar sections. + IRSimilarityIdentifier SimIdent; + SimilarityGroupList SimilaritySections = + SimIdent.findSimilarity(*ModuleToAnalyze); + + std::error_code E = + exportToFile(OutputFilename, SimilaritySections, LLVMInstNum); + if (E) { + errs() << argv[0] << ": " << E.message() << '\n'; + return 2; + } + + return 0; +}