Index: llvm/test/tools/llvm-sim/Inputs/sim1.ll =================================================================== --- /dev/null +++ llvm/test/tools/llvm-sim/Inputs/sim1.ll @@ -0,0 +1,27 @@ +define void @similar_func1() { +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + ret void +} + +define void @similar_func2() { +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + ret void +} \ No newline at end of file Index: llvm/test/tools/llvm-sim/fail-cases.test =================================================================== --- /dev/null +++ llvm/test/tools/llvm-sim/fail-cases.test @@ -0,0 +1,8 @@ +# RUN: not llvm-sim %s 2>&1 | FileCheck %s +# RUN: not llvm-sim %s.2 2>&1 | FileCheck %s -check-prefix=EXIST + +# File reading error messaging tests. + +# CHECK: error: unable to read module [[FILE:.*]] + +# EXIST: error: Source file [[FILE:.*]].2 doesn't exist \ No newline at end of file Index: llvm/test/tools/llvm-sim/single-sim.test =================================================================== --- /dev/null +++ llvm/test/tools/llvm-sim/single-sim.test @@ -0,0 +1,57 @@ +# RUN: llvm-sim -o %t %S/Inputs/sim1.ll +# RUN: cat %t | FileCheck %s + +# Checking the output of a single module test. + +# CHECK: { +# CHECK-NEXT: "1": [ +# CHECK-NEXT: { +# CHECK-NEXT: "start": 8, +# CHECK-NEXT: "end": 9 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "start": 18, +# CHECK-NEXT: "end": 19 +# CHECK-NEXT: } +# CHECK-NEXT: ], +# CHECK-NEXT: "2": [ +# CHECK-NEXT: { +# CHECK-NEXT: "start": 7, +# CHECK-NEXT: "end": 9 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "start": 17, +# CHECK-NEXT: "end": 19 +# CHECK-NEXT: } +# CHECK-NEXT: ], +# CHECK-NEXT: "3": [ +# CHECK-NEXT: { +# CHECK-NEXT: "start": 6, +# CHECK-NEXT: "end": 9 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "start": 16, +# CHECK-NEXT: "end": 19 +# CHECK-NEXT: } +# CHECK-NEXT: ], +# CHECK-NEXT: "4": [ +# CHECK-NEXT: { +# CHECK-NEXT: "start": 5, +# CHECK-NEXT: "end": 9 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "start": 15, +# CHECK-NEXT: "end": 19 +# CHECK-NEXT: } +# CHECK-NEXT: ], +# CHECK-NEXT: "5": [ +# CHECK-NEXT: { +# CHECK-NEXT: "start": 4, +# CHECK-NEXT: "end": 9 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "start": 14, +# CHECK-NEXT: "end": 19 +# CHECK-NEXT: } +# CHECK-NEXT: ] +# CHECK-NEXT: } Index: llvm/tools/LLVMBuild.txt =================================================================== --- llvm/tools/LLVMBuild.txt +++ llvm/tools/LLVMBuild.txt @@ -51,6 +51,7 @@ llvm-rc llvm-reduce llvm-rtdyld + llvm-sim llvm-size llvm-split llvm-undname Index: llvm/tools/llvm-sim/CMakeLists.txt =================================================================== --- /dev/null +++ llvm/tools/llvm-sim/CMakeLists.txt @@ -0,0 +1,7 @@ +set(LLVM_LINK_COMPONENTS core support object coverage AsmParser IRReader TransformUtils ipo) + +add_llvm_tool(llvm-sim + llvm-sim.cpp + FindSimilarities.cpp + JSONExporter.cpp +) \ No newline at end of file Index: llvm/tools/llvm-sim/FindSimilarities.h =================================================================== --- /dev/null +++ llvm/tools/llvm-sim/FindSimilarities.h @@ -0,0 +1,89 @@ +//===-- FindSimilarities.h --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Interface file for the SimilarityFinder for llvm-sim. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SIM_SIMILARITYFINDER_H +#define LLVM_SIM_SIMILARITYFINDER_H + +#include "llvm/Analysis/IRSimilarityIdentifier.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +using namespace IRSimilarity; + +/// The helper class that parses the bitcode files, and uses the +/// IRSimilarityIdentifier to find the similarity present across files. +class SimilarityFinderTool { + +public: + SimilarityFinderTool(raw_ostream &OS, std::string &Path); + ~SimilarityFinderTool(); + + /// Process the files in Paths, and return the found SimilarityGroups + /// organized by which Module the region appears in. + /// \returns The list of different SimilarityGroups, wrapped in FileCandidates + /// with an entry for each SimilarityGroup which has an entry for each + /// contained file. + SimilarityGroupList &run(); + + /// Retrieve the unique number \p I was mapped to in parseBitcodeFile. + /// + /// \param I the Instruction to find the instruction number for. + /// \returns The instruction number for \p I if it exists. + Optional getPositionInModule(Instruction *I) const { + assert(I && "Instruction is nullptr!"); + DenseMap::const_iterator It = LLVMInstNum.find(I); + if (It == LLVMInstNum.end()) + return None; + return It->second; + } + + int getFailure() { return FailedReadingFiles; } + + raw_ostream &error() const; + raw_ostream &warn() const; + +private: + /// Initial checking for whether there exists a regular file at the \p Path. + /// \param Path the path to check for a regular file. + /// \returns nonzero if there was an error finding the \p Path. + int collectPath(const std::string &Path); + + /// Parse the bitcode files, and put the unique_ptr in the \ref Modules + /// vector. + void parseBitcodeFile(); + + /// Stream for printing diagnostic info. + raw_ostream &OS; + + /// The similarity identifier we will use to find the similar sections. + IRSimilarityIdentifier SimIdent; + + /// Overall context for the module parsing. + LLVMContext CurrContext; + + /// The Modules read from the SourcePaths given. + std::unique_ptr ModuleToAnalyze; + + /// The paths to the bitcode/IR files to analyze. + std::string SourcePath; + + /// Flag to check if there were any errors. + int FailedReadingFiles = 0; + + /// Mapping from an Instruction pointer to its occurence in a sequential + /// list of all the Instructions in a Module. + DenseMap LLVMInstNum; +}; + +} // namespace llvm + +#endif // LLVM_SIM_SIMILARITYFINDER_H \ No newline at end of file Index: llvm/tools/llvm-sim/FindSimilarities.cpp =================================================================== --- /dev/null +++ llvm/tools/llvm-sim/FindSimilarities.cpp @@ -0,0 +1,104 @@ +//===-- FindSimilarities.cpp ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implementation file for the similarity finder for llvm-sim. +// +//===----------------------------------------------------------------------===// + +#include "FindSimilarities.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Analysis/IRSimilarityIdentifier.h" +#include "llvm/AsmParser/Parser.h" +#include "llvm/IR/Module.h" +#include "llvm/IRReader/IRReader.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/WithColor.h" +#include "llvm/Support/raw_ostream.h" + +#include + +using namespace llvm; +using namespace IRSimilarity; + +SimilarityFinderTool::SimilarityFinderTool(raw_ostream &S, + std::string &Path) + : OS(S) { + + // Create absolute paths for source + FailedReadingFiles = collectPath(Path); +} + +SimilarityFinderTool::~SimilarityFinderTool() {} + +int SimilarityFinderTool::collectPath(const std::string &Path) { + sys::fs::file_status Status; + sys::fs::status(Path, Status); + // Make sure that the module given exists. + if (!sys::fs::exists(Status)) { + error() << "Source file " << Path << " doesn't exist\n"; + return 1; + } + + // We can only process bitcode files, and they must be regular files in this + // case. + if (!sys::fs::is_regular_file(Status)) { + error() << "ignoring " << Path + << ", bitcode or IR file must be a regular file\n"; + return 1; + } + + // Find the absolute path for the source file. + SmallString<128> EffectivePath(Path); + if (std::error_code EC = sys::fs::make_absolute(EffectivePath)) { + error() << "finding absolute path for " << Path << ": " << EC.message() + << "\n"; + return 1; + } + + sys::path::remove_dots(EffectivePath, /*remove_dot_dots=*/true); + SourcePath = EffectivePath.str().str(); + return 0; +} + +void SimilarityFinderTool::parseBitcodeFile() { + // Parse the bitcode and source files to get the module. + SMDiagnostic Err; + ModuleToAnalyze = parseIRFile(SourcePath, Err, CurrContext); + if(!ModuleToAnalyze) { + error() << "unable to read module " << SourcePath << "\n"; + FailedReadingFiles = 2; + return; + } + + // We give each instruction a number, which gives us a start and end value + // for the beginning and end of each IRSimilarityCandidate. + unsigned InstructionNumber = 1; + for (Function &F : *ModuleToAnalyze) + for (BasicBlock &BB : F) + for (Instruction &I : BB) + LLVMInstNum.insert(std::make_pair(&I, InstructionNumber++)); +} + +SimilarityGroupList &SimilarityFinderTool::run() { + + parseBitcodeFile(); + if (FailedReadingFiles) + return SimIdent.getSimilarity(); + + // Find the similarities across the modules. + return SimIdent.findSimilarity(*ModuleToAnalyze); +} + +raw_ostream &SimilarityFinderTool::error() const { + return WithColor::error(OS); +} +raw_ostream &SimilarityFinderTool::warn() const { + return WithColor::warning(OS); +} Index: llvm/tools/llvm-sim/JSONExporter.h =================================================================== --- /dev/null +++ llvm/tools/llvm-sim/JSONExporter.h @@ -0,0 +1,50 @@ +//===-- JSONExporter.h -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Interface file for the JSON Exporter for Similarity Candidates for +// llvm-sim. +// +// The JSONExporter accepts the SimilarityGroups organized by which module they +// are in and exports it to a JSON file. +// +// If a file with similar sections at instructions 4 to 9 and 5 to 10 +// respectively, and File 1 having similarity in 11 to 15 and 16 to 20, the JSON +// output would be: +// +// { +// "1": [{"start" 4, "end": 9}, {"start" 5, "end": 10}], +// "2": [{"start" 11, "end": 15}, {"start" 16, "end": 20}] +// } +// +//===----------------------------------------------------------------------===// + +#include "FindSimilarities.h" + +namespace llvm { + +/// Helper class to handle the found similarity. +class JSONExporter { + +public: + + JSONExporter() {} + ~JSONExporter() {} + + /// Exports the given SimilarityGroups to a JSON file at \p FilePath. + /// + /// \param FilePath - The path to the output location. + /// \param SimSections - The similarity groups to process. + /// \param SimTool - The tool that holds the diagnostic file stream and the + /// Instruction to location mapping. + /// \returns A nonzero error code if there was a failure creating the file. + int exportToFile(const std::string &FilePath, + const SimilarityGroupList &SimSections, + const SimilarityFinderTool &SimTool); +}; + +} // namespace llvm \ No newline at end of file Index: llvm/tools/llvm-sim/JSONExporter.cpp =================================================================== --- /dev/null +++ llvm/tools/llvm-sim/JSONExporter.cpp @@ -0,0 +1,69 @@ +//===-- JSONExporter.cpp ----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implementation file for the JSON Exporter for Similarity Candidates for +// llvm-sim. +// +//===----------------------------------------------------------------------===// + +#include "FindSimilarities.h" +#include "JSONExporter.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/JSON.h" + +using namespace llvm; +using namespace IRSimilarity; + +int JSONExporter::exportToFile( + const std::string &FilePath, + const SimilarityGroupList &SimSections, + const SimilarityFinderTool &SimTool) { + std::error_code E; + raw_fd_ostream JSONFileStream(FilePath, E, + sys::fs::FA_Read | sys::fs::FA_Write); + + if (E.value() != 0) { + SimTool.error() << E.message() << "\n"; + return E.value(); + } + + json::OStream J(JSONFileStream, 1); + J.objectBegin(); + + unsigned SimOption = 1; + // Process each list of SimilarityGroups organized by the Module. + for (const SimilarityGroup &G : SimSections) { + std::string SimOptionStr = std::to_string(SimOption); + J.attributeBegin(SimOptionStr); + J.arrayBegin(); + // For each file there is a list of the range where the similarity + // exists. + for (const IRSimilarityCandidate &C : G) { + Optional Start = SimTool.getPositionInModule((*C.front()).Inst); + Optional End = SimTool.getPositionInModule((*C.back()).Inst); + + assert(Start.hasValue() && + "Could not find instruction number for first instruction"); + assert(End.hasValue() && + "Could not find instruction number for last instruction"); + + J.object([&] { + J.attribute("start", Start.getValue()); + J.attribute("end", End.getValue()); + }); + } + J.arrayEnd(); + J.attributeEnd(); + SimOption++; + } + J.objectEnd(); + + JSONFileStream.close(); + + return 0; +} \ No newline at end of file Index: llvm/tools/llvm-sim/LLVMBuild.txt =================================================================== --- /dev/null +++ llvm/tools/llvm-sim/LLVMBuild.txt @@ -0,0 +1,21 @@ +;===- ./tools/llvm-sim/LLVMBuild.txt ---------------------------*- Conf -*--===; +; +; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +; See https://llvm.org/LICENSE.txt for license information. +; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Tool +name = llvm-sim +parent = Tools +required_libraries = Core Support AsmParser IRReader IPO \ No newline at end of file Index: llvm/tools/llvm-sim/llvm-sim.cpp =================================================================== --- /dev/null +++ llvm/tools/llvm-sim/llvm-sim.cpp @@ -0,0 +1,50 @@ +//===-- llvm-sim.cpp - Find similar sections of programs -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This program finds similar sections of a Module, and exports them as a JSON +// file. +// +// To find similarities contained across multiple modules, please use llvm-link +// first to merge the modules. +// +//===----------------------------------------------------------------------===// + +#include "FindSimilarities.h" +#include "JSONExporter.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/InitLLVM.h" +#include "llvm/Support/CommandLine.h" + +using namespace llvm; + +static cl::opt +OutputFilename("o", cl::desc("Output Filename"), cl::init("sim.json"), + cl::value_desc("filename")); + +static cl::opt InputSourceFile( + cl::Positional, cl::desc(""), cl::Required); + +int main(int argc, const char *argv[]) { + InitLLVM X(argc, argv); + + cl::ParseCommandLineOptions(argc, argv, "LLVM IR Similarity Visualizer\n"); + + SimilarityFinderTool SimTool(errs(), InputSourceFile); + int E = SimTool.getFailure(); + if (E != 0) + return E; + + SimilarityGroupList SimilaritySections = SimTool.run(); + E = SimTool.getFailure(); + if (E != 0) + return E; + + JSONExporter JE; + E = JE.exportToFile(OutputFilename, SimilaritySections, SimTool); + return E; +} \ No newline at end of file