Index: Analysis/diagnostics/sarif-check.py =================================================================== --- /dev/null +++ Analysis/diagnostics/sarif-check.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python + +import sys +import subprocess +import traceback +import json + +testfile = sys.argv[1] +with open(sys.argv[2]) as datafh: + data = json.load(datafh) + +prefix = "CHECK: " + +def sarifLogFirstResult(idx): + return data['runs'][0]['results'][idx] + +def threadFlow(idx): + return data['runs'][0]['results'][0]['codeFlows'][0]['threadFlows'][idx] + +fails = 0 +passes = 0 +with open(testfile) as testfh: + lineno = 0 + for line in iter(testfh.readline, ""): + lineno += 1 + line = line.rstrip("\r\n") + try: + prefix_pos = line.index(prefix) + except ValueError: + continue + check_expr = line[prefix_pos + len(prefix):] + + try: + exception = None + result = eval(check_expr, {"sarifLog":data, + "result":sarifLogFirstResult, + "flow":threadFlow}) + except Exception: + result = False + exception = traceback.format_exc().splitlines()[-1] + + if exception is not None: + sys.stderr.write( + "{file}:{line:d}: check threw exception: {expr}\n" + "{file}:{line:d}: exception was: {exception}\n".format( + file=testfile, line=lineno, + expr=check_expr, exception=exception)) + fails += 1 + elif not result: + sys.stderr.write( + "{file}:{line:d}: check returned False: {expr}\n".format( + file=testfile, line=lineno, expr=check_expr)) + fails += 1 + else: + passes += 1 + +if fails != 0: + sys.exit("{} checks failed".format(fails)) +else: + sys.stdout.write("{} checks passed\n".format(passes)) + Index: Analysis/diagnostics/sarif-diagnostics-taint-test.c =================================================================== --- /dev/null +++ Analysis/diagnostics/sarif-diagnostics-taint-test.c @@ -0,0 +1,32 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=alpha.security.taint,debug.TaintTest %s -verify -analyzer-output=sarif -o %t.sarif | %python %S/sarif-check.py %s %t.sarif +#include "../Inputs/system-header-simulator.h" + +int atoi(const char *nptr); + +void f(void) { + char s[80]; + scanf("%s", s); + int d = atoi(s); // expected-warning {{tainted}} +} + +int main(void) { + f(); + return 0; +} + +// Test the basics for sanity. +// CHECK: sarifLog['version'].startswith("2.0.0") +// CHECK: sarifLog['runs'][0]['tool']['fullName'] == "clang static analyzer" +// CHECK: sarifLog['runs'][0]['tool']['name'] == "clang" +// CHECK: sarifLog['runs'][0]['tool']['language'] == "en-US" + +// Test the specifics of this taint test. +// CHECK: len(result(0)['codeFlows'][0]['threadFlows'][0]['locations']) == 2 +// CHECK: flow(0)['locations'][0]['step'] == 1 +// CHECK: flow(0)['locations'][0]['importance'] == "essential" +// CHECK: flow(0)['locations'][0]['location']['message']['text'] == "Calling 'f'" +// CHECK: flow(0)['locations'][0]['location']['physicalLocation']['region']['startLine'] == 13 +// CHECK: flow(0)['locations'][1]['step'] == 2 +// CHECK: flow(0)['locations'][1]['importance'] == "essential" +// CHECK: flow(0)['locations'][1]['location']['message']['text'] == "tainted" +// CHECK: flow(0)['locations'][1]['location']['physicalLocation']['region']['startLine'] == 9 Index: StaticAnalyzer/Core/CMakeLists.txt =================================================================== --- StaticAnalyzer/Core/CMakeLists.txt +++ StaticAnalyzer/Core/CMakeLists.txt @@ -40,6 +40,7 @@ MemRegion.cpp PathDiagnostic.cpp PlistDiagnostics.cpp + SarifDiagnostics.cpp ProgramState.cpp RangeConstraintManager.cpp RangedConstraintManager.cpp Index: StaticAnalyzer/Core/SarifDiagnostics.cpp =================================================================== --- /dev/null +++ StaticAnalyzer/Core/SarifDiagnostics.cpp @@ -0,0 +1,266 @@ +//===--- SarifDiagnostics.cpp - Sarif Diagnostics for Paths -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the SarifDiagnostics object. +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/Version.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/StaticAnalyzer/Core/AnalyzerOptions.h" +#include "clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h" +#include "clang/StaticAnalyzer/Core/PathDiagnosticConsumers.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/Path.h" + +using namespace llvm; +using namespace clang; +using namespace ento; + +namespace { +class SarifDiagnostics : public PathDiagnosticConsumer { + std::string OutputFile; + +public: + SarifDiagnostics(AnalyzerOptions &, const std::string &Output) + : OutputFile(Output) {} + ~SarifDiagnostics() override = default; + + void FlushDiagnosticsImpl(std::vector &Diags, + FilesMade *FM) override; + + StringRef getName() const override { return "SarifDiagnostics"; } + PathGenerationScheme getGenerationScheme() const override { return Sarif; } + bool supportsLogicalOpControlFlow() const override { return true; } + bool supportsCrossFileDiagnostics() const override { return true; } +}; +} // end anonymous namespace + +void ento::createSarifDiagnosticConsumer(AnalyzerOptions &AnalyzerOpts, + PathDiagnosticConsumers &C, + const std::string &Output, + const Preprocessor &) { + C.push_back(new SarifDiagnostics(AnalyzerOpts, Output)); +} + +static StringRef getFileName(const FileEntry &FE) { + StringRef Filename = FE.tryGetRealPathName(); + if (Filename.empty()) + Filename = FE.getName(); + return Filename; +} + +static std::string fileNameToURI(StringRef Filename) { + llvm::SmallString<32> Ret = "file://"; + + // Get the root name to see if it has a URI authority. + StringRef Root = sys::path::root_name(Filename); + if (Root.startswith("//")) { + // There is an authority, so add it to the URI. + Ret += Root.drop_front(2).str(); + } else { + // There is no authority, so end the component and add the root to the URI. + Ret += Twine("/" + Root).str(); + } + + // Add the rest of the path components, encoding any reserved characters. + std::for_each(std::next(sys::path::begin(Filename)), sys::path::end(Filename), + [&Ret](StringRef Component) { + // For reasons unknown to me, we may get a backslash with + // Windows native paths for the initial backslash following + // the drive component, which we need to ignore as a URI path + // part. + if (Component == "\\") + return; + + // Add the separator between the previous path part and the + // one being currently processed. + Ret += "/"; + + // URI encode the part. + for (char C : Component) { + // RFC 3986 claims alpha, numeric, and this handful of + // characters are not reserved for the path component and + // should be written out directly. Otherwise, percent + // encode the character and write that out instead of the + // reserved character. + if (llvm::isAlnum(C) || + StringRef::npos != + StringRef("-._~:@!$&'()*+,;=").find(C)) { + Ret += C; + } else { + Ret += "%" + llvm::toHex(StringRef(&C, 1)); + } + } + }); + + return Ret.str().str(); +} + +static json::Object createFileLocation(const FileEntry &FE) { + return json::Object{{"uri", fileNameToURI(getFileName(FE))}}; +} + +static json::Object createFile(const FileEntry &FE) { + return json::Object{{"fileLocation", createFileLocation(FE)}, + {"roles", json::Array{"resultFile"}}, + {"length", FE.getSize()}, + {"mimeType", "text/plain"}}; +} + +static json::Object createFileLocation(const FileEntry &FE, + json::Object &Files) { + std::string FileURI = fileNameToURI(getFileName(FE)); + if (!Files.get(FileURI)) + Files[FileURI] = createFile(FE); + + return json::Object{{"uri", FileURI}}; +} + +static json::Object createTextRegion(SourceRange R, const SourceManager &SM) { + return json::Object{ + {"startLine", SM.getExpansionLineNumber(R.getBegin())}, + {"endLine", SM.getExpansionLineNumber(R.getEnd())}, + {"startColumn", SM.getExpansionColumnNumber(R.getBegin())}, + {"endColumn", SM.getExpansionColumnNumber(R.getEnd())}}; +} + +static json::Object createPhysicalLocation(SourceRange R, const FileEntry &FE, + const SourceManager &SMgr, + json::Object &Files) { + return json::Object{{{"fileLocation", createFileLocation(FE, Files)}, + {"region", createTextRegion(R, SMgr)}}}; +} + +enum class Importance { Important, Essential, Unimportant }; + +static StringRef importanceToStr(Importance I) { + switch (I) { + case Importance::Important: + return "important"; + case Importance::Essential: + return "essential"; + case Importance::Unimportant: + return "unimportant"; + } + llvm_unreachable("Fully covered switch is not so fully covered"); +} + +static json::Object createThreadFlowLocation(int Step, json::Object &&Location, + Importance I) { + return json::Object{{"step", Step}, + {"location", std::move(Location)}, + {"importance", importanceToStr(I)}}; +} + +static json::Object createMessage(StringRef Text) { + return json::Object{{"text", Text.str()}}; +} + +static json::Object createLocation(json::Object &&PhysicalLocation, + StringRef Message = "") { + json::Object Ret{{"physicalLocation", std::move(PhysicalLocation)}}; + if (!Message.empty()) + Ret.insert({"message", createMessage(Message)}); + return Ret; +} + +static Importance calculateImportance(const PathDiagnosticPiece &Piece) { + StringRef PieceStr = Piece.getString(); + + switch (Piece.getKind()) { + case PathDiagnosticPiece::Kind::Call: + case PathDiagnosticPiece::Kind::Macro: + case PathDiagnosticPiece::Kind::Note: + // FIXME: What should be reported here? + break; + case PathDiagnosticPiece::Kind::Event: + return Piece.getTagStr() == "ConditionBRVisitor" ? Importance::Important + : Importance::Essential; + case PathDiagnosticPiece::Kind::ControlFlow: + return Importance::Unimportant; + } + return Importance::Unimportant; +} + +static json::Object createThreadFlow(const PathPieces &Pieces, + json::Object &Files) { + const SourceManager &SMgr = Pieces.front()->getLocation().getManager(); + int Step = 1; + json::Array Locations; + for (const auto &Piece : Pieces) { + const PathDiagnosticLocation &P = Piece->getLocation(); + Locations.push_back(createThreadFlowLocation( + Step++, + createLocation(createPhysicalLocation(P.asRange(), + *P.asLocation().getFileEntry(), + SMgr, Files), + Piece->getString()), + calculateImportance(*Piece))); + } + return json::Object{{"locations", std::move(Locations)}}; +} + +static json::Object createCodeFlow(const PathPieces &Pieces, + json::Object &Files) { + return json::Object{ + {"threadFlows", json::Array{createThreadFlow(Pieces, Files)}}}; +} + +static json::Object createTool() { + return json::Object{{"name", "clang"}, + {"fullName", "clang static analyzer"}, + {"language", "en-US"}, + {"version", getClangFullVersion()}}; +} + +static json::Object createResult(const PathDiagnostic &Diag, + json::Object &Files) { + const PathPieces &Path = Diag.path.flatten(false); + const SourceManager &SMgr = Path.front()->getLocation().getManager(); + + return json::Object{ + {"message", createMessage(Diag.getVerboseDescription())}, + {"codeFlows", json::Array{createCodeFlow(Path, Files)}}, + {"locations", + json::Array{createLocation(createPhysicalLocation( + Diag.getLocation().asRange(), + *Diag.getLocation().asLocation().getFileEntry(), SMgr, Files))}}, + {"ruleId", Diag.getCheckName()}}; +} + +static json::Object createRun(std::vector &Diags) { + json::Array Results; + json::Object Files; + + llvm::for_each(Diags, [&](const PathDiagnostic *D) { + Results.push_back(createResult(*D, Files)); + }); + + return json::Object{{"tool", createTool()}, + {"results", std::move(Results)}, + {"files", std::move(Files)}}; +} + +void SarifDiagnostics::FlushDiagnosticsImpl( + std::vector &Diags, FilesMade *) { + // FIXME: if the file already exists, do we overwrite it with a single run, + // or do we append a run into the file if it's a valid SARIF log? + std::error_code EC; + llvm::raw_fd_ostream OS(OutputFile, EC, llvm::sys::fs::F_Text); + if (EC) { + llvm::errs() << "warning: could not create file: " << EC.message() << '\n'; + return; + } + json::Object Sarif{{"$schema", "http://json.schemastore.org/sarif-2.0.0"}, + {"version", "2.0.0-beta.2018-09-26"}, + {"runs", json::Array{createRun(Diags)}}}; + OS << llvm::formatv("{0:2}", json::Value(std::move(Sarif))); +} Index: clang/StaticAnalyzer/Core/Analyses.def =================================================================== --- clang/StaticAnalyzer/Core/Analyses.def +++ clang/StaticAnalyzer/Core/Analyses.def @@ -33,6 +33,7 @@ ANALYSIS_DIAGNOSTICS(PLIST, "plist", "Output analysis results using Plists", createPlistDiagnosticConsumer) ANALYSIS_DIAGNOSTICS(PLIST_MULTI_FILE, "plist-multi-file", "Output analysis results using Plists (allowing for multi-file bugs)", createPlistMultiFileDiagnosticConsumer) ANALYSIS_DIAGNOSTICS(PLIST_HTML, "plist-html", "Output analysis results using HTML wrapped with Plists", createPlistHTMLDiagnosticConsumer) +ANALYSIS_DIAGNOSTICS(SARIF, "sarif", "Output analysis results in a SARIF file", createSarifDiagnosticConsumer) ANALYSIS_DIAGNOSTICS(TEXT, "text", "Text output of analysis results", createTextPathDiagnosticConsumer) #ifndef ANALYSIS_PURGE Index: clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h =================================================================== --- clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h +++ clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h @@ -123,6 +123,9 @@ /// Used for plist output, used for "arrows" generation. Extensive, + + /// Used for SARIF output. + Sarif, }; virtual PathGenerationScheme getGenerationScheme() const { return Minimal; }