diff --git a/clang/include/clang/Basic/Sarif.h b/clang/include/clang/Basic/Sarif.h
new file mode 100644
--- /dev/null
+++ b/clang/include/clang/Basic/Sarif.h
@@ -0,0 +1,421 @@
+//== clang/Basic/Sarif.h - SARIF Diagnostics Object Model -------*- C++ -*--==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// \file
+// This file defines SarifDocument, a class for pretty-printing clang
+// diagnostics in the SARIF standard. The document created can be accessed
+// as a JSON Object. This class only implements a valid subset of SARIF, which
+// is limited to conforming to all 'SHALL' / 'MUST' definitions for properties
+// of interest to Clang.
+//
+// A SARIF (Static Analysis Results Interchange Format) document is JSON
+// document that describes in detail the results of running static analysis
+// tools on a project. Each (non-trivial) document consists of at least one
+// "run", which are themselves composed of details such as:
+// * Tool: The tool that was run
+// * Rules: The rules applied during the tool run, represented by
+// \c reportingDescriptor objects in SARIF
+// * Results: The matches for the rules applied against the project(s) being
+// evaluated, represented by \c result objects in SARIF
+//
+// Reference:
+// 1. The SARIF standard
+// 2. SARIFreportingDescriptor
+// 3. SARIFresult
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CLANG_BASIC_SARIF_H
+#define CLANG_BASIC_SARIF_H
+
+#include "clang/Basic/SourceLocation.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/JSON.h"
+#include
+
+namespace clang {
+
+using namespace llvm;
+
+class SarifDocumentWriter;
+class LangOptions;
+class SourceManager;
+class FullSourceRange;
+
+namespace detail {
+
+/// An artifact location is SARIF's way of describing the complete Location
+/// of an artifact encountered during analysis. The \c artifactLocation object
+/// typically consists of a URI, and/or an index to reference the artifact it
+/// locates.
+///
+/// This builder makes an additional assumption: that every artifact encountered
+/// by \c clang will be a physical, top-level artifact. Which is why the static
+/// creation method \ref SarifArtifactLocation::create takes a mandatory URI
+/// parameter. The official standard states that either a \c URI or \c Index
+/// must be available in the object, \c clang picks the \c URI as a reasonable,
+/// arbitrary default.
+///
+/// Reference:
+/// 1. artifactLocation object
+/// 2. \ref SarifArtifact
+class SarifArtifactLocation {
+private:
+ friend class clang::SarifDocumentWriter;
+
+ llvm::Optional Index;
+ StringRef URI;
+
+ SarifArtifactLocation(const StringRef &URI) : Index(), URI(URI) {}
+
+public:
+ static SarifArtifactLocation create(const StringRef &URI) {
+ return SarifArtifactLocation{URI};
+ }
+
+ SarifArtifactLocation &setIndex(uint32_t Idx) {
+ this->Index = Idx;
+ return *this;
+ }
+};
+
+/// An artifact in SARIF is any object (a sequence of bytes) addressable by
+/// a URI (RFC 3986). The most common type of artifact for clang's use-case
+/// would be source files. SARIF's artifact object is described in detail in
+/// section 3.24.
+//
+/// Since every in clang artifact MUST have a location (there being no nested
+/// artifacts), the creation method \ref SarifArtifact::create requires a
+/// \ref SarifArtifactLocation object
+///
+/// Reference:
+/// 1. artifact object
+class SarifArtifact {
+private:
+ friend class clang::SarifDocumentWriter;
+
+ llvm::Optional Offset;
+ llvm::Optional Length;
+ StringRef MimeType;
+ SarifArtifactLocation Location;
+ SmallVector Roles;
+
+ SarifArtifact(const SarifArtifactLocation &Loc)
+ : Offset(), Length(), MimeType(), Location(Loc), Roles() {}
+
+public:
+ static SarifArtifact create(const SarifArtifactLocation &Loc) {
+ return SarifArtifactLocation{Loc};
+ }
+
+ SarifArtifact &setOffset(uint32_t Offset) {
+ this->Offset = Offset;
+ return *this;
+ }
+
+ SarifArtifact &setLength(size_t NumBytes) {
+ this->Length = NumBytes;
+ return *this;
+ }
+
+ SarifArtifact &setRoles(const std::initializer_list &Roles) {
+ this->Roles.assign(Roles);
+ return *this;
+ }
+
+ SarifArtifact &setMimeType(const StringRef &MimeType) {
+ this->MimeType = MimeType;
+ return *this;
+ }
+};
+
+} // namespace detail
+
+/// A thread flow is a sequence of code locations that specify a possible path
+/// through a single thread of execution.
+/// A thread flow in SARIF is related to a code flow which describes
+/// the progress of one or more programs through one or more thread flows.
+///
+/// Reference:
+/// 1. threadFlow object
+/// 2. codeFlow object
+class ThreadFlow {
+ friend class SarifDocumentWriter;
+
+ FullSourceRange Range;
+ StringRef Importance;
+ StringRef Message;
+
+ ThreadFlow() = default;
+
+public:
+ static ThreadFlow create() { return {}; }
+
+ ThreadFlow &setRange(const FullSourceRange &Range) {
+ this->Range = Range;
+ return *this;
+ }
+
+ ThreadFlow &setImportance(const StringRef &Importance) {
+ this->Importance = Importance;
+ return *this;
+ }
+
+ ThreadFlow &setMessage(const StringRef &Message) {
+ this->Message = Message;
+ return *this;
+ }
+};
+
+/// A SARIF rule (\c reportingDescriptor object) contains information that
+/// describes a reporting item generated by a tool. A reporting item is
+/// either a result of analysis or notification of a condition encountered by
+/// the tool. Rules are arbitrary but are identifiable by a hierarchical
+/// rule-id.
+///
+/// This builder provides an interface to create SARIF \c reportingDescriptor
+/// objects via the \ref SarifRule::create static method.
+///
+/// Reference:
+/// 1. reportingDescriptor object
+class SarifRule {
+ friend class clang::SarifDocumentWriter;
+
+ StringRef Name;
+ StringRef RuleId;
+ StringRef Description;
+ StringRef HelpURI;
+
+ SarifRule() = default;
+
+public:
+ static SarifRule create() { return {}; }
+
+ SarifRule &setName(const StringRef &Name) {
+ this->Name = Name;
+ return *this;
+ }
+
+ SarifRule &setRuleId(const StringRef &RuleId) {
+ this->RuleId = RuleId;
+ return *this;
+ }
+
+ SarifRule &setDescription(const StringRef &Description) {
+ this->Description = Description;
+ return *this;
+ }
+
+ SarifRule &setHelpURI(const StringRef &HelpURI) {
+ this->HelpURI = HelpURI;
+ return *this;
+ }
+};
+
+/// A SARIF result (also called a "reporting item") is a unit of output
+/// produced when one of the tool's \c reportingDescriptor encounters a match
+/// on the file being analysed by the tool.
+///
+/// This builder provides a \ref SarifResult::create static method that can be
+/// used to create an empty shell onto which attributes can be added using the
+/// \c setX(...) methods.
+///
+/// For example:
+/// \code{.cpp}
+/// SarifResult result = SarifResult::create()
+/// .setIndex(...)
+/// .setRuleId(...)
+/// .setDiagnosticMessage(...);
+/// \endcode
+///
+/// Reference:
+/// 1. SARIFresult
+class SarifResult {
+ friend class clang::SarifDocumentWriter;
+
+ uint32_t RuleIdx;
+ StringRef RuleID;
+ StringRef DiagnosticMessage;
+ ArrayRef Locations;
+ ArrayRef ThreadFlows;
+
+ SarifResult() = default;
+
+public:
+ static SarifResult create() { return {}; }
+
+ SarifResult &setIndex(uint32_t idx) {
+ this->RuleIdx = idx;
+ return *this;
+ }
+
+ SarifResult &setRuleId(const StringRef &RuleID) {
+ this->RuleID = RuleID;
+ return *this;
+ }
+
+ SarifResult &setDiagnosticMessage(const StringRef &Message) {
+ this->DiagnosticMessage = Message;
+ return *this;
+ }
+
+ SarifResult &setLocations(const ArrayRef &DiagLocs) {
+ this->Locations = DiagLocs;
+ return *this;
+ }
+ SarifResult &setThreadFlows(const ArrayRef &ThreadFlows) {
+ this->ThreadFlows = ThreadFlows;
+ return *this;
+ }
+};
+
+/// This class handles creating a valid SARIF document given various input
+/// attributes. However, it requires an ordering among certain method calls:
+///
+/// 1. Because every SARIF document must contain at least 1 \c run, callers
+/// must ensure that \ref SarifDocumentWriter::createRun is is called before
+/// anyother methods.
+/// 2. If SarifDocumentWriter::endRun is called, callers MUST call
+/// SarifDocumentWriter::createRun, before invoking any of the result
+/// aggregation methods such as SarifDocumentWriter::appendResult etc.
+class SarifDocumentWriter {
+private:
+ const StringRef SchemaURI{
+ "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/"
+ "sarif-schema-2.1.0.json"};
+ const StringRef SchemaVersion{"2.1.0"};
+
+ /// \internal
+ /// Return a pointer to the current tool. If no run exists, this will
+ /// crash.
+ json::Object *getCurrentTool();
+
+ /// \internal
+ /// Checks if there is a run associated with this document
+ ///
+ /// \return true on success
+ bool hasRun() const;
+
+ /// \internal
+ /// Reset portions of the internal state so that the document is ready to
+ /// recieve data for a new run
+ void reset();
+
+ /// \internal
+ /// \brief Return a mutable pointer to the current run, if it exists.
+ ///
+ /// \note If a run does not exist in the SARIF document, calling this will
+ /// trigger undefined behaviour
+ json::Object *currentRun();
+
+ /// Create a code flow object for the given threadflows.
+ /// See \link ThreadFlow \endlink
+ ///
+ /// \note If a run does not exist in the SARIF document, calling this will
+ /// trigger undefined behaviour
+ json::Object createCodeFlow(const ArrayRef &ThreadFlows);
+
+ /// Add the given threadflows to the ones this SARIF document knows about
+ json::Array createThreadFlows(const ArrayRef &ThreadFlows);
+
+ /// Add the given \ref FullSourceRange to the SARIF document as a physical
+ /// location, with it's corresponding artifact
+ json::Object createPhysicalLocation(const FullSourceRange &R);
+
+public:
+ /// Create a new empty SARIF document
+ SarifDocumentWriter() = default;
+
+ /// Create a new empty SARIF document with the given language options
+ SarifDocumentWriter(const LangOptions &LangOpts) : LangOpts(&LangOpts) {}
+
+ /// Release resources held by this SARIF document
+ ~SarifDocumentWriter() = default;
+
+ /// Create a new run with which any upcoming analysis will be associated.
+ /// Each run requires specifying the tool that is generating reporting items
+ void createRun(const StringRef &ShortToolName, const StringRef &LongToolName);
+
+ /// If there is a current run, end it. This method collects various
+ /// book-keeping required to clear and close resources associated
+ /// with the current run, but may also allocate some for the next run.
+ ///
+ /// If no run exists, this amounts to a no-op.
+ void endRun();
+
+ /// Create a new rule, and associate it with the current run
+ /// Returns integer rule index for the created rule that is unique within
+ /// the current run
+ ///
+ /// \pre
+ /// There must be a run associated with the document, failing to do so will
+ /// cause undefined behaviour
+ size_t createRule(const StringRef &Name, const StringRef &RuleId,
+ const StringRef &Description,
+ const StringRef &HelpURI = "");
+
+ /// Associate the given rule with the current run
+ ///
+ /// \pre
+ /// There must be a run associated with the document, failing to do so will
+ /// cause undefined behaviour
+ size_t createRule(const SarifRule &Rule);
+
+ /// Append a new result to the currently in-flight run.
+ ///
+ /// \pre
+ /// There must be a run associated with the document, failing to do so will
+ /// cause undefined behaviour
+ /// \pre
+ /// \c RuleIdx must correspond to a rule known by the SARIF document. i.e.
+ /// it must be the value returned by a previous call to \ref createRule
+ void appendResult(size_t RuleIdx, const SarifResult &SarifResult);
+
+ /// Return the SARIF document in its current state.
+ /// Calling this will trigger a copy of the internal state including all
+ /// reported diagnostics, resulting in an expensive call.
+ json::Object createDocument();
+
+private:
+ /// Langauge options to use for the current SARIF document
+ const LangOptions *LangOpts;
+
+ /// A sequence of SARIF runs
+ /// A run object describes a single run of an analysis tool and contains the
+ /// output of that run
+ ///
+ /// Reference: run
+ /// object
+ json::Array Runs;
+
+ /// The list of rules associated with the most recent active run. These are
+ /// defined using the diagnostics passed to the SarifDocument. Each rule
+ /// need not be unique through the result set. E.g. there may be several
+ /// 'syntax' errors throughout code under analysis, each of which has its
+ /// own specific diagnostic message (and consequently, RuleId). Rules are
+ /// also known as "reportingDescriptor" objects in SARIF
+ ///
+ /// Reference: rules
+ /// property
+ SmallVector CurrentRules;
+
+ /// The list of artifacts that have been encountered on the most recent active
+ /// run. An artifact is defined in SARIF as a sequence of bytes addressable
+ /// by a URI. A common example for clang's case would be files named by
+ /// filesystem paths.
+ StringMap CurrentArtifacts;
+};
+} // namespace clang
+
+#endif // CLANG_BASIC_SARIF_H
diff --git a/clang/include/clang/Basic/SourceLocation.h b/clang/include/clang/Basic/SourceLocation.h
--- a/clang/include/clang/Basic/SourceLocation.h
+++ b/clang/include/clang/Basic/SourceLocation.h
@@ -438,7 +438,7 @@
/// Comparison function class, useful for sorting FullSourceLocs.
struct BeforeThanCompare {
- bool operator()(const FullSourceLoc& lhs, const FullSourceLoc& rhs) const {
+ bool operator()(const FullSourceLoc &lhs, const FullSourceLoc &rhs) const {
return lhs.isBeforeInTranslationUnitThan(rhs);
}
};
@@ -448,18 +448,52 @@
/// This is useful for debugging.
void dump() const;
- friend bool
- operator==(const FullSourceLoc &LHS, const FullSourceLoc &RHS) {
+ friend bool operator==(const FullSourceLoc &LHS, const FullSourceLoc &RHS) {
return LHS.getRawEncoding() == RHS.getRawEncoding() &&
- LHS.SrcMgr == RHS.SrcMgr;
+ LHS.SrcMgr == RHS.SrcMgr;
}
- friend bool
- operator!=(const FullSourceLoc &LHS, const FullSourceLoc &RHS) {
+ friend bool operator!=(const FullSourceLoc &LHS, const FullSourceLoc &RHS) {
return !(LHS == RHS);
}
};
+/// A pair of FullSourceLoc objects
+///
+/// Useful for passing to methods that expect SourceRanges and SourceManagers
+/// together.
+class FullSourceRange {
+ FullSourceLoc B;
+ FullSourceLoc E;
+
+public:
+ FullSourceRange() = default;
+ FullSourceRange(FullSourceLoc Begin, FullSourceLoc End) : B(Begin), E(End) {}
+
+ const FullSourceLoc &getBegin() const { return B; }
+ const FullSourceLoc &getEnd() const { return E; }
+
+ bool isValid() const { return B.isValid() && E.isValid(); }
+ bool isInvalid() const { return !isValid(); }
+
+ bool operator==(const FullSourceRange &X) const {
+ return B == X.B && E == X.E;
+ }
+
+ bool operator!=(const FullSourceRange &X) const {
+ return B != X.B || E != X.E;
+ }
+
+ // Returns true iff other is wholly contained within this range.
+ bool fullyContains(const FullSourceRange &other) const {
+ return B <= other.B && E >= other.E;
+ }
+
+ void print(raw_ostream &OS) const;
+ std::string printToString() const;
+ void dump() const;
+};
+
} // namespace clang
namespace llvm {
diff --git a/clang/lib/Basic/CMakeLists.txt b/clang/lib/Basic/CMakeLists.txt
--- a/clang/lib/Basic/CMakeLists.txt
+++ b/clang/lib/Basic/CMakeLists.txt
@@ -62,6 +62,7 @@
NoSanitizeList.cpp
SanitizerSpecialCaseList.cpp
Sanitizers.cpp
+ Sarif.cpp
SourceLocation.cpp
SourceManager.cpp
Stack.cpp
diff --git a/clang/lib/Basic/Sarif.cpp b/clang/lib/Basic/Sarif.cpp
new file mode 100644
--- /dev/null
+++ b/clang/lib/Basic/Sarif.cpp
@@ -0,0 +1,361 @@
+#include "clang/Basic/Sarif.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/Version.h"
+#include "clang/Lex/Lexer.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/ConvertUTF.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/Path.h"
+
+#include
+#include
+#include
+
+using namespace clang;
+using namespace llvm;
+
+using clang::detail::SarifArtifact;
+using clang::detail::SarifArtifactLocation;
+
+namespace {
+
+StringRef getFileName(const FileEntry &FE) {
+ StringRef Filename = FE.tryGetRealPathName();
+ if (Filename.empty())
+ Filename = FE.getName();
+ return Filename;
+}
+/// \name URI
+/// @{
+
+/// \internal
+/// \brief
+/// Return the RFC3986 encoding of the input character
+///
+/// \param C Character to encode to RFC3986
+///
+/// \return The RFC3986 representation of \c C
+std::string percentEncodeURICharacter(char C) {
+ // RFC 3986 claims alpha, numeric, and this handful of
+ // characters are not reserved for the path component and
+ // should be written out directly. Otherwise, percent
+ // encode the character and write that out instead of the
+ // reserved character.
+ if (llvm::isAlnum(C) ||
+ StringRef::npos != StringRef("-._~:@!$&'()*+,;=").find(C))
+ return std::string(&C, 1);
+ return "%" + llvm::toHex(StringRef(&C, 1));
+}
+
+/// \internal
+/// \brief Return a URI representing the given file name
+///
+/// \param Filename
+///
+/// \return RFC3986 URI representing the input file name
+std::string fileNameToURI(StringRef Filename) {
+ llvm::SmallString<32> Ret = StringRef("file://");
+
+ // Get the root name to see if it has a URI authority.
+ StringRef Root = sys::path::root_name(Filename);
+ if (Root.startswith("//")) {
+ // There is an authority, so add it to the URI.
+ Ret += Root.drop_front(2).str();
+ } else if (!Root.empty()) {
+ // There is no authority, so end the component and add the root to the URI.
+ Ret += Twine("/" + Root).str();
+ }
+
+ auto Iter = sys::path::begin(Filename), End = sys::path::end(Filename);
+ assert(Iter != End && "Expected there to be a non-root path component.");
+ // Add the rest of the path components, encoding any reserved characters;
+ // we skip past the first path component, as it was handled it above.
+ std::for_each(++Iter, End, [&Ret](StringRef Component) {
+ // For reasons unknown to me, we may get a backslash with Windows native
+ // paths for the initial backslash following the drive component, which
+ // we need to ignore as a URI path part.
+ if (Component == "\\")
+ return;
+
+ // Add the separator between the previous path part and the one being
+ // currently processed.
+ Ret += "/";
+
+ // URI encode the part.
+ for (char C : Component) {
+ Ret += percentEncodeURICharacter(C);
+ }
+ });
+
+ return std::string(Ret);
+}
+/// @}
+
+/// \brief Calculate the column position expressed in the number of UTF-8 code
+/// points from column start to the source location
+///
+/// \param Loc The source location whose column needs to be calculated
+/// \param TokenLen Optional hint for when the token is multiple bytes long
+///
+/// \return The column number as a UTF-8 aware byte offset from column start to
+/// the effective source location
+unsigned int adjustColumnPos(FullSourceLoc Loc, unsigned int TokenLen = 0) {
+ assert(!Loc.isInvalid() && "invalid Loc when adjusting column position");
+
+ std::pair LocInfo = Loc.getDecomposedLoc();
+ assert(LocInfo.second > Loc.getExpansionColumnNumber() &&
+ "position in file is before column number?");
+
+ Optional Buf =
+ Loc.getManager().getBufferOrNone(LocInfo.first);
+ assert(Buf && "got an invalid buffer for the location's file");
+ assert(Buf->getBufferSize() >= (LocInfo.second + TokenLen) &&
+ "token extends past end of buffer?");
+
+ // Adjust the offset to be the start of the line, since we'll be counting
+ // Unicode characters from there until our column offset.
+ unsigned int Off = LocInfo.second - (Loc.getExpansionColumnNumber() - 1);
+ unsigned int Ret = 1;
+ while (Off < (LocInfo.second + TokenLen)) {
+ Off += getNumBytesForUTF8(Buf->getBuffer()[Off]);
+ Ret++;
+ }
+
+ return Ret;
+}
+
+/// \name SARIF Utilities
+/// @{
+
+/// \internal
+json::Object createMessage(StringRef Text) {
+ return json::Object{{"text", Text.str()}};
+}
+
+/// \internal
+json::Object createTextRegion(const LangOptions &LO, const FullSourceRange &R) {
+ json::Object Region{{"startLine", R.getBegin().getExpansionLineNumber()},
+ {"startColumn", adjustColumnPos(R.getBegin())}};
+ if (R.getBegin() == R.getEnd()) {
+ Region["endColumn"] = adjustColumnPos(R.getBegin());
+ } else {
+ Region["endLine"] = R.getEnd().getExpansionLineNumber();
+ Region["endColumn"] = adjustColumnPos(
+ R.getEnd(), Lexer::MeasureTokenLength(R.getEnd().getLocWithOffset(0),
+ R.getEnd().getManager(), LO));
+ }
+ return Region;
+}
+
+json::Object createLocation(json::Object &&PhysicalLocation,
+ StringRef Message = "") {
+ json::Object Ret{{"physicalLocation", std::move(PhysicalLocation)}};
+ if (!Message.empty())
+ Ret.insert({"message", createMessage(Message)});
+ return Ret;
+}
+
+json::Object createThreadFlowLocation(json::Object &&Location,
+ const StringRef &Importance) {
+ return json::Object{{"locations", std::move(Location)},
+ {"importance", Importance}};
+}
+/// @}
+
+} // namespace
+
+json::Object
+SarifDocumentWriter::createPhysicalLocation(const FullSourceRange &R) {
+ assert(R.isValid() &&
+ "Cannot create a physicalLocation from invalid SourceRange!");
+ const FileEntry *FE = R.getBegin().getExpansionLoc().getFileEntry();
+ assert(FE != nullptr && "Diagnostic does not exist within a valid file!");
+
+ const std::string &FileURI = fileNameToURI(getFileName(*FE));
+ auto I = CurrentArtifacts.find(FileURI);
+
+ if (I == CurrentArtifacts.end()) {
+ uint32_t Idx = static_cast(CurrentArtifacts.size());
+ const SarifArtifactLocation &location =
+ SarifArtifactLocation::create(FileURI).setIndex(Idx);
+ const SarifArtifact &artifact = SarifArtifact::create(location)
+ .setRoles({"resultFile"})
+ .setLength(FE->getSize())
+ .setMimeType("text/plain");
+ auto statusIter = CurrentArtifacts.insert({FileURI, artifact});
+ // If inserted, ensure the original iterator points to the newly inserted
+ // element, so it can be used downstream
+ if (statusIter.second) {
+ I = statusIter.first;
+ }
+ }
+ assert(I != CurrentArtifacts.end() && "Failed to insert new artifact");
+ const SarifArtifactLocation &location = I->second.Location;
+ auto Idx = location.Index.getValue();
+ return json::Object{
+ {{"artifacts", json::Object{{{"uri", FileURI}, {"index", Idx}}}},
+ {"region", createTextRegion(*LangOpts, R)}}};
+}
+
+json::Object *SarifDocumentWriter::getCurrentTool() {
+ assert(hasRun() && "Need to call createRun() before using getcurrentTool!");
+ return Runs.back().getAsObject()->get("tool")->getAsObject();
+}
+
+void SarifDocumentWriter::reset() {
+ CurrentRules.clear();
+ CurrentArtifacts.clear();
+}
+
+void SarifDocumentWriter::endRun() {
+ if (!hasRun()) {
+ return;
+ }
+
+ // Flush all the rules
+ json::Object &Tool = *getCurrentTool();
+ json::Array Rules{};
+ for (const SarifRule &R : CurrentRules) {
+ json::Object theRule{{"name", R.Name},
+ {"ruleId", R.RuleId},
+ {"fullDescription", R.Description}};
+ if (!R.HelpURI.empty()) {
+ theRule["helpUri"] = R.HelpURI;
+ }
+ Rules.emplace_back(std::move(theRule));
+ }
+ Tool["rules"] = std::move(Rules);
+
+ // Flush all the artifacts
+ json::Array *Artifacts = currentRun()->getArray("artifacts");
+ for (const auto &kv : CurrentArtifacts) {
+ const SarifArtifact &A = kv.getValue();
+ json::Object Loc{{"uri", A.Location.URI}};
+ if (A.Location.Index.hasValue()) {
+ Loc["index"] = static_cast(A.Location.Index.getValue());
+ }
+ json::Object theArtifact;
+ theArtifact["location"] = std::move(Loc);
+ if (A.Length.hasValue()) {
+ theArtifact["length"] = static_cast(A.Length.getValue());
+ }
+ if (!A.Roles.empty()) {
+ theArtifact["roles"] = json::Array(A.Roles);
+ }
+ if (!A.MimeType.empty()) {
+ theArtifact["mimeType"] = A.MimeType;
+ }
+ if (A.Offset.hasValue()) {
+ theArtifact["offset"] = A.Offset;
+ }
+ Artifacts->push_back(json::Value(std::move(theArtifact)));
+ }
+
+ // Clear, reset temporaries before new run
+ reset();
+}
+
+json::Array SarifDocumentWriter::createThreadFlows(
+ const ArrayRef &ThreadFlows) {
+ json::Object Ret{{"locations", json::Array{}}};
+ json::Array Locs{};
+ for (const auto &ThreadFlow : ThreadFlows) {
+ json::Object PLoc = createPhysicalLocation(ThreadFlow.Range);
+ json::Object Loc = createLocation(std::move(PLoc), ThreadFlow.Message);
+ Locs.emplace_back(
+ createThreadFlowLocation(std::move(Loc), ThreadFlow.Importance));
+ }
+ Ret["locations"] = std::move(Locs);
+ return json::Array{std::move(Ret)};
+}
+
+json::Object
+SarifDocumentWriter::createCodeFlow(const ArrayRef &ThreadFlows) {
+ return json::Object{{"threadFlows", createThreadFlows(ThreadFlows)}};
+}
+
+void SarifDocumentWriter::createRun(const StringRef &ShortToolName,
+ const StringRef &LongToolName) {
+ // Clear resources associated with a previous run
+ endRun();
+
+ json::Object Tool{
+ {"driver",
+ json::Object{{"name", ShortToolName},
+ {"fullName", LongToolName},
+ {"language", "en-US"},
+ {"version", getClangToolFullVersion(ShortToolName)}}}};
+ json::Object currentRun{{"tool", std::move(Tool)},
+ {"results", {}},
+ {"artifacts", {}},
+ {"columnKind", "unicodeCodePoints"}};
+ Runs.emplace_back(std::move(currentRun));
+}
+
+bool SarifDocumentWriter::hasRun() const { return Runs.size() != 0; }
+
+json::Object *SarifDocumentWriter::currentRun() {
+ assert(hasRun() && "SARIF Document has no runs, create a run first!");
+ return Runs.back().getAsObject();
+}
+
+size_t SarifDocumentWriter::createRule(const StringRef &Name,
+ const StringRef &RuleId,
+ const StringRef &Description,
+ const StringRef &HelpURI) {
+ size_t Ret = CurrentRules.size();
+ SarifRule Rule = SarifRule::create()
+ .setName(Name)
+ .setRuleId(RuleId)
+ .setDescription(Description)
+ .setHelpURI(HelpURI);
+ CurrentRules.emplace_back(Rule);
+ return Ret;
+}
+
+size_t SarifDocumentWriter::createRule(const SarifRule &Rule) {
+ size_t Ret = CurrentRules.size();
+ CurrentRules.emplace_back(Rule);
+ return Ret;
+}
+
+void SarifDocumentWriter::appendResult(size_t RuleIdx,
+ const SarifResult &Result) {
+ assert(RuleIdx < CurrentRules.size() &&
+ "Trying to reference a rule that doesn't exist");
+ json::Object Ret{{"message", createMessage(Result.DiagnosticMessage)},
+ {"ruleIndex", static_cast(RuleIdx)},
+ {"ruleId", CurrentRules[RuleIdx].RuleId}};
+ if (Result.Locations.size() != 0) {
+ json::Array Locs{};
+ for (auto &Range : Result.Locations) {
+ Locs.emplace_back(createLocation(createPhysicalLocation(Range)));
+ }
+ Ret["locations"] = std::move(Locs);
+ }
+ if (Result.ThreadFlows.size() != 0) {
+ Ret["codeFlows"] = json::Array{createCodeFlow(Result.ThreadFlows)};
+ }
+ json::Object *Run = currentRun();
+ json::Array *Results = Run->getArray("results");
+ Results->emplace_back(std::move(Ret));
+}
+
+json::Object SarifDocumentWriter::createDocument() {
+ // Flush all temporaries to their destinations if needed
+ endRun();
+
+ json::Object doc{
+ {"$schema", SchemaURI},
+ {"version", SchemaVersion},
+ };
+ if (Runs.size() > 0) {
+ doc["runs"] = json::Array(Runs);
+ }
+ return doc;
+}
diff --git a/clang/lib/Basic/SourceLocation.cpp b/clang/lib/Basic/SourceLocation.cpp
--- a/clang/lib/Basic/SourceLocation.cpp
+++ b/clang/lib/Basic/SourceLocation.cpp
@@ -270,3 +270,30 @@
std::pair FullSourceLoc::getDecomposedLoc() const {
return SrcMgr->getDecomposedLoc(*this);
}
+
+//===----------------------------------------------------------------------===//
+// FullSourceRange
+//===----------------------------------------------------------------------===//
+
+void FullSourceRange::print(raw_ostream &OS) const {
+
+ OS << '<';
+ auto PrintedLoc = PrintDifference(OS, B.getManager(), B, {});
+ if (B != E) {
+ OS << ", ";
+ PrintDifference(OS, E.getManager(), E, PrintedLoc);
+ }
+ OS << '>';
+}
+
+LLVM_DUMP_METHOD std::string FullSourceRange::printToString() const {
+ std::string S;
+ llvm::raw_string_ostream OS(S);
+ print(OS);
+ return OS.str();
+}
+
+LLVM_DUMP_METHOD void FullSourceRange::dump() const {
+ this->print(llvm::errs());
+ llvm::errs() << '\n';
+}
diff --git a/clang/unittests/Basic/CMakeLists.txt b/clang/unittests/Basic/CMakeLists.txt
--- a/clang/unittests/Basic/CMakeLists.txt
+++ b/clang/unittests/Basic/CMakeLists.txt
@@ -10,6 +10,7 @@
FileManagerTest.cpp
LineOffsetMappingTest.cpp
SanitizersTest.cpp
+ SarifTest.cpp
SourceManagerTest.cpp
)
diff --git a/clang/unittests/Basic/SarifTest.cpp b/clang/unittests/Basic/SarifTest.cpp
new file mode 100644
--- /dev/null
+++ b/clang/unittests/Basic/SarifTest.cpp
@@ -0,0 +1,153 @@
+//===- unittests/Basic/SarifTest.cpp - Test writing SARIF documents -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Basic/Sarif.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/JSON.h"
+#include
+
+#include "gmock/gmock.h"
+#include "gtest/gtest-death-test.h"
+#include "gtest/gtest.h"
+
+using namespace clang;
+using namespace llvm;
+
+namespace {
+
+TEST(SarifDocumentWriterTest, createEmptyDocument) {
+ // GIVEN:
+ SarifDocumentWriter writer;
+
+ // WHEN:
+ const json::Object &emptyDocument = writer.createDocument();
+ std::vector keys(emptyDocument.size());
+ std::transform(emptyDocument.begin(), emptyDocument.end(), keys.begin(),
+ [](auto item) { return item.getFirst(); });
+
+ // THEN:
+ ASSERT_THAT(keys, testing::UnorderedElementsAre("$schema", "version"));
+}
+
+// Test that a newly inserted run will associate correct tool names
+TEST(SarifDocumentWriterTest, documentWithARun) {
+ // GIVEN:
+ SarifDocumentWriter writer;
+ const char *shortName = "sariftest";
+ const char *longName = "sarif writer test";
+
+ // WHEN:
+ writer.createRun(shortName, longName);
+ writer.endRun();
+ const json::Object &document = writer.createDocument();
+ const json::Array *runs = document.getArray("runs");
+
+ // THEN:
+ // A run was created
+ ASSERT_THAT(runs, testing::NotNull());
+
+ // It is the only run
+ ASSERT_EQ(runs->size(), 1UL);
+
+ // The tool associated with the run was the tool
+ const json::Object *driver =
+ runs->begin()->getAsObject()->getObject("tool")->getObject("driver");
+ ASSERT_THAT(driver, testing::NotNull());
+
+ ASSERT_TRUE(driver->getString("name").hasValue());
+ ASSERT_TRUE(driver->getString("fullName").hasValue());
+ ASSERT_TRUE(driver->getString("language").hasValue());
+
+ EXPECT_EQ(driver->getString("name").getValue(), shortName);
+ EXPECT_EQ(driver->getString("fullName").getValue(), longName);
+ EXPECT_EQ(driver->getString("language").getValue(), "en-US");
+}
+
+// Test adding result without a run causes a crash
+TEST(SarifDocumentWriterTest, addingResultsWillCrashIfThereIsNoRun) {
+ // GIVEN:
+ SarifDocumentWriter writer;
+ SarifResult &&emptyResult = SarifResult::create();
+
+ // WHEN:
+ // A SarifDocumentWriter::createRun(...) was not called prior to
+ // SarifDocumentWriter::appendResult(...)
+ // But a rule exists
+ auto ruleIdx = writer.createRule(SarifRule::create());
+
+ // THEN:
+ ASSERT_DEATH({ writer.appendResult(ruleIdx, emptyResult); },
+ ".*create a run first.*");
+}
+
+// Test adding result for invalid ruleIdx causes a crash
+TEST(SarifDocumentWriterTest, addingResultsWithoutRuleWillCrash) {
+ // GIVEN:
+ SarifDocumentWriter writer;
+ SarifResult &&emptyResult = SarifResult::create();
+
+ // WHEN:
+ writer.createRun("sarif test", "sarif test runner");
+ // But caller forgot to create a rule for this run:
+
+ // THEN:
+ ASSERT_DEATH({ writer.appendResult(0, emptyResult); },
+ "Trying to reference a rule that doesn't exist");
+}
+
+// Test adding rule and result shows up in the final document
+TEST(SarifDocumentWriterTest, addResultWIthValidRuleIsOk) {
+ // GIVEN:
+ SarifDocumentWriter writer;
+ const SarifResult &result = SarifResult::create();
+ const SarifRule &rule =
+ SarifRule::create()
+ .setRuleId("clang.unittest")
+ .setDescription("Example rule created during unit tests")
+ .setName("clang unit test");
+
+ // WHEN:
+ writer.createRun("sarif test", "sarif test runner");
+ unsigned ruleIdx = writer.createRule(rule);
+ writer.appendResult(ruleIdx, result);
+ const json::Object &document = writer.createDocument();
+
+ // THEN:
+ // A document with a valid schema and version exists
+ ASSERT_THAT(document.get("$schema"), ::testing::NotNull());
+ ASSERT_THAT(document.get("version"), ::testing::NotNull());
+ const json::Array *runs = document.getArray("runs");
+
+ // A run exists on this document
+ ASSERT_THAT(runs, ::testing::NotNull());
+ ASSERT_EQ(runs->size(), 1UL);
+ const json::Object *theRun = runs->back().getAsObject();
+
+ // The run has slots for tools, results, rules and artifacts
+ ASSERT_THAT(theRun->get("tool"), ::testing::NotNull());
+ ASSERT_THAT(theRun->get("results"), ::testing::NotNull());
+ ASSERT_THAT(theRun->get("artifacts"), ::testing::NotNull());
+ const json::Object *driver = theRun->getObject("tool")->getObject("driver");
+ const json::Array *results = theRun->getArray("results");
+ const json::Array *artifacts = theRun->getArray("artifacts");
+
+ // The tool is as expected
+ ASSERT_TRUE(driver->getString("name").hasValue());
+ ASSERT_TRUE(driver->getString("fullName").hasValue());
+
+ EXPECT_EQ(driver->getString("name").getValue(), "sarif test");
+ EXPECT_EQ(driver->getString("fullName").getValue(), "sarif test runner");
+
+ // The results are as expected
+ EXPECT_EQ(results->size(), 1UL);
+
+ // The artifacts are as expected
+ EXPECT_TRUE(artifacts->empty());
+}
+
+} // namespace