diff --git a/clang/lib/StaticAnalyzer/Core/SarifDiagnostics.cpp b/clang/lib/StaticAnalyzer/Core/SarifDiagnostics.cpp --- a/clang/lib/StaticAnalyzer/Core/SarifDiagnostics.cpp +++ b/clang/lib/StaticAnalyzer/Core/SarifDiagnostics.cpp @@ -17,6 +17,7 @@ #include "clang/StaticAnalyzer/Core/PathDiagnosticConsumers.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringMap.h" +#include "llvm/Support/ConvertUTF.h" #include "llvm/Support/JSON.h" #include "llvm/Support/Path.h" @@ -27,10 +28,12 @@ namespace { class SarifDiagnostics : public PathDiagnosticConsumer { std::string OutputFile; + const LangOptions &LO; public: - SarifDiagnostics(AnalyzerOptions &, const std::string &Output) - : OutputFile(Output) {} + SarifDiagnostics(AnalyzerOptions &, const std::string &Output, + const LangOptions &LO) + : OutputFile(Output), LO(LO) {} ~SarifDiagnostics() override = default; void FlushDiagnosticsImpl(std::vector &Diags, @@ -45,9 +48,9 @@ void ento::createSarifDiagnosticConsumer( AnalyzerOptions &AnalyzerOpts, PathDiagnosticConsumers &C, - const std::string &Output, const Preprocessor &, + const std::string &Output, const Preprocessor &PP, const cross_tu::CrossTranslationUnitContext &) { - C.push_back(new SarifDiagnostics(AnalyzerOpts, Output)); + C.push_back(new SarifDiagnostics(AnalyzerOpts, Output, PP.getLangOpts())); } static StringRef getFileName(const FileEntry &FE) { @@ -142,26 +145,56 @@ return json::Object{{"uri", FileURI}, {"index", Index}}; } -static json::Object createTextRegion(SourceRange R, const SourceManager &SM) { +static unsigned int adjustColumnPos(const SourceManager &SM, SourceLocation Loc, + unsigned int TokenLen = 0) { + assert(!Loc.isInvalid() && "invalid Loc when adjusting column position"); + + std::pair LocInfo = SM.getDecomposedExpansionLoc(Loc); + assert(LocInfo.second > SM.getExpansionColumnNumber(Loc) && + "position in file is before column number?"); + + bool InvalidBuffer = false; + const MemoryBuffer *Buf = SM.getBuffer(LocInfo.first, &InvalidBuffer); + assert(!InvalidBuffer && "got an invalid buffer for the location's file"); + assert(Buf->getBufferSize() >= (LocInfo.second + TokenLen) && + "token extends past end of buffer?"); + + // Adjust the offset to be the start of the line, since we'll be counting + // Unicode characters from there until our column offset. + unsigned int Off = LocInfo.second - (SM.getExpansionColumnNumber(Loc) - 1); + unsigned int Ret = 1; + while (Off < (LocInfo.second + TokenLen)) { + Off += getNumBytesForUTF8(Buf->getBuffer()[Off]); + Ret++; + } + + return Ret; +} + +static json::Object createTextRegion(const LangOptions &LO, SourceRange R, + const SourceManager &SM) { json::Object Region{ {"startLine", SM.getExpansionLineNumber(R.getBegin())}, - {"startColumn", SM.getExpansionColumnNumber(R.getBegin())}, + {"startColumn", adjustColumnPos(SM, R.getBegin())}, }; if (R.getBegin() == R.getEnd()) { - Region["endColumn"] = SM.getExpansionColumnNumber(R.getBegin()); + Region["endColumn"] = adjustColumnPos(SM, R.getBegin()); } else { Region["endLine"] = SM.getExpansionLineNumber(R.getEnd()); - Region["endColumn"] = SM.getExpansionColumnNumber(R.getEnd()) + 1; + Region["endColumn"] = adjustColumnPos( + SM, R.getEnd(), + Lexer::MeasureTokenLength(R.getEnd(), SM, LO)); } return Region; } -static json::Object createPhysicalLocation(SourceRange R, const FileEntry &FE, +static json::Object createPhysicalLocation(const LangOptions &LO, + SourceRange R, const FileEntry &FE, const SourceManager &SMgr, json::Array &Artifacts) { return json::Object{ {{"artifactLocation", createArtifactLocation(FE, Artifacts)}, - {"region", createTextRegion(R, SMgr)}}}; + {"region", createTextRegion(LO, R, SMgr)}}}; } enum class Importance { Important, Essential, Unimportant }; @@ -213,7 +246,8 @@ return Importance::Unimportant; } -static json::Object createThreadFlow(const PathPieces &Pieces, +static json::Object createThreadFlow(const LangOptions &LO, + const PathPieces &Pieces, json::Array &Artifacts) { const SourceManager &SMgr = Pieces.front()->getLocation().getManager(); json::Array Locations; @@ -221,7 +255,7 @@ const PathDiagnosticLocation &P = Piece->getLocation(); Locations.push_back(createThreadFlowLocation( createLocation(createPhysicalLocation( - P.asRange(), + LO, P.asRange(), *P.asLocation().getExpansionLoc().getFileEntry(), SMgr, Artifacts), Piece->getString()), @@ -230,13 +264,15 @@ return json::Object{{"locations", std::move(Locations)}}; } -static json::Object createCodeFlow(const PathPieces &Pieces, +static json::Object createCodeFlow(const LangOptions &LO, + const PathPieces &Pieces, json::Array &Artifacts) { return json::Object{ - {"threadFlows", json::Array{createThreadFlow(Pieces, Artifacts)}}}; + {"threadFlows", json::Array{createThreadFlow(LO, Pieces, Artifacts)}}}; } -static json::Object createResult(const PathDiagnostic &Diag, +static json::Object createResult(const LangOptions &LO, + const PathDiagnostic &Diag, json::Array &Artifacts, const StringMap &RuleMapping) { const PathPieces &Path = Diag.path.flatten(false); @@ -247,10 +283,10 @@ return json::Object{ {"message", createMessage(Diag.getVerboseDescription())}, - {"codeFlows", json::Array{createCodeFlow(Path, Artifacts)}}, + {"codeFlows", json::Array{createCodeFlow(LO, Path, Artifacts)}}, {"locations", json::Array{createLocation(createPhysicalLocation( - Diag.getLocation().asRange(), + LO, Diag.getLocation().asRange(), *Diag.getLocation().asLocation().getExpansionLoc().getFileEntry(), SMgr, Artifacts))}}, {"ruleIndex", Iter->getValue()}, @@ -320,18 +356,20 @@ {"rules", createRules(Diags, RuleMapping)}}}}; } -static json::Object createRun(std::vector &Diags) { +static json::Object createRun(const LangOptions &LO, + std::vector &Diags) { json::Array Results, Artifacts; StringMap RuleMapping; json::Object Tool = createTool(Diags, RuleMapping); llvm::for_each(Diags, [&](const PathDiagnostic *D) { - Results.push_back(createResult(*D, Artifacts, RuleMapping)); + Results.push_back(createResult(LO, *D, Artifacts, RuleMapping)); }); return json::Object{{"tool", std::move(Tool)}, {"results", std::move(Results)}, - {"artifacts", std::move(Artifacts)}}; + {"artifacts", std::move(Artifacts)}, + {"columnKind", "unicodeCodePoints"}}; } void SarifDiagnostics::FlushDiagnosticsImpl( @@ -351,6 +389,6 @@ {"$schema", "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json"}, {"version", "2.1.0"}, - {"runs", json::Array{createRun(Diags)}}}; + {"runs", json::Array{createRun(LO, Diags)}}}; OS << llvm::formatv("{0:2}\n", json::Value(std::move(Sarif))); } diff --git a/clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-diagnostics-taint-test.c.sarif b/clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-diagnostics-taint-test.c.sarif --- a/clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-diagnostics-taint-test.c.sarif +++ b/clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-diagnostics-taint-test.c.sarif @@ -13,6 +13,7 @@ ] } ], + "columnKind": "unicodeCodePoints", "results": [ { "codeFlows": [ diff --git a/clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-multi-diagnostic-test.c.sarif b/clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-multi-diagnostic-test.c.sarif --- a/clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-multi-diagnostic-test.c.sarif +++ b/clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-multi-diagnostic-test.c.sarif @@ -4,7 +4,7 @@ { "artifacts": [ { - "length": 951, + "length": 1077, "location": { }, "mimeType": "text/plain", @@ -13,6 +13,7 @@ ] } ], + "columnKind": "unicodeCodePoints", "results": [ { "codeFlows": [ @@ -32,9 +33,9 @@ }, "region": { "endColumn": 6, - "endLine": 34, + "endLine": 39, "startColumn": 3, - "startLine": 34 + "startLine": 39 } } } @@ -102,9 +103,9 @@ }, "region": { "endColumn": 6, - "endLine": 35, + "endLine": 40, "startColumn": 3, - "startLine": 35 + "startLine": 40 } } } @@ -120,7 +121,7 @@ "index": 0, }, "region": { - "endColumn": 11, + "endColumn": 12, "endLine": 15, "startColumn": 3, "startLine": 15 @@ -363,6 +364,74 @@ }, "ruleId": "unix.Malloc", "ruleIndex": 3 + }, + { + "codeFlows": [ + { + "threadFlows": [ + { + "locations": [ + { + "importance": "essential", + "location": { + "message": { + "text": "'løçål' initialized to 0" + }, + "physicalLocation": { + "artifactLocation": { + "index": 0, + }, + "region": { + "endColumn": 12, + "endLine": 34, + "startColumn": 3, + "startLine": 34 + } + } + } + }, + { + "importance": "essential", + "location": { + "message": { + "text": "Division by zero" + }, + "physicalLocation": { + "artifactLocation": { + "index": 0, + }, + "region": { + "endColumn": 20, + "startColumn": 20, + "startLine": 35 + } + } + } + } + ] + } + ] + } + ], + "locations": [ + { + "physicalLocation": { + "artifactLocation": { + "index": 0, + }, + "region": { + "endColumn": 20, + "startColumn": 20, + "startLine": 35 + } + } + } + ], + "message": { + "text": "Division by zero" + }, + "ruleId": "core.DivideZero", + "ruleIndex": 2 } ], "tool": { diff --git a/clang/test/Analysis/diagnostics/sarif-multi-diagnostic-test.c b/clang/test/Analysis/diagnostics/sarif-multi-diagnostic-test.c --- a/clang/test/Analysis/diagnostics/sarif-multi-diagnostic-test.c +++ b/clang/test/Analysis/diagnostics/sarif-multi-diagnostic-test.c @@ -30,11 +30,17 @@ return 0; } +int unicode() { + int løçål = 0; + /* ☃ */ return 1 / løçål; // expected-warning {{Division by zero}} +} + int main(void) { f(); g(); h(0); leak(0); + unicode(); return 0; }