diff --git a/clang/include/clang/Analysis/FlowSensitive/Logger.h b/clang/include/clang/Analysis/FlowSensitive/Logger.h --- a/clang/include/clang/Analysis/FlowSensitive/Logger.h +++ b/clang/include/clang/Analysis/FlowSensitive/Logger.h @@ -31,6 +31,10 @@ /// A logger that simply writes messages to the specified ostream in real /// time. static std::unique_ptr textual(llvm::raw_ostream &); + /// A logger that builds an HTML UI to inspect the analysis results. + /// Each function's analysis is written to a stream obtained from the factory. + static std::unique_ptr + html(std::function()>); virtual ~Logger() = default; diff --git a/clang/lib/Analysis/FlowSensitive/CMakeLists.txt b/clang/lib/Analysis/FlowSensitive/CMakeLists.txt --- a/clang/lib/Analysis/FlowSensitive/CMakeLists.txt +++ b/clang/lib/Analysis/FlowSensitive/CMakeLists.txt @@ -2,6 +2,7 @@ ControlFlowContext.cpp DataflowAnalysisContext.cpp DataflowEnvironment.cpp + HTMLLogger.cpp Logger.cpp Transfer.cpp TypeErasedDataflowAnalysis.cpp @@ -16,3 +17,14 @@ ) add_subdirectory(Models) + +add_custom_command(OUTPUT HTMLLogger.inc + COMMAND "${Python3_EXECUTABLE}" ${CLANG_SOURCE_DIR}/utils/bundle_resources.py + ${CMAKE_CURRENT_BINARY_DIR}/HTMLLogger.inc + HTMLLogger.html HTMLLogger.css HTMLLogger.js + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + COMMENT "Bundling HTMLLogger resources" + DEPENDS ${CLANG_SOURCE_DIR}/utils/bundle_resources.py HTMLLogger.html HTMLLogger.css HTMLLogger.js + VERBATIM) +add_custom_target(clangAnalysisFlowSensitiveResources DEPENDS HTMLLogger.inc) +add_dependencies(clangAnalysisFlowSensitive clangAnalysisFlowSensitiveResources) diff --git a/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp b/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp --- a/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp +++ b/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp @@ -20,14 +20,17 @@ #include "llvm/ADT/SetOperations.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" #include #include #include -static llvm::cl::opt - DataflowLog("dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional, - llvm::cl::desc("Emit log of dataflow analysis. With no arg, " - "writes textual log to stderr.")); +static llvm::cl::opt DataflowLog( + "dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional, + llvm::cl::desc("Emit log of dataflow analysis. With no arg, writes textual " + "log to stderr. With an arg, writes HTML logs under the " + "specified directory (one per analyzed function).")); namespace clang { namespace dataflow { @@ -376,6 +379,34 @@ return nullptr; } +static std::unique_ptr makeLoggerFromCommandLine() { + if (DataflowLog.empty()) + return Logger::textual(llvm::errs()); + + llvm::StringRef Dir = DataflowLog; + if (auto EC = llvm::sys::fs::create_directories(Dir)) + llvm::errs() << "Failed to create log dir: " << EC.message() << "\n"; + // All analysis runs within a process will log to the same directory. + // Share a counter so they don't all overwrite each other's 0.html. + // (Don't share a logger, it's not threadsafe). + static std::atomic Counter = {0}; + auto StreamFactory = + [Dir(Dir.str())]() mutable -> std::unique_ptr { + llvm::SmallString<256> File(Dir); + llvm::sys::path::append(File, + std::to_string(Counter.fetch_add(1)) + ".html"); + std::error_code EC; + auto OS = std::make_unique(File, EC); + if (EC) { + llvm::errs() << "Failed to create log " << File << ": " << EC.message() + << "\n"; + return std::make_unique(); + } + return OS; + }; + return Logger::html(std::move(StreamFactory)); +} + DataflowAnalysisContext::DataflowAnalysisContext(std::unique_ptr S, Options Opts) : S(std::move(S)), TrueVal(create()), @@ -386,7 +417,7 @@ // based tools. if (Opts.Log == nullptr) { if (DataflowLog.getNumOccurrences() > 0) { - LogOwner = Logger::textual(llvm::errs()); + LogOwner = makeLoggerFromCommandLine(); this->Opts.Log = LogOwner.get(); // FIXME: if the flag is given a value, write an HTML log to a file. } else { diff --git a/clang/lib/Analysis/FlowSensitive/HTMLLogger.html b/clang/lib/Analysis/FlowSensitive/HTMLLogger.html new file mode 100644 --- /dev/null +++ b/clang/lib/Analysis/FlowSensitive/HTMLLogger.html @@ -0,0 +1,73 @@ + + + + + + + + + + +
+
Timeline
+ +
+ +
+
Function
+
+
+
+ +
+
+
+ +
+ + +
+
+ +
+ +
+ + + + + diff --git a/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp b/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp new file mode 100644 --- /dev/null +++ b/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp @@ -0,0 +1,451 @@ +//===-- HTMLLogger.cpp ----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the HTML logger. Given a directory dir/, we write +// dir/0.html for the first analysis, etc. +// These files contain a visualization that allows inspecting the CFG and the +// state of the analysis at each point. +// Static assets (HTMLLogger.js, HTMLLogger.css) and SVG graphs etc are embedded +// so each output file is self-contained. +// +// VIEWS +// +// The timeline and function view are always shown. These allow selecting basic +// blocks, statements within them, and processing iterations (BBs are visited +// multiple times when e.g. loops are involved). +// These are written directly into the HTML body. +// +// There are also listings of particular basic blocks, and dumps of the state +// at particular analysis points (i.e. BB2 iteration 3 statement 2). +// These are only shown when the relevant BB/analysis point is *selected*. +// +// DATA AND TEMPLATES +// +// The HTML proper is mostly static. +// The analysis data is in a JSON object HTMLLoggerData which is embedded as +// a \n"; + + writeCode(); + writeCFG(); + + *OS << "\n"; + *OS << llvm::StringRef(HTMLLogger_html).split("").second; + } + + void enterBlock(const CFGBlock &B) override { + Iters.emplace_back(&B, ++BlockIters[&B]); + ElementIndex = 0; + } + void enterElement(const CFGElement &E) override { + ++ElementIndex; + } + + static std::string blockID(unsigned Block) { + return llvm::formatv("B{0}", Block); + } + static std::string eltID(unsigned Block, unsigned Element) { + return llvm::formatv("B{0}.{1}", Block, Element); + } + static std::string iterID(unsigned Block, unsigned Iter) { + return llvm::formatv("B{0}:{1}", Block, Iter); + } + static std::string elementIterID(unsigned Block, unsigned Iter, + unsigned Element) { + return llvm::formatv("B{0}:{1}_B{0}.{2}", Block, Iter, Element); + } + + // Write the analysis state associated with a particular analysis point. + // FIXME: this dump is fairly opaque. We should show: + // - values associated with the current Stmt + // - values associated with its children + // - meaningful names for values + // - which boolean values are implied true/false by the flow condition + void recordState(TypeErasedDataflowAnalysisState &State) override { + unsigned Block = Iters.back().first->getBlockID(); + unsigned Iter = Iters.back().second; + JOS->attributeObject(elementIterID(Block, Iter, ElementIndex), [&] { + JOS->attribute("block", blockID(Block)); + JOS->attribute("iter", Iter); + JOS->attribute("element", ElementIndex); + if (!ContextLogs.empty()) { + JOS->attribute("logs", ContextLogs); + ContextLogs.clear(); + } + { + std::string BuiltinLattice; + llvm::raw_string_ostream BuiltinLatticeS(BuiltinLattice); + State.Env.dump(BuiltinLatticeS); + JOS->attribute("builtinLattice", BuiltinLattice); + } + }); + } + void blockConverged() override { logText("Block converged"); } + + void logText(llvm::StringRef S) override { + ContextLogs.append(S.begin(), S.end()); + ContextLogs.push_back('\n'); + } + +private: + // Write the CFG block details. + // Currently this is just the list of elements in execution order. + // FIXME: an AST dump would be a useful view, too. + void writeBlock(const CFGBlock &B, unsigned Iters) { + JOS->attributeObject(blockID(B.getBlockID()), [&] { + JOS->attribute("iters", Iters); + JOS->attributeArray("elements", [&] { + for (const auto &Elt : B.Elements) { + std::string Dump; + llvm::raw_string_ostream DumpS(Dump); + Elt.dumpToStream(DumpS); + JOS->value(Dump); + } + }); + }); + } + + // Write the code of function being examined. + // We want to overlay the code with s that mark which BB particular + // tokens are associated with, and even which BB element (so that clicking + // can select the right element). + void writeCode() { + if (!CFG->getDecl()) + return; + const auto &AST = CFG->getDecl()->getASTContext(); + bool Invalid = false; + + // Extract the source code from the original file. + // Pretty-printing from the AST would probably be nicer (no macros or + // indentation to worry about), but we need the boundaries of particular + // AST nodes and the printer doesn't provide this. + auto Range = clang::Lexer::makeFileCharRange( + CharSourceRange::getTokenRange(CFG->getDecl()->getSourceRange()), + AST.getSourceManager(), AST.getLangOpts()); + if (Range.isInvalid()) + return; + llvm::StringRef Code = clang::Lexer::getSourceText( + Range, AST.getSourceManager(), AST.getLangOpts(), &Invalid); + if (Invalid) + return; + + static constexpr unsigned Missing = -1; + // TokenInfo stores the BB and set of elements that a token is part of. + struct TokenInfo { + // The basic block this is part of. + // This is the BB of the stmt with the smallest containing range. + unsigned BB = Missing; + unsigned BBPriority = 0; + // The most specific stmt this is part of (smallest range). + unsigned Elt = Missing; + unsigned EltPriority = 0; + // All stmts this is part of. + SmallVector Elts; + + // Mark this token as being part of BB.Elt. + // RangeLen is the character length of the element's range, used to + // distinguish inner vs outer statements. + // For example in `a==0`, token "a" is part of the stmts "a" and "a==0". + // However "a" has a smaller range, so is more specific. Clicking on the + // token "a" should select the stmt "a". + void assign(unsigned BB, unsigned Elt, unsigned RangeLen) { + // A worse BB (larger range) => ignore. + if (this->BB != Missing && BB != this->BB && BBPriority <= RangeLen) + return; + if (BB != this->BB) { + this->BB = BB; + Elts.clear(); + BBPriority = RangeLen; + } + BBPriority = std::min(BBPriority, RangeLen); + Elts.push_back(Elt); + if (this->Elt == Missing || EltPriority > RangeLen) + this->Elt = Elt; + } + bool operator==(const TokenInfo &Other) const { + return std::tie(BB, Elt, Elts) == + std::tie(Other.BB, Other.Elt, Other.Elts); + } + // Write the attributes for the on this token. + void write(llvm::raw_ostream &OS) const { + OS << "class='c"; + if (BB != Missing) + OS << " " << blockID(BB); + for (unsigned Elt : Elts) + OS << " " << eltID(BB, Elt); + OS << "'"; + + if (Elt != Missing) + OS << " data-elt='" << eltID(BB, Elt) << "'"; + if (BB != Missing) + OS << " data-bb='" << blockID(BB) << "'"; + } + }; + + // Construct one TokenInfo per character in a flat array. + // This is inefficient (chars in a token all have the same info) but simple. + std::vector State(Code.size()); + for (const auto *Block : CFG->getCFG()) { + unsigned EltIndex = 0; + for (const auto& Elt : *Block) { + ++EltIndex; + if (const auto S = Elt.getAs()) { + auto EltRange = clang::Lexer::makeFileCharRange( + CharSourceRange::getTokenRange(S->getStmt()->getSourceRange()), + AST.getSourceManager(), AST.getLangOpts()); + if (EltRange.isInvalid()) + continue; + if (EltRange.getBegin() < Range.getBegin() || + EltRange.getEnd() >= Range.getEnd() || + EltRange.getEnd() < Range.getBegin() || + EltRange.getEnd() >= Range.getEnd()) + continue; + unsigned Off = EltRange.getBegin().getRawEncoding() - + Range.getBegin().getRawEncoding(); + unsigned Len = EltRange.getEnd().getRawEncoding() - + EltRange.getBegin().getRawEncoding(); + for (unsigned I = 0; I < Len; ++I) + State[Off + I].assign(Block->getBlockID(), EltIndex, Len); + } + } + } + + // Finally, write the code with the correct s. + unsigned Line = + AST.getSourceManager().getSpellingLineNumber(Range.getBegin()); + *OS << ""; + } + + // Write the CFG diagram, a graph of basic blocks. + // Laying out graphs is hard, so we construct a graphviz description and shell + // out to `dot` to turn it into an SVG. + void writeCFG() { + *OS << "\n"; + } + + // Produce a graphviz description of a CFG. + static std::string buildCFGDot(const clang::CFG &CFG) { + std::string Graph; + llvm::raw_string_ostream GraphS(Graph); + // Graphviz likes to add unhelpful tooltips everywhere, " " suppresses. + GraphS << R"(digraph { + tooltip=" " + node[class=bb, shape=square, fontname="sans-serif", tooltip=" "] + edge[tooltip = " "] +)"; + for (unsigned I = 0; I < CFG.getNumBlockIDs(); ++I) + GraphS << " " << blockID(I) << " [id=" << blockID(I) << "]\n"; + for (const auto *Block : CFG) { + for (const auto &Succ : Block->succs()) { + GraphS << " " << blockID(Block->getBlockID()) << " -> " + << blockID(Succ.getReachableBlock()->getBlockID()) << "\n"; + } + } + GraphS << "}\n"; + return Graph; + } +}; + +// Nothing interesting here, just subprocess/temp-file plumbing. +llvm::Expected renderSVG(llvm::StringRef DotGraph) { + auto Dot = llvm::sys::findProgramByName("dot"); + if (!Dot) + return llvm::createStringError(Dot.getError(), + "Can't draw CFG: 'dot' not found on PATH"); + + // Create input and output files for `dot` subprocess. + // (We create the output file as empty, to reserve the temp filename). + llvm::SmallString<256> Input, Output; + int InputFD; + if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".dot", InputFD, + Input)) + return llvm::createStringError(EC, "failed to create `dot` temp input"); + llvm::raw_fd_ostream(InputFD, /*shouldClose=*/true) << DotGraph; + auto DeleteInput = + llvm::make_scope_exit([&] { llvm::sys::fs::remove(Input); }); + if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".svg", Output)) + return llvm::createStringError(EC, "failed to create `dot` temp output"); + auto DeleteOutput = + llvm::make_scope_exit([&] { llvm::sys::fs::remove(Output); }); + + std::vector> Redirects = { + Input, Output, + /*stderr=*/std::nullopt}; + std::string ErrMsg; + int Code = llvm::sys::ExecuteAndWait( + *Dot, {"dot", "-Tsvg"}, /*Env=*/std::nullopt, Redirects, + /*SecondsToWait=*/0, /*MemoryLimit=*/0, &ErrMsg); + if (!ErrMsg.empty()) + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "'dot' failed: " + ErrMsg); + if (Code != 0) + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "'dot' failed (" + llvm::Twine(Code) + ")"); + + auto Buf = llvm::MemoryBuffer::getFile(Output); + if (!Buf) + return llvm::createStringError(Buf.getError(), "Can't read `dot` output"); + + // Output has prefix we don't want. Skip to tag. + llvm::StringRef Result = Buf.get()->getBuffer(); + auto Pos = Result.find(" tag in `dot` output"); + return Result.substr(Pos).str(); +} + +} // namespace + +std::unique_ptr +Logger::html(std::function()> Streams) { + return std::make_unique(std::move(Streams)); +} + +} // namespace clang::dataflow diff --git a/clang/lib/Analysis/FlowSensitive/HTMLLogger.css b/clang/lib/Analysis/FlowSensitive/HTMLLogger.css new file mode 100644 --- /dev/null +++ b/clang/lib/Analysis/FlowSensitive/HTMLLogger.css @@ -0,0 +1,118 @@ +/*===-- HTMLLogger.css ----------------------------------------------------=== +* +* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +* See https://llvm.org/LICENSE.txt for license information. +* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +* +*===----------------------------------------------------------------------===*/ +html { font-family: sans-serif; } +body { margin: 0; display: flex; justify-content: left; } +body > * { box-sizing: border-box; } +body > section { + border: 1px solid black; + min-width: 20em; + overflow: auto; + max-height: 100vh; +} +section header { + background-color: #008; + color: white; + font-weight: bold; + font-size: large; +} +section h2 { + font-size: medium; + margin-bottom: 0.5em; + padding-top: 0.5em; + border-top: 1px solid #aaa; +} +#timeline { + min-width: 0; +} +#timeline .entry.hover { + background-color: #aaa; +} +#timeline .entry.iter-select { + background-color: #aac; +} + +#bb-elements { + font-family: monospace; + font-size: x-small; + border-collapse: collapse; +} +#bb-elements td:nth-child(1) { + text-align: right; + width: 4em; + border-right: 1px solid #008; + padding: 0.3em 0.5em; + + font-weight: bold; + color: #888; +}; +#bb-elements tr.hover { + background-color: #abc; +} +#bb-elements tr.elt-select { + background-color: #acf; +} +#iterations { + display: flex; +} +#iterations .chooser { + flex-grow: 1; + text-align: center; +} +#iterations .chooser:not(.iter-select).hover { + background-color: #aaa; +} +#iterations .iter-select { + font-weight: bold; + background-color: #ccc; +} +#iterations .chooser:not(.iter-select) { + text-decoration: underline; + color: blue; +} + +code.filename { + font-weight: bold; + color: black; + background-color: #ccc; + display: block; + text-align: center; +} +code.line { + display: block; + white-space: pre; +} +code.line:before { /* line numbers */ + content: attr(data-line); + display: inline-block; + width: 2em; + text-align: right; + padding-right: 2px; + background-color: #ccc; + border-right: 1px solid #888; + margin-right: 8px; +} +code.line:has(.bb-select):before { + border-right: 4px solid black; + margin-right: 5px; +} +.c.hover, .bb.hover { + filter: saturate(200%) brightness(90%); +} +.c.elt-select { + box-shadow: inset 0 -4px 2px -2px #a00; +} +.bb.bb-select polygon { + stroke-width: 4px; + filter: brightness(70%) saturate(150%); +} +.bb { user-select: none; } +.bb polygon { fill: white; } +#cfg { + position: relative; + margin-left: 0.5em; +} diff --git a/clang/lib/Analysis/FlowSensitive/HTMLLogger.js b/clang/lib/Analysis/FlowSensitive/HTMLLogger.js new file mode 100644 --- /dev/null +++ b/clang/lib/Analysis/FlowSensitive/HTMLLogger.js @@ -0,0 +1,213 @@ +//===-- HTMLLogger.js -----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Based on selected objects, hide/show sections & populate data from templates. +// +// For example, if the selection is {bb="BB4", elt="BB4.6" iter="BB4:2"}: +// - show the "block" and "element" sections +// - re-render templates within these sections (if selection changed) +// - apply "bb-select" to items with class class "BB4", etc +let selection = {}; +function updateSelection(changes, data) { + Object.assign(selection, changes); + + data = Object.create(data); + data.selection = selection; + for (root of document.querySelectorAll('[data-selection]')) + updateSection(root, data); + + for (var k in changes) + applyClassIf(k + '-select', classSelector(changes[k])); +} + +// Given
: +// - hide section if selections x or y are null +// - re-render templates if x or y have changed +function updateSection(root, data) { + let changed = root.selection == null; + root.selection ||= {}; + for (key of root.dataset.selection.split(',')) { + if (!key) continue; + if (data.selection[key] != root.selection[key]) { + root.selection[key] = data.selection[key]; + changed = true; + } + if (data.selection[key] == null) { + root.hidden = true; + return; + } + } + if (changed) { + root.hidden = false; + for (tmpl of root.getElementsByTagName('template')) { + // Clear previously rendered template contents. + while (tmpl.nextSibling && tmpl.nextSibling.inflated) + tmpl.parentNode.removeChild(tmpl.nextSibling); + inflate(tmpl, data, tmpl.parentNode, tmpl.nextSibling); + } + } +} + +// Expands template `tmpl` based on input `data`: +// - interpolates {{expressions}} in text and attributes +// -