diff --git a/clang-tools-extra/pseudo/test/html-forest.c b/clang-tools-extra/pseudo/test/html-forest.c new file mode 100644 --- /dev/null +++ b/clang-tools-extra/pseudo/test/html-forest.c @@ -0,0 +1,8 @@ +// RUN: clang-pseudo -source %s -html-forest=%t.html +// RUN: FileCheck %s < %t.html +int main() { +} +// Sanity check for some obvious strings. +// CHECK-DAG: +// CHECK-DAG: "compound-statement" +// CHECK-DAG: main diff --git a/clang-tools-extra/pseudo/tool/CMakeLists.txt b/clang-tools-extra/pseudo/tool/CMakeLists.txt --- a/clang-tools-extra/pseudo/tool/CMakeLists.txt +++ b/clang-tools-extra/pseudo/tool/CMakeLists.txt @@ -2,6 +2,7 @@ add_clang_tool(clang-pseudo ClangPseudo.cpp + HTMLForest.cpp ) clang_target_link_libraries(clang-pseudo @@ -16,3 +17,13 @@ clangPseudoCLI ) +add_custom_command(OUTPUT HTMLForestResources.inc + COMMAND "${Python3_EXECUTABLE}" bundle_resources.py + ${CMAKE_CURRENT_BINARY_DIR}/HTMLForestResources.inc + HTMLForest.css HTMLForest.js HTMLForest.html + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + COMMENT "Bundling HTMLForest resources" + DEPENDS bundle_resources.py HTMLForest.css HTMLForest.js HTMLForest.html + VERBATIM) +add_custom_target(clang-pseudo-resources DEPENDS HTMLForestResources.inc) +add_dependencies(clang-pseudo clang-pseudo-resources) diff --git a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp --- a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp +++ b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp @@ -8,6 +8,7 @@ #include "clang-pseudo/Bracket.h" #include "clang-pseudo/DirectiveTree.h" +#include "clang-pseudo/Forest.h" #include "clang-pseudo/GLR.h" #include "clang-pseudo/Language.h" #include "clang-pseudo/Token.h" @@ -18,7 +19,6 @@ #include "clang/Basic/LangOptions.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/STLFunctionalExtras.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/MemoryBuffer.h" @@ -45,10 +45,12 @@ desc("Strip directives and select conditional sections")); static opt PrintStatistics("print-statistics", desc("Print GLR parser statistics")); static opt PrintForest("print-forest", desc("Print parse forest")); +static opt HTMLForest("html-forest", desc("output file for HTML forest")); static opt StartSymbol("start-symbol", desc("specify the start symbol to parse"), init("translation-unit")); + static std::string readOrDie(llvm::StringRef Path) { llvm::ErrorOr> Text = llvm::MemoryBuffer::getFile(Path); @@ -62,6 +64,9 @@ namespace clang { namespace pseudo { +// Defined in HTMLForest.cpp +void writeHTMLForest(llvm::raw_ostream &OS, const Grammar &, + const ForestNode &Root, const TokenStream &); namespace { struct NodeStats { @@ -150,6 +155,17 @@ if (PrintForest) llvm::outs() << Root.dumpRecursive(Lang.G, /*Abbreviated=*/true); + if (HTMLForest.getNumOccurrences()) { + std::error_code EC; + llvm::raw_fd_ostream HTMLOut(HTMLForest, EC); + if (EC) { + llvm::errs() << "Couldn't write " << HTMLForest << ": " << EC.message() + << "\n"; + return 2; + } + clang::pseudo::writeHTMLForest(HTMLOut, Lang.G, Root, *ParseableStream); + } + if (PrintStatistics) { llvm::outs() << "Forest bytes: " << Arena.bytes() << " nodes: " << Arena.nodeCount() << "\n"; diff --git a/clang-tools-extra/pseudo/tool/HTMLForest.html b/clang-tools-extra/pseudo/tool/HTMLForest.html new file mode 100644 --- /dev/null +++ b/clang-tools-extra/pseudo/tool/HTMLForest.html @@ -0,0 +1,15 @@ +
    +
    
    +
    diff --git a/clang-tools-extra/pseudo/tool/HTMLForest.cpp b/clang-tools-extra/pseudo/tool/HTMLForest.cpp
    new file mode 100644
    --- /dev/null
    +++ b/clang-tools-extra/pseudo/tool/HTMLForest.cpp
    @@ -0,0 +1,178 @@
    +//===-- HTMLForest.cpp - browser-based parse forest explorer ---------------===//
    +//
    +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
    +// See https://llvm.org/LICENSE.txt for license information.
    +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
    +//
    +//===----------------------------------------------------------------------===//
    +//
    +// The plain text forest node dump (clang-pseudo -print-forest) is useful but
    +// hard to reconcile with the code being examined, especially when it is large.
    +//
    +// HTMLForest produces a self-contained HTML file containing both the code and
    +// the forest representation, linking them interactively with javascript.
    +// At any given time, a single parse tree is shown (ambiguities resolved).
    +// The user can switch between ambiguous alternatives.
    +//
    +// +-------+---------------+
    +// |       |        +-----+|
    +// | #tree |  #code |#info||
    +// |       |        +-----+|
    +// |       |               |
    +// +-------+---------------+
    +//
    +// #tree is a hierarchical view of the nodes (nested 
      s), like -print-forest. +// (It is a simple tree, not a DAG, because ambiguities have been resolved). +// Like -print-forest, trivial sequences are collapsed (expression~IDENTIFIER). +// +// #code is the source code, annotated with s marking the node ranges. +// These spans are usually invisible (exception: ambiguities are marked), but +// they are used to show and change the selection. +// +// #info is a floating box that shows details of the currently selected node: +// - rule (for sequence nodes). Abbreviated rules are also shown. +// - alternatives (for ambiguous nodes). The user can choose an alternative. +// - context. The parent stack showing how this node fits in translation-unit. +// +// There are two types of 'active' node: +// - *highlight* is what the cursor is over, and is colored blue. +// Near ancestors are shaded faintly (onion-skin) to show local structure. +// - *selection* is set by clicking. +// The #info box shows the selection, and selected nodes have a dashed ring. +// +//===----------------------------------------------------------------------===// + +#include "clang-pseudo/Forest.h" +#include "clang-pseudo/grammar/Grammar.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/raw_ostream.h" +namespace clang { +namespace pseudo { +namespace { + +// Defines const char HTMLForest_css[] = "...contents of HTMLForest.css..."; etc +#include "HTMLForestResources.inc" + +struct Writer { + llvm::raw_ostream &Out; + const Grammar &G; + const ForestNode &Root; + const TokenStream &Stream; + + void write() { + Out << "\n\n\nHTMLForest\n"; + Out << "\n"; + Out << "\n"; + Out << "\n"; + Out << "\n"; + Out << "\n\n"; + Out << HTMLForest_html; + Out << "\n\n"; + } + + void writeCode(); + void writeForestJSON(); +}; + +void Writer::writeCode() { + // This loop (whitespace logic) is cribbed from TokenStream::Print. + bool FirstToken = true; + unsigned LastLine = -1; + StringRef LastText; + for (const auto &T : Stream.tokens()) { + StringRef Text = T.text(); + if (FirstToken) { + FirstToken = false; + } else if (T.Line == LastLine) { + if (LastText.data() + LastText.size() != Text.data()) + Out << ' '; + } else { + Out << " \n"; // Extra space aids selection. + Out.indent(T.Indent); + } + Out << ""; + llvm::printHTMLEscaped(Text, Out); + Out << ""; + LastLine = T.Line; + LastText = Text; + } + if (!FirstToken) + Out << '\n'; +} + +// Writes a JSON array of forest nodes. Items are e.g.: +// {kind:'sequence', symbol:'compound-stmt', children:[5,8,33], rule:'compound-stmt := ...'} +// {kind:'terminal', symbol:'VOID', token:'t52'} +// {kind:'ambiguous', symbol:'type-specifier', children:[3,100] selected:3} +// {kind:'opaque', symbol:'statement-seq', firstToken:'t5', lastToken:'t6'} +void Writer::writeForestJSON() { + llvm::DenseMap Index; + std::vector> Queue; + auto AssignID = [&](const ForestNode* N, Token::Index End) -> unsigned { + auto R = Index.try_emplace(N, Queue.size()); + if (R.second) + Queue.push_back({N, End}); + return R.first->second; + }; + AssignID(&Root, Stream.tokens().size()); + auto TokenID = [](Token::Index I) { return ("t" + llvm::Twine(I)).str(); }; + + llvm::json::OStream Out(this->Out, 2); + Out.array([&] { + for (unsigned I = 0; I < Queue.size(); ++I) { + const ForestNode *N = Queue[I].first; + Token::Index End = Queue[I].second; + Out.object([&] { + Out.attribute("symbol", G.symbolName(N->symbol())); + switch (N->kind()) { + case ForestNode::Terminal: + Out.attribute("kind", "terminal"); + Out.attribute("token", TokenID(N->startTokenIndex())); + break; + case ForestNode::Sequence: + Out.attribute("kind", "sequence"); + Out.attribute("rule", G.dumpRule(N->rule())); + break; + case ForestNode::Ambiguous: + Out.attribute("kind", "ambiguous"); + Out.attribute("selected", AssignID(N->children().front(), End)); + break; + case ForestNode::Opaque: + Out.attribute("kind", "opaque"); + Out.attribute("firstToken", TokenID(N->startTokenIndex())); + // [firstToken, lastToken] is a closed range. + // If empty, lastToken is omitted. + if (N->startTokenIndex() != End) + Out.attribute("lastToken", TokenID(End - 1)); + break; + } + auto Children = N->children(); + if (!Children.empty()) + Out.attributeArray("children", [&] { + for (unsigned I = 0; I < Children.size(); ++I) + Out.value(AssignID(Children[I], + I + 1 == Children.size() + ? End + : Children[I + 1]->startTokenIndex())); + }); + }); + } + }); +} + +} // namespace + +// We only accept the derived stream here. Would it be useful or confusing +// to allow the original stream instead? +void writeHTMLForest(llvm::raw_ostream &OS, const Grammar &G, + const ForestNode &Root, const TokenStream &Stream) { + Writer{OS, G, Root, Stream}.write(); +} + +} // namespace pseudo +} // namespace clang diff --git a/clang-tools-extra/pseudo/tool/HTMLForest.css b/clang-tools-extra/pseudo/tool/HTMLForest.css new file mode 100644 --- /dev/null +++ b/clang-tools-extra/pseudo/tool/HTMLForest.css @@ -0,0 +1,87 @@ +body { + position: absolute; + top: 0; + bottom: 0; + right: 0; + left: 0; + + display: flex; + align-items: stretch; + margin: 0; + font-family: sans-serif; + white-space: nowrap; + height: 100%; +} +body > * { + overflow-y: auto; /* Scroll sections independently*/ + margin: 0; +} + +#code { + font-size: 18px; + line-height: 36px; + flex-grow: 1; + padding-right: 10em; /* Leave space for #info */ +} +#code span { + padding: 9px 0; /* No "gaps" between lines due to line-height */ +} +.node.ambiguous::before, .context.ambiguous::after, .tree-node.ambiguous > header::after { + content: /*the thinking man's emoji*/'\01F914'; +} + +#info { + position: fixed; + right: 2em; + top: 1em; + width: 25em; + border: 1px solid black; + min-height: 20em; + background-color: whiteSmoke; + overflow-x: clip; + box-shadow: 3px 3px 5px rgba(0,0,0,0.2); +} +#info header { + background-color: darkGreen; + color: white; + font-size: larger; + padding: 0.5em; +} +#i_kind { + float: right; + font-size: small; +} +#info section { + padding: 0.5em; + border-top: 1px solid lightGray; + overflow-x: auto; +} +#i_context { + font-size: small; +} + +#tree { + flex-grow: 0; + min-width: 20em; + margin-right: 1em; + border-right: 1px solid darkGray; + background-color: azure; + font-size: small; + overflow-x: auto; +} +#tree ul { + margin: 0; + display: inline-block; + padding-left: 6px; + border-left: 1px solid rgba(0,0,0,0.2); + list-style: none; +} +#tree > ul { border-left: none; } +.tree-node.selected > header .name { font-weight: bold; } +.tree-node.terminal .name { font-family: monospace; } +.tree-node.ambiguous > header .name { color: #803; font-weight: bold; } + +.selected { outline: 1px dashed black; } +.abbrev { opacity: 50%; } +.abbrev::after { content: '~'; } +.opaque { background-color: bisque; } diff --git a/clang-tools-extra/pseudo/tool/HTMLForest.js b/clang-tools-extra/pseudo/tool/HTMLForest.js new file mode 100644 --- /dev/null +++ b/clang-tools-extra/pseudo/tool/HTMLForest.js @@ -0,0 +1,284 @@ +// The global map of forest node index => NodeView. +views = []; +// NodeView is a visible forest node. +// It has an entry in the navigation tree, and a span in the code itself. +// Each NodeView is associated with a forest node, but not all nodes have views: +// - nodes not reachable though current ambiguity selection +// - trivial "wrapping" sequence nodes are abbreviated away +class NodeView { + // Builds a node representing forest[index], or its target if it is a wrapper. + // Registers the node in the global map. + static make(index, parent, abbrev) { + var node = forest[index]; + if (node.kind == 'sequence' && node.children.length == 1 && forest[node.children[0]].kind != 'ambiguous') { + abbrev ||= []; + abbrev.push(index); + return NodeView.make(node.children[0], parent, abbrev); + } + return views[index] = new NodeView(index, parent, node, abbrev); + } + + constructor(index, parent, node, abbrev) { + this.abbrev = abbrev || []; + this.parent = parent; + this.children = (node.kind == 'ambiguous' ? [node.selected] : node.children || []).map((c) => NodeView.make(c, this)); + this.index = index; + this.node = node; + views[index] = this; + + this.span = this.buildSpan(); + this.tree = this.buildTree(); + } + + // Replaces the token sequence in #code with a . + buildSpan() { + var elt = document.createElement('span'); + elt.dataset['index'] = this.index; + elt.classList.add("node"); + elt.classList.add("selectable-node"); + elt.classList.add(this.node.kind); + + var begin = null, end = null; + if (this.children.length != 0) { + begin = this.children[0].span; + end = this.children[this.children.length-1].span.nextSibling; + } else if (this.node.kind == 'terminal') { + begin = document.getElementById(this.node.token); + end = begin.nextSibling; + } else if (this.node.kind == 'opaque') { + begin = document.getElementById(this.node.firstToken); + end = (this.node.lastToken == null) ? begin : + document.getElementById(this.node.lastToken).nextSibling; + } + var parent = begin.parentNode; + splice(begin, end, elt); + parent.insertBefore(elt, end); + return elt; + } + + // Returns a (detached)
    • suitable for use in #tree. + buildTree() { + var elt = document.createElement('li'); + elt.dataset['index'] = this.index; + elt.classList.add('tree-node'); + elt.classList.add('selectable-node'); + elt.classList.add(this.node.kind); + var header = document.createElement('header'); + elt.appendChild(header); + + if (this.abbrev.length > 0) { + var abbrev = document.createElement('span'); + abbrev.classList.add('abbrev'); + abbrev.innerText = forest[this.abbrev[0]].symbol; + header.appendChild(abbrev); + } + var name = document.createElement('span'); + name.classList.add('name'); + name.innerText = this.node.symbol; + header.appendChild(name); + + if (this.children.length != 0) { + var sublist = document.createElement('ul'); + this.children.forEach((c) => sublist.appendChild(c.tree)); + elt.appendChild(sublist); + } + return elt; + } + + // Make this view visible on the screen by scrolling if needed. + scrollVisible() { + scrollIntoViewV(document.getElementById('tree'), this.tree.firstChild); + scrollIntoViewV(document.getElementById('code'), this.span); + } + + // Fill #info with details of this node. + renderInfo() { + document.getElementById('i_symbol').innerText = this.node.symbol; + document.getElementById('i_kind').innerText = this.node.kind; + + // For sequence nodes, add LHS := RHS rule. + // If this node abbreviates trivial sequences, we want those rules too. + var rules = document.getElementById('i_rules'); + rules.textContent = ''; + function addRule(i) { + var ruleText = forest[i].rule; + if (ruleText == null) return; + var rule = document.createElement('div'); + rule.classList.add('rule'); + rule.innerText = ruleText; + rules.insertBefore(rule, rules.firstChild); + } + this.abbrev.forEach(addRule); + addRule(this.index); + + // For ambiguous nodes, show a selectable list of alternatives. + var alternatives = document.getElementById('i_alternatives'); + alternatives.textContent = ''; + var that = this; + function addAlternative(i) { + var altNode = forest[i]; + var text = altNode.rule || altNode.kind; + var alt = document.createElement('div'); + alt.classList.add('alternative'); + alt.innerText = text; + alt.dataset['index'] = i; + alt.dataset['parent'] = that.index; + if (i == that.node.selected) + alt.classList.add('selected'); + alternatives.appendChild(alt); + } + if (this.node.kind == 'ambiguous') + this.node.children.forEach(addAlternative); + + // Show the "context stack" of parent nodes. + // The part of each rule that leads to the current node is bolded. + var context = document.getElementById('i_context'); + context.textContent = ''; + var child = this; + for (var view = this.parent; view != null; child=view, view = view.parent) { + var indexInParent = view.children.indexOf(child); + + var ctx = document.createElement('div'); + ctx.classList.add('context'); + ctx.classList.add('selectable-node'); + ctx.classList.add(view.node.kind); + if (view.node.rule) { + // Rule syntax is LHS := RHS1 [annotation] RHS2. + // We walk through the chunks and bold the one at parentInIndex. + var chunkCount = 0; + ctx.innerHTML = view.node.rule.replaceAll(/[^ ]+/g, function(match) { + if (!(match.startsWith('[') && match.endsWith(']')) /*annotations*/ + && chunkCount++ == indexInParent + 2 /*skip LHS :=*/) + return '' + match + ''; + return match; + }); + } else /*ambiguous*/ { + ctx.innerHTML = '' + view.node.symbol + ''; + } + ctx.dataset['index'] = view.index; + if (view.abbrev.length > 0) { + var abbrev = document.createElement('span'); + abbrev.classList.add('abbrev'); + abbrev.innerText = forest[view.abbrev[0]].symbol; + ctx.insertBefore(abbrev, ctx.firstChild); + } + + ctx.dataset['index'] = view.index; + context.appendChild(ctx, context.firstChild); + } + } + + remove() { + this.children.forEach((c) => c.remove()); + splice(this.span.firstChild, null, this.span.parentNode, this.span.nextSibling); + detach(this.span); + delete views[this.index]; + } +}; + +var selection = null; +function selectView(view) { + var old = selection; + selection = view; + if (view == old) + return; + + if (old) { + old.tree.classList.remove('selected'); + old.span.classList.remove('selected'); + } + document.getElementById('info').hidden = (view == null); + if (!view) + return; + view.tree.classList.add('selected'); + view.span.classList.add('selected'); + view.renderInfo(); + view.scrollVisible(); +} + +// To highlight nodes on hover, we create dynamic CSS rules of the form +// .selectable-node[data-index="42"] { background-color: blue; } +// This avoids needing to find all the related nodes and update their classes. +var highlightSheet = new CSSStyleSheet(); +document.adoptedStyleSheets.push(highlightSheet); +var highlightView = function(view) { + var text = ''; + for (const color of ['#6af', '#bbb', '#ddd', '#eee']) { + if (view == null) + break; + text += '.selectable-node[data-index="' + view.index + '"] ' + text += '{ background-color: ' + color + '; }\n'; + view = view.parent; + } + highlightSheet.replace(text); +} + +// Select which branch of an ambiguous node is taken. +function chooseAlternative(parent, index) { + var parentView = views[parent]; + parentView.node.selected = index; + var oldChild = parentView.children[0]; + oldChild.remove(); + var newChild = NodeView.make(index, parentView); + parentView.children[0] = newChild; + parentView.tree.lastChild.replaceChild(newChild.tree, oldChild.tree); + + highlightView(null); + // Force redraw of the info box. + selectView(null); + selectView(parentView); +} + +// Attach event listeners and build content once the document is ready. +document.addEventListener("DOMContentLoaded", function() { + var code = document.getElementById('code'); + var tree = document.getElementById('tree'); + var context = document.getElementById('i_context'); + var alternatives = document.getElementById('i_alternatives'); + + [code, tree, context].forEach(function(container) { + container.addEventListener('click', function(e) { + var nodeElt = e.target.closest('.selectable-node'); + selectView(nodeElt && views[Number(nodeElt.dataset['index'])]); + }); + container.addEventListener('mousemove', function(e) { + var nodeElt = e.target.closest('.selectable-node'); + highlightView(nodeElt && views[Number(nodeElt.dataset['index'])]); + }); + }); + + alternatives.addEventListener('click', function(e) { + var altElt = e.target.closest('.alternative'); + if (altElt) + chooseAlternative( + Number(altElt.dataset['parent']), + Number(altElt.dataset['index'])); + }); + + // The HTML provides #code content in a hidden DOM element, move it. + var hiddenCode = document.getElementById('hidden-code'); + splice(hiddenCode.firstChild, hiddenCode.lastChild, code); + detach(hiddenCode); + + // Build the tree of NodeViews and attach to #tree. + tree.firstChild.appendChild(NodeView.make(0).tree); +}); + +// Helper DOM functions // + +// Moves the sibling range [first, until) into newParent. +function splice(first, until, newParent, before) { + for (var next = first; next != until;) { + var elt = next; + next = next.nextSibling; + newParent.insertBefore(elt, before); + } +} +function detach(node) { node.parentNode.removeChild(node); } +// Like scrollIntoView, but vertical only! +function scrollIntoViewV(container, elt) { + if (container.scrollTop > elt.offsetTop + elt.offsetHeight || + container.scrollTop + container.clientHeight < elt.offsetTop) + container.scrollTo({top:elt.offsetTop, behavior:'smooth'}); +} + diff --git a/clang-tools-extra/pseudo/tool/bundle_resources.py b/clang-tools-extra/pseudo/tool/bundle_resources.py new file mode 100644 --- /dev/null +++ b/clang-tools-extra/pseudo/tool/bundle_resources.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python3 +# Simple bundler of files into string constants. +# +# Usage: bundle_resources.py foo.inc a.js b.css ... +# Produces foo.inc containing: +# const char a_js[] = "..."; +# const char b_css[] = "..."; +import sys + +outfile = sys.argv[1] +infiles = sys.argv[2:] + +with open(outfile, 'w') as out: + for filename in infiles: + varname = filename.replace('.', '_') + out.write("const char " + varname + "[] = \n"); + # MSVC limits each chunk of string to 2k. + # Not quite enough for the JS file, so split by lines. + # The overall limit is 64k, which ought to be enough for anyone. + for line in open(filename).read().split('\n'): + out.write(' R"x(' + line + ')x" "\\n"\n' ) + out.write(' ;\n');