diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h --- a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h @@ -34,6 +34,7 @@ namespace clang { namespace dataflow { +class Logger; /// Skip past nodes that the CFG does not emit. These nodes are invisible to /// flow-sensitive analysis, and should be ignored as they will effectively not @@ -67,6 +68,11 @@ /// fundamentally limited: some constructs, such as recursion, are /// explicitly unsupported. std::optional ContextSensitiveOpts; + + /// If provided, analysis details will be recorded here. + /// (This is always non-null within an AnalysisContext, the framework + /// provides a fallback no-op logger). + Logger *Log = nullptr; }; /// Constructs a dataflow analysis context. @@ -76,11 +82,9 @@ /// `S` must not be null. DataflowAnalysisContext(std::unique_ptr S, Options Opts = Options{ - /*ContextSensitiveOpts=*/std::nullopt}) - : S(std::move(S)), TrueVal(createAtomicBoolValue()), - FalseVal(createAtomicBoolValue()), Opts(Opts) { - assert(this->S != nullptr); - } + /*ContextSensitiveOpts=*/std::nullopt, + /*Logger=*/nullptr}); + ~DataflowAnalysisContext(); /// Takes ownership of `Loc` and returns a reference to it. /// @@ -393,6 +397,8 @@ // Fields modeled by environments covered by this context. llvm::DenseSet ModeledFields; + + std::unique_ptr LogOwner; // If created via flags. }; } // namespace dataflow diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h --- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h @@ -22,6 +22,7 @@ #include "clang/Analysis/FlowSensitive/ControlFlowContext.h" #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" #include "clang/Analysis/FlowSensitive/DataflowLattice.h" +#include "clang/Analysis/FlowSensitive/Logger.h" #include "clang/Analysis/FlowSensitive/StorageLocation.h" #include "clang/Analysis/FlowSensitive/Value.h" #include "llvm/ADT/DenseMap.h" @@ -177,10 +178,12 @@ /// with a symbolic representation of the `this` pointee. Environment(DataflowAnalysisContext &DACtx, const DeclContext &DeclCtx); - const DataflowAnalysisContext::Options &getAnalysisOptions() { + const DataflowAnalysisContext::Options &getAnalysisOptions() const { return DACtx->getOptions(); } + Logger &logger() const { return *DACtx->getOptions().Log; } + /// Creates and returns an environment to use for an inline analysis of the /// callee. Uses the storage location from each argument in the `Call` as the /// storage location for the corresponding parameter in the callee. diff --git a/clang/include/clang/Analysis/FlowSensitive/Logger.h b/clang/include/clang/Analysis/FlowSensitive/Logger.h new file mode 100644 --- /dev/null +++ b/clang/include/clang/Analysis/FlowSensitive/Logger.h @@ -0,0 +1,85 @@ +//===-- Logger.h ------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_LOGGER_H +#define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_LOGGER_H + +#include "clang/Analysis/CFG.h" +#include "llvm/Support/raw_ostream.h" +#include + +namespace clang::dataflow { +// Forward declarations so we can use Logger anywhere in the framework. +class ControlFlowContext; +class TypeErasedDataflowAnalysis; +struct TypeErasedDataflowAnalysisState; + +/// A logger is notified as the analysis progresses. +/// It can produce a report of the analysis's findings and how it came to them. +/// +/// The framework reports key structural events (e.g. traversal of blocks). +/// The specific analysis can add extra details to be presented in context. +class Logger { +public: + /// Returns a dummy logger that does nothing. + static Logger &null(); + /// A logger that simply writes messages to the specified ostream in real + /// time. + static std::unique_ptr textual(llvm::raw_ostream &); + + virtual ~Logger() = default; + + /// Called by the framework as we start analyzing a new function or statement. + /// Forms a pair with endAnalysis(). + virtual void beginAnalysis(const ControlFlowContext &, + TypeErasedDataflowAnalysis &) {} + virtual void endAnalysis() {} + + // At any time during the analysis, we're computing the state for some target + // program point. + + /// Called when we start (re-)processing a block in the CFG. + /// The target program point is the entry to the specified block. + /// Calls to log() describe transferBranch(), join() etc. + virtual void enterBlock(const CFGBlock &) {} + /// Called when we start processing an element in the current CFG block. + /// The target program point is after the specified element. + /// Calls to log() describe the transfer() function. + virtual void enterElement(const CFGElement &) {} + + /// Records the analysis state computed for the current program point. + virtual void recordState(TypeErasedDataflowAnalysisState &) {} + /// Records that the analysis state for the current block is now final. + virtual void blockConverged() {} + + /// Called by the framework or user code to report some event. + /// The event is associated with the current context (program point). + /// The Emit function produces the log message. It may or may not be called, + /// depending on if the logger is interested; it should have no side effects. + void log(llvm::function_ref Emit) { + if (!ShouldLogText) + return; + std::string S; + llvm::raw_string_ostream OS(S); + Emit(OS); + logText(S); + } + +protected: + /// ShouldLogText should be false for trivial loggers that ignore logText(). + /// This allows log() to skip evaluating its Emit function. + Logger(bool ShouldLogText = true) : ShouldLogText(ShouldLogText) {} + +private: + bool ShouldLogText; + virtual void logText(llvm::StringRef) {} +}; + +} // namespace clang::dataflow + +#endif diff --git a/clang/lib/Analysis/FlowSensitive/CMakeLists.txt b/clang/lib/Analysis/FlowSensitive/CMakeLists.txt --- a/clang/lib/Analysis/FlowSensitive/CMakeLists.txt +++ b/clang/lib/Analysis/FlowSensitive/CMakeLists.txt @@ -2,6 +2,7 @@ ControlFlowContext.cpp DataflowAnalysisContext.cpp DataflowEnvironment.cpp + Logger.cpp Transfer.cpp TypeErasedDataflowAnalysis.cpp Value.cpp diff --git a/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp b/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp --- a/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp +++ b/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp @@ -15,13 +15,20 @@ #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" #include "clang/AST/ExprCXX.h" #include "clang/Analysis/FlowSensitive/DebugSupport.h" +#include "clang/Analysis/FlowSensitive/Logger.h" #include "clang/Analysis/FlowSensitive/Value.h" #include "llvm/ADT/SetOperations.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include #include #include +static llvm::cl::opt + DataflowLog("dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional, + llvm::cl::desc("Emit log of dataflow analysis. With no arg, " + "writes textual log to stderr.")); + namespace clang { namespace dataflow { @@ -375,6 +382,27 @@ return nullptr; } +DataflowAnalysisContext::DataflowAnalysisContext(std::unique_ptr S, + Options Opts) + : S(std::move(S)), TrueVal(createAtomicBoolValue()), + FalseVal(createAtomicBoolValue()), Opts(Opts) { + assert(this->S != nullptr); + // If the -dataflow-log command-line flag was set, synthesize a logger. + // This is ugly but provides a uniform method for ad-hoc debugging dataflow- + // based tools. + if (Opts.Log == nullptr) { + if (DataflowLog.getNumOccurrences() > 0) { + LogOwner = Logger::textual(llvm::errs()); + this->Opts.Log = LogOwner.get(); + // FIXME: if the flag is given a value, write an HTML log to a file. + } else { + this->Opts.Log = &Logger::null(); + } + } +} + +DataflowAnalysisContext::~DataflowAnalysisContext() = default; + } // namespace dataflow } // namespace clang diff --git a/clang/lib/Analysis/FlowSensitive/Logger.cpp b/clang/lib/Analysis/FlowSensitive/Logger.cpp new file mode 100644 --- /dev/null +++ b/clang/lib/Analysis/FlowSensitive/Logger.cpp @@ -0,0 +1,108 @@ +//===-- Logger.cpp --------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/Logger.h" +#include "clang/Analysis/FlowSensitive/ControlFlowContext.h" +#include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h" +#include "llvm/Support/WithColor.h" + +namespace clang::dataflow { + +Logger &Logger::null() { + struct NullLogger final : Logger {}; + static auto *Instance = new NullLogger(); + return *Instance; +} + +namespace { +struct TextualLogger final : Logger { + llvm::raw_ostream &OS; + const CFG *CurrentCFG; + const CFGBlock *CurrentBlock; + const CFGElement *CurrentElement; + unsigned CurrentElementIndex; + bool ShowColors; + llvm::DenseMap VisitCount; + TypeErasedDataflowAnalysis *CurrentAnalysis; + + TextualLogger(llvm::raw_ostream &OS) + : OS(OS), ShowColors(llvm::WithColor::defaultAutoDetectFunction()(OS)) {} + + virtual void beginAnalysis(const ControlFlowContext &CFG, + TypeErasedDataflowAnalysis &Analysis) override { + { + llvm::WithColor Header(OS, llvm::raw_ostream::Colors::RED, /*Bold=*/true); + OS << "=== Beginning data flow analysis ===\n"; + } + if (auto *D = CFG.getDecl()) { + D->print(OS); + OS << "\n"; + D->dump(OS); + } + CurrentCFG = &CFG.getCFG(); + CurrentCFG->print(OS, Analysis.getASTContext().getLangOpts(), ShowColors); + CurrentAnalysis = &Analysis; + } + virtual void endAnalysis() override { + llvm::WithColor Header(OS, llvm::raw_ostream::Colors::RED, /*Bold=*/true); + unsigned Blocks = 0, Steps = 0; + for (const auto &E : VisitCount) { + ++Blocks; + Steps += E.second; + } + llvm::errs() << "=== Finished analysis: " << Blocks << " blocks in " + << Steps << " total steps ===\n"; + } + virtual void enterBlock(const CFGBlock &Block) override { + unsigned Count = ++VisitCount[&Block]; + { + llvm::WithColor Header(OS, llvm::raw_ostream::Colors::RED, /*Bold=*/true); + OS << "=== Entering block B" << Block.getBlockID() << " (iteration " + << Count << ") ===\n"; + } + Block.print(OS, CurrentCFG, CurrentAnalysis->getASTContext().getLangOpts(), + ShowColors); + CurrentBlock = &Block; + CurrentElement = nullptr; + CurrentElementIndex = 0; + } + virtual void enterElement(const CFGElement &Element) override { + ++CurrentElementIndex; + CurrentElement = ∈ + { + llvm::WithColor Subheader(OS, llvm::raw_ostream::Colors::CYAN, + /*Bold=*/true); + OS << "Processing element B" << CurrentBlock->getBlockID() << "." + << CurrentElementIndex << ": "; + Element.dumpToStream(OS); + } + } + void recordState(TypeErasedDataflowAnalysisState &State) override { + { + llvm::WithColor Subheader(OS, llvm::raw_ostream::Colors::CYAN, + /*Bold=*/true); + OS << "Computed state for B" << CurrentBlock->getBlockID() << "." + << CurrentElementIndex << ":\n"; + } + // FIXME: currently the environment dump is verbose and unenlightening. + // FIXME: dump the user-defined lattice, too. + State.Env.dump(OS); + OS << "\n"; + } + void blockConverged() override { + OS << "B" << CurrentBlock->getBlockID() << " has converged!\n"; + } + virtual void logText(llvm::StringRef S) override { OS << S << "\n"; } +}; +} // namespace + +std::unique_ptr Logger::textual(llvm::raw_ostream &OS) { + return std::make_unique(OS); +} + +} // namespace clang::dataflow diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp --- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp +++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp @@ -191,7 +191,10 @@ llvm::ArrayRef> BlockStates) : CFCtx(CFCtx), Analysis(Analysis), InitEnv(InitEnv), - BlockStates(BlockStates) {} + Log(InitEnv.logger()), BlockStates(BlockStates) { + Log.beginAnalysis(CFCtx, Analysis); + } + ~AnalysisContext() { Log.endAnalysis(); } /// Contains the CFG being analyzed. const ControlFlowContext &CFCtx; @@ -199,6 +202,7 @@ TypeErasedDataflowAnalysis &Analysis; /// Initial state to start the analysis. const Environment &InitEnv; + Logger &Log; /// Stores the state of a CFG block if it has been evaluated by the analysis. /// The indices correspond to the block IDs. llvm::ArrayRef> BlockStates; @@ -368,8 +372,11 @@ std::function PostVisitCFG = nullptr) { + AC.Log.enterBlock(Block); auto State = computeBlockInputState(Block, AC); + AC.Log.recordState(State); for (const auto &Element : Block) { + AC.Log.enterElement(Element); // Built-in analysis if (AC.Analysis.builtinOptions()) { builtinTransfer(Element, State, AC); @@ -382,6 +389,7 @@ if (PostVisitCFG) { PostVisitCFG(Element, State); } + AC.Log.recordState(State); } return State; } @@ -462,15 +470,18 @@ LatticeJoinEffect Effect2 = NewBlockState.Env.widen(OldBlockState->Env, Analysis); if (Effect1 == LatticeJoinEffect::Unchanged && - Effect2 == LatticeJoinEffect::Unchanged) + Effect2 == LatticeJoinEffect::Unchanged) { // The state of `Block` didn't change from widening so there's no need // to revisit its successors. + AC.Log.blockConverged(); continue; + } } else if (Analysis.isEqualTypeErased(OldBlockState->Lattice, NewBlockState.Lattice) && OldBlockState->Env.equivalentTo(NewBlockState.Env, Analysis)) { // The state of `Block` didn't change after transfer so there's no need // to revisit its successors. + AC.Log.blockConverged(); continue; } } diff --git a/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt b/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt --- a/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt +++ b/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt @@ -9,6 +9,7 @@ DataflowAnalysisContextTest.cpp DataflowEnvironmentTest.cpp DebugSupportTest.cpp + LoggerTest.cpp MapLatticeTest.cpp MatchSwitchTest.cpp MultiVarConstantPropagationTest.cpp diff --git a/clang/unittests/Analysis/FlowSensitive/LoggerTest.cpp b/clang/unittests/Analysis/FlowSensitive/LoggerTest.cpp new file mode 100644 --- /dev/null +++ b/clang/unittests/Analysis/FlowSensitive/LoggerTest.cpp @@ -0,0 +1,152 @@ +#include "TestingSupport.h" +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/Analysis/FlowSensitive/DataflowAnalysis.h" +#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" +#include "clang/Analysis/FlowSensitive/DataflowLattice.h" +#include "llvm/Testing/Support/Error.h" +#include "gtest/gtest.h" +#include + +namespace clang::dataflow::test { +namespace { + +struct TestLattice { + int Elements = 0; + int Branches = 0; + int Joins = 0; + + LatticeJoinEffect join(const TestLattice &Other) { + if (Joins < 3) { + ++Joins; + Elements += Other.Elements; + Branches += Other.Branches; + return LatticeJoinEffect::Changed; + } + return LatticeJoinEffect::Unchanged; + } + friend bool operator==(const TestLattice &LHS, const TestLattice &RHS) { + return std::tie(LHS.Elements, LHS.Branches, LHS.Joins) == + std::tie(RHS.Elements, RHS.Branches, RHS.Joins); + } +}; + +class TestAnalysis : public DataflowAnalysis { +public: + using DataflowAnalysis::DataflowAnalysis; + + static TestLattice initialElement() { return TestLattice{}; } + void transfer(const CFGElement &, TestLattice &L, Environment &E) { + E.logger().log([](llvm::raw_ostream &OS) { OS << "transfer()"; }); + ++L.Elements; + } + void transferBranch(bool Branch, const Stmt *S, TestLattice &L, + Environment &E) { + E.logger().log([&](llvm::raw_ostream &OS) { + OS << "transferBranch(" << Branch << ")"; + }); + ++L.Branches; + } +}; + +class TestLogger : public Logger { +public: + TestLogger(std::string &S) : OS(S) {} + +private: + llvm::raw_string_ostream OS; + + void beginAnalysis(const ControlFlowContext &, + TypeErasedDataflowAnalysis &) override { + logText("beginAnalysis()"); + } + void endAnalysis() override { logText("\nendAnalysis()"); } + + void enterBlock(const CFGBlock &B) override { + OS << "\nenterBlock(" << B.BlockID << ")\n"; + } + void enterElement(const CFGElement &E) override { + // we don't want the trailing \n + std::string S; + llvm::raw_string_ostream SS(S); + E.dumpToStream(SS); + + OS << "enterElement(" << llvm::StringRef(S).trim() << ")\n"; + } + void recordState(TypeErasedDataflowAnalysisState &S) override { + const TestLattice &L = llvm::any_cast(S.Lattice.Value); + OS << "recordState(Elements=" << L.Elements << ", Branches=" << L.Branches + << ", Joins=" << L.Joins << ")\n"; + } + /// Records that the analysis state for the current block is now final. + void blockConverged() override { logText("blockConverged()"); } + + void logText(llvm::StringRef Text) override { OS << Text << "\n"; } +}; + +TEST(LoggerTest, Sequence) { + const char *Code = R"cpp( +int target(bool b, int p, int q) { + return b ? p : q; +} +)cpp"; + + auto Inputs = AnalysisInputs( + Code, ast_matchers::hasName("target"), + [](ASTContext &C, Environment &) { return TestAnalysis(C); }); + std::vector Args = { + "-fsyntax-only", "-fno-delayed-template-parsing", "-std=c++17"}; + Inputs.ASTBuildArgs = Args; + std::string Log; + TestLogger Logger(Log); + Inputs.BuiltinOptions.Log = &Logger; + + ASSERT_THAT_ERROR(checkDataflow(std::move(Inputs), + [](const AnalysisOutputs &) {}), + llvm::Succeeded()); + + EXPECT_EQ(Log, R"(beginAnalysis() + +enterBlock(4) +recordState(Elements=0, Branches=0, Joins=0) +enterElement(b) +transfer() +recordState(Elements=1, Branches=0, Joins=0) +enterElement(b (ImplicitCastExpr, LValueToRValue, _Bool)) +transfer() +recordState(Elements=2, Branches=0, Joins=0) + +enterBlock(3) +transferBranch(0) +recordState(Elements=2, Branches=1, Joins=0) +enterElement(q) +transfer() +recordState(Elements=3, Branches=1, Joins=0) + +enterBlock(2) +transferBranch(1) +recordState(Elements=2, Branches=1, Joins=0) +enterElement(p) +transfer() +recordState(Elements=3, Branches=1, Joins=0) + +enterBlock(1) +recordState(Elements=6, Branches=2, Joins=1) +enterElement(b ? p : q) +transfer() +recordState(Elements=7, Branches=2, Joins=1) +enterElement(b ? p : q (ImplicitCastExpr, LValueToRValue, int)) +transfer() +recordState(Elements=8, Branches=2, Joins=1) +enterElement(return b ? p : q;) +transfer() +recordState(Elements=9, Branches=2, Joins=1) + +enterBlock(0) +recordState(Elements=9, Branches=2, Joins=1) + +endAnalysis() +)"); +} + +} // namespace +} // namespace clang::dataflow::test