diff --git a/clang/examples/CMakeLists.txt b/clang/examples/CMakeLists.txt --- a/clang/examples/CMakeLists.txt +++ b/clang/examples/CMakeLists.txt @@ -10,3 +10,5 @@ add_subdirectory(CallSuperAttribute) add_subdirectory(PluginsOrder) endif() + +add_subdirectory(FlowSensitiveAnalysis) diff --git a/clang/examples/FlowSensitiveAnalysis/CMakeLists.txt b/clang/examples/FlowSensitiveAnalysis/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/clang/examples/FlowSensitiveAnalysis/CMakeLists.txt @@ -0,0 +1,9 @@ +add_clang_tool(clang-dataflow-sample + SampleAnalysis.cpp) + +target_link_libraries(clang-dataflow-sample + PRIVATE + clangAnalysisFlowSensitive + clangBasic + clangTooling +) diff --git a/clang/examples/FlowSensitiveAnalysis/SampleAnalysis.cpp b/clang/examples/FlowSensitiveAnalysis/SampleAnalysis.cpp new file mode 100644 --- /dev/null +++ b/clang/examples/FlowSensitiveAnalysis/SampleAnalysis.cpp @@ -0,0 +1,89 @@ +//===-- SampleAnalysis.cpp ------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This tool runs a trivial analysis using the clang::dataflow framework. +// Currently this computes only the framework's built-in analysis. +// +// It analyzes top-level functions named "target". +// Usage is `clang-dataflow-sample test.cc -- ` +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/DataflowAnalysis.h" +#include "clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h" +#include "clang/Tooling/Execution.h" +#include "llvm/Support/CommandLine.h" + +namespace { +void require(llvm::Error E) { + if (E) { + llvm::errs() << toString(std::move(E)) << "\n"; + exit(1); + } +} +template T require(llvm::Expected E) { + require(E.takeError()); + return std::move(*E); +} +} // namespace + +namespace clang::dataflow::sample { + +struct Lattice { + LatticeJoinEffect join(const Lattice &other) { + return LatticeJoinEffect::Unchanged; + } + bool operator==(const Lattice &Other) const { return true; } +}; + +struct Analysis : dataflow::DataflowAnalysis { + using DataflowAnalysis::DataflowAnalysis; + + Lattice initialElement() { return {}; } + void transfer(const CFGElement &, Lattice &, Environment &Env) { + Env.logger().log([](llvm::raw_ostream &OS) { OS << "transfer!"; }); + } +}; + +class Consumer : public ASTConsumer { + bool HandleTopLevelDecl(DeclGroupRef DG) override { + for (const auto *D : DG) + if (const auto *FD = dyn_cast(D)) + if (FD->getDeclName().isIdentifier() && FD->getName() == "target" && + FD->hasBody()) { + DataflowAnalysisContext::Options Opts; + auto Log = Logger::textual(llvm::errs()); + Opts.Log = Log.get(); + DataflowAnalysisContext DACtx( + std::make_unique(), Opts); + auto &Ctx = FD->getDeclContext()->getParentASTContext(); + + auto CFCtx = + require(ControlFlowContext::build(FD, *FD->getBody(), Ctx)); + Analysis A(Ctx); + Environment Env(DACtx); + auto Out = + require(clang::dataflow::runDataflowAnalysis(CFCtx, A, Env)); + } + return true; + } +}; + +} // namespace clang::dataflow::sample + +int main(int argc, const char **argv) { + llvm::cl::OptionCategory OptCategory(""); + struct Factory { + std::unique_ptr newASTConsumer() { + return std::make_unique(); + } + } F; + require(require(clang::tooling::createExecutorFromCommandLineArgs( + argc, argv, OptCategory)) + ->execute(clang::tooling::newFrontendActionFactory(&F))); +} diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h --- a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h @@ -34,6 +34,7 @@ namespace clang { namespace dataflow { +class Logger; /// Skip past nodes that the CFG does not emit. These nodes are invisible to /// flow-sensitive analysis, and should be ignored as they will effectively not @@ -67,6 +68,9 @@ /// fundamentally limited: some constructs, such as recursion, are /// explicitly unsupported. std::optional ContextSensitiveOpts; + + /// If provided, analysis details will be recorded here. + Logger *Log; }; /// Constructs a dataflow analysis context. @@ -76,7 +80,8 @@ /// `S` must not be null. DataflowAnalysisContext(std::unique_ptr S, Options Opts = Options{ - /*ContextSensitiveOpts=*/std::nullopt}) + /*ContextSensitiveOpts=*/std::nullopt, + /*Logger=*/nullptr}) : S(std::move(S)), TrueVal(createAtomicBoolValue()), FalseVal(createAtomicBoolValue()), Opts(Opts) { assert(this->S != nullptr); diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h --- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h @@ -22,6 +22,7 @@ #include "clang/Analysis/FlowSensitive/ControlFlowContext.h" #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" #include "clang/Analysis/FlowSensitive/DataflowLattice.h" +#include "clang/Analysis/FlowSensitive/Logger.h" #include "clang/Analysis/FlowSensitive/StorageLocation.h" #include "clang/Analysis/FlowSensitive/Value.h" #include "llvm/ADT/DenseMap.h" @@ -177,10 +178,14 @@ /// with a symbolic representation of the `this` pointee. Environment(DataflowAnalysisContext &DACtx, const DeclContext &DeclCtx); - const DataflowAnalysisContext::Options &getAnalysisOptions() { + const DataflowAnalysisContext::Options &getAnalysisOptions() const { return DACtx->getOptions(); } + Logger &logger() const { + return DACtx->getOptions().Log ? *DACtx->getOptions().Log : Logger::null(); + } + /// Creates and returns an environment to use for an inline analysis of the /// callee. Uses the storage location from each argument in the `Call` as the /// storage location for the corresponding parameter in the callee. diff --git a/clang/include/clang/Analysis/FlowSensitive/Logger.h b/clang/include/clang/Analysis/FlowSensitive/Logger.h new file mode 100644 --- /dev/null +++ b/clang/include/clang/Analysis/FlowSensitive/Logger.h @@ -0,0 +1,84 @@ +//===-- Logger.h ------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_LOGGER_H +#define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_LOGGER_H + +#include "clang/Analysis/CFG.h" +#include "llvm/Support/raw_ostream.h" +#include + +namespace clang::dataflow { + +class ControlFlowContext; +class TypeErasedDataflowAnalysis; +struct TypeErasedDataflowAnalysisState; + +// A logger is notified as the analysis progresses. +// It can produce a report of the analysis's findings and how it came to them. +// +// The framework reports key structural events (e.g. traversal of blocks). +// The specific analysis can add extra details to be presented in context. +class Logger { +public: + // Returns a dummy logger that does nothing. + static Logger &null(); + // A logger that simply writes messages to the specified ostream in real time. + static std::unique_ptr textual(llvm::raw_ostream &); + + virtual ~Logger() = default; + + // Called by the framework as we start analyzing a new function or statement. + // Forms a pair with endAnalysis(). + virtual void beginAnalysis(const ControlFlowContext &, + TypeErasedDataflowAnalysis &) {} + virtual void endAnalysis() {} + + // At any time during the analysis, we're computing the state for some target + // program point. + + // Called when we start (re-)processing a block in the CFG. + // The target program point is the entry to the specified block. + // Calls to log() describe transferBranch(), join() etc. + virtual void enterBlock(const CFGBlock &) {} + // Called when we start processing an element in the current CFG block. + // The target program point is after the specified element. + // Calls to log() describe the transfer() function. + virtual void enterElement(const CFGElement &) {} + + // Records the analysis state computed for the current program point. + virtual void recordState(TypeErasedDataflowAnalysisState &) {} + // Records that the analysis state for the current block is now final. + virtual void blockConverged() {} + + // Called by the framework or user code to report some event. + // The event is associated with the current context (program point). + // The Emit function produces the log message. It may or may not be called, + // depending on if the logger is interested; it should have no side effects. + void log(llvm::function_ref Emit) { + if (!ShouldLogText) + return; + std::string S; + llvm::raw_string_ostream OS(S); + Emit(OS); + logText(S); + } + +protected: + // ShouldLogText should be false for trivial loggers that ignore logText(). + // This allows log() to skip evaluating its Emit function. + Logger(bool ShouldLogText = true) : ShouldLogText(ShouldLogText) {} + +private: + bool ShouldLogText; + virtual void logText(llvm::StringRef) {} +}; + +} // namespace clang::dataflow + +#endif diff --git a/clang/lib/Analysis/FlowSensitive/CMakeLists.txt b/clang/lib/Analysis/FlowSensitive/CMakeLists.txt --- a/clang/lib/Analysis/FlowSensitive/CMakeLists.txt +++ b/clang/lib/Analysis/FlowSensitive/CMakeLists.txt @@ -2,6 +2,7 @@ ControlFlowContext.cpp DataflowAnalysisContext.cpp DataflowEnvironment.cpp + Logger.cpp Transfer.cpp TypeErasedDataflowAnalysis.cpp Value.cpp diff --git a/clang/lib/Analysis/FlowSensitive/Logger.cpp b/clang/lib/Analysis/FlowSensitive/Logger.cpp new file mode 100644 --- /dev/null +++ b/clang/lib/Analysis/FlowSensitive/Logger.cpp @@ -0,0 +1,108 @@ +//===-- Logger.cpp --------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/Logger.h" +#include "clang/Analysis/FlowSensitive/ControlFlowContext.h" +#include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h" +#include "llvm/Support/WithColor.h" + +namespace clang::dataflow { + +Logger &Logger::null() { + struct NullLogger : Logger {}; + static auto *Instance = new NullLogger(); + return *Instance; +} + +namespace { +struct TextualLogger : Logger { + llvm::raw_ostream &OS; + const CFG *CurrentCFG; + const CFGBlock *CurrentBlock; + const CFGElement *CurrentElement; + unsigned CurrentElementIndex; + bool ShowColors; + llvm::DenseMap VisitCount; + TypeErasedDataflowAnalysis *CurrentAnalysis; + + TextualLogger(llvm::raw_ostream &OS) + : OS(OS), ShowColors(llvm::WithColor::defaultAutoDetectFunction()(OS)) {} + + virtual void beginAnalysis(const ControlFlowContext &CFG, + TypeErasedDataflowAnalysis &Analysis) override { + { + llvm::WithColor Header(OS, llvm::raw_ostream::Colors::RED, /*Bold=*/true); + OS << "=== Beginning data flow analysis ===\n"; + } + if (auto *D = CFG.getDecl()) { + D->print(OS); + OS << "\n"; + D->dump(OS); + } + CurrentCFG = &CFG.getCFG(); + CurrentCFG->print(OS, Analysis.getASTContext().getLangOpts(), ShowColors); + CurrentAnalysis = &Analysis; + } + virtual void endAnalysis() override { + llvm::WithColor Header(OS, llvm::raw_ostream::Colors::RED, /*Bold=*/true); + unsigned Blocks = 0, Steps = 0; + for (const auto &E : VisitCount) { + ++Blocks; + Steps += E.second; + } + llvm::errs() << "=== Finished analysis: " << Blocks << " blocks in " + << Steps << " total steps ===\n"; + } + virtual void enterBlock(const CFGBlock &Block) override { + unsigned Count = ++VisitCount[&Block]; + { + llvm::WithColor Header(OS, llvm::raw_ostream::Colors::RED, /*Bold=*/true); + OS << "=== Entering block B" << Block.getBlockID() << " (iteration " + << Count << ") ===\n"; + } + Block.print(OS, CurrentCFG, CurrentAnalysis->getASTContext().getLangOpts(), + ShowColors); + CurrentBlock = &Block; + CurrentElement = nullptr; + CurrentElementIndex = 0; + } + virtual void enterElement(const CFGElement &Element) override { + ++CurrentElementIndex; + CurrentElement = ∈ + { + llvm::WithColor Subheader(OS, llvm::raw_ostream::Colors::CYAN, + /*Bold=*/true); + OS << "Processing element B" << CurrentBlock->getBlockID() << "." + << CurrentElementIndex << ": "; + Element.dumpToStream(OS); + } + } + void recordState(TypeErasedDataflowAnalysisState &State) override { + { + llvm::WithColor Subheader(OS, llvm::raw_ostream::Colors::CYAN, + /*Bold=*/true); + OS << "Computed state for B" << CurrentBlock->getBlockID() << "." + << CurrentElementIndex << ":\n"; + } + // FIXME: currently the environment dump is verbose and unenlightening. + // FIXME: dump the user-defined lattice, too. + State.Env.dump(OS); + OS << "\n"; + } + void blockConverged() override { + OS << "B" << CurrentBlock->getBlockID() << " has converged!\n"; + } + virtual void logText(llvm::StringRef S) override { OS << S << "\n"; } +}; +} // namespace + +std::unique_ptr Logger::textual(llvm::raw_ostream &OS) { + return std::make_unique(OS); +} + +} // namespace clang::dataflow diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp --- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp +++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp @@ -189,7 +189,10 @@ llvm::ArrayRef> BlockStates) : CFCtx(CFCtx), Analysis(Analysis), InitEnv(InitEnv), - BlockStates(BlockStates) {} + Log(InitEnv.logger()), BlockStates(BlockStates) { + Log.beginAnalysis(CFCtx, Analysis); + } + ~AnalysisContext() { Log.endAnalysis(); } /// Contains the CFG being analyzed. const ControlFlowContext &CFCtx; @@ -197,6 +200,7 @@ TypeErasedDataflowAnalysis &Analysis; /// Initial state to start the analysis. const Environment &InitEnv; + Logger &Log; /// Stores the state of a CFG block if it has been evaluated by the analysis. /// The indices correspond to the block IDs. llvm::ArrayRef> BlockStates; @@ -366,8 +370,11 @@ std::function PostVisitCFG = nullptr) { + AC.Log.enterBlock(Block); auto State = computeBlockInputState(Block, AC); + AC.Log.recordState(State); for (const auto &Element : Block) { + AC.Log.enterElement(Element); // Built-in analysis if (AC.Analysis.builtinOptions()) { builtinTransfer(Element, State, AC); @@ -380,6 +387,7 @@ if (PostVisitCFG) { PostVisitCFG(Element, State); } + AC.Log.recordState(State); } return State; } @@ -460,15 +468,18 @@ LatticeJoinEffect Effect2 = NewBlockState.Env.widen(OldBlockState->Env, Analysis); if (Effect1 == LatticeJoinEffect::Unchanged && - Effect2 == LatticeJoinEffect::Unchanged) + Effect2 == LatticeJoinEffect::Unchanged) { // The state of `Block` didn't change from widening so there's no need // to revisit its successors. + AC.Log.blockConverged(); continue; + } } else if (Analysis.isEqualTypeErased(OldBlockState->Lattice, NewBlockState.Lattice) && OldBlockState->Env.equivalentTo(NewBlockState.Env, Analysis)) { // The state of `Block` didn't change after transfer so there's no need // to revisit its successors. + AC.Log.blockConverged(); continue; } }