diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h --- a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h @@ -92,10 +92,25 @@ return L1 == L2; } - void transferTypeErased(const Stmt *Stmt, TypeErasedLattice &E, + /// To deprecate. Use the more general `transferCFGElement` function. + /// + /// Transfer function for statements in the code being analysed. + virtual void transfer(const Stmt *Stmt, Lattice &L, Environment &Env) {} + + /// Transfer function for elements in the control flow graph built from the + /// code being analysed. + virtual void transferCFGElement(const CFGElement *Element, Lattice &L, + Environment &Env) {} + + void transferTypeErased(const CFGElement *Element, TypeErasedLattice &E, Environment &Env) final { Lattice &L = llvm::any_cast(E.Value); - static_cast(this)->transfer(Stmt, L, Env); + transferCFGElement(Element, L, Env); + + // To deprecate. + if (Element->getKind() == CFGElement::Statement) { + transfer(Element->getAs()->getStmt(), L, Env); + } } private: @@ -116,32 +131,33 @@ /// returned vector, if any, will have the same size as the number of CFG /// blocks, with indices corresponding to basic block IDs. Returns an error if /// the dataflow analysis cannot be performed successfully. Otherwise, calls -/// `PostVisitStmt` on each statement with the final analysis results at that -/// program point. +/// `PostVisit` on each element with the final analysis results at that program +/// point. template llvm::Expected>>> -runDataflowAnalysis( +runDataflowAnalysisOnCFG( const ControlFlowContext &CFCtx, AnalysisT &Analysis, const Environment &InitEnv, - std::function &)> - PostVisitStmt = nullptr) { - std::function - PostVisitStmtClosure = nullptr; - if (PostVisitStmt != nullptr) { - PostVisitStmtClosure = [&PostVisitStmt]( - const CFGStmt &Stmt, - const TypeErasedDataflowAnalysisState &State) { - auto *Lattice = - llvm::any_cast(&State.Lattice.Value); - PostVisitStmt(Stmt, DataflowAnalysisState{ - *Lattice, State.Env}); - }; + std::function &)> + PostVisit = nullptr) { + std::function + PostVisitTypeErased = nullptr; + if (PostVisit) { + PostVisitTypeErased = + [&PostVisit](const CFGElement &Element, + const TypeErasedDataflowAnalysisState &State) { + auto *Lattice = + llvm::any_cast(&State.Lattice.Value); + PostVisit(Element, DataflowAnalysisState{ + *Lattice, State.Env}); + }; } auto TypeErasedBlockStates = runTypeErasedDataflowAnalysis( - CFCtx, Analysis, InitEnv, PostVisitStmtClosure); + CFCtx, Analysis, InitEnv, PostVisitTypeErased); if (!TypeErasedBlockStates) return TypeErasedBlockStates.takeError(); @@ -162,6 +178,41 @@ return BlockStates; } +/// To deprecate. +/// +/// Performs dataflow analysis and returns a mapping from basic block IDs to +/// dataflow analysis states that model the respective basic blocks. The +/// returned vector, if any, will have the same size as the number of CFG +/// blocks, with indices corresponding to basic block IDs. Returns an error if +/// the dataflow analysis cannot be performed successfully. Otherwise, calls +/// `PostVisitStmt` on each statement with the final analysis results at that +/// program point. +template +llvm::Expected>>> +runDataflowAnalysis( + const ControlFlowContext &CFCtx, AnalysisT &Analysis, + const Environment &InitEnv, + std::function &)> + PostVisitStmt = nullptr) { + std::function &)> + PostVisit = nullptr; + if (PostVisitStmt) { + PostVisit = + [&PostVisitStmt]( + const CFGElement &Element, + const DataflowAnalysisState &State) { + if (Element.getKind() == CFGElement::Statement) { + PostVisitStmt(*Element.getAs(), State); + } + }; + } + return runDataflowAnalysisOnCFG(CFCtx, Analysis, InitEnv, PostVisit); +} + /// Abstract base class for dataflow "models": reusable analysis components that /// model a particular aspect of program semantics in the `Environment`. For /// example, a model may capture a type and its related functions. diff --git a/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h b/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h --- a/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h +++ b/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h @@ -82,9 +82,9 @@ virtual bool isEqualTypeErased(const TypeErasedLattice &, const TypeErasedLattice &) = 0; - /// Applies the analysis transfer function for a given statement and - /// type-erased lattice element. - virtual void transferTypeErased(const Stmt *, TypeErasedLattice &, + /// Applies the analysis transfer function for a given control flow graph + /// element and type-erased lattice element. + virtual void transferTypeErased(const CFGElement *, TypeErasedLattice &, Environment &) = 0; /// Determines whether to apply the built-in transfer functions, which model @@ -109,10 +109,10 @@ : Lattice(std::move(Lattice)), Env(std::move(Env)) {} }; -/// Transfers the state of a basic block by evaluating each of its statements in +/// Transfers the state of a basic block by evaluating each of its elements in /// the context of `Analysis` and the states of its predecessors that are -/// available in `BlockStates`. `HandleTransferredStmt` (if provided) will be -/// applied to each statement in the block, after it is evaluated. +/// available in `BlockStates`. `PostVisit` (if provided) will be applied to +/// each element in the block, after it is evaluated. /// /// Requirements: /// @@ -124,23 +124,23 @@ llvm::ArrayRef> BlockStates, const CFGBlock &Block, const Environment &InitEnv, TypeErasedDataflowAnalysis &Analysis, - std::function - HandleTransferredStmt = nullptr); + PostVisit = nullptr); /// Performs dataflow analysis and returns a mapping from basic block IDs to /// dataflow analysis states that model the respective basic blocks. Indices of /// the returned vector correspond to basic block IDs. Returns an error if the /// dataflow analysis cannot be performed successfully. Otherwise, calls -/// `PostVisitStmt` on each statement with the final analysis results at that +/// `PostVisit` on each element with the final analysis results at that /// program point. llvm::Expected>> runTypeErasedDataflowAnalysis( const ControlFlowContext &CFCtx, TypeErasedDataflowAnalysis &Analysis, const Environment &InitEnv, - std::function - PostVisitStmt = nullptr); + PostVisit = nullptr); } // namespace dataflow } // namespace clang diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp --- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp +++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp @@ -154,19 +154,32 @@ TransferOptions TransferOpts; }; +// TODO: Document fields +struct TransferContext { + TransferContext( + TypeErasedDataflowAnalysis &Analysis, const ControlFlowContext &CFCtx, + llvm::ArrayRef> + BlockStates, + const Environment &InitEnv) + : Analysis(Analysis), CFCtx(CFCtx), BlockStates(BlockStates), + InitEnv(InitEnv) {} + + TypeErasedDataflowAnalysis &Analysis; + const ControlFlowContext &CFCtx; + llvm::ArrayRef> BlockStates; + const Environment &InitEnv; +}; + /// Computes the input state for a given basic block by joining the output /// states of its predecessors. /// /// Requirements: /// /// All predecessors of `Block` except those with loop back edges must have -/// already been transferred. States in `BlockStates` that are set to +/// already been transferred. States in `TC.BlockStates` that are set to /// `llvm::None` represent basic blocks that are not evaluated yet. -static TypeErasedDataflowAnalysisState computeBlockInputState( - const ControlFlowContext &CFCtx, - llvm::ArrayRef> BlockStates, - const CFGBlock &Block, const Environment &InitEnv, - TypeErasedDataflowAnalysis &Analysis) { +static TypeErasedDataflowAnalysisState +computeBlockInputState(const CFGBlock &Block, TransferContext &TC) { llvm::DenseSet Preds; Preds.insert(Block.pred_begin(), Block.pred_end()); if (Block.getTerminator().isTemporaryDtorsBranch()) { @@ -193,13 +206,16 @@ // // See `NoreturnDestructorTest` for concrete examples. if (Block.succ_begin()->getReachableBlock()->hasNoReturnElement()) { - auto StmtBlock = CFCtx.getStmtToBlock().find(Block.getTerminatorStmt()); - assert(StmtBlock != CFCtx.getStmtToBlock().end()); + auto StmtBlock = + TC.CFCtx.getStmtToBlock().find(Block.getTerminatorStmt()); + assert(StmtBlock != TC.CFCtx.getStmtToBlock().end()); Preds.erase(StmtBlock->getSecond()); } } llvm::Optional MaybeState; + + auto &Analysis = TC.Analysis; bool ApplyBuiltinTransfer = Analysis.applyBuiltinTransfer(); for (const CFGBlock *Pred : Preds) { @@ -210,14 +226,14 @@ // Skip if `Pred` was not evaluated yet. This could happen if `Pred` has a // loop back edge to `Block`. const llvm::Optional &MaybePredState = - BlockStates[Pred->getBlockID()]; + TC.BlockStates[Pred->getBlockID()]; if (!MaybePredState) continue; TypeErasedDataflowAnalysisState PredState = MaybePredState.value(); if (ApplyBuiltinTransfer) { if (const Stmt *PredTerminatorStmt = Pred->getTerminatorStmt()) { - const StmtToEnvMapImpl StmtToEnv(CFCtx, BlockStates); + const StmtToEnvMapImpl StmtToEnv(TC.CFCtx, TC.BlockStates); TerminatorVisitor(StmtToEnv, PredState.Env, blockIndexInPredecessor(*Pred, Block), Analysis.builtinTransferOptions()) @@ -236,106 +252,116 @@ // FIXME: Consider passing `Block` to `Analysis.typeErasedInitialElement()` // to enable building analyses like computation of dominators that // initialize the state of each basic block differently. - MaybeState.emplace(Analysis.typeErasedInitialElement(), InitEnv); + MaybeState.emplace(Analysis.typeErasedInitialElement(), TC.InitEnv); } return *MaybeState; } -/// Transfers `State` by evaluating `CfgStmt` in the context of `Analysis`. -/// `HandleTransferredStmt` (if provided) will be applied to `CfgStmt`, after it -/// is evaluated. -static void transferCFGStmt( - const ControlFlowContext &CFCtx, - llvm::ArrayRef> BlockStates, - const CFGStmt &CfgStmt, TypeErasedDataflowAnalysis &Analysis, - TypeErasedDataflowAnalysisState &State, - std::function - HandleTransferredStmt) { +/// Built-in transfer function for `CfgStmt`. +void builtInTransfer(const CFGStmt &CfgStmt, + TypeErasedDataflowAnalysisState &InputState, + TransferContext TC) { const Stmt *S = CfgStmt.getStmt(); assert(S != nullptr); - - if (Analysis.applyBuiltinTransfer()) - transfer(StmtToEnvMapImpl(CFCtx, BlockStates), *S, State.Env, - Analysis.builtinTransferOptions()); - Analysis.transferTypeErased(S, State.Lattice, State.Env); - - if (HandleTransferredStmt != nullptr) - HandleTransferredStmt(CfgStmt, State); + transfer(StmtToEnvMapImpl(TC.CFCtx, TC.BlockStates), *S, InputState.Env, + TC.Analysis.builtinTransferOptions()); } -/// Transfers `State` by evaluating `CfgInit`. -static void transferCFGInitializer(const CFGInitializer &CfgInit, - TypeErasedDataflowAnalysisState &State) { - const auto &ThisLoc = *cast( - State.Env.getThisPointeeStorageLocation()); +/// Built-in transfer function for `CfgInit`. +void builtInTransfer(const CFGInitializer &CfgInit, + TypeErasedDataflowAnalysisState &InputState) { + const CXXCtorInitializer *Init = CfgInit.getInitializer(); + assert(Init != nullptr); - const CXXCtorInitializer *Initializer = CfgInit.getInitializer(); - assert(Initializer != nullptr); + auto &Env = InputState.Env; + const auto &ThisLoc = + *cast(Env.getThisPointeeStorageLocation()); - const FieldDecl *Member = Initializer->getMember(); + const FieldDecl *Member = Init->getMember(); if (Member == nullptr) // Not a field initializer. return; - auto *InitStmt = Initializer->getInit(); + auto *InitStmt = Init->getInit(); assert(InitStmt != nullptr); - auto *InitStmtLoc = - State.Env.getStorageLocation(*InitStmt, SkipPast::Reference); + auto *InitStmtLoc = Env.getStorageLocation(*InitStmt, SkipPast::Reference); if (InitStmtLoc == nullptr) return; - auto *InitStmtVal = State.Env.getValue(*InitStmtLoc); + auto *InitStmtVal = Env.getValue(*InitStmtLoc); if (InitStmtVal == nullptr) return; if (Member->getType()->isReferenceType()) { auto &MemberLoc = ThisLoc.getChild(*Member); - State.Env.setValue(MemberLoc, - State.Env.takeOwnership( - std::make_unique(*InitStmtLoc))); + Env.setValue(MemberLoc, Env.takeOwnership(std::make_unique( + *InitStmtLoc))); } else { auto &MemberLoc = ThisLoc.getChild(*Member); - State.Env.setValue(MemberLoc, *InitStmtVal); + Env.setValue(MemberLoc, *InitStmtVal); } } +/// Transfers `State` by evaluating each element in the `Block` based on the +/// `TC.Analysis` specified. +/// +/// Built in transfer functions (if the option for `ApplyBuiltinTransfer` is set +/// by the analysis) will be applied to the element before evaluation by the +/// user-specified analysis. +/// `PostVisit` (if provided) will be applied to the element after evaluation by +/// the user-specified analysis. +TypeErasedDataflowAnalysisState +transferCFGBlock(const CFGBlock &Block, TransferContext TC, + std::function + PostVisit = nullptr) { + auto State = computeBlockInputState(Block, TC); + for (const auto &Element : Block) { + // Built-in analysis + if (TC.Analysis.applyBuiltinTransfer()) { + switch (Element.getKind()) { + case CFGElement::Statement: { + builtInTransfer(*Element.getAs(), State, TC); + break; + } + case CFGElement::Initializer: { + builtInTransfer(*Element.getAs(), State); + break; + } + default: + break; + } + } + // User-provided analysis + TC.Analysis.transferTypeErased(&Element, State.Lattice, State.Env); + // Post processing + if (PostVisit) { + PostVisit(Element, State); + } + } + return State; +} + TypeErasedDataflowAnalysisState transferBlock( const ControlFlowContext &CFCtx, llvm::ArrayRef> BlockStates, const CFGBlock &Block, const Environment &InitEnv, TypeErasedDataflowAnalysis &Analysis, - std::function - HandleTransferredStmt) { - TypeErasedDataflowAnalysisState State = - computeBlockInputState(CFCtx, BlockStates, Block, InitEnv, Analysis); - for (const CFGElement &Element : Block) { - switch (Element.getKind()) { - case CFGElement::Statement: - transferCFGStmt(CFCtx, BlockStates, *Element.getAs(), Analysis, - State, HandleTransferredStmt); - break; - case CFGElement::Initializer: - if (Analysis.applyBuiltinTransfer()) - transferCFGInitializer(*Element.getAs(), State); - break; - default: - // FIXME: Evaluate other kinds of `CFGElement`. - break; - } - } - return State; + PostVisit) { + TransferContext TC(Analysis, CFCtx, BlockStates, InitEnv); + return transferCFGBlock(Block, TC, PostVisit); } llvm::Expected>> runTypeErasedDataflowAnalysis( const ControlFlowContext &CFCtx, TypeErasedDataflowAnalysis &Analysis, const Environment &InitEnv, - std::function - PostVisitStmt) { + PostVisit) { PostOrderCFGView POV(&CFCtx.getCFG()); ForwardDataflowWorklist Worklist(CFCtx.getCFG(), &POV); @@ -348,14 +374,16 @@ InitEnv}; Worklist.enqueueSuccessors(&Entry); + TransferContext TC(Analysis, CFCtx, BlockStates, InitEnv); + // Bugs in lattices and transfer functions can prevent the analysis from - // converging. To limit the damage (infinite loops) that these bugs can cause, - // limit the number of iterations. + // converging. To limit the damage (infinite loops) that these bugs can + // cause, limit the number of iterations. // FIXME: Consider making the maximum number of iterations configurable. // FIXME: Consider restricting the number of backedges followed, rather than // iterations. - // FIXME: Set up statistics (see llvm/ADT/Statistic.h) to count average number - // of iterations, number of functions that time out, etc. + // FIXME: Set up statistics (see llvm/ADT/Statistic.h) to count average + // number of iterations, number of functions that time out, etc. static constexpr uint32_t MaxAverageVisitsPerBlock = 4; static constexpr uint32_t AbsoluteMaxIterations = 1 << 16; const uint32_t RelativeMaxIterations = @@ -372,14 +400,14 @@ const llvm::Optional &OldBlockState = BlockStates[Block->getBlockID()]; TypeErasedDataflowAnalysisState NewBlockState = - transferBlock(CFCtx, BlockStates, *Block, InitEnv, Analysis); + transferCFGBlock(*Block, TC); if (OldBlockState && Analysis.isEqualTypeErased(OldBlockState.value().Lattice, NewBlockState.Lattice) && OldBlockState->Env.equivalentTo(NewBlockState.Env, Analysis)) { - // The state of `Block` didn't change after transfer so there's no need to - // revisit its successors. + // The state of `Block` didn't change after transfer so there's no need + // to revisit its successors. continue; } @@ -394,14 +422,12 @@ // FIXME: Consider evaluating unreachable basic blocks (those that have a // state set to `llvm::None` at this point) to also analyze dead code. - if (PostVisitStmt) { + if (PostVisit) { for (const CFGBlock *Block : CFCtx.getCFG()) { // Skip blocks that were not evaluated. if (!BlockStates[Block->getBlockID()]) continue; - - transferBlock(CFCtx, BlockStates, *Block, InitEnv, Analysis, - PostVisitStmt); + transferCFGBlock(*Block, TC, PostVisit); } } @@ -409,4 +435,4 @@ } } // namespace dataflow -} // namespace clang +} // namespace clang \ No newline at end of file diff --git a/clang/unittests/Analysis/FlowSensitive/TestingSupport.h b/clang/unittests/Analysis/FlowSensitive/TestingSupport.h --- a/clang/unittests/Analysis/FlowSensitive/TestingSupport.h +++ b/clang/unittests/Analysis/FlowSensitive/TestingSupport.h @@ -71,14 +71,22 @@ std::vector> &BlockStates; }; +// TODO: add example of annotated test code snippet +// +// Runs dataflow analysis (specified from `MakeAnalysis`) and the `PostVisit` +// function (if provided) on the body of the function that matches +// `TargetFuncMatcher` in code snippet `Code`. `VerifyResults` checks that the +// results from the analysis matches the expectations (which can be specified by +// annotations in the `Code`). +// Requires: `AnalysisT` contains a type `Lattice`. template -llvm::Error checkDataflow( +llvm::Error checkDataflowOnCFG( llvm::StringRef Code, ast_matchers::internal::Matcher TargetFuncMatcher, std::function MakeAnalysis, - std::function - PostVisitStmt, + PostVisit, std::function VerifyResults, ArrayRef Args, const tooling::FileContentMappings &VirtualMappedFiles = {}) { llvm::Annotations AnnotatedCode(Code); @@ -112,14 +120,15 @@ Environment Env(DACtx, *F); auto Analysis = MakeAnalysis(Context, Env); - std::function - PostVisitStmtClosure = nullptr; - if (PostVisitStmt != nullptr) { - PostVisitStmtClosure = [&PostVisitStmt, &Context]( - const CFGStmt &Stmt, + std::function + PostVisitClosure = nullptr; + if (PostVisit != nullptr) { + PostVisitClosure = + [&PostVisit, &Context](const CFGElement &Element, const TypeErasedDataflowAnalysisState &State) { - PostVisitStmt(Context, Stmt, State); - }; + PostVisit(Context, Element, State); + }; } llvm::Expected> @@ -130,7 +139,7 @@ llvm::Expected>> MaybeBlockStates = runTypeErasedDataflowAnalysis(*CFCtx, Analysis, Env, - PostVisitStmtClosure); + PostVisitClosure); if (!MaybeBlockStates) return MaybeBlockStates.takeError(); auto &BlockStates = *MaybeBlockStates; @@ -141,6 +150,32 @@ return llvm::Error::success(); } +template +llvm::Error checkDataflow( + llvm::StringRef Code, + ast_matchers::internal::Matcher TargetFuncMatcher, + std::function MakeAnalysis, + std::function + PostVisitStmt, + std::function VerifyResults, ArrayRef Args, + const tooling::FileContentMappings &VirtualMappedFiles = {}) { + + std::function + PostVisit = nullptr; + if (PostVisitStmt != nullptr) { + PostVisit = [&PostVisitStmt](ASTContext &Context, const CFGElement &Element, + const TypeErasedDataflowAnalysisState &State) { + if (Element.getKind() == CFGElement::Statement) { + PostVisitStmt(Context, *Element.getAs(), State); + } + }; + } + return checkDataflowOnCFG(Code, TargetFuncMatcher, MakeAnalysis, PostVisit, + VerifyResults, Args, VirtualMappedFiles); +} + // Runs dataflow on the body of the function that matches `TargetFuncMatcher` in // code snippet `Code`. Requires: `AnalysisT` contains a type `Lattice`. template @@ -157,9 +192,9 @@ const tooling::FileContentMappings &VirtualMappedFiles = {}) { using StateT = DataflowAnalysisState; - return checkDataflow( + return checkDataflowOnCFG( Code, std::move(TargetFuncMatcher), std::move(MakeAnalysis), - /*PostVisitStmt=*/nullptr, + /*PostVisit=*/nullptr, [&VerifyResults](AnalysisData AnalysisData) { if (AnalysisData.BlockStates.empty()) { VerifyResults({}, AnalysisData.ASTCtx); @@ -180,9 +215,12 @@ AnalysisData.CFCtx, AnalysisData.BlockStates, *Block, AnalysisData.Env, AnalysisData.Analysis, [&Results, - &Annotations](const clang::CFGStmt &Stmt, + &Annotations](const clang::CFGElement &Element, const TypeErasedDataflowAnalysisState &State) { - auto It = Annotations.find(Stmt.getStmt()); + // FIXME: extend testing annotations to non statement constructs + if (Element.getKind() != clang::CFGElement::Statement) + return; + auto It = Annotations.find(Element.getAs()->getStmt()); if (It == Annotations.end()) return; auto *Lattice = llvm::any_cast(