diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp --- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp +++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp @@ -154,6 +154,30 @@ TransferOptions TransferOpts; }; +/// Returns whether `Block` is a "back edge" in the CFG. Such a block has only +/// one successor, the start of the loop. +static bool isBackEdge(const CFGBlock *Block) { + assert(Block != nullptr); + return Block->LoopTarget != nullptr; +} + +/// Returns the predecessor to `Block` that comes from a "back edge", if one +/// exists. +/// +/// If this function returns a non-null pointer, that means `Block` dominates +/// the back edge block. That is, all paths from the entry block to the back +/// edge block must go through `Block`. +static const CFGBlock *findBackEdge(const CFGBlock *Block) { + assert(Block != nullptr); + for (const auto &Pred : Block->preds()) { + if (!Pred.isReachable()) + continue; + if (isBackEdge(Pred)) + return Pred; + } + return nullptr; +} + /// Computes the input state for a given basic block by joining the output /// states of its predecessors. /// @@ -208,7 +232,7 @@ continue; // Skip if `Pred` was not evaluated yet. This could happen if `Pred` has a - // loop back edge to `Block`. + // a noreturn element. const llvm::Optional &MaybePredState = BlockStates[Pred->getBlockID()]; if (!MaybePredState) @@ -311,6 +335,7 @@ HandleTransferredStmt) { TypeErasedDataflowAnalysisState State = computeBlockInputState(CFCtx, BlockStates, Block, InitEnv, Analysis); + for (const CFGElement &Element : Block) { switch (Element.getKind()) { case CFGElement::Statement: @@ -326,9 +351,56 @@ break; } } + + // The back edge block is where we perform a widen, allowing certain analyses + // to converge in finite time. The `PrevState` of the previous iteration of + // the loop is widened to subsume the input `State`. + // + // FIXME: Allow users to configure a certain number of iterations to unroll, + // to allow higher precision in their analyses. + if (isBackEdge(&Block)) { + auto PrevState = BlockStates[Block.getBlockID()]; + assert(PrevState.has_value()); + Analysis.widenTypeErased(PrevState->Lattice, State.Lattice); + // FIXME: Add a widen operator to the `Environment`. + PrevState->Env.join(State.Env, Analysis); + return *PrevState; + } + return State; } +/// Given a `Block` with its state set in `BlockStates`, copies this state to the +/// back edge of all successors that are the first block of a loop. +/// +/// FIXME: This is unsound for a corner case where a `goto` jumps to the start +/// of a `do ... while` loop. +static void prepareBackEdges( + const CFGBlock &Block, + llvm::MutableArrayRef> + BlockStates) { + const auto &State = BlockStates[Block.getBlockID()]; + assert(State.has_value()); + + for (const auto &Succ : Block.succs()) { + if (!Succ.isReachable()) + continue; + if (const CFGBlock *BackEdge = findBackEdge(Succ); + BackEdge != nullptr && BackEdge != &Block) { + // `Succ` is the first block of the loop body, `Block` is the "entrance" + // to the loop, and `BackEdge` is the back-edge of the loop. + // + // `Block` represents the input state to the loop before any iterations + // have been executed. By copying this state to the back edge, we can + // consistently use the back edge block as the input state to the loop, + // even on its first iteration. This subsequently handles nested loops; if + // the outer loop requires additional passes to converge, the inner loop + // is correctly "reinitialized" once `Block` is transfered. + BlockStates[BackEdge->getBlockID()] = State; + } + } +} + llvm::Expected>> runTypeErasedDataflowAnalysis( const ControlFlowContext &CFCtx, TypeErasedDataflowAnalysis &Analysis, @@ -387,6 +459,9 @@ continue; Worklist.enqueueSuccessors(Block); + + // If we are about to enter into a loop, we set up our back edge blocks. + prepareBackEdges(*Block, BlockStates); } // FIXME: Consider evaluating unreachable basic blocks (those that have a // state set to `llvm::None` at this point) to also analyze dead code. diff --git a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp --- a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp @@ -42,13 +42,17 @@ using namespace dataflow; using namespace test; using namespace ast_matchers; +using ::llvm::HasValue; using ::testing::_; +using ::testing::AllOf; +using ::testing::Each; using ::testing::ElementsAre; using ::testing::IsEmpty; -using ::testing::IsNull; using ::testing::NotNull; +using ::testing::Optional; using ::testing::Pair; using ::testing::Ref; +using ::testing::SizeIs; using ::testing::Test; using ::testing::UnorderedElementsAre; @@ -222,6 +226,71 @@ "maximum number of iterations reached"); } +struct ConvergesOnWidenLattice { + int State = 0; + bool Top = false; + + bool operator==(const ConvergesOnWidenLattice &Other) const { + if (Top) + return Other.Top; + return State == Other.State; + } + + LatticeJoinEffect join(const ConvergesOnWidenLattice &Other) { + auto Prev = *this; + Top = Top || Other.Top; + State += Other.State; + return Prev == *this ? LatticeJoinEffect::Unchanged + : LatticeJoinEffect::Changed; + } + + void widen(const ConvergesOnWidenLattice &Other) { Top = true; } + + friend std::ostream &operator<<(std::ostream &OS, + const ConvergesOnWidenLattice &L) { + return OS << "{" << L.State << "," << (L.Top ? "true" : "false") << "}"; + } +}; + +class ConvergesOnWidenAnalysis + : public DataflowAnalysis { +public: + explicit ConvergesOnWidenAnalysis(ASTContext &Context) + : DataflowAnalysis(Context, + /*ApplyBuiltinTransfer=*/false) {} + + static ConvergesOnWidenLattice initialElement() { return {}; } + + void transfer(const Stmt *S, ConvergesOnWidenLattice &E, Environment &Env) { + ++E.State; + } +}; + +TEST(DataflowAnalysisTest, WhileLoopConvergesOnWidenAnalysis) { + std::string Code = R"( + void target() { + while(true) {} + } + )"; + EXPECT_THAT_EXPECTED( + runAnalysis( + Code, [](ASTContext &C) { return ConvergesOnWidenAnalysis(C); }), + HasValue(AllOf(SizeIs(4), Each(Optional(_))))); +} + +TEST(DataflowAnalysisTest, ForLoopConvergesOnWidenAnalysis) { + std::string Code = R"( + void target() { + for (int i = 0; i > -1; ++i) {} + } + )"; + EXPECT_THAT_EXPECTED( + runAnalysis( + Code, [](ASTContext &C) { return ConvergesOnWidenAnalysis(C); }), + HasValue(AllOf(SizeIs(5), Each(Optional(_))))); +} + struct FunctionCallLattice { llvm::SmallSet CalledFunctions;