diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h --- a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h @@ -42,6 +42,11 @@ /// * `void transfer(const Stmt *, LatticeT &, Environment &)` - applies the /// analysis transfer function for a given statement and lattice element. /// +/// `Derived` can optionally override the following members: +/// * `bool merge(QualType, const Value &, const Value &, Value &, +/// Environment &)` - joins distinct values. This could be a strict +/// lattice join or a more general widening operation. +/// /// `LatticeT` is a bounded join-semilattice that is used by `Derived` and must /// provide the following public members: /// * `LatticeJoinEffect join(const LatticeT &)` - joins the object and the diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h --- a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h @@ -22,6 +22,7 @@ #include "llvm/ADT/DenseMap.h" #include #include +#include #include #include @@ -32,15 +33,21 @@ /// is used during dataflow analysis. class DataflowAnalysisContext { public: + DataflowAnalysisContext() + : TrueVal(&takeOwnership(std::make_unique())), + FalseVal(&takeOwnership(std::make_unique())) {} + /// Takes ownership of `Loc` and returns a reference to it. /// /// Requirements: /// /// `Loc` must not be null. - StorageLocation &takeOwnership(std::unique_ptr Loc) { + template + typename std::enable_if::value, T &>::type + takeOwnership(std::unique_ptr Loc) { assert(Loc != nullptr); Locs.push_back(std::move(Loc)); - return *Locs.back().get(); + return *cast(Locs.back().get()); } /// Takes ownership of `Val` and returns a reference to it. @@ -48,10 +55,12 @@ /// Requirements: /// /// `Val` must not be null. - Value &takeOwnership(std::unique_ptr Val) { + template + typename std::enable_if::value, T &>::type + takeOwnership(std::unique_ptr Val) { assert(Val != nullptr); Vals.push_back(std::move(Val)); - return *Vals.back().get(); + return *cast(Vals.back().get()); } /// Assigns `Loc` as the storage location of `D`. @@ -104,6 +113,12 @@ return ThisPointeeLoc; } + /// Returns a symbolic boolean value that models a boolean literal equal to + /// `Value`. + BoolValue &getBoolLiteralValue(bool Value) const { + return Value ? *TrueVal : *FalseVal; + } + private: // Storage for the state of a program. std::vector> Locs; @@ -120,6 +135,8 @@ StorageLocation *ThisPointeeLoc = nullptr; // FIXME: Add support for boolean expressions. + BoolValue *TrueVal; + BoolValue *FalseVal; }; } // namespace dataflow diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h --- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h @@ -26,6 +26,9 @@ #include "clang/Analysis/FlowSensitive/Value.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include +#include +#include namespace clang { namespace dataflow { @@ -48,6 +51,25 @@ /// Holds the state of the program (store and heap) at a given program point. class Environment { public: + /// Supplements `Environment` with non-standard join operations. + class Merger { + public: + virtual ~Merger() = default; + + /// Given distinct `Val1` and `Val2`, modifies `MergedVal` to approximate + /// both `Val1` and `Val2`. This could be a strict lattice join or a more + /// general widening operation. If this function returns true, `MergedVal` + /// will be assigned to a storage location of type `Type` in `Env`. + /// + /// Requirements: + /// + /// `Val1` and `Val2` must be distinct. + virtual bool merge(QualType Type, const Value &Val1, const Value &Val2, + Value &MergedVal, Environment &Env) { + return false; + } + }; + /// Creates an environment that uses `DACtx` to store objects that encompass /// the state of a program. explicit Environment(DataflowAnalysisContext &DACtx) : DACtx(&DACtx) {} @@ -64,7 +86,7 @@ bool operator==(const Environment &) const; - LatticeJoinEffect join(const Environment &); + LatticeJoinEffect join(const Environment &, Environment::Merger &); // FIXME: Rename `createOrGetStorageLocation` to `getOrCreateStorageLocation`, // `getStableStorageLocation`, or something more appropriate. @@ -142,12 +164,34 @@ Value *getValue(const Expr &E, SkipPast SP) const; /// Transfers ownership of `Loc` to the analysis context and returns a - /// reference to `Loc`. - StorageLocation &takeOwnership(std::unique_ptr Loc); + /// reference to it. + /// + /// Requirements: + /// + /// `Loc` must not be null. + template + typename std::enable_if::value, T &>::type + takeOwnership(std::unique_ptr Loc) { + return DACtx->takeOwnership(std::move(Loc)); + } /// Transfers ownership of `Val` to the analysis context and returns a - /// reference to `Val`. - Value &takeOwnership(std::unique_ptr Val); + /// reference to it. + /// + /// Requirements: + /// + /// `Val` must not be null. + template + typename std::enable_if::value, T &>::type + takeOwnership(std::unique_ptr Val) { + return DACtx->takeOwnership(std::move(Val)); + } + + /// Returns a symbolic boolean value that models a boolean literal equal to + /// `Value` + BoolValue &getBoolLiteralValue(bool Value) const { + return DACtx->getBoolLiteralValue(Value); + } private: /// Creates a value appropriate for `Type`, if `Type` is supported, otherwise diff --git a/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h b/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h --- a/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h +++ b/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h @@ -40,7 +40,7 @@ }; /// Type-erased base class for dataflow analyses built on a single lattice type. -class TypeErasedDataflowAnalysis { +class TypeErasedDataflowAnalysis : public Environment::Merger { public: virtual ~TypeErasedDataflowAnalysis() {} diff --git a/clang/include/clang/Analysis/FlowSensitive/Value.h b/clang/include/clang/Analysis/FlowSensitive/Value.h --- a/clang/include/clang/Analysis/FlowSensitive/Value.h +++ b/clang/include/clang/Analysis/FlowSensitive/Value.h @@ -17,6 +17,8 @@ #include "clang/AST/Decl.h" #include "clang/Analysis/FlowSensitive/StorageLocation.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" #include #include @@ -26,7 +28,7 @@ /// Base class for all values computed by abstract interpretation. class Value { public: - enum class Kind { Integer, Reference, Pointer, Struct }; + enum class Kind { Bool, Integer, Reference, Pointer, Struct }; explicit Value(Kind ValKind) : ValKind(ValKind) {} @@ -38,6 +40,14 @@ Kind ValKind; }; +/// Models a boolean. +class BoolValue : public Value { +public: + explicit BoolValue() : Value(Kind::Bool) {} + + static bool classof(const Value *Val) { return Val->getKind() == Kind::Bool; } +}; + /// Models an integer. class IntegerValue : public Value { public: @@ -110,8 +120,22 @@ /// Assigns `Val` as the child value for `D`. void setChild(const ValueDecl &D, Value &Val) { Children[&D] = &Val; } + /// Returns the value of the synthetic property with the given `Name` or null + /// if the property isn't assigned a value. + Value *getProperty(llvm::StringRef Name) const { + auto It = Properties.find(Name); + return It == Properties.end() ? nullptr : It->second; + } + + /// Assigns `Val` as the value of the synthetic property with the given + /// `Name`. + void setProperty(llvm::StringRef Name, Value &Val) { + Properties.insert_or_assign(Name, &Val); + } + private: llvm::DenseMap Children; + llvm::StringMap Properties; }; } // namespace dataflow diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp --- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp +++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp @@ -73,7 +73,8 @@ return DeclToLoc == Other.DeclToLoc && LocToVal == Other.LocToVal; } -LatticeJoinEffect Environment::join(const Environment &Other) { +LatticeJoinEffect Environment::join(const Environment &Other, + Environment::Merger &Merger) { assert(DACtx == Other.DACtx); auto Effect = LatticeJoinEffect::Unchanged; @@ -88,10 +89,32 @@ if (ExprToLocSizeBefore != ExprToLoc.size()) Effect = LatticeJoinEffect::Changed; - // FIXME: Add support for joining distinct values that are assigned to the - // same storage locations in `LocToVal` and `Other.LocToVal`. + llvm::DenseMap MergedLocToVal; + for (auto &Entry : LocToVal) { + const StorageLocation *Loc = Entry.first; + assert(Loc != nullptr); + + Value *Val = Entry.second; + assert(Val != nullptr); + + auto It = Other.LocToVal.find(Loc); + if (It == Other.LocToVal.end()) + continue; + assert(It->second != nullptr); + + if (It->second == Val) { + MergedLocToVal.insert({Loc, Val}); + continue; + } + + // FIXME: Consider destroying `MergedValue` immediately if `Merger::merge` + // returns false to avoid storing unneeded values in `DACtx`. + if (Value *MergedVal = createValue(Loc->getType())) + if (Merger.merge(Loc->getType(), *Val, *It->second, *MergedVal, *this)) + MergedLocToVal.insert({Loc, MergedVal}); + } const unsigned LocToValSizeBefore = LocToVal.size(); - LocToVal = intersectDenseMaps(LocToVal, Other.LocToVal); + LocToVal = std::move(MergedLocToVal); if (LocToValSizeBefore != LocToVal.size()) Effect = LatticeJoinEffect::Changed; @@ -267,15 +290,6 @@ return nullptr; } -StorageLocation & -Environment::takeOwnership(std::unique_ptr Loc) { - return DACtx->takeOwnership(std::move(Loc)); -} - -Value &Environment::takeOwnership(std::unique_ptr Val) { - return DACtx->takeOwnership(std::move(Val)); -} - StorageLocation &Environment::skip(StorageLocation &Loc, SkipPast SP) const { switch (SP) { case SkipPast::None: diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp --- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp +++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp @@ -93,7 +93,7 @@ MaybePredState.getValue(); if (MaybeState.hasValue()) { Analysis.joinTypeErased(MaybeState->Lattice, PredState.Lattice); - MaybeState->Env.join(PredState.Env); + MaybeState->Env.join(PredState.Env, Analysis); } else { MaybeState = PredState; } diff --git a/clang/unittests/Analysis/FlowSensitive/TestingSupport.h b/clang/unittests/Analysis/FlowSensitive/TestingSupport.h --- a/clang/unittests/Analysis/FlowSensitive/TestingSupport.h +++ b/clang/unittests/Analysis/FlowSensitive/TestingSupport.h @@ -165,6 +165,13 @@ VirtualMappedFiles); } +/// Returns the `ValueDecl` for the given identifier. +/// +/// Requirements: +/// +/// `Name` must be unique in `ASTCtx`. +const ValueDecl *findValueDecl(ASTContext &ASTCtx, llvm::StringRef Name); + } // namespace test } // namespace dataflow } // namespace clang diff --git a/clang/unittests/Analysis/FlowSensitive/TestingSupport.cpp b/clang/unittests/Analysis/FlowSensitive/TestingSupport.cpp --- a/clang/unittests/Analysis/FlowSensitive/TestingSupport.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TestingSupport.cpp @@ -18,8 +18,10 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/Error.h" #include "llvm/Testing/Support/Annotations.h" +#include #include #include #include @@ -29,6 +31,7 @@ using namespace clang; using namespace dataflow; +using namespace ast_matchers; static bool isAnnotationDirectlyAfterStatement(const Stmt *Stmt, unsigned AnnotationBegin, @@ -55,7 +58,6 @@ llvm::Annotations AnnotatedCode) { llvm::DenseMap Result; - using namespace ast_matchers; // NOLINT: Too many names auto StmtMatcher = findAll(stmt(unless(anyOf(hasParent(expr()), hasParent(returnStmt())))) .bind("stmt")); @@ -121,3 +123,11 @@ return Result; } + +const ValueDecl *test::findValueDecl(ASTContext &ASTCtx, llvm::StringRef Name) { + auto TargetNodes = match(valueDecl(hasName(Name)).bind("v"), ASTCtx); + assert(TargetNodes.size() == 1 && "Name must be unique"); + auto *const Result = selectFirst("v", TargetNodes); + assert(Result != nullptr); + return Result; +} diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp --- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp @@ -22,7 +22,6 @@ #include "llvm/Testing/Support/Error.h" #include "gmock/gmock.h" #include "gtest/gtest.h" -#include #include #include @@ -30,6 +29,7 @@ using namespace clang; using namespace dataflow; +using namespace test; using ::testing::_; using ::testing::ElementsAre; using ::testing::IsNull; @@ -58,21 +58,6 @@ } }; -/// Returns the `ValueDecl` for the given identifier. -/// -/// Requirements: -/// -/// `Name` must be unique in `ASTCtx`. -static const ValueDecl *findValueDecl(ASTContext &ASTCtx, - llvm::StringRef Name) { - auto TargetNodes = ast_matchers::match( - ast_matchers::valueDecl(ast_matchers::hasName(Name)).bind("v"), ASTCtx); - assert(TargetNodes.size() == 1 && "Name must be unique"); - auto *const Result = ast_matchers::selectFirst("v", TargetNodes); - assert(Result != nullptr); - return Result; -} - TEST_F(TransferTest, IntVarDecl) { std::string Code = R"( void target() { diff --git a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp --- a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp @@ -9,6 +9,7 @@ #include "NoopAnalysis.h" #include "TestingSupport.h" #include "clang/AST/Decl.h" +#include "clang/AST/ExprCXX.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" #include "clang/Analysis/CFG.h" @@ -16,6 +17,7 @@ #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" #include "clang/Analysis/FlowSensitive/DataflowLattice.h" +#include "clang/Analysis/FlowSensitive/Value.h" #include "clang/Tooling/Tooling.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" @@ -35,8 +37,15 @@ using namespace clang; using namespace dataflow; +using namespace test; +using namespace ast_matchers; +using ::testing::_; +using ::testing::ElementsAre; using ::testing::IsEmpty; +using ::testing::IsNull; +using ::testing::NotNull; using ::testing::Pair; +using ::testing::Test; using ::testing::UnorderedElementsAre; template @@ -174,7 +183,7 @@ } }; -class NoreturnDestructorTest : public ::testing::Test { +class NoreturnDestructorTest : public Test { protected: template void runDataflow(llvm::StringRef Code, Matcher Expectations) { @@ -300,4 +309,184 @@ // FIXME: Called functions at point `p` should contain only "foo". } +class OptionalIntAnalysis + : public DataflowAnalysis { +public: + explicit OptionalIntAnalysis(ASTContext &Context) + : DataflowAnalysis(Context) {} + + static NoopLattice initialElement() { return {}; } + + void transfer(const Stmt *S, NoopLattice &, Environment &Env) { + auto OptionalIntRecordDecl = recordDecl(hasName("OptionalInt")); + auto HasOptionalIntType = hasType(OptionalIntRecordDecl); + + if (const auto *E = selectFirst( + "call", match(cxxConstructExpr(HasOptionalIntType).bind("call"), *S, + getASTContext()))) { + auto &ConstructorVal = *cast(Env.createValue(E->getType())); + ConstructorVal.setProperty("has_value", Env.getBoolLiteralValue(false)); + Env.setValue(*Env.getStorageLocation(*E, SkipPast::None), ConstructorVal); + } else if (const auto *E = selectFirst( + "call", + match(cxxOperatorCallExpr(callee(cxxMethodDecl(ofClass( + OptionalIntRecordDecl)))) + .bind("call"), + *S, getASTContext()))) { + assert(E->getNumArgs() > 0); + auto *Object = E->getArg(0); + assert(Object != nullptr); + + auto *ObjectLoc = + Env.getStorageLocation(*Object, SkipPast::ReferenceThenPointer); + assert(ObjectLoc != nullptr); + + auto &ConstructorVal = + *cast(Env.createValue(Object->getType())); + ConstructorVal.setProperty("has_value", Env.getBoolLiteralValue(true)); + Env.setValue(*ObjectLoc, ConstructorVal); + } + } + + bool merge(QualType Type, const Value &Val1, const Value &Val2, + Value &MergedVal, Environment &Env) final { + if (!Type->isRecordType() || + Type->getAsCXXRecordDecl()->getQualifiedNameAsString() != "OptionalInt") + return false; + + auto *HasValue1 = cast_or_null( + cast(&Val1)->getProperty("has_value")); + if (HasValue1 == nullptr) + return false; + + auto *HasValue2 = cast_or_null( + cast(&Val2)->getProperty("has_value")); + if (HasValue2 == nullptr) + return false; + + if (HasValue1 != HasValue2) + return false; + + cast(&MergedVal)->setProperty("has_value", *HasValue1); + return true; + } +}; + +class WideningTest : public Test { +protected: + template + void runDataflow(llvm::StringRef Code, Matcher Match) { + tooling::FileContentMappings FilesContents; + FilesContents.push_back( + std::make_pair("widening_test_defs.h", R"( + struct OptionalInt { + OptionalInt() = default; + OptionalInt& operator=(int); + }; + )")); + ASSERT_THAT_ERROR( + test::checkDataflow( + Code, "target", + [](ASTContext &Context, Environment &Env) { + return OptionalIntAnalysis(Context); + }, + [&Match]( + llvm::ArrayRef< + std::pair>> + Results, + ASTContext &ASTCtx) { Match(Results, ASTCtx); }, + {"-fsyntax-only", "-std=c++17"}, FilesContents), + llvm::Succeeded()); + } +}; + +TEST_F(WideningTest, JoinDistinctValuesWithDistinctProperties) { + std::string Code = R"( + #include "widening_test_defs.h" + + void target(bool Cond) { + OptionalInt Foo; + /*[[p1]]*/ + if (Cond) { + Foo = 1; + /*[[p2]]*/ + } + (void)0; + /*[[p3]]*/ + } + )"; + runDataflow( + Code, [](llvm::ArrayRef< + std::pair>> + Results, + ASTContext &ASTCtx) { + ASSERT_THAT(Results, + ElementsAre(Pair("p3", _), Pair("p2", _), Pair("p1", _))); + const Environment &Env1 = Results[2].second.Env; + const Environment &Env2 = Results[1].second.Env; + const Environment &Env3 = Results[0].second.Env; + + const ValueDecl *FooDecl = findValueDecl(ASTCtx, "Foo"); + ASSERT_THAT(FooDecl, NotNull()); + + auto GetFooValue = [FooDecl](const Environment &Env) { + return cast(Env.getValue(*FooDecl, SkipPast::None)); + }; + + EXPECT_EQ(GetFooValue(Env1)->getProperty("has_value"), + &Env1.getBoolLiteralValue(false)); + EXPECT_EQ(GetFooValue(Env2)->getProperty("has_value"), + &Env2.getBoolLiteralValue(true)); + EXPECT_THAT(Env3.getValue(*FooDecl, SkipPast::None), IsNull()); + }); +} + +TEST_F(WideningTest, JoinDistinctValuesWithSameProperties) { + std::string Code = R"( + #include "widening_test_defs.h" + + void target(bool Cond) { + OptionalInt Foo; + /*[[p1]]*/ + if (Cond) { + Foo = 1; + /*[[p2]]*/ + } else { + Foo = 2; + /*[[p3]]*/ + } + (void)0; + /*[[p4]]*/ + } + )"; + runDataflow( + Code, [](llvm::ArrayRef< + std::pair>> + Results, + ASTContext &ASTCtx) { + ASSERT_THAT(Results, ElementsAre(Pair("p4", _), Pair("p3", _), + Pair("p2", _), Pair("p1", _))); + const Environment &Env1 = Results[3].second.Env; + const Environment &Env2 = Results[2].second.Env; + const Environment &Env3 = Results[1].second.Env; + const Environment &Env4 = Results[0].second.Env; + + const ValueDecl *FooDecl = findValueDecl(ASTCtx, "Foo"); + ASSERT_THAT(FooDecl, NotNull()); + + auto GetFooValue = [FooDecl](const Environment &Env) { + return cast(Env.getValue(*FooDecl, SkipPast::None)); + }; + + EXPECT_EQ(GetFooValue(Env1)->getProperty("has_value"), + &Env1.getBoolLiteralValue(false)); + EXPECT_EQ(GetFooValue(Env2)->getProperty("has_value"), + &Env2.getBoolLiteralValue(true)); + EXPECT_EQ(GetFooValue(Env3)->getProperty("has_value"), + &Env3.getBoolLiteralValue(true)); + EXPECT_EQ(GetFooValue(Env4)->getProperty("has_value"), + &Env4.getBoolLiteralValue(true)); + }); +} + } // namespace