diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h --- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h @@ -192,7 +192,8 @@ /// Moves gathered information back into `this` from a `CalleeEnv` created via /// `pushCall`. - void popCall(const Environment &CalleeEnv); + void popCall(const CallExpr *Call, const Environment &CalleeEnv); + void popCall(const CXXConstructExpr *Call, const Environment &CalleeEnv); /// Returns true if and only if the environment is equivalent to `Other`, i.e /// the two environments: @@ -323,8 +324,51 @@ /// in the environment. StorageLocation *getThisPointeeStorageLocation() const; - /// Returns the storage location of the return value or null, if unset. - StorageLocation *getReturnStorageLocation() const; + /// Returns the return value of the current function. This can be null if: + /// - The function has a void return type + /// - No return value could be determined for the function, for example + /// because it calls a function without a body. + /// + /// Requirements: + /// The current function must have a non-reference return type. + Value *getReturnValue() const { + assert(getCurrentFunc() != nullptr && + !getCurrentFunc()->getReturnType()->isReferenceType()); + return ReturnVal; + } + + /// Returns the storage location for the reference returned by the current + /// function. This can be null if function doesn't return a single consistent + /// reference. + /// + /// Requirements: + /// The current function must have a reference return type. + StorageLocation *getReturnStorageLocation() const { + assert(getCurrentFunc() != nullptr && + getCurrentFunc()->getReturnType()->isReferenceType()); + return ReturnLoc; + } + + /// Sets the return value of the current function. + /// + /// Requirements: + /// The current function must have a non-reference return type. + void setReturnValue(Value *Val) { + assert(getCurrentFunc() != nullptr && + !getCurrentFunc()->getReturnType()->isReferenceType()); + ReturnVal = Val; + } + + /// Sets the storage location for the reference returned by the current + /// function. + /// + /// Requirements: + /// The current function must have a reference return type. + void setReturnStorageLocation(StorageLocation *Loc) { + assert(getCurrentFunc() != nullptr && + getCurrentFunc()->getReturnType()->isReferenceType()); + ReturnLoc = Loc; + } /// Returns a pointer value that represents a null pointer. Calls with /// `PointeeType` that are canonically equivalent will return the same result. @@ -472,6 +516,12 @@ /// returns null. const DeclContext *getDeclCtx() const { return CallStack.back(); } + /// Returns the function currently being analyzed, or null if the code being + /// analyzed isn't part of a function. + const FunctionDecl *getCurrentFunc() const { + return dyn_cast(getDeclCtx()); + } + /// Returns whether this `Environment` can be extended to analyze the given /// `Callee` (i.e. if `pushCall` can be used), with recursion disallowed and a /// given `MaxDepth`. @@ -515,16 +565,18 @@ // `DACtx` is not null and not owned by this object. DataflowAnalysisContext *DACtx; - - // FIXME: move the fields `CallStack`, `ReturnLoc` and `ThisPointeeLoc` into a - // separate call-context object, shared between environments in the same call. + // FIXME: move the fields `CallStack`, `ReturnVal`, `ReturnLoc` and + // `ThisPointeeLoc` into a separate call-context object, shared between + // environments in the same call. // https://github.com/llvm/llvm-project/issues/59005 // `DeclContext` of the block being analysed if provided. std::vector CallStack; - // In a properly initialized `Environment`, `ReturnLoc` should only be null if - // its `DeclContext` could not be cast to a `FunctionDecl`. + // Value returned by the function (if it has non-reference return type). + Value *ReturnVal = nullptr; + // Storage location of the reference returned by the function (if it has + // reference return type). StorageLocation *ReturnLoc = nullptr; // The storage location of the `this` pointee. Should only be null if the // function being analyzed is only a function and not a method. diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp --- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp +++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp @@ -267,9 +267,10 @@ Environment::Environment(const Environment &Other) : DACtx(Other.DACtx), CallStack(Other.CallStack), - ReturnLoc(Other.ReturnLoc), ThisPointeeLoc(Other.ThisPointeeLoc), - DeclToLoc(Other.DeclToLoc), ExprToLoc(Other.ExprToLoc), - LocToVal(Other.LocToVal), MemberLocToStruct(Other.MemberLocToStruct), + ReturnVal(Other.ReturnVal), ReturnLoc(Other.ReturnLoc), + ThisPointeeLoc(Other.ThisPointeeLoc), DeclToLoc(Other.DeclToLoc), + ExprToLoc(Other.ExprToLoc), LocToVal(Other.LocToVal), + MemberLocToStruct(Other.MemberLocToStruct), FlowConditionToken(&DACtx->forkFlowCondition(*Other.FlowConditionToken)) { } @@ -302,9 +303,6 @@ createValue(ParamDecl->getType().getNonReferenceType())) setValue(ParamLoc, *ParamVal); } - - QualType ReturnType = FuncDecl->getReturnType(); - ReturnLoc = &createStorageLocation(ReturnType); } if (const auto *MethodDecl = dyn_cast(&DeclCtx)) { @@ -331,9 +329,6 @@ Environment Environment::pushCall(const CallExpr *Call) const { Environment Env(*this); - // FIXME: Support references here. - Env.ReturnLoc = getStorageLocation(*Call, SkipPast::Reference); - if (const auto *MethodCall = dyn_cast(Call)) { if (const Expr *Arg = MethodCall->getImplicitObjectArgument()) { if (!isa(Arg)) @@ -352,10 +347,9 @@ Environment Environment::pushCall(const CXXConstructExpr *Call) const { Environment Env(*this); - // FIXME: Support references here. - Env.ReturnLoc = getStorageLocation(*Call, SkipPast::Reference); - - Env.ThisPointeeLoc = Env.ReturnLoc; + Env.ThisPointeeLoc = &Env.createStorageLocation(Call->getType()); + if (Value *Val = Env.createValue(Call->getType())) + Env.setValue(*Env.ThisPointeeLoc, *Val); Env.pushCallInternal(Call->getConstructor(), llvm::ArrayRef(Call->getArgs(), Call->getNumArgs())); @@ -365,6 +359,12 @@ void Environment::pushCallInternal(const FunctionDecl *FuncDecl, ArrayRef Args) { + // Canonicalize to the definition of the function. This ensures that we're + // putting arguments into the same `ParamVarDecl`s` that the callee will later + // be retrieving them from. + assert(FuncDecl->getDefinition() != nullptr); + FuncDecl = FuncDecl->getDefinition(); + CallStack.push_back(FuncDecl); initFieldsGlobalsAndFuncs(FuncDecl); @@ -399,23 +399,46 @@ } } -void Environment::popCall(const Environment &CalleeEnv) { +void Environment::popCall(const CallExpr *Call, const Environment &CalleeEnv) { // We ignore `DACtx` because it's already the same in both. We don't want the - // callee's `DeclCtx`, `ReturnLoc` or `ThisPointeeLoc`. We don't bring back - // `DeclToLoc` and `ExprToLoc` because we want to be able to later analyze the - // same callee in a different context, and `setStorageLocation` requires there - // to not already be a storage location assigned. Conceptually, these maps - // capture information from the local scope, so when popping that scope, we do - // not propagate the maps. + // callee's `DeclCtx`, `ReturnVal`, `ReturnLoc` or `ThisPointeeLoc`. We don't + // bring back `DeclToLoc` and `ExprToLoc` because we want to be able to later + // analyze the same callee in a different context, and `setStorageLocation` + // requires there to not already be a storage location assigned. Conceptually, + // these maps capture information from the local scope, so when popping that + // scope, we do not propagate the maps. this->LocToVal = std::move(CalleeEnv.LocToVal); this->MemberLocToStruct = std::move(CalleeEnv.MemberLocToStruct); this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken); + + if (Call->isGLValue()) { + if (CalleeEnv.ReturnLoc != nullptr) + setStorageLocationStrict(*Call, *CalleeEnv.ReturnLoc); + } else if (!Call->getType()->isVoidType()) { + if (CalleeEnv.ReturnVal != nullptr) + setValueStrict(*Call, *CalleeEnv.ReturnVal); + } +} + +void Environment::popCall(const CXXConstructExpr *Call, + const Environment &CalleeEnv) { + // See also comment in `popCall(const CallExpr *, const Environment &)` above. + this->LocToVal = std::move(CalleeEnv.LocToVal); + this->MemberLocToStruct = std::move(CalleeEnv.MemberLocToStruct); + this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken); + + if (Value *Val = CalleeEnv.getValue(*CalleeEnv.ThisPointeeLoc)) { + setValueStrict(*Call, *Val); + } } bool Environment::equivalentTo(const Environment &Other, Environment::ValueModel &Model) const { assert(DACtx == Other.DACtx); + if (ReturnVal != Other.ReturnVal) + return false; + if (ReturnLoc != Other.ReturnLoc) return false; @@ -453,6 +476,7 @@ LatticeJoinEffect Environment::widen(const Environment &PrevEnv, Environment::ValueModel &Model) { assert(DACtx == PrevEnv.DACtx); + assert(ReturnVal == PrevEnv.ReturnVal); assert(ReturnLoc == PrevEnv.ReturnLoc); assert(ThisPointeeLoc == PrevEnv.ThisPointeeLoc); assert(CallStack == PrevEnv.CallStack); @@ -514,7 +538,6 @@ LatticeJoinEffect Environment::join(const Environment &Other, Environment::ValueModel &Model) { assert(DACtx == Other.DACtx); - assert(ReturnLoc == Other.ReturnLoc); assert(ThisPointeeLoc == Other.ThisPointeeLoc); assert(CallStack == Other.CallStack); @@ -523,9 +546,38 @@ Environment JoinedEnv(*DACtx); JoinedEnv.CallStack = CallStack; - JoinedEnv.ReturnLoc = ReturnLoc; JoinedEnv.ThisPointeeLoc = ThisPointeeLoc; + if (ReturnVal == nullptr || Other.ReturnVal == nullptr) { + // `ReturnVal` might not always get set -- for example if we have a return + // statement of the form `return some_other_func()` and we decide not to + // analyze `some_other_func()`. + // In this case, we can't say anything about the joined return value -- we + // don't simply want to propagate the return value that we do have, because + // it might not be the correct one. + // This occurs for example in the test `ContextSensitiveMutualRecursion`. + JoinedEnv.ReturnVal = nullptr; + } else if (areEquivalentValues(*ReturnVal, *Other.ReturnVal)) { + JoinedEnv.ReturnVal = ReturnVal; + } else { + assert(!CallStack.empty()); + // FIXME: Make `CallStack` a vector of `FunctionDecl` so we don't need this + // cast. + auto *Func = dyn_cast(CallStack.back()); + assert(Func != nullptr); + if (Value *MergedVal = + mergeDistinctValues(Func->getReturnType(), *ReturnVal, *this, + *Other.ReturnVal, Other, JoinedEnv, Model)) { + JoinedEnv.ReturnVal = MergedVal; + Effect = LatticeJoinEffect::Changed; + } + } + + if (ReturnLoc == Other.ReturnLoc) + JoinedEnv.ReturnLoc = ReturnLoc; + else + JoinedEnv.ReturnLoc = nullptr; + // FIXME: Once we're able to remove declarations from `DeclToLoc` when their // lifetime ends, add an assertion that there aren't any entries in // `DeclToLoc` and `Other.DeclToLoc` that map the same declaration to @@ -659,10 +711,6 @@ return ThisPointeeLoc; } -StorageLocation *Environment::getReturnStorageLocation() const { - return ReturnLoc; -} - PointerValue &Environment::getOrCreateNullPointerValue(QualType PointeeType) { return DACtx->getOrCreateNullPointerValue(PointeeType); } diff --git a/clang/lib/Analysis/FlowSensitive/Transfer.cpp b/clang/lib/Analysis/FlowSensitive/Transfer.cpp --- a/clang/lib/Analysis/FlowSensitive/Transfer.cpp +++ b/clang/lib/Analysis/FlowSensitive/Transfer.cpp @@ -503,22 +503,21 @@ if (Ret == nullptr) return; - auto *Val = Env.getValue(*Ret, SkipPast::None); - if (Val == nullptr) - return; - - // FIXME: Support reference-type returns. - if (Val->getKind() == Value::Kind::Reference) - return; + if (Ret->isPRValue()) { + auto *Val = Env.getValueStrict(*Ret); + if (Val == nullptr) + return; - auto *Loc = Env.getReturnStorageLocation(); - assert(Loc != nullptr); - // FIXME: Support reference-type returns. - if (Loc->getType()->isReferenceType()) - return; + // FIXME: Model NRVO. + Env.setReturnValue(Val); + } else { + auto *Loc = Env.getStorageLocationStrict(*Ret); + if (Loc == nullptr) + return; - // FIXME: Model NRVO. - Env.setValue(*Loc, *Val); + // FIXME: Model NRVO. + Env.setReturnStorageLocation(Loc); + } } void VisitMemberExpr(const MemberExpr *S) { @@ -857,13 +856,6 @@ auto ExitBlock = CFCtx->getCFG().getExit().getBlockID(); - if (const auto *NonConstructExpr = dyn_cast(S)) { - // Note that it is important for the storage location of `S` to be set - // before `pushCall`, because the latter uses it to set the storage - // location for `return`. - auto &ReturnLoc = Env.createStorageLocation(*S); - Env.setStorageLocation(*S, ReturnLoc); - } auto CalleeEnv = Env.pushCall(S); // FIXME: Use the same analysis as the caller for the callee. Note, @@ -882,7 +874,7 @@ auto ExitState = (*BlockToOutputState)[ExitBlock]; assert(ExitState); - Env.popCall(ExitState->Env); + Env.popCall(S, ExitState->Env); } const StmtToEnvMap &StmtToEnv; diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp --- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp @@ -32,7 +32,9 @@ using namespace clang; using namespace dataflow; using namespace test; +using ::testing::Eq; using ::testing::IsNull; +using ::testing::Ne; using ::testing::NotNull; using ::testing::UnorderedElementsAre; @@ -4239,13 +4241,33 @@ {BuiltinOptions{/*.ContextSensitiveOpts=*/std::nullopt}}); } +TEST(TransferTest, ContextSensitiveReturnReference) { + std::string Code = R"( + class S {}; + S& target(bool b, S &s) { + return s; + // [[p]] + } + )"; + runDataflow( + Code, + [](const llvm::StringMap> &Results, + ASTContext &ASTCtx) { + const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); + + const ValueDecl *SDecl = findValueDecl(ASTCtx, "s"); + ASSERT_THAT(SDecl, NotNull()); + + auto *SLoc = Env.getStorageLocation(*SDecl); + ASSERT_THAT(SLoc, NotNull()); + + ASSERT_THAT(Env.getReturnStorageLocation(), Eq(SLoc)); + }, + {BuiltinOptions{ContextSensitiveOptions{}}}); +} + // This test is a regression test, based on a real crash. -TEST(TransferTest, ContextSensitiveReturnReferenceFromNonReferenceLvalue) { - // This code exercises an unusual code path. If we return an lvalue directly, - // the code will catch that it's an l-value based on the `Value`'s kind. If we - // pass through a dummy function, the framework won't populate a value at - // all. In contrast, this code results in a (fresh) value, but it is not - // `ReferenceValue`. This test verifies that we catch this case as well. +TEST(TransferTest, ContextSensitiveReturnReferenceWithConditionalOperator) { std::string Code = R"( class S {}; S& target(bool b, S &s) { @@ -4260,10 +4282,72 @@ ASSERT_THAT(Results.keys(), UnorderedElementsAre("p")); const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); + const ValueDecl *SDecl = findValueDecl(ASTCtx, "s"); + ASSERT_THAT(SDecl, NotNull()); + + auto *SLoc = Env.getStorageLocation(*SDecl); + ASSERT_THAT(SLoc, NotNull()); + EXPECT_THAT(Env.getValue(*SLoc), NotNull()); + auto *Loc = Env.getReturnStorageLocation(); ASSERT_THAT(Loc, NotNull()); + EXPECT_THAT(Env.getValue(*Loc), NotNull()); + + // TODO: We would really like to make this stronger assertion, but that + // doesn't work because we don't propagate values correctly through + // the conditional operator yet. + // ASSERT_THAT(Loc, Eq(SLoc)); + }, + {BuiltinOptions{ContextSensitiveOptions{}}}); +} + +TEST(TransferTest, ContextSensitiveReturnOneOfTwoReferences) { + std::string Code = R"( + class S {}; + S &callee(bool b, S &s1_parm, S &s2_parm) { + if (b) + return s1_parm; + else + return s2_parm; + } + void target(bool b) { + S s1; + S s2; + S &return_s1 = s1; + S &return_s2 = s2; + S &return_dont_know = callee(b, s1, s2); + // [[p]] + } + )"; + runDataflow( + Code, + [](const llvm::StringMap> &Results, + ASTContext &ASTCtx) { + const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); - EXPECT_THAT(Env.getValue(*Loc), IsNull()); + const ValueDecl *S1 = findValueDecl(ASTCtx, "s1"); + ASSERT_THAT(S1, NotNull()); + const ValueDecl *S2 = findValueDecl(ASTCtx, "s2"); + ASSERT_THAT(S2, NotNull()); + const ValueDecl *ReturnS1 = findValueDecl(ASTCtx, "return_s1"); + ASSERT_THAT(ReturnS1, NotNull()); + const ValueDecl *ReturnS2 = findValueDecl(ASTCtx, "return_s2"); + ASSERT_THAT(ReturnS2, NotNull()); + const ValueDecl *ReturnDontKnow = + findValueDecl(ASTCtx, "return_dont_know"); + ASSERT_THAT(ReturnDontKnow, NotNull()); + + StorageLocation *S1Loc = Env.getStorageLocation(*S1); + StorageLocation *S2Loc = Env.getStorageLocation(*S2); + + EXPECT_THAT(Env.getStorageLocation(*ReturnS1), Eq(S1Loc)); + EXPECT_THAT(Env.getStorageLocation(*ReturnS2), Eq(S2Loc)); + + // In the case where we don't have a consistent storage location for + // the return value, the framework creates a new storage location, which + // should be different from the storage locations of `s1` and `s2`. + EXPECT_THAT(Env.getStorageLocation(*ReturnDontKnow), Ne(S1Loc)); + EXPECT_THAT(Env.getStorageLocation(*ReturnDontKnow), Ne(S2Loc)); }, {BuiltinOptions{ContextSensitiveOptions{}}}); }