Index: include/clang/StaticAnalyzer/Core/PathSensitive/Store.h =================================================================== --- include/clang/StaticAnalyzer/Core/PathSensitive/Store.h +++ include/clang/StaticAnalyzer/Core/PathSensitive/Store.h @@ -59,6 +59,25 @@ /// \return The value bound to the location \c loc. virtual SVal getBinding(Store store, Loc loc, QualType T = QualType()) = 0; + /// Return the default value bound to a region in a given store. The default + /// binding is the value assigned to uninitialized sub-regions of compound + /// values. This could be an unknown or symbolic value. + /// \param[in] store The analysis state. + /// \param[in] R The default binding is returned for this region. + /// \return The default value bound to the region in the store, if a default + /// binding exists. + virtual Optional getDefaultBinding(Store store, const MemRegion *R) = 0; + + /// Return the default value bound to a LazyCompoundVal. The default binding + /// is used to represent the value of any uninitialized fields or elements + /// within the LazyCompoundVal. This could be an unknown or symbolic value. + /// \param[in] lcv The lazy compound value. + /// \return The default value bound to the LazyCompoundVal \c lcv, if a + /// default binding exists. + Optional getDefaultBinding(nonloc::LazyCompoundVal lcv) { + return getDefaultBinding(lcv.getStore(), lcv.getRegion()); + } + /// Return a state with the specified value bound to the given location. /// \param[in] store The analysis state. /// \param[in] loc The symbolic memory location. Index: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp =================================================================== --- lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -65,6 +65,18 @@ /// and thus, is tainted. static bool isStdin(const Expr *E, CheckerContext &C); + /// This is called from getPointedToSymbol() to resolve symbol references for + /// the region underlying a LazyCompoundVal. This is the default binding + /// for the LCV, which could be a conjured symbol from a function call that + /// initialized the region. It only returns the conjured symbol if the LCV + /// covers the entire region, e.g. we avoid false positives by not returning + /// a default bindingc for an entire struct if the symbol for only a single + /// field or element within it is requested. + // TODO: Return an appropriate symbol for sub-fields/elements of an LCV so + // that they are also appropriately tainted. + static SymbolRef getLCVSymbol(CheckerContext &C, + nonloc::LazyCompoundVal &LCV); + /// \brief Given a pointer argument, get the symbol of the value it contains /// (points to). static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg); @@ -423,6 +435,27 @@ return false; } +SymbolRef GenericTaintChecker::getLCVSymbol(CheckerContext &C, + nonloc::LazyCompoundVal &LCV) { + StoreManager &StoreMgr = C.getStoreManager(); + + // getLCVSymbol() is reached in a PostStmt so we can always expect a default + // binding to exist if one is present. + if (Optional binding = StoreMgr.getDefaultBinding(LCV)) { + SymbolRef Sym = binding->getAsSymbol(); + if (!Sym) + return nullptr; + + // If the LCV covers an entire base region return the default conjured symbol. + if (LCV.getRegion() == LCV.getRegion()->getBaseRegion()) + return Sym; + } + + // Otherwise, return a nullptr as there's not yet a functional way to taint + // sub-regions of LCVs. + return nullptr; +} + SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C, const Expr* Arg) { ProgramStateRef State = C.getState(); @@ -438,6 +471,10 @@ dyn_cast(Arg->getType().getCanonicalType().getTypePtr()); SVal Val = State->getSVal(*AddrLoc, ArgTy ? ArgTy->getPointeeType(): QualType()); + + if (auto LCV = Val.getAs()) + return getLCVSymbol(C, *LCV); + return Val.getAsSymbol(); } Index: lib/StaticAnalyzer/Core/RegionStore.cpp =================================================================== --- lib/StaticAnalyzer/Core/RegionStore.cpp +++ lib/StaticAnalyzer/Core/RegionStore.cpp @@ -494,6 +494,11 @@ return getBinding(getRegionBindings(S), L, T); } + Optional getDefaultBinding(Store S, const MemRegion *R) override { + RegionBindingsRef B = getRegionBindings(S); + return B.getDefaultBinding(R); + } + SVal getBinding(RegionBindingsConstRef B, Loc L, QualType T = QualType()); SVal getBindingForElement(RegionBindingsConstRef B, const ElementRegion *R); Index: test/Analysis/taint-generic.c =================================================================== --- test/Analysis/taint-generic.c +++ test/Analysis/taint-generic.c @@ -169,6 +169,43 @@ sock = socket(AF_LOCAL, SOCK_STREAM, 0); read(sock, buffer, 100); execl(buffer, "filename", 0); // no-warning + + sock = socket(AF_INET, SOCK_STREAM, 0); + // References to both buffer and &buffer as an argument should taint the argument + read(sock, &buffer, 100); + execl(buffer, "filename", 0); // expected-warning {{Untrusted data is passed to a system call}} +} + +void testStruct() { + struct { + char buf[16]; + int length; + } tainted; + + char buffer[16]; + int sock; + + sock = socket(AF_INET, SOCK_STREAM, 0); + read(sock, &tainted, sizeof(tainted)); + __builtin_memcpy(buffer, tainted.buf, tainted.length); // expected-warning {{Untrusted data is used to specify the buffer size}} +} + +void testStructArray() { + struct { + char buf[16]; + struct { + int length; + } st[1]; + } tainted; + + char buffer[16]; + int sock; + + sock = socket(AF_INET, SOCK_STREAM, 0); + read(sock, &tainted.buf[0], sizeof(tainted.buf)); + read(sock, &tainted.st[0], sizeof(tainted.st)); + // FIXME: tainted.st[0].length should be marked tainted + __builtin_memcpy(buffer, tainted.buf, tainted.st[0].length); // no-warning } int testDivByZero() {