Index: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp =================================================================== --- clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -138,6 +138,9 @@ bool checkPre(const CallEvent &Call, const FunctionData &FData, CheckerContext &C) const; + /// Add taint sources for operator>> on pre-visit. + static bool addOverloadedOpPre(const CallEvent &Call, CheckerContext &C); + /// Add taint sources on a pre-visit. Returns true on matching. bool addSourcesPre(const CallEvent &Call, const FunctionData &FData, CheckerContext &C) const; @@ -154,6 +157,9 @@ /// and thus, is tainted. static bool isStdin(const Expr *E, CheckerContext &C); + /// Check if the type of the variable is std::basic_istream + static bool isStdstream(const Expr *E, CheckerContext &C); + /// Given a pointer argument, return the value it points to. static Optional getPointeeOf(CheckerContext &C, const Expr *Arg); @@ -258,15 +264,12 @@ return (llvm::find(DstArgs, ArgNum) != DstArgs.end()); } - static bool isTaintedOrPointsToTainted(const Expr *E, - const ProgramStateRef &State, + static bool isTaintedOrPointsToTainted(const Expr *E, ProgramStateRef State, CheckerContext &C) { - if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C)) + if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C) || + isStdstream(E, C)) return true; - if (!E->getType().getTypePtr()->isPointerType()) - return false; - Optional V = getPointeeOf(C, E); return (V && isTainted(State, *V)); } @@ -282,7 +285,12 @@ /// Defines a map between the propagation function's name, scope /// and TaintPropagationRule. - NameRuleMap CustomPropagations; + NameRuleMap CustomPropagations{ + {"c_str", {"std::__cxx11::basic_string", {{0}, {ReturnValueIndex}}}}, + {"data", {"std::__cxx11::basic_string", {{0}, {ReturnValueIndex}}}}, + {"size", {"std::__cxx11::basic_string", {{0}, {ReturnValueIndex}}}}, + {"length", {"std::__cxx11::basic_string", {{0}, {ReturnValueIndex}}}}, + {"getline", {"std::", {{0}, {1, ReturnValueIndex}}}}}; /// Defines a map between the filter function's name, scope and filtering /// args. @@ -354,6 +362,19 @@ /// points to data, which should be tainted on return. REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned) +/// Treat implicit this parameter as the 0. argument. +const Expr *getArgWithImplicitThis(const CallEvent &Call, unsigned ArgNum) { + if (const auto *InstanceCall = dyn_cast(&Call)) + return ArgNum == 0 ? InstanceCall->getCXXThisExpr() + : InstanceCall->getArgExpr(ArgNum - 1); + return Call.getArgExpr(ArgNum); +} + +/// Class member functions has an implicit this parameter. +unsigned getNumArgsWithImplicitThis(const CallEvent &Call) { + return Call.getNumArgs() + static_cast(isa(Call)); +} + GenericTaintChecker::ArgVector GenericTaintChecker::convertToArgVector(CheckerManager &Mgr, const std::string &Option, @@ -521,6 +542,9 @@ void GenericTaintChecker::checkPreCall(const CallEvent &Call, CheckerContext &C) const { + if (addOverloadedOpPre(Call, C)) + return; + Optional FData = FunctionData::create(Call, C); if (!FData) return; @@ -550,6 +574,31 @@ printTaint(State, Out, NL, Sep); } +bool GenericTaintChecker::addOverloadedOpPre(const CallEvent &Call, + CheckerContext &C) { + const auto *OCE = dyn_cast(Call.getOriginExpr()); + if (OCE) { + TaintPropagationRule Rule; + switch (OCE->getOperator()) { + case OO_GreaterGreater: + Rule = {{0}, {1, ReturnValueIndex}}; + break; + case OO_Equal: + Rule = {{1}, {0}}; + break; + default: + return false; + } + + ProgramStateRef State = Rule.process(Call, C); + if (State) { + C.addTransition(State); + return true; + } + } + return false; +} + bool GenericTaintChecker::addSourcesPre(const CallEvent &Call, const FunctionData &FData, CheckerContext &C) const { @@ -577,10 +626,10 @@ const auto &Value = It->second; const ArgVector &Args = Value.second; for (unsigned ArgNum : Args) { - if (ArgNum >= Call.getNumArgs()) + if (ArgNum >= getNumArgsWithImplicitThis(Call)) continue; - const Expr *Arg = Call.getArgExpr(ArgNum); + const Expr *Arg = getArgWithImplicitThis(Call, ArgNum); Optional V = getPointeeOf(C, Arg); if (V) State = removeTaint(State, *V); @@ -613,9 +662,9 @@ // The arguments are pointer arguments. The data they are pointing at is // tainted after the call. - if (Call.getNumArgs() < (ArgNum + 1)) + if (getNumArgsWithImplicitThis(Call) < (ArgNum + 1)) return false; - const Expr *Arg = Call.getArgExpr(ArgNum); + const Expr *Arg = getArgWithImplicitThis(Call, ArgNum); Optional V = getPointeeOf(C, Arg); if (V) State = addTaint(State, *V); @@ -679,20 +728,21 @@ // Check for taint in arguments. bool IsTainted = true; for (unsigned ArgNum : SrcArgs) { - if (ArgNum >= Call.getNumArgs()) + if (ArgNum >= getNumArgsWithImplicitThis(Call)) continue; - if ((IsTainted = - isTaintedOrPointsToTainted(Call.getArgExpr(ArgNum), State, C))) + if ((IsTainted = isTaintedOrPointsToTainted( + getArgWithImplicitThis(Call, ArgNum), State, C))) break; } // Check for taint in variadic arguments. if (!IsTainted && VariadicType::Src == VarType) { // Check if any of the arguments is tainted - for (unsigned i = VariadicIndex; i < Call.getNumArgs(); ++i) { - if ((IsTainted = - isTaintedOrPointsToTainted(Call.getArgExpr(i), State, C))) + for (unsigned i = VariadicIndex; i < getNumArgsWithImplicitThis(Call); + ++i) { + if ((IsTainted = isTaintedOrPointsToTainted( + getArgWithImplicitThis(Call, i), State, C))) break; } } @@ -711,7 +761,7 @@ continue; } - if (ArgNum >= Call.getNumArgs()) + if (ArgNum >= getNumArgsWithImplicitThis(Call)) continue; // Mark the given argument. @@ -724,8 +774,9 @@ // If they are not pointing to const data, mark data as tainted. // TODO: So far we are just going one level down; ideally we'd need to // recurse here. - for (unsigned i = VariadicIndex; i < Call.getNumArgs(); ++i) { - const Expr *Arg = Call.getArgExpr(i); + for (unsigned i = VariadicIndex; i < getNumArgsWithImplicitThis(Call); + ++i) { + const Expr *Arg = getArgWithImplicitThis(Call, i); // Process pointer argument. const Type *ArgTy = Arg->getType().getTypePtr(); QualType PType = ArgTy->getPointeeType(); @@ -742,7 +793,7 @@ // If argument 0(protocol domain) is network, the return value should get taint. bool GenericTaintChecker::TaintPropagationRule::postSocket( bool /*IsTainted*/, const CallEvent &Call, CheckerContext &C) { - SourceLocation DomLoc = Call.getArgExpr(0)->getExprLoc(); + SourceLocation DomLoc = getArgWithImplicitThis(Call, 0)->getExprLoc(); StringRef DomName = C.getMacroNameOrSpelling(DomLoc); // White list the internal communication protocols. if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") || @@ -751,6 +802,11 @@ return true; } +bool GenericTaintChecker::isStdstream(const Expr *E, CheckerContext &C) { + const std::string CT = E->getType().getCanonicalType().getAsString(); + return StringRef(CT).find("std::basic_istream") != StringRef::npos; +} + bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) { ProgramStateRef State = C.getState(); SVal Val = C.getSVal(E); @@ -797,7 +853,7 @@ for (const auto *Format : FDecl->specific_attrs()) { ArgNum = Format->getFormatIdx() - 1; if ((Format->getType()->getName() == "printf") && - Call.getNumArgs() > ArgNum) + getNumArgsWithImplicitThis(Call) > ArgNum) return true; } @@ -846,7 +902,7 @@ // If either the format string content or the pointer itself are tainted, // warn. - return generateReportIfTainted(Call.getArgExpr(ArgNum), + return generateReportIfTainted(getArgWithImplicitThis(Call, ArgNum), MsgUncontrolledFormatString, C); } @@ -868,11 +924,12 @@ .Case("dlopen", 0) .Default(InvalidArgIndex); - if (ArgNum == InvalidArgIndex || Call.getNumArgs() < (ArgNum + 1)) + if (ArgNum == InvalidArgIndex || + getNumArgsWithImplicitThis(Call) < (ArgNum + 1)) return false; - return generateReportIfTainted(Call.getArgExpr(ArgNum), MsgSanitizeSystemArgs, - C); + return generateReportIfTainted(getArgWithImplicitThis(Call, ArgNum), + MsgSanitizeSystemArgs, C); } // TODO: Should this check be a part of the CString checker? @@ -912,9 +969,10 @@ ArgNum = 2; } - return ArgNum != InvalidArgIndex && Call.getNumArgs() > ArgNum && - generateReportIfTainted(Call.getArgExpr(ArgNum), MsgTaintedBufferSize, - C); + return ArgNum != InvalidArgIndex && + getNumArgsWithImplicitThis(Call) > ArgNum && + generateReportIfTainted(getArgWithImplicitThis(Call, ArgNum), + MsgTaintedBufferSize, C); } bool GenericTaintChecker::checkCustomSinks(const CallEvent &Call, @@ -927,10 +985,11 @@ const auto &Value = It->second; const GenericTaintChecker::ArgVector &Args = Value.second; for (unsigned ArgNum : Args) { - if (ArgNum >= Call.getNumArgs()) + if (ArgNum >= getNumArgsWithImplicitThis(Call)) continue; - if (generateReportIfTainted(Call.getArgExpr(ArgNum), MsgCustomSink, C)) + if (generateReportIfTainted(getArgWithImplicitThis(Call, ArgNum), + MsgCustomSink, C)) return true; } Index: clang/lib/StaticAnalyzer/Checkers/Taint.cpp =================================================================== --- clang/lib/StaticAnalyzer/Checkers/Taint.cpp +++ clang/lib/StaticAnalyzer/Checkers/Taint.cpp @@ -152,6 +152,14 @@ return isTainted(State, Sym, Kind); if (const MemRegion *Reg = V.getAsRegion()) return isTainted(State, Reg, Kind); + if (auto LCV = V.getAs()) { + if (Optional binding = + State->getStateManager().getStoreManager().getDefaultBinding( + *LCV)) { + if (SymbolRef Sym = binding->getAsSymbol()) + return isTainted(State, Sym, Kind); + } + } return false; } Index: clang/test/Analysis/taint-generic.cpp =================================================================== --- clang/test/Analysis/taint-generic.cpp +++ clang/test/Analysis/taint-generic.cpp @@ -124,3 +124,127 @@ foo.myMemberScanf("%d", &x); Buffer[x] = 1; // expected-warning {{Out of bound memory access }} } + +// Test I/O +namespace std { +template +class char_traits {}; + +class ios_base {}; + +template > +class basic_ios : public ios_base {}; + +template > +class basic_istream : virtual public basic_ios {}; +template > +class basic_ostream : virtual public basic_ios {}; + +using istream = basic_istream; +using ostream = basic_ostream; +using wistream = basic_istream; + +istream &operator>>(istream &is, int &val); +wistream &operator>>(wistream &is, int &val); + +extern istream cin; +extern istream wcin; + +template > +class basic_fstream : public basic_istream, public basic_ostream { +public: + basic_fstream(const char *) {} +}; +template > +class basic_ifstream : public basic_istream { +public: + basic_ifstream(const char *) {} +}; + +using ifstream = basic_ifstream; +using fstream = basic_ifstream; + +template +class allocator {}; + +namespace __cxx11 { +template < + class CharT, + class Traits = std::char_traits, + class Allocator = std::allocator> +class basic_string { +public: + const char *c_str(); + basic_string operator=(const basic_string &); + +private: + unsigned size; + char *ptr; +}; +} // namespace __cxx11 + +using string = __cxx11::basic_string; + +istream &operator>>(istream &is, string &val); +istream &getline(istream &is, string &str); +} // namespace std + +void testCin() { + int x, y; + std::cin >> x >> y; + Buffer[y] = 1; // expected-warning {{Out of bound memory access }} +} + +void testWcin() { + int x, y; + std::wcin >> x >> y; + Buffer[y] = 1; // expected-warning {{Out of bound memory access }} +} + +void mySink(const std::string &, int, const char *); + +void testFstream() { + std::string str; + std::ifstream file("example.txt"); + file >> str; + mySink(str, 0, ""); // expected-warning {{Untrusted data is passed to a user-defined sink}} +} + +void testIfstream() { + std::string str; + std::fstream file("example.txt"); + file >> str; + mySink(str, 0, ""); // expected-warning {{Untrusted data is passed to a user-defined sink}} +} + +void testStdstring() { + std::string str1; + std::cin >> str1; + + std::string &str2 = str1; + mySink(str2, 0, ""); // expected-warning {{Untrusted data is passed to a user-defined sink}} + + const std::string &str3 = str1; + mySink(str3, 0, ""); // expected-warning {{Untrusted data is passed to a user-defined sink}} +} + +void testTaintedThis() { + std::string str; + mySink(std::string(), 0, str.c_str()); // no-warning + + std::cin >> str; + mySink(std::string(), 0, str.c_str()); // expected-warning {{Untrusted data is passed to a user-defined sink}} +} + +void testOverloadedAssignmentOp() { + std::string str1, str2; + std::cin >> str1; + str2 = str1; + mySink(str2, 0, ""); // expected-warning {{Untrusted data is passed to a user-defined sink}} +} + +void testGetline() { + std::string str; + std::getline(std::cin, str); + mySink(str, 0, ""); // expected-warning {{Untrusted data is passed to a user-defined sink}} +}