diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp --- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -24,6 +24,7 @@ #include "clang/StaticAnalyzer/Core/CheckerManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "llvm/Support/Regex.h" #include "llvm/Support/YAMLTraits.h" #include #include @@ -135,6 +136,9 @@ bool checkPre(const CallExpr *CE, const FunctionData &FData, CheckerContext &C) const; + /// Add taint sources for extraction operator on pre-visit. + bool addOverloadedOpPre(const CallExpr *CE, CheckerContext &C) const; + /// Add taint sources on a pre-visit. Returns true on matching. bool addSourcesPre(const CallExpr *CE, const FunctionData &FData, CheckerContext &C) const; @@ -151,6 +155,9 @@ /// and thus, is tainted. static bool isStdin(const Expr *E, CheckerContext &C); + /// Check if the type of the variable is std::basic_istream + static bool isStdstream(const Expr *E, CheckerContext &C); + /// Given a pointer argument, return the value it points to. static Optional getPointedToSVal(CheckerContext &C, const Expr *Arg); @@ -258,12 +265,10 @@ static bool isTaintedOrPointsToTainted(const Expr *E, ProgramStateRef State, CheckerContext &C) { - if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C)) + if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C) || + isStdstream(E, C)) return true; - if (!E->getType().getTypePtr()->isPointerType()) - return false; - Optional V = getPointedToSVal(C, E); return (V && isTainted(State, *V)); } @@ -279,7 +284,20 @@ /// Defines a map between the propagation function's name, scope /// and TaintPropagationRule. - NameRuleMap CustomPropagations; + NameRuleMap CustomPropagations{ + {"c_str", + {"std::__cxx11::basic_string", + TaintPropagationRule({0}, {ReturnValueIndex})}}, + {"data", + {"std::__cxx11::basic_string", + TaintPropagationRule({0}, {ReturnValueIndex})}}, + {"size", + {"std::__cxx11::basic_string", + TaintPropagationRule({0}, {ReturnValueIndex})}}, + {"length", + {"std::__cxx11::basic_string", + TaintPropagationRule({0}, {ReturnValueIndex})}}, + {"getline", {"std::", TaintPropagationRule({0}, {1, ReturnValueIndex})}}}; /// Defines a map between the filter function's name, scope and filtering /// args. @@ -351,6 +369,23 @@ /// points to data, which should be tainted on return. REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned) +/// Treat implicit this parameter as the 0. argument. +const Expr *getArg(const CallExpr *CE, unsigned ArgNum) { + if (const auto *MCE = dyn_cast(CE)) { + if (ArgNum == 0) + return MCE->getImplicitObjectArgument(); + return MCE->getArg(ArgNum - 1); + } + return CE->getArg(ArgNum); +} + +/// Class member functions has an implicit this parameteter. +unsigned getNumArgs(const CallExpr *CE) { + if (isa(CE)) + return CE->getNumArgs() + 1; + return CE->getNumArgs(); +} + GenericTaintChecker::ArgVector GenericTaintChecker::convertToArgVector( CheckerManager &Mgr, const std::string &Option, SignedArgVector Args) { ArgVector Result; @@ -516,6 +551,9 @@ void GenericTaintChecker::checkPreStmt(const CallExpr *CE, CheckerContext &C) const { + if (addOverloadedOpPre(CE, C)) + return; + Optional FData = FunctionData::create(CE, C); if (!FData) return; @@ -545,6 +583,31 @@ printTaint(State, Out, NL, Sep); } +bool GenericTaintChecker::addOverloadedOpPre(const CallExpr *CE, + CheckerContext &C) const { + const auto *OCE = dyn_cast(CE); + if (OCE) { + TaintPropagationRule Rule; + switch (OCE->getOperator()) { + case OO_GreaterGreater: + Rule = TaintPropagationRule({0}, {1, ReturnValueIndex}); + break; + case OO_Equal: + Rule = TaintPropagationRule({1}, {0}); + break; + default: + return false; + }; + + ProgramStateRef State = Rule.process(CE, C); + if (State) { + C.addTransition(State); + return true; + } + } + return false; +} + bool GenericTaintChecker::addSourcesPre(const CallExpr *CE, const FunctionData &FData, CheckerContext &C) const { @@ -572,10 +635,10 @@ const auto &Value = It->second; const ArgVector &Args = Value.second; for (unsigned ArgNum : Args) { - if (ArgNum >= CE->getNumArgs()) + if (ArgNum >= getNumArgs(CE)) continue; - const Expr *Arg = CE->getArg(ArgNum); + const Expr *Arg = getArg(CE, ArgNum); Optional V = getPointedToSVal(C, Arg); if (V) State = removeTaint(State, *V); @@ -608,9 +671,9 @@ // The arguments are pointer arguments. The data they are pointing at is // tainted after the call. - if (CE->getNumArgs() < (ArgNum + 1)) + if (getNumArgs(CE) < (ArgNum + 1)) return false; - const Expr *Arg = CE->getArg(ArgNum); + const Expr *Arg = getArg(CE, ArgNum); Optional V = getPointedToSVal(C, Arg); if (V) State = addTaint(State, *V); @@ -678,18 +741,18 @@ // Check for taint in arguments. bool IsTainted = true; for (unsigned ArgNum : SrcArgs) { - if (ArgNum >= CE->getNumArgs()) + if (ArgNum >= getNumArgs(CE)) continue; - if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C))) + if ((IsTainted = isTaintedOrPointsToTainted(getArg(CE, ArgNum), State, C))) break; } // Check for taint in variadic arguments. if (!IsTainted && VariadicType::Src == VarType) { // Check if any of the arguments is tainted - for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) { - if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C))) + for (unsigned i = VariadicIndex; i < getNumArgs(CE); ++i) { + if ((IsTainted = isTaintedOrPointsToTainted(getArg(CE, i), State, C))) break; } } @@ -708,7 +771,7 @@ continue; } - if (ArgNum >= CE->getNumArgs()) + if (ArgNum >= getNumArgs(CE)) continue; // Mark the given argument. @@ -721,8 +784,8 @@ // If they are not pointing to const data, mark data as tainted. // TODO: So far we are just going one level down; ideally we'd need to // recurse here. - for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) { - const Expr *Arg = CE->getArg(i); + for (unsigned i = VariadicIndex; i < getNumArgs(CE); ++i) { + const Expr *Arg = getArg(CE, i); // Process pointer argument. const Type *ArgTy = Arg->getType().getTypePtr(); QualType PType = ArgTy->getPointeeType(); @@ -739,7 +802,7 @@ bool GenericTaintChecker::TaintPropagationRule::postSocket(bool /*IsTainted*/, const CallExpr *CE, CheckerContext &C) { - SourceLocation DomLoc = CE->getArg(0)->getExprLoc(); + SourceLocation DomLoc = getArg(CE, 0)->getExprLoc(); StringRef DomName = C.getMacroNameOrSpelling(DomLoc); // White list the internal communication protocols. if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") || @@ -749,6 +812,16 @@ return true; } +bool GenericTaintChecker::isStdstream(const Expr *E, CheckerContext &C) { + llvm::Regex R{".*std::basic_istream.*"}; + std::string Error; + if (!R.isValid(Error)) + assert(false); + + std::string CT = E->getType().getCanonicalType().getAsString(); + return R.match(CT); +} + bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) { ProgramStateRef State = C.getState(); SVal Val = C.getSVal(E); @@ -795,7 +868,7 @@ return false; for (const auto *Format : FDecl->specific_attrs()) { ArgNum = Format->getFormatIdx() - 1; - if ((Format->getType()->getName() == "printf") && CE->getNumArgs() > ArgNum) + if ((Format->getType()->getName() == "printf") && getNumArgs(CE) > ArgNum) return true; } @@ -844,7 +917,7 @@ // If either the format string content or the pointer itself are tainted, // warn. - return generateReportIfTainted(CE->getArg(ArgNum), + return generateReportIfTainted(getArg(CE, ArgNum), MsgUncontrolledFormatString, C); } @@ -866,10 +939,10 @@ .Case("dlopen", 0) .Default(InvalidArgIndex); - if (ArgNum == InvalidArgIndex || CE->getNumArgs() < (ArgNum + 1)) + if (ArgNum == InvalidArgIndex || getNumArgs(CE) < (ArgNum + 1)) return false; - return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C); + return generateReportIfTainted(getArg(CE, ArgNum), MsgSanitizeSystemArgs, C); } // TODO: Should this check be a part of the CString checker? @@ -907,8 +980,8 @@ ArgNum = 2; } - return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum && - generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C); + return ArgNum != InvalidArgIndex && getNumArgs(CE) > ArgNum && + generateReportIfTainted(getArg(CE, ArgNum), MsgTaintedBufferSize, C); } bool GenericTaintChecker::checkCustomSinks(const CallExpr *CE, @@ -921,10 +994,10 @@ const auto &Value = It->second; const GenericTaintChecker::ArgVector &Args = Value.second; for (unsigned ArgNum : Args) { - if (ArgNum >= CE->getNumArgs()) + if (ArgNum >= getNumArgs(CE)) continue; - if (generateReportIfTainted(CE->getArg(ArgNum), MsgCustomSink, C)) + if (generateReportIfTainted(getArg(CE, ArgNum), MsgCustomSink, C)) return true; } diff --git a/clang/lib/StaticAnalyzer/Checkers/Taint.cpp b/clang/lib/StaticAnalyzer/Checkers/Taint.cpp --- a/clang/lib/StaticAnalyzer/Checkers/Taint.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/Taint.cpp @@ -152,6 +152,14 @@ return isTainted(State, Sym, Kind); if (const MemRegion *Reg = V.getAsRegion()) return isTainted(State, Reg, Kind); + if (auto LCV = V.getAs()) { + if (Optional binding = + State->getStateManager().getStoreManager().getDefaultBinding( + *LCV)) { + if (SymbolRef Sym = binding->getAsSymbol()) + return isTainted(State, Sym, Kind); + } + } return false; } diff --git a/clang/test/Analysis/taint-generic.cpp b/clang/test/Analysis/taint-generic.cpp --- a/clang/test/Analysis/taint-generic.cpp +++ b/clang/test/Analysis/taint-generic.cpp @@ -124,3 +124,126 @@ foo.myMemberScanf("%d", &x); Buffer[x] = 1; // expected-warning {{Out of bound memory access }} } + + +// Test I/O +namespace std { + template class char_traits {}; + + class ios_base {}; + + template> + class basic_ios : public ios_base {}; + + template> + class basic_istream : virtual public basic_ios {}; + template> + class basic_ostream : virtual public basic_ios {}; + + using istream = basic_istream; + using ostream = basic_ostream; + using wistream = basic_istream; + + istream& operator>>(istream& is, int& val); + wistream& operator>>(wistream& is, int& val); + + extern istream cin; + extern istream wcin; + + template> + class basic_fstream : + public basic_istream, public basic_ostream { + public: + basic_fstream(const char*) {} + }; + template> + class basic_ifstream : public basic_istream { + public: + basic_ifstream(const char*) {} + }; + + using ifstream = basic_ifstream; + using fstream = basic_ifstream; + + template class allocator {}; + +namespace __cxx11 { + template< + class CharT, + class Traits = std::char_traits, + class Allocator = std::allocator> + class basic_string { + public: + const char* c_str(); + basic_string operator=(const basic_string&); + private: + unsigned size; + char* ptr; + }; + } + + using string = __cxx11::basic_string; + + istream& operator>>(istream& is, string& val); + istream& getline(istream& is, string& str); +} + +void testCin() { + int x, y; + std::cin >> x >> y; + Buffer[y] = 1; // expected-warning {{Out of bound memory access }} +} + +void testWcin() { + int x, y; + std::wcin >> x >> y; + Buffer[y] = 1; // expected-warning {{Out of bound memory access }} +} + +void mySink(const std::string&, int, const char*); + +void testFstream() { + std::string str; + std::ifstream file("example.txt"); + file >> str; + mySink(str, 0, ""); // expected-warning {{Untrusted data is passed to a user-defined sink}} +} + +void testIfstream() { + std::string str; + std::fstream file("example.txt"); + file >> str; + mySink(str, 0, ""); // expected-warning {{Untrusted data is passed to a user-defined sink}} +} + +void testStdstring() { + std::string str1; + std::cin >> str1; + + std::string& str2 = str1; + mySink(str2, 0, ""); // expected-warning {{Untrusted data is passed to a user-defined sink}} + + const std::string& str3 = str1; + mySink(str3, 0, ""); // expected-warning {{Untrusted data is passed to a user-defined sink}} +} + +void testTaintedThis() { + std::string str; + mySink(std::string(), 0, str.c_str()); // no-warning + + std::cin >> str; + mySink(std::string(), 0, str.c_str()); // expected-warning {{Untrusted data is passed to a user-defined sink}} +} + +void testOverloadedAssignmentOp() { + std::string str1, str2; + std::cin >> str1; + str2 = str1; + mySink(str2, 0, ""); // expected-warning {{Untrusted data is passed to a user-defined sink}} +} + +void testGetline() { + std::string str; + std::getline(std::cin, str); + mySink(str, 0, ""); // expected-warning {{Untrusted data is passed to a user-defined sink}} +}