Index: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp =================================================================== --- clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -88,6 +88,7 @@ void parseConfiguration(CheckerManager &Mgr, const std::string &Option, TaintConfiguration &&Config); + static const unsigned ImplicitArgIndex{0}; static const unsigned InvalidArgIndex{std::numeric_limits::max()}; /// Denotes the return vale. static const unsigned ReturnValueIndex{std::numeric_limits::max() - @@ -138,6 +139,9 @@ bool checkPre(const CallEvent &Call, const FunctionData &FData, CheckerContext &C) const; + /// Add taint sources for extraction operator on pre-visit. + static bool addOverloadedOpPre(const CallEvent &Call, CheckerContext &C); + /// Add taint sources on a pre-visit. Returns true on matching. bool addSourcesPre(const CallEvent &Call, const FunctionData &FData, CheckerContext &C) const; @@ -154,6 +158,9 @@ /// and thus, is tainted. static bool isStdin(const Expr *E, CheckerContext &C); + /// Check if the type of the variable is std::basic_istream + static bool isStdstream(const Expr *E, CheckerContext &C); + /// Given a pointer argument, return the value it points to. static Optional getPointeeOf(CheckerContext &C, const Expr *Arg); @@ -258,15 +265,12 @@ return (llvm::find(DstArgs, ArgNum) != DstArgs.end()); } - static bool isTaintedOrPointsToTainted(const Expr *E, - const ProgramStateRef &State, + static bool isTaintedOrPointsToTainted(const Expr *E, ProgramStateRef State, CheckerContext &C) { - if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C)) + if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C) || + isStdstream(E, C)) return true; - if (!E->getType().getTypePtr()->isPointerType()) - return false; - Optional V = getPointeeOf(C, E); return (V && isTainted(State, *V)); } @@ -282,7 +286,14 @@ /// Defines a map between the propagation function's name, scope /// and TaintPropagationRule. - NameRuleMap CustomPropagations; + NameRuleMap CustomPropagations{ + {"c_str", + {"std::basic_string", {{ImplicitArgIndex}, {ReturnValueIndex}}}}, + {"data", {"std::basic_string", {{ImplicitArgIndex}, {ReturnValueIndex}}}}, + {"size", {"std::basic_string", {{ImplicitArgIndex}, {ReturnValueIndex}}}}, + {"length", + {"std::basic_string", {{ImplicitArgIndex}, {ReturnValueIndex}}}}, + {"getline", {"std::", {{0}, {1, ReturnValueIndex}}}}}; /// Defines a map between the filter function's name, scope and filtering /// args. @@ -354,6 +365,19 @@ /// points to data, which should be tainted on return. REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned) +/// Treat implicit this parameter as the 0. argument. +const Expr *getArgWithImplicitThis(const CallEvent &Call, unsigned ArgNum) { + if (const auto *InstanceCall = dyn_cast(&Call)) + return ArgNum == 0 ? InstanceCall->getCXXThisExpr() + : InstanceCall->getArgExpr(ArgNum - 1); + return Call.getArgExpr(ArgNum); +} + +/// Class member functions has an implicit this parameter. +unsigned getNumArgsWithImplicitThis(const CallEvent &Call) { + return Call.getNumArgs() + static_cast(isa(Call)); +} + GenericTaintChecker::ArgVector GenericTaintChecker::convertToArgVector(CheckerManager &Mgr, const std::string &Option, @@ -521,6 +545,9 @@ void GenericTaintChecker::checkPreCall(const CallEvent &Call, CheckerContext &C) const { + if (addOverloadedOpPre(Call, C)) + return; + Optional FData = FunctionData::create(Call, C); if (!FData) return; @@ -550,6 +577,31 @@ printTaint(State, Out, NL, Sep); } +bool GenericTaintChecker::addOverloadedOpPre(const CallEvent &Call, + CheckerContext &C) { + if (const auto *OCE = + dyn_cast_or_null(Call.getOriginExpr())) { + TaintPropagationRule Rule; + switch (OCE->getOperator()) { + case OO_GreaterGreater: + Rule = {{ImplicitArgIndex}, {1, ReturnValueIndex}}; + break; + case OO_Equal: + Rule = {{1}, {ImplicitArgIndex}}; + break; + default: + return false; + } + + ProgramStateRef State = Rule.process(Call, C); + if (State) { + C.addTransition(State); + return true; + } + } + return false; +} + bool GenericTaintChecker::addSourcesPre(const CallEvent &Call, const FunctionData &FData, CheckerContext &C) const { @@ -577,10 +629,10 @@ const auto &Value = It->second; const ArgVector &Args = Value.second; for (unsigned ArgNum : Args) { - if (ArgNum >= Call.getNumArgs()) + if (ArgNum >= getNumArgsWithImplicitThis(Call)) continue; - const Expr *Arg = Call.getArgExpr(ArgNum); + const Expr *Arg = getArgWithImplicitThis(Call, ArgNum); Optional V = getPointeeOf(C, Arg); if (V) State = removeTaint(State, *V); @@ -613,9 +665,9 @@ // The arguments are pointer arguments. The data they are pointing at is // tainted after the call. - if (Call.getNumArgs() < (ArgNum + 1)) + if (getNumArgsWithImplicitThis(Call) < (ArgNum + 1)) return false; - const Expr *Arg = Call.getArgExpr(ArgNum); + const Expr *Arg = getArgWithImplicitThis(Call, ArgNum); Optional V = getPointeeOf(C, Arg); if (V) State = addTaint(State, *V); @@ -679,20 +731,21 @@ // Check for taint in arguments. bool IsTainted = true; for (unsigned ArgNum : SrcArgs) { - if (ArgNum >= Call.getNumArgs()) + if (ArgNum >= getNumArgsWithImplicitThis(Call)) continue; - if ((IsTainted = - isTaintedOrPointsToTainted(Call.getArgExpr(ArgNum), State, C))) + if ((IsTainted = isTaintedOrPointsToTainted( + getArgWithImplicitThis(Call, ArgNum), State, C))) break; } // Check for taint in variadic arguments. if (!IsTainted && VariadicType::Src == VarType) { // Check if any of the arguments is tainted - for (unsigned i = VariadicIndex; i < Call.getNumArgs(); ++i) { - if ((IsTainted = - isTaintedOrPointsToTainted(Call.getArgExpr(i), State, C))) + for (unsigned i = VariadicIndex; i < getNumArgsWithImplicitThis(Call); + ++i) { + if ((IsTainted = isTaintedOrPointsToTainted( + getArgWithImplicitThis(Call, i), State, C))) break; } } @@ -711,7 +764,7 @@ continue; } - if (ArgNum >= Call.getNumArgs()) + if (ArgNum >= getNumArgsWithImplicitThis(Call)) continue; // Mark the given argument. @@ -724,8 +777,9 @@ // If they are not pointing to const data, mark data as tainted. // TODO: So far we are just going one level down; ideally we'd need to // recurse here. - for (unsigned i = VariadicIndex; i < Call.getNumArgs(); ++i) { - const Expr *Arg = Call.getArgExpr(i); + for (unsigned i = VariadicIndex; i < getNumArgsWithImplicitThis(Call); + ++i) { + const Expr *Arg = getArgWithImplicitThis(Call, i); // Process pointer argument. const Type *ArgTy = Arg->getType().getTypePtr(); QualType PType = ArgTy->getPointeeType(); @@ -742,7 +796,7 @@ // If argument 0(protocol domain) is network, the return value should get taint. bool GenericTaintChecker::TaintPropagationRule::postSocket( bool /*IsTainted*/, const CallEvent &Call, CheckerContext &C) { - SourceLocation DomLoc = Call.getArgExpr(0)->getExprLoc(); + SourceLocation DomLoc = getArgWithImplicitThis(Call, 0)->getExprLoc(); StringRef DomName = C.getMacroNameOrSpelling(DomLoc); // White list the internal communication protocols. if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") || @@ -751,6 +805,11 @@ return true; } +bool GenericTaintChecker::isStdstream(const Expr *E, CheckerContext &C) { + const std::string CT = E->getType().getCanonicalType().getAsString(); + return StringRef(CT).find("std::basic_istream") != StringRef::npos; +} + bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) { ProgramStateRef State = C.getState(); SVal Val = C.getSVal(E); @@ -797,7 +856,7 @@ for (const auto *Format : FDecl->specific_attrs()) { ArgNum = Format->getFormatIdx() - 1; if ((Format->getType()->getName() == "printf") && - Call.getNumArgs() > ArgNum) + getNumArgsWithImplicitThis(Call) > ArgNum) return true; } @@ -846,7 +905,7 @@ // If either the format string content or the pointer itself are tainted, // warn. - return generateReportIfTainted(Call.getArgExpr(ArgNum), + return generateReportIfTainted(getArgWithImplicitThis(Call, ArgNum), MsgUncontrolledFormatString, C); } @@ -868,11 +927,12 @@ .Case("dlopen", 0) .Default(InvalidArgIndex); - if (ArgNum == InvalidArgIndex || Call.getNumArgs() < (ArgNum + 1)) + if (ArgNum == InvalidArgIndex || + getNumArgsWithImplicitThis(Call) < (ArgNum + 1)) return false; - return generateReportIfTainted(Call.getArgExpr(ArgNum), MsgSanitizeSystemArgs, - C); + return generateReportIfTainted(getArgWithImplicitThis(Call, ArgNum), + MsgSanitizeSystemArgs, C); } // TODO: Should this check be a part of the CString checker? @@ -912,9 +972,10 @@ ArgNum = 2; } - return ArgNum != InvalidArgIndex && Call.getNumArgs() > ArgNum && - generateReportIfTainted(Call.getArgExpr(ArgNum), MsgTaintedBufferSize, - C); + return ArgNum != InvalidArgIndex && + getNumArgsWithImplicitThis(Call) > ArgNum && + generateReportIfTainted(getArgWithImplicitThis(Call, ArgNum), + MsgTaintedBufferSize, C); } bool GenericTaintChecker::checkCustomSinks(const CallEvent &Call, @@ -927,10 +988,11 @@ const auto &Value = It->second; const GenericTaintChecker::ArgVector &Args = Value.second; for (unsigned ArgNum : Args) { - if (ArgNum >= Call.getNumArgs()) + if (ArgNum >= getNumArgsWithImplicitThis(Call)) continue; - if (generateReportIfTainted(Call.getArgExpr(ArgNum), MsgCustomSink, C)) + if (generateReportIfTainted(getArgWithImplicitThis(Call, ArgNum), + MsgCustomSink, C)) return true; } Index: clang/lib/StaticAnalyzer/Checkers/Taint.cpp =================================================================== --- clang/lib/StaticAnalyzer/Checkers/Taint.cpp +++ clang/lib/StaticAnalyzer/Checkers/Taint.cpp @@ -152,6 +152,14 @@ return isTainted(State, Sym, Kind); if (const MemRegion *Reg = V.getAsRegion()) return isTainted(State, Reg, Kind); + if (auto LCV = V.getAs()) { + if (Optional binding = + State->getStateManager().getStoreManager().getDefaultBinding( + *LCV)) { + if (SymbolRef Sym = binding->getAsSymbol()) + return isTainted(State, Sym, Kind); + } + } return false; } Index: clang/test/Analysis/Inputs/system-header-simulator-cxx.h =================================================================== --- clang/test/Analysis/Inputs/system-header-simulator-cxx.h +++ clang/test/Analysis/Inputs/system-header-simulator-cxx.h @@ -579,6 +579,9 @@ void resize(size_type count); void shrink_to_fit(); void swap(basic_string &other); + private: + unsigned size; + char *ptr; }; typedef basic_string string; @@ -588,6 +591,46 @@ typedef basic_string u32string; #endif + template + class char_traits {}; + + class ios_base {}; + + template > + class basic_ios : public ios_base {}; + + template > + class basic_istream : virtual public basic_ios {}; + template > + class basic_ostream : virtual public basic_ios {}; + + using istream = basic_istream; + using ostream = basic_ostream; + using wistream = basic_istream; + + istream &operator>>(istream &is, int &val); + wistream &operator>>(wistream &is, int &val); + + extern istream cin; + extern istream wcin; + + template > + class basic_fstream : public basic_istream, public basic_ostream { + public: + basic_fstream(const char *) {} + }; + template > + class basic_ifstream : public basic_istream { + public: + basic_ifstream(const char *) {} + }; + + using ifstream = basic_ifstream; + using fstream = basic_ifstream; + + istream &operator>>(istream &is, string &val); + istream &getline(istream &is, string &str); + class exception { public: exception() throw(); Index: clang/test/Analysis/diagnostics/explicit-suppression.cpp =================================================================== --- clang/test/Analysis/diagnostics/explicit-suppression.cpp +++ clang/test/Analysis/diagnostics/explicit-suppression.cpp @@ -19,6 +19,6 @@ void testCopyNull(C *I, C *E) { std::copy(I, E, (C *)0); #ifndef SUPPRESSED - // expected-warning@../Inputs/system-header-simulator-cxx.h:699 {{Called C++ object pointer is null}} + // expected-warning@../Inputs/system-header-simulator-cxx.h:742 {{Called C++ object pointer is null}} #endif } Index: clang/test/Analysis/taint-generic.cpp =================================================================== --- clang/test/Analysis/taint-generic.cpp +++ clang/test/Analysis/taint-generic.cpp @@ -1,5 +1,7 @@ // RUN: %clang_analyze_cc1 -analyzer-checker=alpha.security.taint,core,alpha.security.ArrayBoundV2 -analyzer-config alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml -Wno-format-security -verify -std=c++11 %s +#include "Inputs/system-header-simulator-cxx.h" + #define BUFSIZE 10 int Buffer[BUFSIZE]; @@ -124,3 +126,64 @@ foo.myMemberScanf("%d", &x); Buffer[x] = 1; // expected-warning {{Out of bound memory access }} } + +// Test I/O +void testCin() { + int x, y; + std::cin >> x >> y; + Buffer[y] = 1; // expected-warning {{Out of bound memory access }} +} + +void testWcin() { + int x, y; + std::wcin >> x >> y; + Buffer[y] = 1; // expected-warning {{Out of bound memory access }} +} + +void mySink(const std::string &, int, const char *); + +void testFstream() { + std::string str; + std::ifstream file("example.txt"); + file >> str; + mySink(str, 0, ""); // expected-warning {{Untrusted data is passed to a user-defined sink}} +} + +void testIfstream() { + std::string str; + std::fstream file("example.txt"); + file >> str; + mySink(str, 0, ""); // expected-warning {{Untrusted data is passed to a user-defined sink}} +} + +void testStdstring() { + std::string str1; + std::cin >> str1; + + std::string &str2 = str1; + mySink(str2, 0, ""); // expected-warning {{Untrusted data is passed to a user-defined sink}} + + const std::string &str3 = str1; + mySink(str3, 0, ""); // expected-warning {{Untrusted data is passed to a user-defined sink}} +} + +void testTaintedThis() { + std::string str; + mySink(std::string(), 0, str.c_str()); // no-warning + + std::cin >> str; + mySink(std::string(), 0, str.c_str()); // expected-warning {{Untrusted data is passed to a user-defined sink}} +} + +void testOverloadedAssignmentOp() { + std::string str1, str2; + std::cin >> str1; + str2 = str1; + mySink(str2, 0, ""); // expected-warning {{Untrusted data is passed to a user-defined sink}} +} + +void testGetline() { + std::string str; + std::getline(std::cin, str); + mySink(str, 0, ""); // expected-warning {{Untrusted data is passed to a user-defined sink}} +}