Index: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp =================================================================== --- lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -23,15 +23,20 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" #include +#include +#include using namespace clang; using namespace ento; namespace { -class GenericTaintChecker : public Checker< check::PostStmt, - check::PreStmt > { +class GenericTaintChecker + : public Checker, check::PreStmt> { public: - static void *getTag() { static int Tag; return &Tag; } + static void *getTag() { + static int Tag; + return &Tag; + } void checkPostStmt(const CallExpr *CE, CheckerContext &C) const; @@ -43,7 +48,7 @@ static const unsigned ReturnValueIndex = UINT_MAX - 1; mutable std::unique_ptr BT; - inline void initBugType() const { + void initBugType() const { if (!BT) BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data")); } @@ -69,8 +74,8 @@ static Optional getPointedToSVal(CheckerContext &C, const Expr *Arg); /// Functions defining the attack surface. - typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *, - CheckerContext &C) const; + using FnCheck = ProgramStateRef (GenericTaintChecker::*)( + const CallExpr *, CheckerContext &C) const; ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const; ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const; ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const; @@ -100,7 +105,7 @@ bool generateReportIfTainted(const Expr *E, const char Msg[], CheckerContext &C) const; - typedef SmallVector ArgVector; + using ArgVector = SmallVector; /// A struct used to specify taint propagation rules for a function. /// @@ -112,50 +117,48 @@ /// ReturnValueIndex is added to the dst list, the return value will be /// tainted. struct TaintPropagationRule { + enum class VariadicType { None, Src, Dst }; + /// List of arguments which can be taint sources and should be checked. ArgVector SrcArgs; /// List of arguments which should be tainted on function return. ArgVector DstArgs; - // TODO: Check if using other data structures would be more optimal. - - TaintPropagationRule() {} - - TaintPropagationRule(unsigned SArg, - unsigned DArg, bool TaintRet = false) { - SrcArgs.push_back(SArg); - DstArgs.push_back(DArg); - if (TaintRet) - DstArgs.push_back(ReturnValueIndex); - } - - TaintPropagationRule(unsigned SArg1, unsigned SArg2, - unsigned DArg, bool TaintRet = false) { - SrcArgs.push_back(SArg1); - SrcArgs.push_back(SArg2); - DstArgs.push_back(DArg); - if (TaintRet) - DstArgs.push_back(ReturnValueIndex); - } + /// Index for the first variadic parameter if exist. + unsigned VariadicIndex; + /// Show when a function has variadic parameters. If it has, it marks all + /// of them as source or destination. + VariadicType VarType; + + TaintPropagationRule() + : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None) {} + + TaintPropagationRule(std::initializer_list &&Src, + std::initializer_list &&Dst, + VariadicType Var = VariadicType::None, + unsigned VarIndex = InvalidArgIndex) + : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)), + VariadicIndex(VarIndex), VarType(Var) {} /// Get the propagation rule for a given function. static TaintPropagationRule - getTaintPropagationRule(const FunctionDecl *FDecl, - StringRef Name, - CheckerContext &C); + getTaintPropagationRule(const FunctionDecl *FDecl, StringRef Name, + CheckerContext &C); - inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); } - inline void addDstArg(unsigned A) { DstArgs.push_back(A); } + void addSrcArg(unsigned A) { SrcArgs.push_back(A); } + void addDstArg(unsigned A) { DstArgs.push_back(A); } - inline bool isNull() const { return SrcArgs.empty(); } + bool isNull() const { + return SrcArgs.empty() && DstArgs.empty() && + VariadicType::None == VarType; + } - inline bool isDestinationArgument(unsigned ArgNum) const { - return (std::find(DstArgs.begin(), - DstArgs.end(), ArgNum) != DstArgs.end()); + bool isDestinationArgument(unsigned ArgNum) const { + return (std::find(DstArgs.begin(), DstArgs.end(), ArgNum) != + DstArgs.end()); } - static inline bool isTaintedOrPointsToTainted(const Expr *E, - ProgramStateRef State, - CheckerContext &C) { + static bool isTaintedOrPointsToTainted(const Expr *E, ProgramStateRef State, + CheckerContext &C) { if (State->isTainted(E, C.getLocationContext()) || isStdin(E, C)) return true; @@ -169,7 +172,6 @@ /// Pre-process a function which propagates taint according to the /// taint rule. ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const; - }; }; @@ -177,17 +179,18 @@ const unsigned GenericTaintChecker::InvalidArgIndex; const char GenericTaintChecker::MsgUncontrolledFormatString[] = - "Untrusted data is used as a format string " - "(CWE-134: Uncontrolled Format String)"; + "Untrusted data is used as a format string " + "(CWE-134: Uncontrolled Format String)"; const char GenericTaintChecker::MsgSanitizeSystemArgs[] = - "Untrusted data is passed to a system call " - "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; + "Untrusted data is passed to a system call " + "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; const char GenericTaintChecker::MsgTaintedBufferSize[] = - "Untrusted data is used to specify the buffer size " - "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for " - "character data and the null terminator)"; + "Untrusted data is used to specify the buffer size " + "(CERT/STR31-C. Guarantee that storage for strings has sufficient space " + "for " + "character data and the null terminator)"; } // end of anonymous namespace @@ -199,33 +202,33 @@ GenericTaintChecker::TaintPropagationRule GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule( - const FunctionDecl *FDecl, - StringRef Name, - CheckerContext &C) { + const FunctionDecl *FDecl, StringRef Name, CheckerContext &C) { // TODO: Currently, we might lose precision here: we always mark a return // value as tainted even if it's just a pointer, pointing to tainted data. // Check for exact name match for functions without builtin substitutes. - TaintPropagationRule Rule = llvm::StringSwitch(Name) - .Case("atoi", TaintPropagationRule(0, ReturnValueIndex)) - .Case("atol", TaintPropagationRule(0, ReturnValueIndex)) - .Case("atoll", TaintPropagationRule(0, ReturnValueIndex)) - .Case("getc", TaintPropagationRule(0, ReturnValueIndex)) - .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex)) - .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex)) - .Case("getw", TaintPropagationRule(0, ReturnValueIndex)) - .Case("toupper", TaintPropagationRule(0, ReturnValueIndex)) - .Case("tolower", TaintPropagationRule(0, ReturnValueIndex)) - .Case("strchr", TaintPropagationRule(0, ReturnValueIndex)) - .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex)) - .Case("read", TaintPropagationRule(0, 2, 1, true)) - .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true)) - .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true)) - .Case("fgets", TaintPropagationRule(2, 0, true)) - .Case("getline", TaintPropagationRule(2, 0)) - .Case("getdelim", TaintPropagationRule(3, 0)) - .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex)) - .Default(TaintPropagationRule()); + TaintPropagationRule Rule = + llvm::StringSwitch(Name) + .Case("atoi", TaintPropagationRule({0}, {ReturnValueIndex})) + .Case("atol", TaintPropagationRule({0}, {ReturnValueIndex})) + .Case("atoll", TaintPropagationRule({0}, {ReturnValueIndex})) + .Case("getc", TaintPropagationRule({0}, {ReturnValueIndex})) + .Case("fgetc", TaintPropagationRule({0}, {ReturnValueIndex})) + .Case("getc_unlocked", TaintPropagationRule({0}, {ReturnValueIndex})) + .Case("getw", TaintPropagationRule({0}, {ReturnValueIndex})) + .Case("toupper", TaintPropagationRule({0}, {ReturnValueIndex})) + .Case("tolower", TaintPropagationRule({0}, {ReturnValueIndex})) + .Case("strchr", TaintPropagationRule({0}, {ReturnValueIndex})) + .Case("strrchr", TaintPropagationRule({0}, {ReturnValueIndex})) + .Case("read", TaintPropagationRule({0, 2}, {1, ReturnValueIndex})) + .Case("pread", + TaintPropagationRule({0, 1, 2, 3}, {1, ReturnValueIndex})) + .Case("gets", TaintPropagationRule({}, {0, ReturnValueIndex})) + .Case("fgets", TaintPropagationRule({2}, {0, ReturnValueIndex})) + .Case("getline", TaintPropagationRule({2}, {0})) + .Case("getdelim", TaintPropagationRule({3}, {0})) + .Case("fgetln", TaintPropagationRule({0}, {ReturnValueIndex})) + .Default(TaintPropagationRule()); if (!Rule.isNull()) return Rule; @@ -233,18 +236,18 @@ // Check if it's one of the memory setting/copying functions. // This check is specialized but faster then calling isCLibraryFunction. unsigned BId = 0; - if ( (BId = FDecl->getMemoryFunctionKind()) ) - switch(BId) { + if ((BId = FDecl->getMemoryFunctionKind())) + switch (BId) { case Builtin::BImemcpy: case Builtin::BImemmove: case Builtin::BIstrncpy: case Builtin::BIstrncat: - return TaintPropagationRule(1, 2, 0, true); + return TaintPropagationRule({1, 2}, {0, ReturnValueIndex}); case Builtin::BIstrlcpy: case Builtin::BIstrlcat: - return TaintPropagationRule(1, 2, 0, false); + return TaintPropagationRule({1, 2}, {0}); case Builtin::BIstrndup: - return TaintPropagationRule(0, 1, ReturnValueIndex); + return TaintPropagationRule({0, 1}, {ReturnValueIndex}); default: break; @@ -252,20 +255,23 @@ // Process all other functions which could be defined as builtins. if (Rule.isNull()) { - if (C.isCLibraryFunction(FDecl, "snprintf") || - C.isCLibraryFunction(FDecl, "sprintf")) - return TaintPropagationRule(InvalidArgIndex, 0, true); + if (C.isCLibraryFunction(FDecl, "snprintf")) + return TaintPropagationRule({1}, {0, ReturnValueIndex}, VariadicType::Src, + 3); + else if (C.isCLibraryFunction(FDecl, "sprintf")) + return TaintPropagationRule({}, {0, ReturnValueIndex}, VariadicType::Src, + 2); else if (C.isCLibraryFunction(FDecl, "strcpy") || C.isCLibraryFunction(FDecl, "stpcpy") || C.isCLibraryFunction(FDecl, "strcat")) - return TaintPropagationRule(1, 0, true); + return TaintPropagationRule({1}, {0, ReturnValueIndex}); else if (C.isCLibraryFunction(FDecl, "bcopy")) - return TaintPropagationRule(0, 2, 1, false); + return TaintPropagationRule({0, 2}, {1}); else if (C.isCLibraryFunction(FDecl, "strdup") || C.isCLibraryFunction(FDecl, "strdupa")) - return TaintPropagationRule(0, ReturnValueIndex); + return TaintPropagationRule({0}, {ReturnValueIndex}); else if (C.isCLibraryFunction(FDecl, "wcsdup")) - return TaintPropagationRule(0, ReturnValueIndex); + return TaintPropagationRule({0}, {ReturnValueIndex}); } // Skipping the following functions, since they might be used for cleansing @@ -305,7 +311,7 @@ // First, try generating a propagation rule for this function. TaintPropagationRule Rule = - TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C); + TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C); if (!Rule.isNull()) { State = Rule.process(CE, C); if (!State) @@ -316,15 +322,14 @@ // Otherwise, check if we have custom pre-processing implemented. FnCheck evalFunction = llvm::StringSwitch(Name) - .Case("fscanf", &GenericTaintChecker::preFscanf) - .Default(nullptr); + .Case("fscanf", &GenericTaintChecker::preFscanf) + .Default(nullptr); // Check and evaluate the call. if (evalFunction) State = (this->*evalFunction)(CE, C); if (!State) return; C.addTransition(State); - } bool GenericTaintChecker::propagateFromPre(const CallExpr *CE, @@ -338,9 +343,8 @@ if (TaintArgs.isEmpty()) return false; - for (llvm::ImmutableSet::iterator - I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) { - unsigned ArgNum = *I; + for (auto I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) { + unsigned ArgNum = *I; // Special handling for the tainted return value. if (ArgNum == ReturnValueIndex) { @@ -352,7 +356,7 @@ // tainted after the call. if (CE->getNumArgs() < (ArgNum + 1)) return false; - const Expr* Arg = CE->getArg(ArgNum); + const Expr *Arg = CE->getArg(ArgNum); Optional V = getPointedToSVal(C, Arg); if (V) State = State->addTaint(*V); @@ -379,19 +383,20 @@ StringRef Name = C.getCalleeName(FDecl); if (Name.empty()) return; - FnCheck evalFunction = llvm::StringSwitch(Name) - .Case("scanf", &GenericTaintChecker::postScanf) - // TODO: Add support for vfscanf & family. - .Case("getchar", &GenericTaintChecker::postRetTaint) - .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint) - .Case("getenv", &GenericTaintChecker::postRetTaint) - .Case("fopen", &GenericTaintChecker::postRetTaint) - .Case("fdopen", &GenericTaintChecker::postRetTaint) - .Case("freopen", &GenericTaintChecker::postRetTaint) - .Case("getch", &GenericTaintChecker::postRetTaint) - .Case("wgetch", &GenericTaintChecker::postRetTaint) - .Case("socket", &GenericTaintChecker::postSocket) - .Default(nullptr); + FnCheck evalFunction = + llvm::StringSwitch(Name) + .Case("scanf", &GenericTaintChecker::postScanf) + // TODO: Add support for vfscanf & family. + .Case("getchar", &GenericTaintChecker::postRetTaint) + .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint) + .Case("getenv", &GenericTaintChecker::postRetTaint) + .Case("fopen", &GenericTaintChecker::postRetTaint) + .Case("fdopen", &GenericTaintChecker::postRetTaint) + .Case("freopen", &GenericTaintChecker::postRetTaint) + .Case("getch", &GenericTaintChecker::postRetTaint) + .Case("wgetch", &GenericTaintChecker::postRetTaint) + .Case("socket", &GenericTaintChecker::postSocket) + .Default(nullptr); // If the callee isn't defined, it is not of security concern. // Check and evaluate the call. @@ -404,7 +409,8 @@ C.addTransition(State); } -bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{ +bool GenericTaintChecker::checkPre(const CallExpr *CE, + CheckerContext &C) const { if (checkUncontrolledFormatString(CE, C)) return true; @@ -458,53 +464,29 @@ // Check for taint in arguments. bool IsTainted = false; - for (ArgVector::const_iterator I = SrcArgs.begin(), - E = SrcArgs.end(); I != E; ++I) { + for (auto I = SrcArgs.begin(), E = SrcArgs.end(); I != E; ++I) { unsigned ArgNum = *I; - - if (ArgNum == InvalidArgIndex) { - // Check if any of the arguments is tainted, but skip the - // destination arguments. - for (unsigned int i = 0; i < CE->getNumArgs(); ++i) { - if (isDestinationArgument(i)) - continue; - if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C))) - break; - } - break; - } - - if (CE->getNumArgs() < (ArgNum + 1)) + if (ArgNum >= CE->getNumArgs()) return State; if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C))) break; } + + // Check for taint in variadic arguments. + if (!IsTainted && VariadicType::Src == VarType) { + // Check if any of the arguments is tainted + for (unsigned int i = VariadicIndex; i < CE->getNumArgs(); ++i) { + if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C))) + break; + } + } + if (!IsTainted) return State; // Mark the arguments which should be tainted after the function returns. - for (ArgVector::const_iterator I = DstArgs.begin(), - E = DstArgs.end(); I != E; ++I) { + for (auto I = DstArgs.begin(), E = DstArgs.end(); I != E; ++I) { unsigned ArgNum = *I; - - // Should we mark all arguments as tainted? - if (ArgNum == InvalidArgIndex) { - // For all pointer and references that were passed in: - // If they are not pointing to const data, mark data as tainted. - // TODO: So far we are just going one level down; ideally we'd need to - // recurse here. - for (unsigned int i = 0; i < CE->getNumArgs(); ++i) { - const Expr *Arg = CE->getArg(i); - // Process pointer argument. - const Type *ArgTy = Arg->getType().getTypePtr(); - QualType PType = ArgTy->getPointeeType(); - if ((!PType.isNull() && !PType.isConstQualified()) - || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) - State = State->add(i); - } - continue; - } - // Should mark the return value? if (ArgNum == ReturnValueIndex) { State = State->add(ReturnValueIndex); @@ -516,14 +498,30 @@ State = State->add(ArgNum); } + // Mark all variadic arguments tainted if present. + if (VariadicType::Dst == VarType) { + // For all pointer and references that were passed in: + // If they are not pointing to const data, mark data as tainted. + // TODO: So far we are just going one level down; ideally we'd need to + // recurse here. + for (unsigned int i = VariadicIndex; i < CE->getNumArgs(); ++i) { + const Expr *Arg = CE->getArg(i); + // Process pointer argument. + const Type *ArgTy = Arg->getType().getTypePtr(); + QualType PType = ArgTy->getPointeeType(); + if ((!PType.isNull() && !PType.isConstQualified()) || + (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) + State = State->add(i); + } + } + return State; } - // If argument 0 (file descriptor) is tainted, all arguments except for arg 0 // and arg 1 should get taint. ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE, - CheckerContext &C) const { + CheckerContext &C) const { assert(CE->getNumArgs() >= 2); ProgramStateRef State = C.getState(); @@ -532,14 +530,13 @@ isStdin(CE->getArg(0), C)) { // All arguments except for the first two should get taint. for (unsigned int i = 2; i < CE->getNumArgs(); ++i) - State = State->add(i); + State = State->add(i); return State; } return nullptr; } - // If argument 0(protocol domain) is network, the return value should get taint. ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE, CheckerContext &C) const { @@ -558,7 +555,7 @@ } ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE, - CheckerContext &C) const { + CheckerContext &C) const { ProgramStateRef State = C.getState(); if (CE->getNumArgs() < 2) return State; @@ -567,7 +564,7 @@ for (unsigned int i = 1; i < CE->getNumArgs(); ++i) { // The arguments are pointer arguments. The data they are pointing at is // tainted after the call. - const Expr* Arg = CE->getArg(i); + const Expr *Arg = CE->getArg(i); Optional V = getPointedToSVal(C, Arg); if (V) State = State->addTaint(*V); @@ -593,7 +590,8 @@ return false; // Get it's symbol and find the declaration region it's pointing to. - const SymbolRegionValue *Sm =dyn_cast(SymReg->getSymbol()); + const SymbolRegionValue *Sm = + dyn_cast(SymReg->getSymbol()); if (!Sm) return false; const DeclRegion *DeclReg = dyn_cast_or_null(Sm->getRegion()); @@ -602,14 +600,14 @@ // This region corresponds to a declaration, find out if it's a global/extern // variable named stdin with the proper type. - if (const VarDecl *D = dyn_cast_or_null(DeclReg->getDecl())) { + if (const auto *D = dyn_cast_or_null(DeclReg->getDecl())) { D = D->getCanonicalDecl(); - if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) - if (const PointerType * PtrTy = - dyn_cast(D->getType().getTypePtr())) - if (PtrTy->getPointeeType().getCanonicalType() == - C.getASTContext().getFILEType().getCanonicalType()) - return true; + if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) { + const auto *PtrTy = dyn_cast(D->getType().getTypePtr()); + if (PtrTy && PtrTy->getPointeeType().getCanonicalType() == + C.getASTContext().getFILEType().getCanonicalType()) + return true; + } } return false; } @@ -625,8 +623,7 @@ return false; for (const auto *Format : FDecl->specific_attrs()) { ArgNum = Format->getFormatIdx() - 1; - if ((Format->getType()->getName() == "printf") && - CE->getNumArgs() > ArgNum) + if ((Format->getType()->getName() == "printf") && CE->getNumArgs() > ArgNum) return true; } @@ -667,36 +664,36 @@ return false; } -bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE, - CheckerContext &C) const{ +bool GenericTaintChecker::checkUncontrolledFormatString( + const CallExpr *CE, CheckerContext &C) const { // Check if the function contains a format string argument. unsigned int ArgNum = 0; if (!getPrintfFormatArgumentNum(CE, C, ArgNum)) return false; - // If either the format string content or the pointer itself are tainted, warn. + // If either the format string content or the pointer itself are tainted, + // warn. return generateReportIfTainted(CE->getArg(ArgNum), MsgUncontrolledFormatString, C); } -bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, - StringRef Name, +bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, StringRef Name, CheckerContext &C) const { // TODO: It might make sense to run this check on demand. In some cases, // we should check if the environment has been cleansed here. We also might // need to know if the user was reset before these calls(seteuid). unsigned ArgNum = llvm::StringSwitch(Name) - .Case("system", 0) - .Case("popen", 0) - .Case("execl", 0) - .Case("execle", 0) - .Case("execlp", 0) - .Case("execv", 0) - .Case("execvp", 0) - .Case("execvP", 0) - .Case("execve", 0) - .Case("dlopen", 0) - .Default(UINT_MAX); + .Case("system", 0) + .Case("popen", 0) + .Case("execl", 0) + .Case("execle", 0) + .Case("execlp", 0) + .Case("execv", 0) + .Case("execvp", 0) + .Case("execvP", 0) + .Case("execve", 0) + .Case("dlopen", 0) + .Default(UINT_MAX); if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1)) return false; @@ -712,8 +709,8 @@ // If the function has a buffer size argument, set ArgNum. unsigned ArgNum = InvalidArgIndex; unsigned BId = 0; - if ( (BId = FDecl->getMemoryFunctionKind()) ) - switch(BId) { + if ((BId = FDecl->getMemoryFunctionKind())) + switch (BId) { case Builtin::BImemcpy: case Builtin::BImemmove: case Builtin::BIstrncpy: