Index: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp =================================================================== --- lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -28,10 +28,13 @@ using namespace ento; namespace { -class GenericTaintChecker : public Checker< check::PostStmt, - check::PreStmt > { +class GenericTaintChecker + : public Checker, check::PreStmt> { public: - static void *getTag() { static int Tag; return &Tag; } + static void *getTag() { + static int Tag; + return &Tag; + } void checkPostStmt(const CallExpr *CE, CheckerContext &C) const; @@ -69,8 +72,8 @@ static Optional getPointedToSVal(CheckerContext &C, const Expr *Arg); /// Functions defining the attack surface. - typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *, - CheckerContext &C) const; + typedef ProgramStateRef (GenericTaintChecker::*FnCheck)( + const CallExpr *, CheckerContext &C) const; ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const; ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const; ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const; @@ -120,16 +123,15 @@ TaintPropagationRule() {} - TaintPropagationRule(unsigned SArg, - unsigned DArg, bool TaintRet = false) { + TaintPropagationRule(unsigned SArg, unsigned DArg, bool TaintRet = false) { SrcArgs.push_back(SArg); DstArgs.push_back(DArg); if (TaintRet) DstArgs.push_back(ReturnValueIndex); } - TaintPropagationRule(unsigned SArg1, unsigned SArg2, - unsigned DArg, bool TaintRet = false) { + TaintPropagationRule(unsigned SArg1, unsigned SArg2, unsigned DArg, + bool TaintRet = false) { SrcArgs.push_back(SArg1); SrcArgs.push_back(SArg2); DstArgs.push_back(DArg); @@ -139,18 +141,17 @@ /// Get the propagation rule for a given function. static TaintPropagationRule - getTaintPropagationRule(const FunctionDecl *FDecl, - StringRef Name, - CheckerContext &C); + getTaintPropagationRule(const FunctionDecl *FDecl, StringRef Name, + CheckerContext &C); inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); } - inline void addDstArg(unsigned A) { DstArgs.push_back(A); } + inline void addDstArg(unsigned A) { DstArgs.push_back(A); } inline bool isNull() const { return SrcArgs.empty(); } inline bool isDestinationArgument(unsigned ArgNum) const { - return (std::find(DstArgs.begin(), - DstArgs.end(), ArgNum) != DstArgs.end()); + return (std::find(DstArgs.begin(), DstArgs.end(), ArgNum) != + DstArgs.end()); } static inline bool isTaintedOrPointsToTainted(const Expr *E, @@ -169,7 +170,6 @@ /// Pre-process a function which propagates taint according to the /// taint rule. ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const; - }; }; @@ -177,17 +177,18 @@ const unsigned GenericTaintChecker::InvalidArgIndex; const char GenericTaintChecker::MsgUncontrolledFormatString[] = - "Untrusted data is used as a format string " - "(CWE-134: Uncontrolled Format String)"; + "Untrusted data is used as a format string " + "(CWE-134: Uncontrolled Format String)"; const char GenericTaintChecker::MsgSanitizeSystemArgs[] = - "Untrusted data is passed to a system call " - "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; + "Untrusted data is passed to a system call " + "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; const char GenericTaintChecker::MsgTaintedBufferSize[] = - "Untrusted data is used to specify the buffer size " - "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for " - "character data and the null terminator)"; + "Untrusted data is used to specify the buffer size " + "(CERT/STR31-C. Guarantee that storage for strings has sufficient space " + "for " + "character data and the null terminator)"; } // end of anonymous namespace @@ -199,33 +200,32 @@ GenericTaintChecker::TaintPropagationRule GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule( - const FunctionDecl *FDecl, - StringRef Name, - CheckerContext &C) { + const FunctionDecl *FDecl, StringRef Name, CheckerContext &C) { // TODO: Currently, we might lose precision here: we always mark a return // value as tainted even if it's just a pointer, pointing to tainted data. // Check for exact name match for functions without builtin substitutes. - TaintPropagationRule Rule = llvm::StringSwitch(Name) - .Case("atoi", TaintPropagationRule(0, ReturnValueIndex)) - .Case("atol", TaintPropagationRule(0, ReturnValueIndex)) - .Case("atoll", TaintPropagationRule(0, ReturnValueIndex)) - .Case("getc", TaintPropagationRule(0, ReturnValueIndex)) - .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex)) - .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex)) - .Case("getw", TaintPropagationRule(0, ReturnValueIndex)) - .Case("toupper", TaintPropagationRule(0, ReturnValueIndex)) - .Case("tolower", TaintPropagationRule(0, ReturnValueIndex)) - .Case("strchr", TaintPropagationRule(0, ReturnValueIndex)) - .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex)) - .Case("read", TaintPropagationRule(0, 2, 1, true)) - .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true)) - .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true)) - .Case("fgets", TaintPropagationRule(2, 0, true)) - .Case("getline", TaintPropagationRule(2, 0)) - .Case("getdelim", TaintPropagationRule(3, 0)) - .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex)) - .Default(TaintPropagationRule()); + TaintPropagationRule Rule = + llvm::StringSwitch(Name) + .Case("atoi", TaintPropagationRule(0, ReturnValueIndex)) + .Case("atol", TaintPropagationRule(0, ReturnValueIndex)) + .Case("atoll", TaintPropagationRule(0, ReturnValueIndex)) + .Case("getc", TaintPropagationRule(0, ReturnValueIndex)) + .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex)) + .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex)) + .Case("getw", TaintPropagationRule(0, ReturnValueIndex)) + .Case("toupper", TaintPropagationRule(0, ReturnValueIndex)) + .Case("tolower", TaintPropagationRule(0, ReturnValueIndex)) + .Case("strchr", TaintPropagationRule(0, ReturnValueIndex)) + .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex)) + .Case("read", TaintPropagationRule(0, 2, 1, true)) + .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true)) + .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true)) + .Case("fgets", TaintPropagationRule(2, 0, true)) + .Case("getline", TaintPropagationRule(2, 0)) + .Case("getdelim", TaintPropagationRule(3, 0)) + .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex)) + .Default(TaintPropagationRule()); if (!Rule.isNull()) return Rule; @@ -233,8 +233,8 @@ // Check if it's one of the memory setting/copying functions. // This check is specialized but faster then calling isCLibraryFunction. unsigned BId = 0; - if ( (BId = FDecl->getMemoryFunctionKind()) ) - switch(BId) { + if ((BId = FDecl->getMemoryFunctionKind())) + switch (BId) { case Builtin::BImemcpy: case Builtin::BImemmove: case Builtin::BIstrncpy: @@ -305,7 +305,7 @@ // First, try generating a propagation rule for this function. TaintPropagationRule Rule = - TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C); + TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C); if (!Rule.isNull()) { State = Rule.process(CE, C); if (!State) @@ -316,15 +316,14 @@ // Otherwise, check if we have custom pre-processing implemented. FnCheck evalFunction = llvm::StringSwitch(Name) - .Case("fscanf", &GenericTaintChecker::preFscanf) - .Default(nullptr); + .Case("fscanf", &GenericTaintChecker::preFscanf) + .Default(nullptr); // Check and evaluate the call. if (evalFunction) State = (this->*evalFunction)(CE, C); if (!State) return; C.addTransition(State); - } bool GenericTaintChecker::propagateFromPre(const CallExpr *CE, @@ -338,9 +337,10 @@ if (TaintArgs.isEmpty()) return false; - for (llvm::ImmutableSet::iterator - I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) { - unsigned ArgNum = *I; + for (llvm::ImmutableSet::iterator I = TaintArgs.begin(), + E = TaintArgs.end(); + I != E; ++I) { + unsigned ArgNum = *I; // Special handling for the tainted return value. if (ArgNum == ReturnValueIndex) { @@ -352,7 +352,7 @@ // tainted after the call. if (CE->getNumArgs() < (ArgNum + 1)) return false; - const Expr* Arg = CE->getArg(ArgNum); + const Expr *Arg = CE->getArg(ArgNum); Optional V = getPointedToSVal(C, Arg); if (V) State = State->addTaint(*V); @@ -379,19 +379,20 @@ StringRef Name = C.getCalleeName(FDecl); if (Name.empty()) return; - FnCheck evalFunction = llvm::StringSwitch(Name) - .Case("scanf", &GenericTaintChecker::postScanf) - // TODO: Add support for vfscanf & family. - .Case("getchar", &GenericTaintChecker::postRetTaint) - .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint) - .Case("getenv", &GenericTaintChecker::postRetTaint) - .Case("fopen", &GenericTaintChecker::postRetTaint) - .Case("fdopen", &GenericTaintChecker::postRetTaint) - .Case("freopen", &GenericTaintChecker::postRetTaint) - .Case("getch", &GenericTaintChecker::postRetTaint) - .Case("wgetch", &GenericTaintChecker::postRetTaint) - .Case("socket", &GenericTaintChecker::postSocket) - .Default(nullptr); + FnCheck evalFunction = + llvm::StringSwitch(Name) + .Case("scanf", &GenericTaintChecker::postScanf) + // TODO: Add support for vfscanf & family. + .Case("getchar", &GenericTaintChecker::postRetTaint) + .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint) + .Case("getenv", &GenericTaintChecker::postRetTaint) + .Case("fopen", &GenericTaintChecker::postRetTaint) + .Case("fdopen", &GenericTaintChecker::postRetTaint) + .Case("freopen", &GenericTaintChecker::postRetTaint) + .Case("getch", &GenericTaintChecker::postRetTaint) + .Case("wgetch", &GenericTaintChecker::postRetTaint) + .Case("socket", &GenericTaintChecker::postSocket) + .Default(nullptr); // If the callee isn't defined, it is not of security concern. // Check and evaluate the call. @@ -404,7 +405,8 @@ C.addTransition(State); } -bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{ +bool GenericTaintChecker::checkPre(const CallExpr *CE, + CheckerContext &C) const { if (checkUncontrolledFormatString(CE, C)) return true; @@ -458,8 +460,8 @@ // Check for taint in arguments. bool IsTainted = false; - for (ArgVector::const_iterator I = SrcArgs.begin(), - E = SrcArgs.end(); I != E; ++I) { + for (ArgVector::const_iterator I = SrcArgs.begin(), E = SrcArgs.end(); I != E; + ++I) { unsigned ArgNum = *I; if (ArgNum == InvalidArgIndex) { @@ -483,8 +485,8 @@ return State; // Mark the arguments which should be tainted after the function returns. - for (ArgVector::const_iterator I = DstArgs.begin(), - E = DstArgs.end(); I != E; ++I) { + for (ArgVector::const_iterator I = DstArgs.begin(), E = DstArgs.end(); I != E; + ++I) { unsigned ArgNum = *I; // Should we mark all arguments as tainted? @@ -498,8 +500,8 @@ // Process pointer argument. const Type *ArgTy = Arg->getType().getTypePtr(); QualType PType = ArgTy->getPointeeType(); - if ((!PType.isNull() && !PType.isConstQualified()) - || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) + if ((!PType.isNull() && !PType.isConstQualified()) || + (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) State = State->add(i); } continue; @@ -519,11 +521,10 @@ return State; } - // If argument 0 (file descriptor) is tainted, all arguments except for arg 0 // and arg 1 should get taint. ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE, - CheckerContext &C) const { + CheckerContext &C) const { assert(CE->getNumArgs() >= 2); ProgramStateRef State = C.getState(); @@ -532,14 +533,13 @@ isStdin(CE->getArg(0), C)) { // All arguments except for the first two should get taint. for (unsigned int i = 2; i < CE->getNumArgs(); ++i) - State = State->add(i); + State = State->add(i); return State; } return nullptr; } - // If argument 0(protocol domain) is network, the return value should get taint. ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE, CheckerContext &C) const { @@ -558,7 +558,7 @@ } ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE, - CheckerContext &C) const { + CheckerContext &C) const { ProgramStateRef State = C.getState(); if (CE->getNumArgs() < 2) return State; @@ -567,7 +567,7 @@ for (unsigned int i = 1; i < CE->getNumArgs(); ++i) { // The arguments are pointer arguments. The data they are pointing at is // tainted after the call. - const Expr* Arg = CE->getArg(i); + const Expr *Arg = CE->getArg(i); Optional V = getPointedToSVal(C, Arg); if (V) State = State->addTaint(*V); @@ -593,7 +593,8 @@ return false; // Get it's symbol and find the declaration region it's pointing to. - const SymbolRegionValue *Sm =dyn_cast(SymReg->getSymbol()); + const SymbolRegionValue *Sm = + dyn_cast(SymReg->getSymbol()); if (!Sm) return false; const DeclRegion *DeclReg = dyn_cast_or_null(Sm->getRegion()); @@ -605,11 +606,11 @@ if (const VarDecl *D = dyn_cast_or_null(DeclReg->getDecl())) { D = D->getCanonicalDecl(); if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) - if (const PointerType * PtrTy = + if (const PointerType *PtrTy = dyn_cast(D->getType().getTypePtr())) - if (PtrTy->getPointeeType().getCanonicalType() == - C.getASTContext().getFILEType().getCanonicalType()) - return true; + if (PtrTy->getPointeeType().getCanonicalType() == + C.getASTContext().getFILEType().getCanonicalType()) + return true; } return false; } @@ -625,8 +626,7 @@ return false; for (const auto *Format : FDecl->specific_attrs()) { ArgNum = Format->getFormatIdx() - 1; - if ((Format->getType()->getName() == "printf") && - CE->getNumArgs() > ArgNum) + if ((Format->getType()->getName() == "printf") && CE->getNumArgs() > ArgNum) return true; } @@ -667,36 +667,36 @@ return false; } -bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE, - CheckerContext &C) const{ +bool GenericTaintChecker::checkUncontrolledFormatString( + const CallExpr *CE, CheckerContext &C) const { // Check if the function contains a format string argument. unsigned int ArgNum = 0; if (!getPrintfFormatArgumentNum(CE, C, ArgNum)) return false; - // If either the format string content or the pointer itself are tainted, warn. + // If either the format string content or the pointer itself are tainted, + // warn. return generateReportIfTainted(CE->getArg(ArgNum), MsgUncontrolledFormatString, C); } -bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, - StringRef Name, +bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, StringRef Name, CheckerContext &C) const { // TODO: It might make sense to run this check on demand. In some cases, // we should check if the environment has been cleansed here. We also might // need to know if the user was reset before these calls(seteuid). unsigned ArgNum = llvm::StringSwitch(Name) - .Case("system", 0) - .Case("popen", 0) - .Case("execl", 0) - .Case("execle", 0) - .Case("execlp", 0) - .Case("execv", 0) - .Case("execvp", 0) - .Case("execvP", 0) - .Case("execve", 0) - .Case("dlopen", 0) - .Default(UINT_MAX); + .Case("system", 0) + .Case("popen", 0) + .Case("execl", 0) + .Case("execle", 0) + .Case("execlp", 0) + .Case("execv", 0) + .Case("execvp", 0) + .Case("execvP", 0) + .Case("execve", 0) + .Case("dlopen", 0) + .Default(UINT_MAX); if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1)) return false; @@ -712,8 +712,8 @@ // If the function has a buffer size argument, set ArgNum. unsigned ArgNum = InvalidArgIndex; unsigned BId = 0; - if ( (BId = FDecl->getMemoryFunctionKind()) ) - switch(BId) { + if ((BId = FDecl->getMemoryFunctionKind())) + switch (BId) { case Builtin::BImemcpy: case Builtin::BImemmove: case Builtin::BIstrncpy: