Index: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp =================================================================== --- lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -34,6 +34,28 @@ using namespace taint; namespace { +/// A struct to store tainted argument and taint type as a pair in the program +/// state. +struct TaintArgTypePair { + unsigned Arg; + TaintTagType TagType; + + bool operator==(const TaintArgTypePair &X) const { + return Arg == X.Arg && TagType == X.TagType; + } + + bool operator<(const TaintArgTypePair &X) const { + if (Arg != X.Arg) + return Arg < X.Arg; + return TagType < X.TagType; + } + + void Profile(llvm::FoldingSetNodeID &ID) const { + ID.AddInteger(Arg); + ID.AddInteger(TagType); + } +}; + class GenericTaintChecker : public Checker, check::PreStmt> { public: @@ -46,8 +68,8 @@ void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; - void printState(raw_ostream &Out, ProgramStateRef State, - const char *NL, const char *Sep) const override; + void printState(raw_ostream &Out, ProgramStateRef State, const char *NL, + const char *Sep) const override; using ArgVector = SmallVector; @@ -89,10 +111,16 @@ /// Catch taint related bugs. Check if tainted data is passed to a /// system call etc. - bool checkPre(const CallExpr *CE, CheckerContext &C) const; + bool checkPre(const CallExpr *CE, const FunctionDecl *FDecl, StringRef Name, + CheckerContext &C) const; /// Add taint sources on a pre-visit. - void addSourcesPre(const CallExpr *CE, CheckerContext &C) const; + bool addSourcesPre(const CallExpr *CE, const FunctionDecl *FDecl, + StringRef Name, CheckerContext &C) const; + + /// Mark filter's arguments not tainted on a pre-visit. + bool addFiltersPre(const CallExpr *CE, StringRef Name, + CheckerContext &C) const; /// Propagate taint generated at pre-visit. bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const; @@ -294,7 +322,7 @@ /// to the call post-visit. The values are unsigned integers, which are either /// ReturnValueIndex, or indexes of the pointer/reference argument, which /// points to data, which should be tainted on return. -REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned) +REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, TaintArgTypePair) void GenericTaintChecker::getConfiguration(StringRef ConfigFile) { if (ConfigFile.trim().empty()) @@ -443,14 +471,25 @@ void GenericTaintChecker::checkPreStmt(const CallExpr *CE, CheckerContext &C) const { + const FunctionDecl *FDecl = C.getCalleeDecl(CE); + if (!FDecl || FDecl->getKind() != Decl::Function) + return; + + StringRef Name = C.getCalleeName(FDecl); + if (Name.empty()) + return; + // Check for taintedness related errors first: system call, uncontrolled // format string, tainted buffer size. - if (checkPre(CE, C)) + if (checkPre(CE, FDecl, Name, C)) return; // Marks the function's arguments and/or return value tainted if it present in // the list. - addSourcesPre(CE, C); + if (addSourcesPre(CE, FDecl, Name, C)) + return; + + addFiltersPre(CE, Name, C); } void GenericTaintChecker::checkPostStmt(const CallExpr *CE, @@ -465,31 +504,46 @@ printTaint(State, Out, NL, Sep); } -void GenericTaintChecker::addSourcesPre(const CallExpr *CE, +bool GenericTaintChecker::addSourcesPre(const CallExpr *CE, + const FunctionDecl *FDecl, + StringRef Name, CheckerContext &C) const { - ProgramStateRef State = nullptr; - const FunctionDecl *FDecl = C.getCalleeDecl(CE); - if (!FDecl || FDecl->getKind() != Decl::Function) - return; - - StringRef Name = C.getCalleeName(FDecl); - if (Name.empty()) - return; - // First, try generating a propagation rule for this function. TaintPropagationRule Rule = TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C); if (!Rule.isNull()) { - State = Rule.process(CE, C); - if (!State) - return; - C.addTransition(State); - return; + ProgramStateRef State = Rule.process(CE, C); + if (State) { + C.addTransition(State); + return true; + } } + return false; +} - if (!State) - return; - C.addTransition(State); +bool GenericTaintChecker::addFiltersPre(const CallExpr *CE, StringRef Name, + CheckerContext &C) const { + auto It = CustomFilters.find(Name); + if (It == CustomFilters.end()) + return false; + + ProgramStateRef State = C.getState(); + const GenericTaintChecker::ArgVector &Args = It->getValue(); + for (unsigned ArgNum : Args) { + if (ArgNum >= CE->getNumArgs()) { + llvm::errs() << "Skip out of bound filter Arg: " << Name << ":" << ArgNum + << '\n'; + continue; + } + + State = State->add({ArgNum, TaintTagNotTainted}); + } + + if (State != C.getState()) { + C.addTransition(State); + return true; + } + return false; } bool GenericTaintChecker::propagateFromPre(const CallExpr *CE, @@ -503,10 +557,12 @@ if (TaintArgs.isEmpty()) return false; - for (unsigned ArgNum : TaintArgs) { + for (auto ArgTypePair : TaintArgs) { + unsigned ArgNum = ArgTypePair.Arg; + TaintTagType TagType = ArgTypePair.TagType; // Special handling for the tainted return value. if (ArgNum == ReturnValueIndex) { - State = addTaint(State, CE, C.getLocationContext()); + State = addTaint(State, CE, C.getLocationContext(), TagType); continue; } @@ -517,7 +573,7 @@ const Expr *Arg = CE->getArg(ArgNum); Optional V = getPointedToSVal(C, Arg); if (V) - State = addTaint(State, *V); + State = addTaint(State, *V, TagType); } // Clear up the taint info from the state. @@ -531,19 +587,12 @@ } bool GenericTaintChecker::checkPre(const CallExpr *CE, + const FunctionDecl *FDecl, StringRef Name, CheckerContext &C) const { if (checkUncontrolledFormatString(CE, C)) return true; - const FunctionDecl *FDecl = C.getCalleeDecl(CE); - if (!FDecl || FDecl->getKind() != Decl::Function) - return false; - - StringRef Name = C.getCalleeName(FDecl); - if (Name.empty()) - return false; - if (checkSystemCall(CE, Name, C)) return true; @@ -618,7 +667,8 @@ for (unsigned ArgNum : DstArgs) { // Should mark the return value? if (ArgNum == ReturnValueIndex) { - State = State->add(ReturnValueIndex); + State = + State->add({ReturnValueIndex, TaintTagGeneric}); continue; } @@ -630,7 +680,7 @@ } // Mark the given argument. - State = State->add(ArgNum); + State = State->add({ArgNum, TaintTagGeneric}); } // Mark all variadic arguments tainted if present. @@ -646,7 +696,7 @@ QualType PType = ArgTy->getPointeeType(); if ((!PType.isNull() && !PType.isConstQualified()) || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) - State = State->add(i); + State = State->add({i, TaintTagGeneric}); } } @@ -836,7 +886,7 @@ if (It == CustomSinks.end()) return false; - const GenericTaintChecker::ArgVector& Args = It->getValue(); + const GenericTaintChecker::ArgVector &Args = It->getValue(); for (unsigned ArgNum : Args) { if (ArgNum >= CE->getNumArgs()) { llvm::errs() << "Skip out of bound sink Arg: " << Name << ":" << ArgNum Index: lib/StaticAnalyzer/Checkers/Taint.h =================================================================== --- lib/StaticAnalyzer/Checkers/Taint.h +++ lib/StaticAnalyzer/Checkers/Taint.h @@ -24,37 +24,35 @@ /// taint. using TaintTagType = unsigned; -static constexpr TaintTagType TaintTagGeneric = 0; +static constexpr TaintTagType TaintTagNotTainted = 0; +static constexpr TaintTagType TaintTagGeneric = 1; /// Create a new state in which the value of the statement is marked as tainted. -LLVM_NODISCARD ProgramStateRef -addTaint(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, - TaintTagType Kind = TaintTagGeneric); +LLVM_NODISCARD ProgramStateRef addTaint(ProgramStateRef State, const Stmt *S, + const LocationContext *LCtx, + TaintTagType Kind = TaintTagGeneric); /// Create a new state in which the value is marked as tainted. -LLVM_NODISCARD ProgramStateRef -addTaint(ProgramStateRef State, SVal V, - TaintTagType Kind = TaintTagGeneric); +LLVM_NODISCARD ProgramStateRef addTaint(ProgramStateRef State, SVal V, + TaintTagType Kind = TaintTagGeneric); /// Create a new state in which the symbol is marked as tainted. -LLVM_NODISCARD ProgramStateRef -addTaint(ProgramStateRef State, SymbolRef Sym, - TaintTagType Kind = TaintTagGeneric); +LLVM_NODISCARD ProgramStateRef addTaint(ProgramStateRef State, SymbolRef Sym, + TaintTagType Kind = TaintTagGeneric); /// Create a new state in which the pointer represented by the region /// is marked as tainted. -LLVM_NODISCARD ProgramStateRef -addTaint(ProgramStateRef State, const MemRegion *R, - TaintTagType Kind = TaintTagGeneric); +LLVM_NODISCARD ProgramStateRef addTaint(ProgramStateRef State, + const MemRegion *R, + TaintTagType Kind = TaintTagGeneric); /// Create a new state in a which a sub-region of a given symbol is tainted. /// This might be necessary when referring to regions that can not have an /// individual symbol, e.g. if they are represented by the default binding of /// a LazyCompoundVal. -LLVM_NODISCARD ProgramStateRef -addPartialTaint(ProgramStateRef State, - SymbolRef ParentSym, const SubRegion *SubRegion, - TaintTagType Kind = TaintTagGeneric); +LLVM_NODISCARD ProgramStateRef addPartialTaint( + ProgramStateRef State, SymbolRef ParentSym, const SubRegion *SubRegion, + TaintTagType Kind = TaintTagGeneric); /// Check if the statement has a tainted value in the given state. bool isTainted(ProgramStateRef State, const Stmt *S, @@ -99,4 +97,3 @@ } // namespace clang #endif - Index: lib/StaticAnalyzer/Checkers/Taint.cpp =================================================================== --- lib/StaticAnalyzer/Checkers/Taint.cpp +++ lib/StaticAnalyzer/Checkers/Taint.cpp @@ -37,9 +37,7 @@ Out << I.first << " : " << I.second << NL; } -void dumpTaint(ProgramStateRef State) { - printTaint(State, llvm::errs()); -} +void dumpTaint(ProgramStateRef State) { printTaint(State, llvm::errs()); } ProgramStateRef taint::addTaint(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, @@ -64,8 +62,8 @@ // region of the parent region. if (auto LCV = V.getAs()) { if (Optional binding = - State->getStateManager().getStoreManager() - .getDefaultBinding(*LCV)) { + State->getStateManager().getStoreManager().getDefaultBinding( + *LCV)) { if (SymbolRef Sym = binding->getAsSymbol()) return addPartialTaint(State, Sym, LCV->getRegion(), Kind); } @@ -157,12 +155,13 @@ // Traverse all the symbols this symbol depends on to see if any are tainted. for (SymExpr::symbol_iterator SI = Sym->symbol_begin(), - SE = Sym->symbol_end(); SI != SE; ++SI) { + SE = Sym->symbol_end(); + SI != SE; ++SI) { if (!isa(*SI)) continue; if (const TaintTagType *Tag = State->get(*SI)) { - if (*Tag == Kind) + if (*Tag != TaintTagNotTainted && *Tag == Kind) return true; } Index: test/Analysis/Inputs/taint-generic-config.yaml =================================================================== --- test/Analysis/Inputs/taint-generic-config.yaml +++ test/Analysis/Inputs/taint-generic-config.yaml @@ -36,8 +36,8 @@ # A list of filter functions Filters: # int x; // x is tainted - # myFilter(&x); // x is not tainted anymore - - Name: myFilter + # myFilter1(&x); // x is not tainted anymore + - Name: myFilter1 Args: [0] # A list of sink functions Index: test/Analysis/taint-generic.c =================================================================== --- test/Analysis/taint-generic.c +++ test/Analysis/taint-generic.c @@ -296,13 +296,15 @@ *(volatile int *) 0; // no-warning } - // Test configuration int mySource1(); void mySource2(int*); void myScanf(const char*, ...); int myPropagator(int, int*); int mySnprintf(char*, size_t, const char*, ...); +void myFilter1(int* x) {} +void myFilter2(const int*); +void myFilter3(int); void mySink(int, int, int); void testConfigurationSources1() { @@ -329,6 +331,24 @@ Buffer[y] = 1; // expected-warning {{Out of bound memory access }} } +void testConfigurationFilter1() { + int x = mySource1(); + myFilter1(&x); + Buffer[x] = 1; // no-warning +} + +void testConfigurationFilter2() { + int x = mySource1(); + myFilter2(&x); // Pass by const int* + Buffer[x] = 1; // expected-warning {{Out of bound memory access }} +} + +void testConfigurationFilter3() { + int x = mySource1(); + myFilter3(x); // Pass by value + Buffer[x] = 1; // expected-warning {{Out of bound memory access }} +} + void testConfigurationSinks() { int x = mySource1(); mySink(x, 1, 2); // expected-warning {{Untrusted data is passed to a user defined sink}}