diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp --- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -98,13 +98,20 @@ } /// Catch taint related bugs. Check if tainted data is passed to a - /// system call etc. - bool checkPre(const CallExpr *CE, CheckerContext &C) const; + /// system call etc. Returns true on matching. + bool checkPre(const CallExpr *CE, const FunctionDecl *FDecl, StringRef Name, + CheckerContext &C) const; - /// Add taint sources on a pre-visit. - void addSourcesPre(const CallExpr *CE, CheckerContext &C) const; + /// Add taint sources on a pre-visit. Returns true on matching. + bool addSourcesPre(const CallExpr *CE, const FunctionDecl *FDecl, + StringRef Name, CheckerContext &C) const; - /// Propagate taint generated at pre-visit. + /// Mark filter's arguments not tainted on a pre-visit. Returns true on + /// matching. + bool addFiltersPre(const CallExpr *CE, StringRef Name, + CheckerContext &C) const; + + /// Propagate taint generated at pre-visit. Returns true on matching. bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const; /// Check if the region the expression evaluates to is the standard input, @@ -442,14 +449,26 @@ void GenericTaintChecker::checkPreStmt(const CallExpr *CE, CheckerContext &C) const { + const FunctionDecl *FDecl = C.getCalleeDecl(CE); + // Check for non-global functions. + if (!FDecl || FDecl->getKind() != Decl::Function) + return; + + StringRef Name = C.getCalleeName(FDecl); + if (Name.empty()) + return; + // Check for taintedness related errors first: system call, uncontrolled // format string, tainted buffer size. - if (checkPre(CE, C)) + if (checkPre(CE, FDecl, Name, C)) return; // Marks the function's arguments and/or return value tainted if it present in // the list. - addSourcesPre(CE, C); + if (addSourcesPre(CE, FDecl, Name, C)) + return; + + addFiltersPre(CE, Name, C); } void GenericTaintChecker::checkPostStmt(const CallExpr *CE, @@ -464,31 +483,46 @@ printTaint(State, Out, NL, Sep); } -void GenericTaintChecker::addSourcesPre(const CallExpr *CE, +bool GenericTaintChecker::addSourcesPre(const CallExpr *CE, + const FunctionDecl *FDecl, + StringRef Name, CheckerContext &C) const { - ProgramStateRef State = nullptr; - const FunctionDecl *FDecl = C.getCalleeDecl(CE); - if (!FDecl || FDecl->getKind() != Decl::Function) - return; - - StringRef Name = C.getCalleeName(FDecl); - if (Name.empty()) - return; - // First, try generating a propagation rule for this function. TaintPropagationRule Rule = TaintPropagationRule::getTaintPropagationRule( this->CustomPropagations, FDecl, Name, C); if (!Rule.isNull()) { - State = Rule.process(CE, C); - if (!State) - return; - C.addTransition(State); - return; + ProgramStateRef State = Rule.process(CE, C); + if (State) { + C.addTransition(State); + return true; + } } + return false; +} - if (!State) - return; - C.addTransition(State); +bool GenericTaintChecker::addFiltersPre(const CallExpr *CE, StringRef Name, + CheckerContext &C) const { + auto It = CustomFilters.find(Name); + if (It == CustomFilters.end()) + return false; + + ProgramStateRef State = C.getState(); + const ArgVector &Args = It->getValue(); + for (unsigned ArgNum : Args) { + if (ArgNum >= CE->getNumArgs()) + continue; + + const Expr *Arg = CE->getArg(ArgNum); + Optional V = getPointedToSVal(C, Arg); + if (V) + State = removeTaint(State, *V); + } + + if (State != C.getState()) { + C.addTransition(State); + return true; + } + return false; } bool GenericTaintChecker::propagateFromPre(const CallExpr *CE, @@ -530,19 +564,12 @@ } bool GenericTaintChecker::checkPre(const CallExpr *CE, + const FunctionDecl *FDecl, StringRef Name, CheckerContext &C) const { if (checkUncontrolledFormatString(CE, C)) return true; - const FunctionDecl *FDecl = C.getCalleeDecl(CE); - if (!FDecl || FDecl->getKind() != Decl::Function) - return false; - - StringRef Name = C.getCalleeName(FDecl); - if (Name.empty()) - return false; - if (checkSystemCall(CE, Name, C)) return true; diff --git a/clang/lib/StaticAnalyzer/Checkers/Taint.h b/clang/lib/StaticAnalyzer/Checkers/Taint.h --- a/clang/lib/StaticAnalyzer/Checkers/Taint.h +++ b/clang/lib/StaticAnalyzer/Checkers/Taint.h @@ -27,34 +27,39 @@ static constexpr TaintTagType TaintTagGeneric = 0; /// Create a new state in which the value of the statement is marked as tainted. -LLVM_NODISCARD ProgramStateRef -addTaint(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, - TaintTagType Kind = TaintTagGeneric); +LLVM_NODISCARD ProgramStateRef addTaint(ProgramStateRef State, const Stmt *S, + const LocationContext *LCtx, + TaintTagType Kind = TaintTagGeneric); /// Create a new state in which the value is marked as tainted. -LLVM_NODISCARD ProgramStateRef -addTaint(ProgramStateRef State, SVal V, - TaintTagType Kind = TaintTagGeneric); +LLVM_NODISCARD ProgramStateRef addTaint(ProgramStateRef State, SVal V, + TaintTagType Kind = TaintTagGeneric); /// Create a new state in which the symbol is marked as tainted. -LLVM_NODISCARD ProgramStateRef -addTaint(ProgramStateRef State, SymbolRef Sym, - TaintTagType Kind = TaintTagGeneric); +LLVM_NODISCARD ProgramStateRef addTaint(ProgramStateRef State, SymbolRef Sym, + TaintTagType Kind = TaintTagGeneric); /// Create a new state in which the pointer represented by the region /// is marked as tainted. -LLVM_NODISCARD ProgramStateRef -addTaint(ProgramStateRef State, const MemRegion *R, - TaintTagType Kind = TaintTagGeneric); +LLVM_NODISCARD ProgramStateRef addTaint(ProgramStateRef State, + const MemRegion *R, + TaintTagType Kind = TaintTagGeneric); + +LLVM_NODISCARD ProgramStateRef removeTaint(ProgramStateRef State, SVal V); + +LLVM_NODISCARD ProgramStateRef removeTaint(ProgramStateRef State, + const MemRegion *R); + +LLVM_NODISCARD ProgramStateRef removeTaint(ProgramStateRef State, + SymbolRef Sym); /// Create a new state in a which a sub-region of a given symbol is tainted. /// This might be necessary when referring to regions that can not have an /// individual symbol, e.g. if they are represented by the default binding of /// a LazyCompoundVal. -LLVM_NODISCARD ProgramStateRef -addPartialTaint(ProgramStateRef State, - SymbolRef ParentSym, const SubRegion *SubRegion, - TaintTagType Kind = TaintTagGeneric); +LLVM_NODISCARD ProgramStateRef addPartialTaint( + ProgramStateRef State, SymbolRef ParentSym, const SubRegion *SubRegion, + TaintTagType Kind = TaintTagGeneric); /// Check if the statement has a tainted value in the given state. bool isTainted(ProgramStateRef State, const Stmt *S, @@ -99,4 +104,3 @@ } // namespace clang #endif - diff --git a/clang/lib/StaticAnalyzer/Checkers/Taint.cpp b/clang/lib/StaticAnalyzer/Checkers/Taint.cpp --- a/clang/lib/StaticAnalyzer/Checkers/Taint.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/Taint.cpp @@ -37,9 +37,7 @@ Out << I.first << " : " << I.second << NL; } -void dumpTaint(ProgramStateRef State) { - printTaint(State, llvm::errs()); -} +void dumpTaint(ProgramStateRef State) { printTaint(State, llvm::errs()); } ProgramStateRef taint::addTaint(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, @@ -64,8 +62,8 @@ // region of the parent region. if (auto LCV = V.getAs()) { if (Optional binding = - State->getStateManager().getStoreManager() - .getDefaultBinding(*LCV)) { + State->getStateManager().getStoreManager().getDefaultBinding( + *LCV)) { if (SymbolRef Sym = binding->getAsSymbol()) return addPartialTaint(State, Sym, LCV->getRegion(), Kind); } @@ -94,6 +92,32 @@ return NewState; } +ProgramStateRef taint::removeTaint(ProgramStateRef State, SVal V) { + SymbolRef Sym = V.getAsSymbol(); + if (Sym) + return removeTaint(State, Sym); + + const MemRegion *R = V.getAsRegion(); + return removeTaint(State, R); +} + +ProgramStateRef taint::removeTaint(ProgramStateRef State, const MemRegion *R) { + if (const SymbolicRegion *SR = dyn_cast_or_null(R)) + return removeTaint(State, SR->getSymbol()); + return State; +} + +ProgramStateRef taint::removeTaint(ProgramStateRef State, SymbolRef Sym) { + // If this is a symbol cast, remove the cast before adding the taint. Taint + // is cast agnostic. + while (const SymbolCast *SC = dyn_cast(Sym)) + Sym = SC->getOperand(); + + ProgramStateRef NewState = State->remove(Sym); + assert(NewState); + return NewState; +} + ProgramStateRef taint::addPartialTaint(ProgramStateRef State, SymbolRef ParentSym, const SubRegion *SubRegion, @@ -157,7 +181,8 @@ // Traverse all the symbols this symbol depends on to see if any are tainted. for (SymExpr::symbol_iterator SI = Sym->symbol_begin(), - SE = Sym->symbol_end(); SI != SE; ++SI) { + SE = Sym->symbol_end(); + SI != SE; ++SI) { if (!isa(*SI)) continue; diff --git a/clang/test/Analysis/Inputs/taint-generic-config.yaml b/clang/test/Analysis/Inputs/taint-generic-config.yaml --- a/clang/test/Analysis/Inputs/taint-generic-config.yaml +++ b/clang/test/Analysis/Inputs/taint-generic-config.yaml @@ -36,8 +36,8 @@ # A list of filter functions Filters: # int x; // x is tainted - # myFilter(&x); // x is not tainted anymore - - Name: myFilter + # isOutOfRange(&x); // x is not tainted anymore + - Name: isOutOfRange Args: [0] # A list of sink functions diff --git a/clang/test/Analysis/taint-generic.c b/clang/test/Analysis/taint-generic.c --- a/clang/test/Analysis/taint-generic.c +++ b/clang/test/Analysis/taint-generic.c @@ -56,6 +56,8 @@ extern FILE *stdin; #endif +#define bool _Bool + int fscanf(FILE *restrict stream, const char *restrict format, ...); int sprintf(char *str, const char *format, ...); void setproctitle(const char *fmt, ...); @@ -346,6 +348,7 @@ void myScanf(const char*, ...); int myPropagator(int, int*); int mySnprintf(char*, size_t, const char*, ...); +bool isOutOfRange(const int*); void mySink(int, int, int); void testConfigurationSources1() { @@ -372,6 +375,13 @@ Buffer[y] = 1; // expected-warning {{Out of bound memory access }} } +void testConfigurationFilter() { + int x = mySource1(); + if (isOutOfRange(&x)) // the filter function + return; + Buffer[x] = 1; // no-warning +} + void testConfigurationSinks() { int x = mySource1(); mySink(x, 1, 2);