diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h @@ -84,6 +84,8 @@ return Eng.getContext(); } + const ASTContext &getASTContext() const { return Eng.getContext(); } + const LangOptions &getLangOpts() const { return Eng.getContext().getLangOpts(); } diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp --- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -38,6 +38,8 @@ namespace { +class GenericTaintChecker; + /// Check for CWE-134: Uncontrolled Format String. constexpr llvm::StringLiteral MsgUncontrolledFormatString = "Untrusted data is used as a format string " @@ -62,23 +64,26 @@ "Untrusted data is passed to a user-defined sink"; using ArgIdxTy = int; -using ArgVecTy = llvm::SmallVector; +using ArgVecTy = llvm::SmallVector; -constexpr int InvalidArgIndex{-2}; /// Denotes the return value. -constexpr int ReturnValueIndex{-1}; +constexpr ArgIdxTy ReturnValueIndex{-1}; + +static ArgIdxTy fromArgumentCount(unsigned Count) { + assert(Count <= + static_cast(std::numeric_limits::max()) && + "ArgIdxTy is not large enough to represent the number of arguments."); + return Count; +} /// Check if the region the expression evaluates to is the standard input, /// and thus, is tainted. -bool isStdin(const Expr *E, CheckerContext &C) { - ProgramStateRef State = C.getState(); - SVal Val = C.getSVal(E); - - // stdin is a pointer, so it would be a region. - const MemRegion *MemReg = Val.getAsRegion(); +/// FIXME: Move this to Taint.cpp. +bool isStdin(SVal Val, const ASTContext &ACtx) { + // FIXME: What if Val is NonParamVarRegion? // The region should be symbolic, we do not know it's value. - const auto *SymReg = dyn_cast_or_null(MemReg); + const auto *SymReg = dyn_cast_or_null(Val.getAsRegion()); if (!SymReg) return false; @@ -86,7 +91,7 @@ const auto *Sm = dyn_cast(SymReg->getSymbol()); if (!Sm) return false; - const auto *DeclReg = dyn_cast_or_null(Sm->getRegion()); + const auto *DeclReg = dyn_cast(Sm->getRegion()); if (!DeclReg) return false; @@ -94,68 +99,58 @@ // variable named stdin with the proper type. if (const auto *D = dyn_cast_or_null(DeclReg->getDecl())) { D = D->getCanonicalDecl(); + // FIXME: This should look for an exact match. if (D->getName().contains("stdin") && D->isExternC()) { - const auto *PtrTy = dyn_cast(D->getType().getTypePtr()); - if (PtrTy && PtrTy->getPointeeType().getCanonicalType() == - C.getASTContext().getFILEType().getCanonicalType()) - return true; + const QualType FILETy = ACtx.getFILEType().getCanonicalType(); + const QualType Ty = D->getType().getCanonicalType(); + + if (Ty->isPointerType()) + return Ty->getPointeeType() == FILETy; } } return false; } -/// Given a pointer argument, return the value it points to. -Optional getPointeeOf(CheckerContext &C, const Expr *Arg) { - ProgramStateRef State = C.getState(); - SVal AddrVal = C.getSVal(Arg->IgnoreParens()); - if (AddrVal.isUnknownOrUndef()) - return None; - - Optional AddrLoc = AddrVal.getAs(); - if (!AddrLoc) - return None; - - QualType ArgTy = Arg->getType().getCanonicalType(); - if (!ArgTy->isPointerType()) - return State->getSVal(*AddrLoc); - - QualType ValTy = ArgTy->getPointeeType(); +SVal getPointeeOf(const CheckerContext &C, Loc LValue) { + const QualType ArgTy = LValue.getType(C.getASTContext()); + if (!ArgTy->isPointerType() || !ArgTy->getPointeeType()->isVoidType()) + return C.getState()->getSVal(LValue); // Do not dereference void pointers. Treat them as byte pointers instead. // FIXME: we might want to consider more than just the first byte. - if (ValTy->isVoidType()) - ValTy = C.getASTContext().CharTy; + return C.getState()->getSVal(LValue, C.getASTContext().CharTy); +} - return State->getSVal(*AddrLoc, ValTy); +/// Given a pointer/reference argument, return the value it refers to. +Optional getPointeeOf(const CheckerContext &C, SVal Arg) { + if (auto LValue = Arg.getAs()) + return getPointeeOf(C, *LValue); + return None; } /// Given a pointer, return the SVal of its pointee or if it is tainted, /// otherwise return the pointer's SVal if tainted. -Optional getTaintedPointeeOrPointer(CheckerContext &C, const Expr *Arg) { - assert(Arg); - // Check for taint. - ProgramStateRef State = C.getState(); - Optional PointedToSVal = getPointeeOf(C, Arg); +/// Also considers stdin as a taint source. +Optional getTaintedPointeeOrPointer(const CheckerContext &C, SVal Arg) { + const ProgramStateRef State = C.getState(); - if (PointedToSVal && isTainted(State, *PointedToSVal)) - return PointedToSVal; + if (auto Pointee = getPointeeOf(C, Arg)) + if (isTainted(State, *Pointee)) // FIXME: isTainted(...) ? Pointee : None; + return Pointee; - if (isTainted(State, Arg, C.getLocationContext())) - return {C.getSVal(Arg)}; + if (isTainted(State, Arg)) + return Arg; - return {}; + // FIXME: This should be done by the isTainted() API. + if (isStdin(Arg, C.getASTContext())) + return Arg; + + return None; } bool isTaintedOrPointsToTainted(const Expr *E, const ProgramStateRef &State, CheckerContext &C) { - if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C)) - return true; - - if (!E->getType().getTypePtr()->isPointerType()) - return false; - - Optional V = getPointeeOf(C, E); - return (V && isTainted(State, *V)); + return getTaintedPointeeOrPointer(C, C.getSVal(E)).hasValue(); } /// ArgSet is used to describe arguments relevant for taint detection or @@ -164,9 +159,7 @@ class ArgSet { public: ArgSet() = default; - ArgSet(ArgVecTy &&DiscreteArgs) - : DiscreteArgs(DiscreteArgs), VariadicIndex(None) {} - ArgSet(ArgVecTy &&DiscreteArgs, ArgIdxTy VariadicIndex) + ArgSet(ArgVecTy &&DiscreteArgs, Optional VariadicIndex = None) : DiscreteArgs(DiscreteArgs), VariadicIndex(VariadicIndex) {} bool contains(ArgIdxTy ArgIdx) const { @@ -254,7 +247,8 @@ /// Process a function which could either be a taint source, a taint sink, a /// taint filter or a taint propagator. - void process(const CallEvent &Call, CheckerContext &C) const; + void process(const GenericTaintChecker &Checker, const CallEvent &Call, + CheckerContext &C) const; /// Handles the resolution of indexes of type ArgIdxTy to Expr*-s. static const Expr *GetArgExpr(ArgIdxTy ArgIdx, const CallEvent &Call) { @@ -290,7 +284,7 @@ ArgVecTy SrcArgs; ArgVecTy DstArgs; VariadicType VarType; - int VarIndex; + ArgIdxTy VarIndex; }; std::vector Propagations; @@ -315,14 +309,18 @@ private: using NamePartTy = llvm::SmallVector, 2>; - using CallDescAPITy = llvm::SmallVector; /// Validate part of the configuration, which contains a list of argument /// indexes. void validateArgVector(const std::string &Option, const ArgVecTy &Args) const; + template static NamePartTy parseNameParts(const Config &C); + + // Takes the config and creates a CallDescription for it and associates a Rule + // with that. template - auto parseNameParts(const Config &C) const -> NamePartTy; + static void consumeRulesFromConfig(const Config &C, GenericTaintRule &&Rule, + RulesContTy &Rules); void parseConfig(const std::string &Option, TaintConfiguration::Sink &&P, RulesContTy &Rules) const; @@ -353,12 +351,7 @@ CheckerContext &C) const; private: - mutable std::unique_ptr BT; - void initBugType() const { - if (!BT) - BT = std::make_unique(this, "Use of Untrusted Data", - "Untrusted Data"); - } + const BugType BT{this, "Use of Untrusted Data", "Untrusted Data"}; bool checkUncontrolledFormatString(const CallEvent &Call, CheckerContext &C) const; @@ -377,8 +370,8 @@ // TODO: Remove separation to simplify matching logic once CallDescriptions // are more expressive. - mutable Optional GlobalCTaintRules; - mutable Optional TaintRules; + mutable Optional StaticTaintRules; + mutable Optional DynamicTaintRules; }; } // end of anonymous namespace @@ -419,9 +412,8 @@ IO.mapOptional("Scope", Propagation.Scope); IO.mapOptional("SrcArgs", Propagation.SrcArgs); IO.mapOptional("DstArgs", Propagation.DstArgs); - IO.mapOptional("VariadicType", Propagation.VarType, - TaintConfiguration::VariadicType::None); - IO.mapOptional("VariadicIndex", Propagation.VarIndex, InvalidArgIndex); + IO.mapOptional("VariadicType", Propagation.VarType); + IO.mapOptional("VariadicIndex", Propagation.VarIndex); } }; @@ -443,7 +435,7 @@ void GenericTaintRuleParser::validateArgVector(const std::string &Option, const ArgVecTy &Args) const { - for (int Arg : Args) { + for (ArgIdxTy Arg : Args) { if (Arg < ReturnValueIndex) { Mgr.reportInvalidCheckerOptionValue( Mgr.getChecker(), Option, @@ -453,8 +445,8 @@ } template -auto GenericTaintRuleParser::parseNameParts(const Config &C) const - -> NamePartTy { +GenericTaintRuleParser::NamePartTy +GenericTaintRuleParser::parseNameParts(const Config &C) { NamePartTy NameParts; if (!C.Scope.empty()) { // If the Scope argument contains multiple "::" parts, those are considered @@ -468,42 +460,31 @@ return NameParts; } +template +void GenericTaintRuleParser::consumeRulesFromConfig(const Config &C, + GenericTaintRule &&Rule, + RulesContTy &Rules) { + NamePartTy NameParts = parseNameParts(C); + llvm::SmallVector CallDescParts{NameParts.size()}; + llvm::transform(NameParts, CallDescParts.begin(), + [](SmallString<32> &S) { return S.c_str(); }); + Rules.template emplace_back(CallDescParts, std::move(Rule)); +} + void GenericTaintRuleParser::parseConfig(const std::string &Option, TaintConfiguration::Sink &&S, RulesContTy &Rules) const { validateArgVector(Option, S.SinkArgs); - - // The ArrayRef API of CallDescription makes it necessary to - // first get an owning string representation, and then get the underlying - // C-string representation. - // FIXME: Once CallDescription supports the (possibly move) construction via - // ArrayRef, ArrayRef or from a range, this can be - // simplified. - NamePartTy NameParts{parseNameParts(S)}; - CallDescAPITy CallDescParts{NameParts.size()}; - llvm::transform(NameParts, CallDescParts.begin(), - [](auto &&P) { return P.c_str(); }); - Rules.template emplace_back(CallDescParts, - GenericTaintRule::Sink(std::move(S.SinkArgs))); + consumeRulesFromConfig(S, GenericTaintRule::Sink(std::move(S.SinkArgs)), + Rules); } void GenericTaintRuleParser::parseConfig(const std::string &Option, TaintConfiguration::Filter &&S, RulesContTy &Rules) const { validateArgVector(Option, S.FilterArgs); - - // The ArrayRef API of CallDescription makes it necessary to - // first get an owning string representation, and then get the underlying - // C-string representation. - // FIXME: Once CallDescription supports the (possibly move) construction via - // ArrayRef, ArrayRef or from a range, this can be - // simplified. - NamePartTy NameParts{parseNameParts(S)}; - CallDescAPITy CallDescParts{NameParts.size()}; - llvm::transform(NameParts, CallDescParts.begin(), - [](auto &&P) { return P.c_str(); }); - Rules.template emplace_back( - CallDescParts, GenericTaintRule::Filter(std::move(S.FilterArgs))); + consumeRulesFromConfig(S, GenericTaintRule::Filter(std::move(S.FilterArgs)), + Rules); } void GenericTaintRuleParser::parseConfig(const std::string &Option, @@ -513,25 +494,13 @@ validateArgVector(Option, P.DstArgs); bool IsSrcVariadic = P.VarType == TaintConfiguration::VariadicType::Src; bool IsDstVariadic = P.VarType == TaintConfiguration::VariadicType::Dst; + Optional JustVarIndex = P.VarIndex; - ArgSet SrcDesc = IsSrcVariadic ? ArgSet(std::move(P.SrcArgs), P.VarIndex) - : ArgSet(std::move(P.SrcArgs)); - ArgSet DstDesc = IsDstVariadic ? ArgSet(std::move(P.DstArgs), P.VarIndex) - : ArgSet(std::move(P.DstArgs)); - - // The ArrayRef API of CallDescription makes it necessary to - // first get an owning string representation, and then get the underlying - // C-string representation. - // FIXME: Once CallDescription supports the (possibly move) construction via - // ArrayRef, ArrayRef or from a range, this can be - // simplified. - NamePartTy NameParts{parseNameParts(P)}; - CallDescAPITy CallDescParts{NameParts.size()}; - llvm::transform(NameParts, CallDescParts.begin(), - [](auto &&P) { return P.c_str(); }); - Rules.template emplace_back( - CallDescParts, - GenericTaintRule::Prop(std::move(SrcDesc), std::move(DstDesc))); + ArgSet SrcDesc(std::move(P.SrcArgs), IsSrcVariadic ? JustVarIndex : None); + ArgSet DstDesc(std::move(P.DstArgs), IsDstVariadic ? JustVarIndex : None); + + consumeRulesFromConfig( + P, GenericTaintRule::Prop(std::move(SrcDesc), std::move(DstDesc)), Rules); } GenericTaintRuleParser::RulesContTy @@ -556,18 +525,18 @@ // Check for exact name match for functions without builtin substitutes. // Use qualified name, because these are C functions without namespace. - if (GlobalCTaintRules || TaintRules) + if (StaticTaintRules || DynamicTaintRules) return; using RulesConstructionTy = std::vector>; using TR = GenericTaintRule; - const auto &BI = C.getASTContext().BuiltinInfo; + const Builtin::Context &BI = C.getASTContext().BuiltinInfo; RulesConstructionTy GlobalCRules{ // Sources - {{"fdopen"}, TR::Source(ArgSet{{ReturnValueIndex}})}, + {{"fdopen"}, TR::Source({{ReturnValueIndex}})}, {{"fopen"}, TR::Source({{ReturnValueIndex}})}, {{"freopen"}, TR::Source({{ReturnValueIndex}})}, {{"getch"}, TR::Source({{ReturnValueIndex}})}, @@ -660,8 +629,8 @@ GlobalCRules.push_back({{"getenv"}, TR::Source({{ReturnValueIndex}})}); } - GlobalCTaintRules.emplace(std::make_move_iterator(GlobalCRules.begin()), - std::make_move_iterator(GlobalCRules.end())); + StaticTaintRules.emplace(std::make_move_iterator(GlobalCRules.begin()), + std::make_move_iterator(GlobalCRules.end())); // User-provided taint configuration. CheckerManager *Mgr = C.getAnalysisManager().getCheckerManager(); @@ -673,32 +642,27 @@ llvm::Optional Config = getConfiguration(*Mgr, this, Option, ConfigFile); if (!Config) { - TaintRules = RuleLookupTy{}; + DynamicTaintRules = RuleLookupTy{}; return; } GenericTaintRuleParser::RulesContTy Rules{ ConfigParser.parseConfiguration(Option, std::move(Config.getValue()))}; - TaintRules.emplace(std::make_move_iterator(Rules.begin()), - std::make_move_iterator(Rules.end())); + DynamicTaintRules.emplace(std::make_move_iterator(Rules.begin()), + std::make_move_iterator(Rules.end())); } void GenericTaintChecker::checkPreCall(const CallEvent &Call, CheckerContext &C) const { - initTaintRules(C); - const GenericTaintRule *MaybeGlobalCMatch = GlobalCTaintRules->lookup(Call); - const GenericTaintRule *MaybeMatch = - TaintRules ? TaintRules->lookup(Call) : nullptr; - bool ConsiderGlobalCMatch = MaybeGlobalCMatch && Call.isGlobalCFunction(); - - if (ConsiderGlobalCMatch) - MaybeGlobalCMatch->process(Call, C); - - if (!ConsiderGlobalCMatch && MaybeMatch) - MaybeMatch->process(Call, C); + // FIXME: this should be much simpler. + if (const auto *Rule = + Call.isGlobalCFunction() ? StaticTaintRules->lookup(Call) : nullptr) + Rule->process(*this, Call, C); + else if (const auto *Rule = DynamicTaintRules->lookup(Call)) + Rule->process(*this, Call, C); // FIXME: These edge cases are to be eliminated from here eventually. // @@ -725,11 +689,6 @@ if (TaintArgs.isEmpty()) return; - assert(Call.getNumArgs() <= - static_cast(std::numeric_limits::max()) && - "ArgIdxTy is not large enough to represent the number of arguments."); - ArgIdxTy CallNumArgs = Call.getNumArgs(); - for (ArgIdxTy ArgNum : TaintArgs) { // Special handling for the tainted return value. if (ArgNum == ReturnValueIndex) { @@ -739,21 +698,13 @@ // The arguments are pointer arguments. The data they are pointing at is // tainted after the call. - if (CallNumArgs < (ArgNum + 1)) - return; - const Expr *Arg = Call.getArgExpr(ArgNum); - Optional V = getPointeeOf(C, Arg); - if (V) + if (auto V = getPointeeOf(C, Call.getArgSVal(ArgNum))) State = addTaint(State, *V); } // Clear up the taint info from the state. State = State->remove(); - - if (State != C.getState()) { - C.addTransition(State); - return; - } + C.addTransition(State); } void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State, @@ -761,38 +712,30 @@ printTaint(State, Out, NL, Sep); } -void GenericTaintRule::process(const CallEvent &Call, CheckerContext &C) const { +void GenericTaintRule::process(const GenericTaintChecker &Checker, + const CallEvent &Call, CheckerContext &C) const { ProgramStateRef State = C.getState(); - - assert(Call.getNumArgs() <= - static_cast(std::numeric_limits::max()) && - "ArgIdxTy is not large enough to represent the number of arguments."); + const ArgIdxTy CallNumArgs = fromArgumentCount(Call.getNumArgs()); /// Iterate every call argument, and get their corresponding Expr and SVal. - const auto ForEachCallArg = [&C, &Call](auto &&F) { - for (ArgIdxTy I = ReturnValueIndex, N = Call.getNumArgs(); I < N; ++I) { + const auto ForEachCallArg = [&C, &Call, CallNumArgs](auto &&Fun) { + for (ArgIdxTy I = ReturnValueIndex; I < CallNumArgs; ++I) { const Expr *E = GetArgExpr(I, Call); - assert(E); - SVal S = C.getSVal(E); - F(I, E, S); + Fun(I, E, C.getSVal(E)); } }; /// Check for taint sinks. - ForEachCallArg([this, &C, &State](ArgIdxTy I, const Expr *E, SVal) { + ForEachCallArg([this, &Checker, &C, &State](ArgIdxTy I, const Expr *E, SVal) { if (SinkArgs.contains(I) && isTaintedOrPointsToTainted(E, State, C)) - C.getAnalysisManager() - .getCheckerManager() - ->getChecker() - ->generateReportIfTainted(E, SinkMsg ? *SinkMsg : MsgCustomSink, C); + Checker.generateReportIfTainted(E, SinkMsg.getValueOr(MsgCustomSink), C); }); /// Check for taint filters. ForEachCallArg([this, &C, &State](ArgIdxTy I, const Expr *E, SVal S) { if (FilterArgs.contains(I)) { State = removeTaint(State, S); - Optional P = getPointeeOf(C, E); - if (P) + if (auto P = getPointeeOf(C, S)) State = removeTaint(State, *P); } }); @@ -810,37 +753,31 @@ if (!IsMatching) return; - /// Propagate taint where it is necessary. - // TODO: Currently, we might lose precision here: we always mark a return - // value as tainted even if it's just a pointer, pointing to tainted data. - ForEachCallArg([this, &C, &State](ArgIdxTy I, const Expr *E, SVal S) { - if (PropDstArgs.contains(I)) - State = State->add(I); + const auto WouldEscape = [](SVal V, QualType Ty) -> bool { + if (!V.getAs()) + return false; - Optional MaybeValueToTaint = [E, &C, S]() -> Optional { - if (!E) - return {}; + const bool IsNonConstRef = Ty->isReferenceType() && !Ty.isConstQualified(); + const bool IsNonConstPtr = + Ty->isPointerType() && !Ty->getPointeeType().isConstQualified(); - const QualType ArgTy = E->getType(); - - const bool IsNonConstRef = - ArgTy->isReferenceType() && !ArgTy.isConstQualified(); - const bool IsNonConstPtr = - ArgTy->isPointerType() && !ArgTy->getPointeeType().isConstQualified(); - - if (IsNonConstRef) - return S; - if (IsNonConstPtr) - return getPointeeOf(C, E); - return {}; - }(); + return IsNonConstRef || IsNonConstPtr; + }; - if (MaybeValueToTaint.hasValue()) - State = State->add(I); - }); + /// Propagate taint where it is necessary. + ForEachCallArg( + [this, &State, WouldEscape](ArgIdxTy I, const Expr *E, SVal V) { + if (PropDstArgs.contains(I)) + State = State->add(I); + + // TODO: We should traverse all reachable memory regions via the + // escaping parameter. Instead of doing that we simply mark only the + // referred memory region as tainted. + if (WouldEscape(V, E->getType())) + State = State->add(I); + }); - if (State != C.getState()) - C.addTransition(State); + C.addTransition(State); } bool GenericTaintRule::UntrustedEnv(CheckerContext &C) { @@ -852,15 +789,14 @@ bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg, CheckerContext &C) const { assert(E); - Optional TaintedSVal{getTaintedPointeeOrPointer(C, E)}; + Optional TaintedSVal{getTaintedPointeeOrPointer(C, C.getSVal(E))}; if (!TaintedSVal) return false; // Generate diagnostic. if (ExplodedNode *N = C.generateNonFatalErrorNode()) { - initBugType(); - auto report = std::make_unique(*BT, Msg, N); + auto report = std::make_unique(BT, Msg, N); report->addRange(E->getSourceRange()); report->addVisitor(std::make_unique(*TaintedSVal)); C.emitReport(std::move(report)); @@ -888,10 +824,7 @@ if (!FDecl) return false; - assert(Call.getNumArgs() <= - static_cast(std::numeric_limits::max()) && - "ArgIdxTy is not large enough to represent the number of arguments."); - ArgIdxTy CallNumArgs = Call.getNumArgs(); + const ArgIdxTy CallNumArgs = fromArgumentCount(Call.getNumArgs()); for (const auto *Format : FDecl->specific_attrs()) { ArgNum = Format->getFormatIdx() - 1; @@ -905,7 +838,7 @@ bool GenericTaintChecker::checkUncontrolledFormatString( const CallEvent &Call, CheckerContext &C) const { // Check if the function contains a format string argument. - int ArgNum = 0; + ArgIdxTy ArgNum = 0; if (!getPrintfFormatArgumentNum(Call, C, ArgNum)) return false;