Index: clang/include/clang/StaticAnalyzer/Checkers/Taint.h =================================================================== --- clang/include/clang/StaticAnalyzer/Checkers/Taint.h +++ clang/include/clang/StaticAnalyzer/Checkers/Taint.h @@ -13,7 +13,9 @@ #ifndef LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_TAINT_H #define LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_TAINT_H +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" namespace clang { @@ -23,27 +25,46 @@ /// The type of taint, which helps to differentiate between different types of /// taint. using TaintTagType = unsigned; +using TaintFlowType = int; static constexpr TaintTagType TaintTagGeneric = 0; +static constexpr TaintFlowType UninitializedFlow = -1; + +struct TaintData { + TaintTagType Type; + TaintFlowType Flow; + TaintData() : Type(TaintTagGeneric), Flow(UninitializedFlow){}; + TaintData(TaintFlowType f) : Type(TaintTagGeneric), Flow(f){}; + TaintData(TaintTagType t, TaintFlowType f) : Type(t), Flow(f){}; + bool operator==(const TaintData &X) const { return Flow == X.Flow; } + int operator<(const TaintData &X) const { return Flow < X.Flow; } + void Profile(llvm::FoldingSetNodeID &ID) const { ID.AddInteger(Flow); } +}; + +static inline raw_ostream &operator<<(llvm::raw_ostream &Out, + const clang::ento::taint::TaintData t) { + Out << "Type:" << t.Type << " Flow:" << t.Flow << "\n"; + return Out; +} /// Create a new state in which the value of the statement is marked as tainted. [[nodiscard]] ProgramStateRef addTaint(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, - TaintTagType Kind = TaintTagGeneric); + TaintData Data = TaintData()); /// Create a new state in which the value is marked as tainted. [[nodiscard]] ProgramStateRef addTaint(ProgramStateRef State, SVal V, - TaintTagType Kind = TaintTagGeneric); + TaintData Data = TaintData()); /// Create a new state in which the symbol is marked as tainted. [[nodiscard]] ProgramStateRef addTaint(ProgramStateRef State, SymbolRef Sym, - TaintTagType Kind = TaintTagGeneric); + TaintData Data = TaintData()); /// Create a new state in which the pointer represented by the region /// is marked as tainted. [[nodiscard]] ProgramStateRef addTaint(ProgramStateRef State, const MemRegion *R, - TaintTagType Kind = TaintTagGeneric); + TaintData Data = TaintData()); [[nodiscard]] ProgramStateRef removeTaint(ProgramStateRef State, SVal V); @@ -56,10 +77,38 @@ /// This might be necessary when referring to regions that can not have an /// individual symbol, e.g. if they are represented by the default binding of /// a LazyCompoundVal. -[[nodiscard]] ProgramStateRef -addPartialTaint(ProgramStateRef State, SymbolRef ParentSym, - const SubRegion *SubRegion, - TaintTagType Kind = TaintTagGeneric); +[[nodiscard]] ProgramStateRef addPartialTaint(ProgramStateRef State, + SymbolRef ParentSym, + const SubRegion *SubRegion, + TaintData = TaintData()); + +std::optional getTaintData(ProgramStateRef State, const Stmt *S, + const LocationContext *LCtx); + +/// Get the taint data for a value if tainted in the given state. +std::optional getTaintData(ProgramStateRef State, SVal V); + +/// Get the taint data for symbol if tainted in the given state. +std::optional getTaintData(ProgramStateRef State, SymbolRef Sym); + +/// Get the Taint data for a pointer represented by the region +// if tainted in the given state. +std::optional getTaintData(ProgramStateRef State, const MemRegion *Reg); + +/// Check if the region the expression evaluates to is the standard input, +/// and thus, is tainted. +bool isStdin(SVal Val, const ASTContext &ACtx); + +/// Returns the TaintData for a pointed value (if tainted), otherwise +/// the TaintData of the pointer itself. +std::optional getTaintDataPointeeOrPointer(const CheckerContext &C, + SVal Arg); +std::optional getTaintDataPointeeOrPointer(const Expr *E, + const ProgramStateRef &State, + CheckerContext &C); + +SVal getPointeeOf(const CheckerContext &C, Loc LValue); +std::optional getPointeeOf(const CheckerContext &C, SVal Arg); /// Check if the statement has a tainted value in the given state. bool isTainted(ProgramStateRef State, const Stmt *S, @@ -84,19 +133,17 @@ LLVM_DUMP_METHOD void dumpTaint(ProgramStateRef State); -/// The bug visitor prints a diagnostic message at the location where a given -/// variable was tainted. -class TaintBugVisitor final : public BugReporterVisitor { -private: - const SVal V; - +class TaintBugReport : public clang::ento::PathSensitiveBugReport { public: - TaintBugVisitor(const SVal V) : V(V) {} - void Profile(llvm::FoldingSetNodeID &ID) const override { ID.Add(V); } - - PathDiagnosticPieceRef VisitNode(const ExplodedNode *N, - BugReporterContext &BRC, - PathSensitiveBugReport &BR) override; + TaintBugReport(const BugType &bt, StringRef desc, + const ExplodedNode *errorNode, TaintData T) + : clang::ento::PathSensitiveBugReport(bt, desc, errorNode, + Kind::TaintPathSensitive), + taint_data(T){}; + static bool classof(const BugReport *R) { + return R->getKind() == Kind::TaintPathSensitive; + } + TaintData taint_data; }; } // namespace taint Index: clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h =================================================================== --- clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h +++ clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h @@ -118,7 +118,7 @@ /// individual bug reports. class BugReport { public: - enum class Kind { Basic, PathSensitive }; + enum class Kind { Basic, PathSensitive, TaintPathSensitive }; protected: friend class BugReportEquivClass; @@ -367,14 +367,16 @@ public: PathSensitiveBugReport(const BugType &bt, StringRef desc, - const ExplodedNode *errorNode) - : PathSensitiveBugReport(bt, desc, desc, errorNode) {} + const ExplodedNode *errorNode, + BugReport::Kind kind = Kind::PathSensitive) + : PathSensitiveBugReport(bt, desc, desc, errorNode, kind) {} PathSensitiveBugReport(const BugType &bt, StringRef shortDesc, StringRef desc, - const ExplodedNode *errorNode) + const ExplodedNode *errorNode, + BugReport::Kind kind = Kind::PathSensitive) : PathSensitiveBugReport(bt, shortDesc, desc, errorNode, /*LocationToUnique*/ {}, - /*DeclToUnique*/ nullptr) {} + /*DeclToUnique*/ nullptr, kind) {} /// Create a PathSensitiveBugReport with a custom uniqueing location. /// @@ -386,17 +388,20 @@ PathSensitiveBugReport(const BugType &bt, StringRef desc, const ExplodedNode *errorNode, PathDiagnosticLocation LocationToUnique, - const Decl *DeclToUnique) + const Decl *DeclToUnique, + BugReport::Kind kind = Kind::PathSensitive) : PathSensitiveBugReport(bt, desc, desc, errorNode, LocationToUnique, - DeclToUnique) {} + DeclToUnique, kind) {} PathSensitiveBugReport(const BugType &bt, StringRef shortDesc, StringRef desc, const ExplodedNode *errorNode, PathDiagnosticLocation LocationToUnique, - const Decl *DeclToUnique); + const Decl *DeclToUnique, + BugReport::Kind kind = Kind::PathSensitive); static bool classof(const BugReport *R) { - return R->getKind() == Kind::PathSensitive; + return R->getKind() >= Kind::PathSensitive && + R->getKind() <= Kind::TaintPathSensitive; } const ExplodedNode *getErrorNode() const { return ErrorNode; } Index: clang/include/clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h =================================================================== --- clang/include/clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h +++ clang/include/clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h @@ -22,6 +22,7 @@ extern const char *const CXXMoveSemantics; extern const char *const SecurityError; extern const char *const UnusedCode; +extern const char *const TaintedData; } // namespace categories } // namespace ento } // namespace clang Index: clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp =================================================================== --- clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp +++ clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp @@ -32,12 +32,12 @@ namespace { class ArrayBoundCheckerV2 : public Checker { - mutable std::unique_ptr BT; + mutable std::unique_ptr BT; enum OOB_Kind { OOB_Precedes, OOB_Excedes, OOB_Tainted }; void reportOOB(CheckerContext &C, ProgramStateRef errorState, OOB_Kind kind, - std::unique_ptr Visitor = nullptr) const; + SVal &Offset) const; public: void checkLocation(SVal l, bool isLoad, const Stmt*S, @@ -166,7 +166,8 @@ // Are we constrained enough to definitely precede the lower bound? if (state_precedesLowerBound && !state_withinLowerBound) { - reportOOB(checkerContext, state_precedesLowerBound, OOB_Precedes); + reportOOB(checkerContext, state_precedesLowerBound, OOB_Precedes, + rawOffsetVal); return; } @@ -208,14 +209,15 @@ SVal ByteOffset = rawOffset.getByteOffset(); if (isTainted(state, ByteOffset)) { reportOOB(checkerContext, state_exceedsUpperBound, OOB_Tainted, - std::make_unique(ByteOffset)); + rawOffsetVal); return; } } else if (state_exceedsUpperBound) { // If we are constrained enough to definitely exceed the upper bound, // report. assert(!state_withinUpperBound); - reportOOB(checkerContext, state_exceedsUpperBound, OOB_Excedes); + reportOOB(checkerContext, state_exceedsUpperBound, OOB_Excedes, + rawOffsetVal); return; } @@ -227,16 +229,22 @@ checkerContext.addTransition(state); } -void ArrayBoundCheckerV2::reportOOB( - CheckerContext &checkerContext, ProgramStateRef errorState, OOB_Kind kind, - std::unique_ptr Visitor) const { +void ArrayBoundCheckerV2::reportOOB(CheckerContext &checkerContext, + ProgramStateRef errorState, OOB_Kind kind, + SVal &Offset) const { ExplodedNode *errorNode = checkerContext.generateErrorNode(errorState); if (!errorNode) return; - if (!BT) - BT.reset(new BuiltinBug(this, "Out-of-bound access")); + if (!BT) { + if (kind != OOB_Tainted) + BT.reset( + new BugType(this, "Out-of-bound access", categories::LogicError)); + else + BT.reset( + new BugType(this, "Out-of-bound access", categories::TaintedData)); + } // FIXME: This diagnostics are preliminary. We should get far better // diagnostics for explaining buffer overruns. @@ -255,9 +263,13 @@ os << "(index is tainted)"; break; } + std::optional TD = std::nullopt; + if (kind == OOB_Tainted) + TD = getTaintDataPointeeOrPointer(checkerContext, Offset); - auto BR = std::make_unique(*BT, os.str(), errorNode); - BR->addVisitor(std::move(Visitor)); + auto BR = + TD ? std::make_unique(*BT, os.str(), errorNode, *TD) + : std::make_unique(*BT, os.str(), errorNode); checkerContext.emitReport(std::move(BR)); } Index: clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp =================================================================== --- clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp +++ clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp @@ -25,8 +25,9 @@ namespace { class DivZeroChecker : public Checker< check::PreStmt > { - mutable std::unique_ptr BT; - void reportBug(const char *Msg, ProgramStateRef StateZero, CheckerContext &C, + mutable std::unique_ptr BT; + void reportBug(const char *Msg, std::string Category, + ProgramStateRef StateZero, CheckerContext &C, std::unique_ptr Visitor = nullptr) const; public: @@ -42,13 +43,19 @@ } void DivZeroChecker::reportBug( - const char *Msg, ProgramStateRef StateZero, CheckerContext &C, - std::unique_ptr Visitor) const { + const char *Msg, std::string Category, ProgramStateRef StateZero, + CheckerContext &C, std::unique_ptr Visitor) const { if (ExplodedNode *N = C.generateErrorNode(StateZero)) { if (!BT) - BT.reset(new BuiltinBug(this, "Division by zero")); + BT.reset(new BugType(this, "Division by zero", Category)); - auto R = std::make_unique(*BT, Msg, N); + std::optional TD = std::nullopt; + if (Category == categories::TaintedData) + TD = getTaintDataPointeeOrPointer(C, C.getSVal(getDenomExpr(N))); + + auto R = TD ? std::make_unique(*BT, Msg, N, *TD) + : std::make_unique(*BT, Msg, N); + std::make_unique(*BT, Msg, N); R->addVisitor(std::move(Visitor)); bugreporter::trackExpressionValue(N, getDenomExpr(N), *R); C.emitReport(std::move(R)); @@ -82,14 +89,14 @@ if (!stateNotZero) { assert(stateZero); - reportBug("Division by zero", stateZero, C); + reportBug("Division by zero", categories::LogicError, stateZero, C); return; } bool TaintedD = isTainted(C.getState(), *DV); if ((stateNotZero && stateZero && TaintedD)) { - reportBug("Division by a tainted value, possibly zero", stateZero, C, - std::make_unique(*DV)); + reportBug("Division by a tainted value, possibly zero", + categories::TaintedData, stateZero, C); return; } Index: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp =================================================================== --- clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -25,7 +25,9 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" + #include "llvm/Support/YAMLTraits.h" #include @@ -71,6 +73,25 @@ using ArgIdxTy = int; using ArgVecTy = llvm::SmallVector; +/// In this structure we follow the spread of the taint flows through +/// the function arguments. +struct ArgIdxFlowTy { + ArgIdxTy ArgIdx; + int FlowId; + ArgIdxFlowTy(ArgIdxTy i) : ArgIdx(i), FlowId(UninitializedFlow){}; + ArgIdxFlowTy(ArgIdxTy i, int f) : ArgIdx(i), FlowId(f){}; + bool operator==(const ArgIdxFlowTy &X) const { + return FlowId == X.FlowId && ArgIdx == X.ArgIdx; + } + int operator<(const ArgIdxFlowTy &X) const { + return FlowId < X.FlowId && ArgIdx < X.ArgIdx; + } + void Profile(llvm::FoldingSetNodeID &ID) const { + ID.AddInteger(FlowId); + ID.AddInteger(ArgIdx); + } +}; + /// Denotes the return value. constexpr ArgIdxTy ReturnValueIndex{-1}; @@ -81,56 +102,6 @@ return Count; } -/// Check if the region the expression evaluates to is the standard input, -/// and thus, is tainted. -/// FIXME: Move this to Taint.cpp. -bool isStdin(SVal Val, const ASTContext &ACtx) { - // FIXME: What if Val is NonParamVarRegion? - - // The region should be symbolic, we do not know it's value. - const auto *SymReg = dyn_cast_or_null(Val.getAsRegion()); - if (!SymReg) - return false; - - // Get it's symbol and find the declaration region it's pointing to. - const auto *DeclReg = - dyn_cast_or_null(SymReg->getSymbol()->getOriginRegion()); - if (!DeclReg) - return false; - - // This region corresponds to a declaration, find out if it's a global/extern - // variable named stdin with the proper type. - if (const auto *D = dyn_cast_or_null(DeclReg->getDecl())) { - D = D->getCanonicalDecl(); - // FIXME: This should look for an exact match. - if (D->getName().contains("stdin") && D->isExternC()) { - const QualType FILETy = ACtx.getFILEType().getCanonicalType(); - const QualType Ty = D->getType().getCanonicalType(); - - if (Ty->isPointerType()) - return Ty->getPointeeType() == FILETy; - } - } - return false; -} - -SVal getPointeeOf(const CheckerContext &C, Loc LValue) { - const QualType ArgTy = LValue.getType(C.getASTContext()); - if (!ArgTy->isPointerType() || !ArgTy->getPointeeType()->isVoidType()) - return C.getState()->getSVal(LValue); - - // Do not dereference void pointers. Treat them as byte pointers instead. - // FIXME: we might want to consider more than just the first byte. - return C.getState()->getSVal(LValue, C.getASTContext().CharTy); -} - -/// Given a pointer/reference argument, return the value it refers to. -std::optional getPointeeOf(const CheckerContext &C, SVal Arg) { - if (auto LValue = Arg.getAs()) - return getPointeeOf(C, *LValue); - return std::nullopt; -} - /// Given a pointer, return the SVal of its pointee or if it is tainted, /// otherwise return the pointer's SVal if tainted. /// Also considers stdin as a taint source. @@ -182,6 +153,25 @@ std::optional VariadicIndex; }; +const NoteTag *createTaintOriginTag(CheckerContext &C, int FlowId) { + LLVM_DEBUG(llvm::dbgs() << "New taint flow. Flow id: " << FlowId << '\n';); + const NoteTag *InjectionTag = + C.getNoteTag([&C, FlowId](PathSensitiveBugReport &BR) -> std::string { + SmallString<256> Msg; + llvm::raw_svector_ostream Out(Msg); + // Only make this note when the report is taint related and the flow + // id of the origin matches with the flow id of the sink + if (TaintBugReport *TBR = dyn_cast_or_null(&BR)) + if (TBR->taint_data.Flow == FlowId) { + LLVM_DEBUG(llvm::dbgs() << "Taint originated here. Flow id: " + << std::to_string(FlowId) << "\n"); + Out << "Taint originated here"; + } + return std::string(Out.str()); + }); + return InjectionTag; +} + /// A struct used to specify taint propagation rules for a function. /// /// If any of the possible taint source arguments is tainted, all of the @@ -193,7 +183,7 @@ ArgSet SinkArgs; /// Arguments which should be sanitized on function return. ArgSet FilterArgs; - /// Arguments which can participate in taint propagationa. If any of the + /// Arguments which can participate in taint propagation. If any of the /// arguments in PropSrcArgs is tainted, all arguments in PropDstArgs should /// be tainted. ArgSet PropSrcArgs; @@ -334,7 +324,6 @@ public: void checkPreCall(const CallEvent &Call, CheckerContext &C) const; void checkPostCall(const CallEvent &Call, CheckerContext &C) const; - void printState(raw_ostream &Out, ProgramStateRef State, const char *NL, const char *Sep) const override; @@ -342,8 +331,13 @@ bool generateReportIfTainted(const Expr *E, StringRef Msg, CheckerContext &C) const; + static TaintFlowType generateNewTaintFlow() { return FlowIdCount++; } + private: - const BugType BT{this, "Use of Untrusted Data", "Untrusted Data"}; + /// A unique counter for each taint flow. + /// We must increase this whenever the analysis hits a new taint source. + static TaintFlowType FlowIdCount; + const BugType BT{this, "Use of Untrusted Data", categories::TaintedData}; bool checkUncontrolledFormatString(const CallEvent &Call, CheckerContext &C) const; @@ -365,6 +359,8 @@ mutable std::optional StaticTaintRules; mutable std::optional DynamicTaintRules; }; +TaintFlowType GenericTaintChecker::FlowIdCount = 0; + } // end of anonymous namespace /// YAML serialization mapping. @@ -424,8 +420,8 @@ /// ReturnValueIndex, or indexes of the pointer/reference argument, which /// points to data, which should be tainted on return. REGISTER_MAP_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, const LocationContext *, - ImmutableSet) -REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(ArgIdxFactory, ArgIdxTy) + ImmutableSet) +REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(ArgIdxFactory, ArgIdxFlowTy) void GenericTaintRuleParser::validateArgVector(const std::string &Option, const ArgVecTy &Args) const { @@ -776,29 +772,32 @@ // stored in the state as TaintArgsOnPostVisit set. TaintArgsOnPostVisitTy TaintArgsMap = State->get(); - const ImmutableSet *TaintArgs = TaintArgsMap.lookup(CurrentFrame); + const ImmutableSet *TaintArgs = + TaintArgsMap.lookup(CurrentFrame); if (!TaintArgs) return; assert(!TaintArgs->isEmpty()); - LLVM_DEBUG(for (ArgIdxTy I + LLVM_DEBUG(for (ArgIdxFlowTy I : *TaintArgs) { llvm::dbgs() << "PostCall<"; Call.dump(llvm::dbgs()); - llvm::dbgs() << "> actually wants to taint arg index: " << I << '\n'; + llvm::dbgs() << "> actually wants to taint arg index: " << I.ArgIdx + << " Flow Id: " << I.FlowId << "\n"; }); - for (ArgIdxTy ArgNum : *TaintArgs) { + for (ArgIdxFlowTy ArgNum : *TaintArgs) { // Special handling for the tainted return value. - if (ArgNum == ReturnValueIndex) { - State = addTaint(State, Call.getReturnValue()); + if (ArgNum.ArgIdx == ReturnValueIndex) { + State = addTaint(State, Call.getReturnValue(), ArgNum.FlowId); continue; } // The arguments are pointer arguments. The data they are pointing at is // tainted after the call. - if (auto V = getPointeeOf(C, Call.getArgSVal(ArgNum))) - State = addTaint(State, *V); + if (auto V = getPointeeOf(C, Call.getArgSVal(ArgNum.ArgIdx))) { + State = addTaint(State, *V, ArgNum.FlowId); + } } // Clear up the taint info from the state. @@ -826,8 +825,13 @@ /// Check for taint sinks. ForEachCallArg([this, &Checker, &C, &State](ArgIdxTy I, const Expr *E, SVal) { - if (SinkArgs.contains(I) && isTaintedOrPointsToTainted(E, State, C)) + if (SinkArgs.contains(I) && isTaintedOrPointsToTainted(E, State, C)) { + LLVM_DEBUG(llvm::dbgs() + << "Potential Injection vulnerability detected. Flow id: " + << getTaintDataPointeeOrPointer(C, C.getSVal(E))->Flow + << '\n';); Checker.generateReportIfTainted(E, SinkMsg.value_or(MsgCustomSink), C); + } }); /// Check for taint filters. @@ -843,11 +847,31 @@ /// A rule is relevant if PropSrcArgs is empty, or if any of its signified /// args are tainted in context of the current CallEvent. bool IsMatching = PropSrcArgs.isEmpty(); - ForEachCallArg( - [this, &C, &IsMatching, &State](ArgIdxTy I, const Expr *E, SVal) { - IsMatching = IsMatching || (PropSrcArgs.contains(I) && - isTaintedOrPointsToTainted(E, State, C)); - }); + TaintFlowType TaintFlowId = UninitializedFlow; + + const NoteTag *InjectionTag = nullptr; + if (PropSrcArgs.isEmpty() && !PropDstArgs.isEmpty()) { + // The attacker injects the taint value here + // We place here a noteTag and assign a new Flow id. + TaintFlowId = GenericTaintChecker::generateNewTaintFlow(); + InjectionTag = createTaintOriginTag(C, TaintFlowId); + } + + ForEachCallArg([this, &C, &IsMatching, &State, &TaintFlowId, + &InjectionTag](ArgIdxTy I, const Expr *E, SVal) mutable { + IsMatching = IsMatching || (PropSrcArgs.contains(I) && + isTaintedOrPointsToTainted(E, State, C)); + if (TaintFlowId == UninitializedFlow) { + if (isStdin(C.getSVal(E), C.getASTContext())) { + // Reading from stdin. Creating a new flow + TaintFlowId = GenericTaintChecker::generateNewTaintFlow(); + InjectionTag = createTaintOriginTag(C, TaintFlowId); + } else if (getTaintDataPointeeOrPointer(C, C.getSVal(E)).has_value()) { + // Propagating an existing Flow Id + TaintFlowId = getTaintDataPointeeOrPointer(C, C.getSVal(E))->Flow; + } + } + }); if (!IsMatching) return; @@ -865,32 +889,34 @@ /// Propagate taint where it is necessary. auto &F = State->getStateManager().get_context(); - ImmutableSet Result = F.getEmptySet(); - ForEachCallArg( - [&](ArgIdxTy I, const Expr *E, SVal V) { - if (PropDstArgs.contains(I)) { - LLVM_DEBUG(llvm::dbgs() << "PreCall<"; Call.dump(llvm::dbgs()); - llvm::dbgs() - << "> prepares tainting arg index: " << I << '\n';); - Result = F.add(Result, I); - } - - // TODO: We should traverse all reachable memory regions via the - // escaping parameter. Instead of doing that we simply mark only the - // referred memory region as tainted. - if (WouldEscape(V, E->getType())) { - LLVM_DEBUG(if (!Result.contains(I)) { - llvm::dbgs() << "PreCall<"; - Call.dump(llvm::dbgs()); - llvm::dbgs() << "> prepares tainting arg index: " << I << '\n'; - }); - Result = F.add(Result, I); - } + ImmutableSet Result = F.getEmptySet(); + ForEachCallArg([&](ArgIdxTy I, const Expr *E, SVal V) { + if (PropDstArgs.contains(I)) { + LLVM_DEBUG(llvm::dbgs() << "PreCall<"; Call.dump(llvm::dbgs()); + llvm::dbgs() << "> prepares tainting arg index: " << I + << " Flow Id: " << TaintFlowId << '\n';); + ArgIdxFlowTy Flow(I, TaintFlowId); + Result = F.add(Result, Flow); + } + + // TODO: We should traverse all reachable memory regions via the + // escaping parameter. Instead of doing that we simply mark only the + // referred memory region as tainted. + if (WouldEscape(V, E->getType())) { + LLVM_DEBUG(if (!Result.contains(I)) { + llvm::dbgs() << "PreCall<"; + Call.dump(llvm::dbgs()); + llvm::dbgs() << "> prepares tainting arg index: " << I + << " Flow Id: " << TaintFlowId << '\n'; }); + ArgIdxFlowTy Flow(I, TaintFlowId); + Result = F.add(Result, Flow); + } + }); if (!Result.isEmpty()) State = State->set(C.getStackFrame(), Result); - C.addTransition(State); + C.addTransition(State, InjectionTag); } bool GenericTaintRule::UntrustedEnv(CheckerContext &C) { @@ -902,16 +928,14 @@ bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg, CheckerContext &C) const { assert(E); - std::optional TaintedSVal{getTaintedPointeeOrPointer(C, C.getSVal(E))}; + std::optional TD = getTaintDataPointeeOrPointer(C, C.getSVal(E)); - if (!TaintedSVal) + if (!TD) { return false; - + } // Generate diagnostic. if (ExplodedNode *N = C.generateNonFatalErrorNode()) { - auto report = std::make_unique(BT, Msg, N); - report->addRange(E->getSourceRange()); - report->addVisitor(std::make_unique(*TaintedSVal)); + auto report = std::make_unique(BT, Msg, N, *TD); C.emitReport(std::move(report)); return true; } @@ -982,7 +1006,8 @@ ProgramStateRef State = C.getState(); auto &F = State->getStateManager().get_context(); - ImmutableSet Result = F.add(F.getEmptySet(), ReturnValueIndex); + ImmutableSet Result = + F.add(F.getEmptySet(), ArgIdxFlowTy(ReturnValueIndex)); State = State->set(C.getStackFrame(), Result); C.addTransition(State); } Index: clang/lib/StaticAnalyzer/Checkers/Taint.cpp =================================================================== --- clang/lib/StaticAnalyzer/Checkers/Taint.cpp +++ clang/lib/StaticAnalyzer/Checkers/Taint.cpp @@ -13,18 +13,17 @@ #include "clang/StaticAnalyzer/Checkers/Taint.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" -#include using namespace clang; using namespace ento; using namespace taint; // Fully tainted symbols. -REGISTER_MAP_WITH_PROGRAMSTATE(TaintMap, SymbolRef, TaintTagType) +REGISTER_MAP_WITH_PROGRAMSTATE(TaintMap, SymbolRef, TaintData) // Partially tainted symbols. REGISTER_MAP_FACTORY_WITH_PROGRAMSTATE(TaintedSubRegions, const SubRegion *, - TaintTagType) + TaintData) REGISTER_MAP_WITH_PROGRAMSTATE(DerivedSymTaint, SymbolRef, TaintedSubRegions) void taint::printTaint(ProgramStateRef State, raw_ostream &Out, const char *NL, @@ -43,16 +42,14 @@ } ProgramStateRef taint::addTaint(ProgramStateRef State, const Stmt *S, - const LocationContext *LCtx, - TaintTagType Kind) { - return addTaint(State, State->getSVal(S, LCtx), Kind); + const LocationContext *LCtx, TaintData Data) { + return addTaint(State, State->getSVal(S, LCtx), Data); } -ProgramStateRef taint::addTaint(ProgramStateRef State, SVal V, - TaintTagType Kind) { +ProgramStateRef taint::addTaint(ProgramStateRef State, SVal V, TaintData Data) { SymbolRef Sym = V.getAsSymbol(); if (Sym) - return addTaint(State, Sym, Kind); + return addTaint(State, Sym, Data); // If the SVal represents a structure, try to mass-taint all values within the // structure. For now it only works efficiently on lazy compound values that @@ -68,29 +65,29 @@ State->getStateManager().getStoreManager().getDefaultBinding( *LCV)) { if (SymbolRef Sym = binding->getAsSymbol()) - return addPartialTaint(State, Sym, LCV->getRegion(), Kind); + return addPartialTaint(State, Sym, LCV->getRegion(), Data); } } const MemRegion *R = V.getAsRegion(); - return addTaint(State, R, Kind); + return addTaint(State, R, Data); } ProgramStateRef taint::addTaint(ProgramStateRef State, const MemRegion *R, - TaintTagType Kind) { + TaintData Data) { if (const SymbolicRegion *SR = dyn_cast_or_null(R)) - return addTaint(State, SR->getSymbol(), Kind); + return addTaint(State, SR->getSymbol(), Data); return State; } ProgramStateRef taint::addTaint(ProgramStateRef State, SymbolRef Sym, - TaintTagType Kind) { + TaintData Data) { // If this is a symbol cast, remove the cast before adding the taint. Taint // is cast agnostic. while (const SymbolCast *SC = dyn_cast(Sym)) Sym = SC->getOperand(); - ProgramStateRef NewState = State->set(Sym, Kind); + ProgramStateRef NewState = State->set(Sym, Data); assert(NewState); return NewState; } @@ -124,63 +121,43 @@ ProgramStateRef taint::addPartialTaint(ProgramStateRef State, SymbolRef ParentSym, const SubRegion *SubRegion, - TaintTagType Kind) { + TaintData Data) { // Ignore partial taint if the entire parent symbol is already tainted. - if (const TaintTagType *T = State->get(ParentSym)) - if (*T == Kind) + if (const TaintData *T = State->get(ParentSym)) + if (T->Type == Data.Type) return State; // Partial taint applies if only a portion of the symbol is tainted. if (SubRegion == SubRegion->getBaseRegion()) - return addTaint(State, ParentSym, Kind); + return addTaint(State, ParentSym, Data); const TaintedSubRegions *SavedRegs = State->get(ParentSym); TaintedSubRegions::Factory &F = State->get_context(); TaintedSubRegions Regs = SavedRegs ? *SavedRegs : F.getEmptyMap(); - Regs = F.add(Regs, SubRegion, Kind); + Regs = F.add(Regs, SubRegion, Data); ProgramStateRef NewState = State->set(ParentSym, Regs); assert(NewState); return NewState; } -bool taint::isTainted(ProgramStateRef State, const Stmt *S, - const LocationContext *LCtx, TaintTagType Kind) { +std::optional taint::getTaintData(ProgramStateRef State, const Stmt *S, + const LocationContext *LCtx) { SVal val = State->getSVal(S, LCtx); - return isTainted(State, val, Kind); + return getTaintData(State, val); } -bool taint::isTainted(ProgramStateRef State, SVal V, TaintTagType Kind) { +std::optional taint::getTaintData(ProgramStateRef State, SVal V) { if (SymbolRef Sym = V.getAsSymbol()) - return isTainted(State, Sym, Kind); + return getTaintData(State, Sym); if (const MemRegion *Reg = V.getAsRegion()) - return isTainted(State, Reg, Kind); - return false; -} - -bool taint::isTainted(ProgramStateRef State, const MemRegion *Reg, - TaintTagType K) { - if (!Reg) - return false; - - // Element region (array element) is tainted if either the base or the offset - // are tainted. - if (const ElementRegion *ER = dyn_cast(Reg)) - return isTainted(State, ER->getSuperRegion(), K) || - isTainted(State, ER->getIndex(), K); - - if (const SymbolicRegion *SR = dyn_cast(Reg)) - return isTainted(State, SR->getSymbol(), K); - - if (const SubRegion *ER = dyn_cast(Reg)) - return isTainted(State, ER->getSuperRegion(), K); - - return false; + return getTaintData(State, Reg); + return std::nullopt; } -bool taint::isTainted(ProgramStateRef State, SymbolRef Sym, TaintTagType Kind) { +std::optional taint::getTaintData(ProgramStateRef State, SymbolRef Sym) { if (!Sym) - return false; + return std::nullopt; // Traverse all the symbols this symbol depends on to see if any are tainted. for (SymExpr::symbol_iterator SI = Sym->symbol_begin(), @@ -189,15 +166,14 @@ if (!isa(*SI)) continue; - if (const TaintTagType *Tag = State->get(*SI)) { - if (*Tag == Kind) - return true; + if (const TaintData *Data = State->get(*SI)) { + return *Data; } if (const auto *SD = dyn_cast(*SI)) { // If this is a SymbolDerived with a tainted parent, it's also tainted. - if (isTainted(State, SD->getParentSymbol(), Kind)) - return true; + if (getTaintData(State, SD->getParentSymbol())) + return getTaintData(State, SD->getParentSymbol()); // If this is a SymbolDerived with the same parent symbol as another // tainted SymbolDerived and a region that's a sub-region of that tainted @@ -210,46 +186,142 @@ // complete. For example, this would not currently identify // overlapping fields in a union as tainted. To identify this we can // check for overlapping/nested byte offsets. - if (Kind == I.second && R->isSubRegionOf(I.first)) - return true; + if (R->isSubRegionOf(I.first)) + return I.second; } } } // If memory region is tainted, data is also tainted. if (const auto *SRV = dyn_cast(*SI)) { - if (isTainted(State, SRV->getRegion(), Kind)) - return true; + if (getTaintData(State, SRV->getRegion())) + return getTaintData(State, SRV->getRegion()); } // If this is a SymbolCast from a tainted value, it's also tainted. if (const auto *SC = dyn_cast(*SI)) { - if (isTainted(State, SC->getOperand(), Kind)) - return true; + if (getTaintData(State, SC->getOperand())) + return getTaintData(State, SC->getOperand()); } } + return std::nullopt; +} + +std::optional taint::getTaintData(ProgramStateRef State, + const MemRegion *Reg) { + if (!Reg) + return std::nullopt; + + // Element region (array element) is tainted if either the base or the offset + // are tainted. + if (const ElementRegion *ER = dyn_cast(Reg)) { + if (getTaintData(State, ER->getSuperRegion())) + return getTaintData(State, ER->getSuperRegion()); + else + return getTaintData(State, ER->getIndex()); + } + + if (const SymbolicRegion *SR = dyn_cast(Reg)) + return getTaintData(State, SR->getSymbol()); + + if (const SubRegion *ER = dyn_cast(Reg)) + return getTaintData(State, ER->getSuperRegion()); + + return std::nullopt; +} +bool taint::isStdin(SVal Val, const ASTContext &ACtx) { + // FIXME: What if Val is NonParamVarRegion? + + // The region should be symbolic, we do not know it's value. + const auto *SymReg = dyn_cast_or_null(Val.getAsRegion()); + if (!SymReg) + return false; + + // Get it's symbol and find the declaration region it's pointing to. + const auto *DeclReg = + dyn_cast_or_null(SymReg->getSymbol()->getOriginRegion()); + if (!DeclReg) + return false; + + // This region corresponds to a declaration, find out if it's a global/extern + // variable named stdin with the proper type. + if (const auto *D = dyn_cast_or_null(DeclReg->getDecl())) { + D = D->getCanonicalDecl(); + // FIXME: This should look for an exact match. + if (D->getName().contains("stdin") && D->isExternC()) { + const QualType FILETy = ACtx.getFILEType().getCanonicalType(); + const QualType Ty = D->getType().getCanonicalType(); + + if (Ty->isPointerType()) + return Ty->getPointeeType() == FILETy; + } + } return false; } -PathDiagnosticPieceRef TaintBugVisitor::VisitNode(const ExplodedNode *N, - BugReporterContext &BRC, - PathSensitiveBugReport &BR) { +std::optional taint::getTaintDataPointeeOrPointer(const CheckerContext &C, + SVal Arg) { + const ProgramStateRef State = C.getState(); + + if (auto Pointee = getPointeeOf(C, Arg)) + if (isTainted(State, *Pointee)) + return getTaintData(State, *Pointee); + + if (isTainted(State, Arg)) + return getTaintData(State, Arg); + + return std::nullopt; +} + +std::optional +taint::getTaintDataPointeeOrPointer(const Expr *E, const ProgramStateRef &State, + CheckerContext &C) { + return getTaintDataPointeeOrPointer(C, C.getSVal(E)); +} - // Find the ExplodedNode where the taint was first introduced - if (!isTainted(N->getState(), V) || - isTainted(N->getFirstPred()->getState(), V)) - return nullptr; +SVal taint::getPointeeOf(const CheckerContext &C, Loc LValue) { + const QualType ArgTy = LValue.getType(C.getASTContext()); + if (!ArgTy->isPointerType() || !ArgTy->getPointeeType()->isVoidType()) + return C.getState()->getSVal(LValue); - const Stmt *S = N->getStmtForDiagnostics(); - if (!S) - return nullptr; + // Do not dereference void pointers. Treat them as byte pointers instead. + // FIXME: we might want to consider more than just the first byte. + return C.getState()->getSVal(LValue, C.getASTContext().CharTy); +} + +/// Given a pointer/reference argument, return the value it refers to. +std::optional taint::getPointeeOf(const CheckerContext &C, SVal Arg) { + if (auto LValue = Arg.getAs()) + return getPointeeOf(C, *LValue); + return std::nullopt; +} + +bool taint::isTainted(ProgramStateRef State, const Stmt *S, + const LocationContext *LCtx, TaintTagType Kind) { + SVal val = State->getSVal(S, LCtx); + return isTainted(State, val, Kind); +} - const LocationContext *NCtx = N->getLocationContext(); - PathDiagnosticLocation L = - PathDiagnosticLocation::createBegin(S, BRC.getSourceManager(), NCtx); - if (!L.isValid() || !L.asLocation().isValid()) - return nullptr; +bool taint::isTainted(ProgramStateRef State, SVal V, TaintTagType Kind) { + if (SymbolRef Sym = V.getAsSymbol()) + return isTainted(State, Sym, Kind); + if (const MemRegion *Reg = V.getAsRegion()) + return isTainted(State, Reg, Kind); + return false; +} - return std::make_shared(L, "Taint originated here"); +bool taint::isTainted(ProgramStateRef State, const MemRegion *Reg, + TaintTagType Kind) { + if (!Reg) + return false; + std::optional TD = getTaintData(State, Reg); + return TD ? (TD->Type == Kind) : false; } + +bool taint::isTainted(ProgramStateRef State, SymbolRef Sym, TaintTagType Kind) { + if (!Sym) + return false; + std::optional TD = getTaintData(State, Sym); + return TD ? (TD->Type == Kind) : false; +} \ No newline at end of file Index: clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp =================================================================== --- clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp +++ clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp @@ -167,8 +167,7 @@ // Check if the size is tainted. if (isTainted(State, SizeV)) { - reportBug(VLA_Tainted, SizeE, nullptr, C, - std::make_unique(SizeV)); + reportBug(VLA_Tainted, SizeE, nullptr, C); return nullptr; } @@ -217,9 +216,16 @@ if (!N) return; - if (!BT) - BT.reset(new BuiltinBug( - this, "Dangerous variable-length array (VLA) declaration")); + if (!BT) { + if (Kind == VLA_Tainted) + BT.reset(new BugType(this, + "Dangerous variable-length array (VLA) declaration", + categories::TaintedData)); + else + BT.reset(new BugType(this, + "Dangerous variable-length array (VLA) declaration", + categories::LogicError)); + } SmallString<256> buf; llvm::raw_svector_ostream os(buf); @@ -242,7 +248,12 @@ break; } - auto report = std::make_unique(*BT, os.str(), N); + std::optional TD = std::nullopt; + if (Kind == VLA_Tainted) + TD = getTaintDataPointeeOrPointer(C, C.getSVal(SizeE)); + + auto report = TD ? std::make_unique(*BT, os.str(), N, *TD) + : std::make_unique(*BT, os.str(), N); report->addVisitor(std::move(Visitor)); report->addRange(SizeE->getSourceRange()); bugreporter::trackExpressionValue(N, SizeE, *report); Index: clang/lib/StaticAnalyzer/Core/BugReporter.cpp =================================================================== --- clang/lib/StaticAnalyzer/Core/BugReporter.cpp +++ clang/lib/StaticAnalyzer/Core/BugReporter.cpp @@ -2130,8 +2130,8 @@ PathSensitiveBugReport::PathSensitiveBugReport( const BugType &bt, StringRef shortDesc, StringRef desc, const ExplodedNode *errorNode, PathDiagnosticLocation LocationToUnique, - const Decl *DeclToUnique) - : BugReport(Kind::PathSensitive, bt, shortDesc, desc), ErrorNode(errorNode), + const Decl *DeclToUnique, BugReport::Kind kind) + : BugReport(kind, bt, shortDesc, desc), ErrorNode(errorNode), ErrorNodeRange(getStmt() ? getStmt()->getSourceRange() : SourceRange()), UniqueingLocation(LocationToUnique), UniqueingDecl(DeclToUnique) { assert(!isDependency(ErrorNode->getState() Index: clang/lib/StaticAnalyzer/Core/CommonBugCategories.cpp =================================================================== --- clang/lib/StaticAnalyzer/Core/CommonBugCategories.cpp +++ clang/lib/StaticAnalyzer/Core/CommonBugCategories.cpp @@ -23,6 +23,7 @@ const char *const CXXMoveSemantics = "C++ move semantics"; const char *const SecurityError = "Security error"; const char *const UnusedCode = "Unused code"; +const char *const TaintedData = "Tainted data used"; } // namespace categories } // namespace ento } // namespace clang Index: clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-diagnostics-taint-test.c.sarif =================================================================== --- clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-diagnostics-taint-test.c.sarif +++ clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-diagnostics-taint-test.c.sarif @@ -4,7 +4,7 @@ { "artifacts": [ { - "length": 434, + "length": 435, "location": { "index": 0, }, Index: clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-multi-diagnostic-test.c.sarif =================================================================== --- clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-multi-diagnostic-test.c.sarif +++ clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-multi-diagnostic-test.c.sarif @@ -44,7 +44,7 @@ { "importance": "essential", "location": { - "message": { + "message": { "text": "tainted" }, "physicalLocation": { Index: clang/test/Analysis/diagnostics/sarif-diagnostics-taint-test.c =================================================================== --- clang/test/Analysis/diagnostics/sarif-diagnostics-taint-test.c +++ clang/test/Analysis/diagnostics/sarif-diagnostics-taint-test.c @@ -5,7 +5,7 @@ void f(void) { char s[80]; - scanf("%s", s); + scanf("%s", s); int d = atoi(s); // expected-warning {{tainted}} } Index: clang/test/Analysis/taint-checker-callback-order-has-definition.c =================================================================== --- clang/test/Analysis/taint-checker-callback-order-has-definition.c +++ clang/test/Analysis/taint-checker-callback-order-has-definition.c @@ -18,18 +18,21 @@ void top(const char *fname, char *buf) { FILE *fp = fopen(fname, "r"); - // CHECK: PreCall prepares tainting arg index: -1 - // CHECK-NEXT: PostCall actually wants to taint arg index: -1 + // CHECK: New taint flow. Flow id: 0 + // CHECK-NEXT: PreCall prepares tainting arg index: -1 Flow Id: 0 + // CHECK-NEXT: PostCall actually wants to taint arg index: -1 Flow Id: 0 if (!fp) return; (void)fgets(buf, 42, fp); // Trigger taint propagation. - // CHECK-NEXT: PreCall prepares tainting arg index: -1 - // CHECK-NEXT: PreCall prepares tainting arg index: 0 - // CHECK-NEXT: PreCall prepares tainting arg index: 2 - // - // CHECK-NEXT: PostCall actually wants to taint arg index: -1 - // CHECK-NEXT: PostCall actually wants to taint arg index: 0 - // CHECK-NEXT: PostCall actually wants to taint arg index: 2 + // CHECK-NEXT: PreCall prepares tainting arg index: -1 Flow Id: 0 + // CHECK-NEXT: PreCall prepares tainting arg index: 0 Flow Id: 0 + // CHECK-NEXT: PreCall prepares tainting arg index: 0 Flow Id: 0 + // CHECK-NEXT: PreCall prepares tainting arg index: 2 Flow Id: 0 + // CHECK-NEXT: PostCall actually wants to taint arg index: -1 Flow Id: 0 + // CHECK-NEXT: PostCall actually wants to taint arg index: 0 Flow Id: 0 + // CHECK-NEXT: PostCall actually wants to taint arg index: 2 Flow Id: 0 } + + Index: clang/test/Analysis/taint-checker-callback-order-without-definition.c =================================================================== --- clang/test/Analysis/taint-checker-callback-order-without-definition.c +++ clang/test/Analysis/taint-checker-callback-order-without-definition.c @@ -13,19 +13,22 @@ void top(const char *fname, char *buf) { FILE *fp = fopen(fname, "r"); // Introduce taint. - // CHECK: PreCall prepares tainting arg index: -1 - // CHECK-NEXT: PostCall actually wants to taint arg index: -1 + // CHECK: New taint flow. Flow id: 0 + // CHECK-NEXT: PreCall prepares tainting arg index: -1 Flow Id: 0 + // CHECK-NEXT: PostCall actually wants to taint arg index: -1 Flow Id: 0 if (!fp) return; (void)fgets(buf, 42, fp); // Trigger taint propagation. - // CHECK-NEXT: PreCall prepares tainting arg index: -1 - // CHECK-NEXT: PreCall prepares tainting arg index: 0 - // CHECK-NEXT: PreCall prepares tainting arg index: 2 + // CHECK-NEXT: PreCall prepares tainting arg index: -1 Flow Id: 0 + // CHECK-NEXT: PreCall prepares tainting arg index: 0 Flow Id: 0 + // CHECK-NEXT: PreCall prepares tainting arg index: 0 Flow Id: 0 // - // CHECK-NEXT: PostCall actually wants to taint arg index: -1 - // CHECK-NEXT: PostCall actually wants to taint arg index: 0 - // CHECK-NEXT: PostCall actually wants to taint arg index: 2 + // CHECK-NEXT: PreCall prepares tainting arg index: 2 Flow Id: 0 + // CHECK-NEXT: PostCall actually wants to taint arg index: -1 Flow Id: 0 + // CHECK-NEXT: PostCall actually wants to taint arg index: 0 Flow Id: 0 + // PostCall actually wants to taint arg index: 2 Flow Id: 0 + } Index: clang/test/Analysis/taint-diagnostic-visitor.c =================================================================== --- clang/test/Analysis/taint-diagnostic-visitor.c +++ clang/test/Analysis/taint-diagnostic-visitor.c @@ -2,8 +2,17 @@ // This file is for testing enhanced diagnostics produced by the GenericTaintChecker +typedef unsigned long size_t; +struct _IO_FILE; +typedef struct _IO_FILE FILE; + int scanf(const char *restrict format, ...); int system(const char *command); +char* getenv( const char* env_var ); +size_t strlen( const char* str ); +void *malloc(size_t size ); +char *fgets(char *str, int n, FILE *stream); +FILE *stdin; void taintDiagnostic(void) { @@ -34,3 +43,41 @@ int vla[x]; // expected-warning {{Declared variable-length array (VLA) has tainted size}} // expected-note@-1 {{Declared variable-length array (VLA) has tainted size}} } + +//Tests if the originated note is correctly placed even if the path is +//propagating through variables and expressions +char* taintDiagnosticPropagation(){ + char *pathbuf; + char *pathlist=getenv("PATH"); // expected-note {{Taint originated here}} + if (pathlist){ // expected-note {{Assuming 'pathlist' is non-null}} + // expected-note@-1 {{Taking true branch}} + pathbuf=(char*) malloc(strlen(pathlist)+1); // expected-warning{{Untrusted data is used to specify the buffer size}} + // expected-note@-1{{Untrusted data is used to specify the buffer size}} + return pathbuf; + } + return 0; +} + + +//Taint origin should be marked correctly even if there are multiple taint +//sources in the function +char* taintDiagnosticPropagation2(){ + char *pathbuf; + char *user_env2=getenv("USER_ENV_VAR2");//unrelated taint source + char *pathlist=getenv("PATH"); // expected-note {{Taint originated here}} + char *user_env=getenv("USER_ENV_VAR");//unrelated taint source + if (pathlist){ // expected-note {{Assuming 'pathlist' is non-null}} + // expected-note@-1 {{Taking true branch}} + pathbuf=(char*) malloc(strlen(pathlist)+1); // expected-warning{{Untrusted data is used to specify the buffer size}} + // expected-note@-1{{Untrusted data is used to specify the buffer size}} + return pathbuf; + } + return 0; +} + +void testReadStdIn(){ + char buf[1024]; + fgets(buf, sizeof(buf), stdin);// expected-note {{Taint originated here}} + system(buf);// expected-warning {{Untrusted data is passed to a system call}} // expected-note {{Untrusted data is passed to a system call (CERT/STR02-C. Sanitize data passed to complex subsystems)}} + +} Index: clang/test/Analysis/taint-dumps.c =================================================================== --- clang/test/Analysis/taint-dumps.c +++ clang/test/Analysis/taint-dumps.c @@ -6,7 +6,7 @@ int getchar(void); // CHECK: Tainted symbols: -// CHECK-NEXT: conj_$2{{.*}} : 0 +// CHECK-NEXT: conj_$2{{.*}} : Type:0 Flow:0 int test_taint_dumps(void) { int x = getchar(); clang_analyzer_printState();