Index: clang/include/clang/StaticAnalyzer/Checkers/Taint.h =================================================================== --- clang/include/clang/StaticAnalyzer/Checkers/Taint.h +++ clang/include/clang/StaticAnalyzer/Checkers/Taint.h @@ -62,43 +62,27 @@ TaintTagType Kind = TaintTagGeneric); /// Check if the statement has a tainted value in the given state. -bool isTainted(ProgramStateRef State, const Stmt *S, - const LocationContext *LCtx, - TaintTagType Kind = TaintTagGeneric); +SymbolRef isTainted(ProgramStateRef State, const Stmt *S, + const LocationContext *LCtx, + TaintTagType Kind = TaintTagGeneric); /// Check if the value is tainted in the given state. -bool isTainted(ProgramStateRef State, SVal V, - TaintTagType Kind = TaintTagGeneric); +SymbolRef isTainted(ProgramStateRef State, SVal V, + TaintTagType Kind = TaintTagGeneric); /// Check if the symbol is tainted in the given state. -bool isTainted(ProgramStateRef State, SymbolRef Sym, - TaintTagType Kind = TaintTagGeneric); +SymbolRef isTainted(ProgramStateRef State, SymbolRef Sym, + TaintTagType Kind = TaintTagGeneric); /// Check if the pointer represented by the region is tainted in the given /// state. -bool isTainted(ProgramStateRef State, const MemRegion *Reg, - TaintTagType Kind = TaintTagGeneric); +SymbolRef isTainted(ProgramStateRef State, const MemRegion *Reg, + TaintTagType Kind = TaintTagGeneric); void printTaint(ProgramStateRef State, raw_ostream &Out, const char *nl = "\n", const char *sep = ""); LLVM_DUMP_METHOD void dumpTaint(ProgramStateRef State); - -/// The bug visitor prints a diagnostic message at the location where a given -/// variable was tainted. -class TaintBugVisitor final : public BugReporterVisitor { -private: - const SVal V; - -public: - TaintBugVisitor(const SVal V) : V(V) {} - void Profile(llvm::FoldingSetNodeID &ID) const override { ID.Add(V); } - - PathDiagnosticPieceRef VisitNode(const ExplodedNode *N, - BugReporterContext &BRC, - PathSensitiveBugReport &BR) override; -}; - } // namespace taint } // namespace ento } // namespace clang Index: clang/include/clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h =================================================================== --- clang/include/clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h +++ clang/include/clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h @@ -22,6 +22,7 @@ extern const char *const CXXMoveSemantics; extern const char *const SecurityError; extern const char *const UnusedCode; +extern const char *const TaintedData; } // namespace categories } // namespace ento } // namespace clang Index: clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp =================================================================== --- clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp +++ clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp @@ -32,12 +32,12 @@ namespace { class ArrayBoundCheckerV2 : public Checker { - mutable std::unique_ptr BT; + mutable std::unique_ptr BT; enum OOB_Kind { OOB_Precedes, OOB_Excedes, OOB_Tainted }; void reportOOB(CheckerContext &C, ProgramStateRef errorState, OOB_Kind kind, - std::unique_ptr Visitor = nullptr) const; + SymbolRef TaintedSymbol = nullptr) const; public: void checkLocation(SVal l, bool isLoad, const Stmt*S, @@ -206,9 +206,8 @@ // If we are under constrained and the index variables are tainted, report. if (state_exceedsUpperBound && state_withinUpperBound) { SVal ByteOffset = rawOffset.getByteOffset(); - if (isTainted(state, ByteOffset)) { - reportOOB(checkerContext, state_exceedsUpperBound, OOB_Tainted, - std::make_unique(ByteOffset)); + if (SymbolRef TSR = isTainted(state, ByteOffset)) { + reportOOB(checkerContext, state_exceedsUpperBound, OOB_Tainted, TSR); return; } } else if (state_exceedsUpperBound) { @@ -227,16 +226,16 @@ checkerContext.addTransition(state); } -void ArrayBoundCheckerV2::reportOOB( - CheckerContext &checkerContext, ProgramStateRef errorState, OOB_Kind kind, - std::unique_ptr Visitor) const { +void ArrayBoundCheckerV2::reportOOB(CheckerContext &checkerContext, + ProgramStateRef errorState, OOB_Kind kind, + SymbolRef TaintedSymbol) const { ExplodedNode *errorNode = checkerContext.generateErrorNode(errorState); if (!errorNode) return; - - if (!BT) - BT.reset(new BuiltinBug(this, "Out-of-bound access")); + BT.reset(new BugType(this, "Out-of-bound access", + kind == OOB_Tainted ? categories::TaintedData + : categories::LogicError)); // FIXME: This diagnostics are preliminary. We should get far better // diagnostics for explaining buffer overruns. @@ -257,7 +256,9 @@ } auto BR = std::make_unique(*BT, os.str(), errorNode); - BR->addVisitor(std::move(Visitor)); + if (TaintedSymbol) + BR->markInteresting(TaintedSymbol); + checkerContext.emitReport(std::move(BR)); } Index: clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp =================================================================== --- clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp +++ clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp @@ -25,9 +25,9 @@ namespace { class DivZeroChecker : public Checker< check::PreStmt > { - mutable std::unique_ptr BT; - void reportBug(const char *Msg, ProgramStateRef StateZero, CheckerContext &C, - std::unique_ptr Visitor = nullptr) const; + mutable std::unique_ptr BT; + void reportBug(StringRef Msg, StringRef Category, ProgramStateRef StateZero, + CheckerContext &C, SymbolRef TaintedSymbol = nullptr) const; public: void checkPreStmt(const BinaryOperator *B, CheckerContext &C) const; @@ -41,16 +41,17 @@ return nullptr; } -void DivZeroChecker::reportBug( - const char *Msg, ProgramStateRef StateZero, CheckerContext &C, - std::unique_ptr Visitor) const { +void DivZeroChecker::reportBug(StringRef Msg, StringRef Category, + ProgramStateRef StateZero, CheckerContext &C, + SymbolRef TaintedSymbol) const { if (ExplodedNode *N = C.generateErrorNode(StateZero)) { if (!BT) - BT.reset(new BuiltinBug(this, "Division by zero")); + BT.reset(new BugType(this, "Division by zero", Category)); auto R = std::make_unique(*BT, Msg, N); - R->addVisitor(std::move(Visitor)); bugreporter::trackExpressionValue(N, getDenomExpr(N), *R); + if (TaintedSymbol) + R->markInteresting(TaintedSymbol); C.emitReport(std::move(R)); } } @@ -82,15 +83,16 @@ if (!stateNotZero) { assert(stateZero); - reportBug("Division by zero", stateZero, C); + reportBug("Division by zero", categories::LogicError, stateZero, C); return; } - bool TaintedD = isTainted(C.getState(), *DV); - if ((stateNotZero && stateZero && TaintedD)) { - reportBug("Division by a tainted value, possibly zero", stateZero, C, - std::make_unique(*DV)); - return; + if ((stateNotZero && stateZero)) { + if (SymbolRef TaintedSym = isTainted(C.getState(), *DV)) { + reportBug("Division by a tainted value, possibly zero", + categories::TaintedData, stateZero, C, TaintedSym); + return; + } } // If we get here, then the denom should not be zero. We abandon the implicit Index: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp =================================================================== --- clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -32,6 +32,7 @@ #include #include #include +#include #define DEBUG_TYPE "taint-checker" @@ -114,47 +115,111 @@ return false; } -SVal getPointeeOf(const CheckerContext &C, Loc LValue) { - const QualType ArgTy = LValue.getType(C.getASTContext()); +SVal getPointeeOf(ASTContext &ASTCtx, const ProgramStateRef State, Loc LValue) { + const QualType ArgTy = LValue.getType(ASTCtx); if (!ArgTy->isPointerType() || !ArgTy->getPointeeType()->isVoidType()) - return C.getState()->getSVal(LValue); + return State->getSVal(LValue); // Do not dereference void pointers. Treat them as byte pointers instead. // FIXME: we might want to consider more than just the first byte. - return C.getState()->getSVal(LValue, C.getASTContext().CharTy); + return State->getSVal(LValue, ASTCtx.CharTy); } /// Given a pointer/reference argument, return the value it refers to. -std::optional getPointeeOf(const CheckerContext &C, SVal Arg) { +std::optional getPointeeOf(ASTContext &ASTCtx, + const ProgramStateRef State, SVal Arg) { if (auto LValue = Arg.getAs()) - return getPointeeOf(C, *LValue); + return getPointeeOf(ASTCtx, State, *LValue); return std::nullopt; } /// Given a pointer, return the SVal of its pointee or if it is tainted, /// otherwise return the pointer's SVal if tainted. /// Also considers stdin as a taint source. -std::optional getTaintedPointeeOrPointer(const CheckerContext &C, +std::optional getTaintedPointeeOrPointer(ASTContext &ASTCtx, + const ProgramStateRef State, SVal Arg) { - const ProgramStateRef State = C.getState(); - - if (auto Pointee = getPointeeOf(C, Arg)) + if (auto Pointee = getPointeeOf(ASTCtx, State, Arg)) if (isTainted(State, *Pointee)) // FIXME: isTainted(...) ? Pointee : None; return Pointee; if (isTainted(State, Arg)) return Arg; + return std::nullopt; +} - // FIXME: This should be done by the isTainted() API. - if (isStdin(Arg, C.getASTContext())) - return Arg; +bool isTaintedOrPointsToTainted(ASTContext &ASTCtx, + const ProgramStateRef &State, SVal ExprSVal) { + return getTaintedPointeeOrPointer(ASTCtx, State, ExprSVal).has_value(); +} - return std::nullopt; +/// Helps in printing taint diagnostics. +/// Marks the incoming parameters of a function interesting (to be printed) +/// when the return value, or the outgoing parameters are tainted. +const NoteTag *taintOriginTrackerTag(CheckerContext &C, + std::vector TaintedSymbols, + std::vector TaintedArgs, + const LocationContext *CallLocation) { + assert(TaintedSymbols.size() == TaintedArgs.size()); + return C.getNoteTag([TaintedSymbols = std::move(TaintedSymbols), + TaintedArgs = std::move(TaintedArgs), CallLocation]( + PathSensitiveBugReport &BR) -> std::string { + SmallString<256> Msg; + // We give diagnostics only for taint related reports + if (!BR.isInteresting(CallLocation) || + BR.getBugType().getCategory() != categories::TaintedData) { + return ""; + } + if (TaintedSymbols.empty()) { + return "Taint originated here"; + } + for (auto Sym : llvm::enumerate(TaintedSymbols)) { + LLVM_DEBUG(llvm::dbgs() << "Taint Propagated from argument " + << TaintedArgs.at(Sym.index()) + 1 << "\n"); + BR.markInteresting(Sym.value()); + } + return ""; + }); } -bool isTaintedOrPointsToTainted(const Expr *E, const ProgramStateRef &State, - CheckerContext &C) { - return getTaintedPointeeOrPointer(C, C.getSVal(E)).has_value(); +/// Helps in printing taint diagnostics. +/// Marks the function interesting (to be printed) +/// when the return value, or the outgoing parameters are tainted. +const NoteTag *taintPropagationExplainerTag( + CheckerContext &C, std::vector TaintedSymbols, + std::vector TaintedArgs, const LocationContext *CallLocation) { + assert(TaintedSymbols.size() == TaintedArgs.size()); + return C.getNoteTag([TaintedSymbols = std::move(TaintedSymbols), + TaintedArgs = std::move(TaintedArgs), CallLocation]( + PathSensitiveBugReport &BR) -> std::string { + SmallString<256> Msg; + llvm::raw_svector_ostream Out(Msg); + // We give diagnostics only for taint related reports + if (TaintedSymbols.empty() || + BR.getBugType().getCategory() != categories::TaintedData) { + return ""; + } + int nofTaintedArgs = 0; + for (auto Sym : llvm::enumerate(TaintedSymbols)) { + if (BR.isInteresting(Sym.value())) { + BR.markInteresting(CallLocation); + if (TaintedArgs.at(Sym.index()) != ReturnValueIndex) { + LLVM_DEBUG(llvm::dbgs() << "Taint Propagated to argument " + << TaintedArgs.at(Sym.index()) + 1 << "\n"); + if (nofTaintedArgs == 0) + Out << "Taint propagated to argument " + << TaintedArgs.at(Sym.index()) + 1; + else + Out << ", " << TaintedArgs.at(Sym.index()) + 1; + nofTaintedArgs++; + } else { + LLVM_DEBUG(llvm::dbgs() << "Taint Propagated to return value.\n"); + Out << "Taint propagated to the return value"; + } + } + } + return std::string(Out.str()); + }); } /// ArgSet is used to describe arguments relevant for taint detection or @@ -193,7 +258,7 @@ ArgSet SinkArgs; /// Arguments which should be sanitized on function return. ArgSet FilterArgs; - /// Arguments which can participate in taint propagationa. If any of the + /// Arguments which can participate in taint propagation. If any of the /// arguments in PropSrcArgs is tainted, all arguments in PropDstArgs should /// be tainted. ArgSet PropSrcArgs; @@ -343,7 +408,7 @@ CheckerContext &C) const; private: - const BugType BT{this, "Use of Untrusted Data", "Untrusted Data"}; + const BugType BT{this, "Use of Untrusted Data", categories::TaintedData}; bool checkUncontrolledFormatString(const CallEvent &Call, CheckerContext &C) const; @@ -351,7 +416,7 @@ void taintUnsafeSocketProtocol(const CallEvent &Call, CheckerContext &C) const; - /// Default taint rules are initilized with the help of a CheckerContext to + /// Default taint rules are initalized with the help of a CheckerContext to /// access the names of built-in functions like memcpy. void initTaintRules(CheckerContext &C) const; @@ -788,22 +853,42 @@ llvm::dbgs() << "> actually wants to taint arg index: " << I << '\n'; }); + const NoteTag *InjectionTag = nullptr; + std::vector TaintedSymbols; + std::vector TaintedIndexes; for (ArgIdxTy ArgNum : *TaintArgs) { // Special handling for the tainted return value. if (ArgNum == ReturnValueIndex) { State = addTaint(State, Call.getReturnValue()); + SymbolRef TaintedSym = isTainted(State, Call.getReturnValue()); + if (TaintedSym) { + TaintedSymbols.push_back(TaintedSym); + TaintedIndexes.push_back(ArgNum); + } continue; } - // The arguments are pointer arguments. The data they are pointing at is // tainted after the call. - if (auto V = getPointeeOf(C, Call.getArgSVal(ArgNum))) + if (auto V = + getPointeeOf(C.getASTContext(), State, Call.getArgSVal(ArgNum))) { State = addTaint(State, *V); + // FIXME: The call argument may be a complex expression + // referring to multiple tainted variables. + // Now we generate notes and track back only one of them. + SymbolRef TaintedSym = isTainted(State, *V); + if (TaintedSym) { + TaintedSymbols.push_back(TaintedSym); + TaintedIndexes.push_back(ArgNum); + } + } } - + // Create a NoteTag callback, which prints to the user where the taintedness + // was propagated to. + InjectionTag = taintPropagationExplainerTag(C, TaintedSymbols, TaintedIndexes, + Call.getCalleeStackFrame(0)); // Clear up the taint info from the state. State = State->remove(CurrentFrame); - C.addTransition(State); + C.addTransition(State, InjectionTag); } void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State, @@ -826,7 +911,12 @@ /// Check for taint sinks. ForEachCallArg([this, &Checker, &C, &State](ArgIdxTy I, const Expr *E, SVal) { - if (SinkArgs.contains(I) && isTaintedOrPointsToTainted(E, State, C)) + // Add taintedness to stdin parameters + if (isStdin(C.getSVal(E), C.getASTContext())) { + State = addTaint(State, C.getSVal(E)); + } + if (SinkArgs.contains(I) && + isTaintedOrPointsToTainted(C.getASTContext(), State, C.getSVal(E))) Checker.generateReportIfTainted(E, SinkMsg.value_or(MsgCustomSink), C); }); @@ -834,7 +924,7 @@ ForEachCallArg([this, &C, &State](ArgIdxTy I, const Expr *E, SVal S) { if (FilterArgs.contains(I)) { State = removeTaint(State, S); - if (auto P = getPointeeOf(C, S)) + if (auto P = getPointeeOf(C.getASTContext(), State, S)) State = removeTaint(State, *P); } }); @@ -843,11 +933,25 @@ /// A rule is relevant if PropSrcArgs is empty, or if any of its signified /// args are tainted in context of the current CallEvent. bool IsMatching = PropSrcArgs.isEmpty(); - ForEachCallArg( - [this, &C, &IsMatching, &State](ArgIdxTy I, const Expr *E, SVal) { - IsMatching = IsMatching || (PropSrcArgs.contains(I) && - isTaintedOrPointsToTainted(E, State, C)); - }); + std::vector TaintedSymbols; + std::vector TaintedIndexes; + ForEachCallArg([this, &C, &IsMatching, &State, &TaintedSymbols, + &TaintedIndexes](ArgIdxTy I, const Expr *E, SVal) { + IsMatching = + IsMatching || + (PropSrcArgs.contains(I) && + isTaintedOrPointsToTainted(C.getASTContext(), State, C.getSVal(E))); + std::optional TaintedSVal = + getTaintedPointeeOrPointer(C.getASTContext(), State, C.getSVal(E)); + + // We track back tainted arguments except for stdin + if (TaintedSVal && !isStdin(*TaintedSVal, C.getASTContext())) { + if (SymbolRef SR = isTainted(State, *TaintedSVal)) { + TaintedSymbols.push_back(SR); + TaintedIndexes.push_back(I); + } + } + }); if (!IsMatching) return; @@ -890,7 +994,9 @@ if (!Result.isEmpty()) State = State->set(C.getStackFrame(), Result); - C.addTransition(State); + const NoteTag *InjectionTag = taintOriginTrackerTag( + C, TaintedSymbols, TaintedIndexes, Call.getCalleeStackFrame(0)); + C.addTransition(State, InjectionTag); } bool GenericTaintRule::UntrustedEnv(CheckerContext &C) { @@ -902,7 +1008,8 @@ bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg, CheckerContext &C) const { assert(E); - std::optional TaintedSVal{getTaintedPointeeOrPointer(C, C.getSVal(E))}; + std::optional TaintedSVal{getTaintedPointeeOrPointer( + C.getASTContext(), C.getState(), C.getSVal(E))}; if (!TaintedSVal) return false; @@ -911,7 +1018,9 @@ if (ExplodedNode *N = C.generateNonFatalErrorNode()) { auto report = std::make_unique(BT, Msg, N); report->addRange(E->getSourceRange()); - report->addVisitor(std::make_unique(*TaintedSVal)); + if (SymbolRef TaintedSym = isTainted(C.getState(), *TaintedSVal)) + report->markInteresting(TaintedSym); + C.emitReport(std::move(report)); return true; } Index: clang/lib/StaticAnalyzer/Checkers/Taint.cpp =================================================================== --- clang/lib/StaticAnalyzer/Checkers/Taint.cpp +++ clang/lib/StaticAnalyzer/Checkers/Taint.cpp @@ -144,30 +144,33 @@ return NewState; } -bool taint::isTainted(ProgramStateRef State, const Stmt *S, - const LocationContext *LCtx, TaintTagType Kind) { +SymbolRef taint::isTainted(ProgramStateRef State, const Stmt *S, + const LocationContext *LCtx, TaintTagType Kind) { SVal val = State->getSVal(S, LCtx); return isTainted(State, val, Kind); } -bool taint::isTainted(ProgramStateRef State, SVal V, TaintTagType Kind) { +SymbolRef taint::isTainted(ProgramStateRef State, SVal V, TaintTagType Kind) { if (SymbolRef Sym = V.getAsSymbol()) return isTainted(State, Sym, Kind); if (const MemRegion *Reg = V.getAsRegion()) return isTainted(State, Reg, Kind); - return false; + return nullptr; } -bool taint::isTainted(ProgramStateRef State, const MemRegion *Reg, - TaintTagType K) { +SymbolRef taint::isTainted(ProgramStateRef State, const MemRegion *Reg, + TaintTagType K) { if (!Reg) - return false; + return nullptr; // Element region (array element) is tainted if either the base or the offset // are tainted. - if (const ElementRegion *ER = dyn_cast(Reg)) - return isTainted(State, ER->getSuperRegion(), K) || - isTainted(State, ER->getIndex(), K); + if (const ElementRegion *ER = dyn_cast(Reg)) { + if (SymbolRef TaintedSym = isTainted(State, ER->getIndex(), K)) + return TaintedSym; + if (SymbolRef TaintedSym = isTainted(State, ER->getSuperRegion(), K)) + return TaintedSym; + } if (const SymbolicRegion *SR = dyn_cast(Reg)) return isTainted(State, SR->getSymbol(), K); @@ -175,12 +178,13 @@ if (const SubRegion *ER = dyn_cast(Reg)) return isTainted(State, ER->getSuperRegion(), K); - return false; + return nullptr; } -bool taint::isTainted(ProgramStateRef State, SymbolRef Sym, TaintTagType Kind) { +SymbolRef taint::isTainted(ProgramStateRef State, SymbolRef Sym, + TaintTagType Kind) { if (!Sym) - return false; + return nullptr; // Traverse all the symbols this symbol depends on to see if any are tainted. for (SymExpr::symbol_iterator SI = Sym->symbol_begin(), @@ -190,14 +194,15 @@ continue; if (const TaintTagType *Tag = State->get(*SI)) { - if (*Tag == Kind) - return true; + if (*Tag == Kind) { + return *SI; + } } if (const auto *SD = dyn_cast(*SI)) { // If this is a SymbolDerived with a tainted parent, it's also tainted. - if (isTainted(State, SD->getParentSymbol(), Kind)) - return true; + if (SymbolRef TSR = isTainted(State, SD->getParentSymbol(), Kind)) + return TSR; // If this is a SymbolDerived with the same parent symbol as another // tainted SymbolDerived and a region that's a sub-region of that tainted @@ -210,46 +215,25 @@ // complete. For example, this would not currently identify // overlapping fields in a union as tainted. To identify this we can // check for overlapping/nested byte offsets. - if (Kind == I.second && R->isSubRegionOf(I.first)) - return true; + if (Kind == I.second && R->isSubRegionOf(I.first)) { + return SD->getParentSymbol(); + } } } } // If memory region is tainted, data is also tainted. if (const auto *SRV = dyn_cast(*SI)) { - if (isTainted(State, SRV->getRegion(), Kind)) - return true; + if (SymbolRef TaintedSym = isTainted(State, SRV->getRegion(), Kind)) + return TaintedSym; } // If this is a SymbolCast from a tainted value, it's also tainted. if (const auto *SC = dyn_cast(*SI)) { - if (isTainted(State, SC->getOperand(), Kind)) - return true; + if (SymbolRef TaintedSym = isTainted(State, SC->getOperand(), Kind)) + return TaintedSym; } } - return false; -} - -PathDiagnosticPieceRef TaintBugVisitor::VisitNode(const ExplodedNode *N, - BugReporterContext &BRC, - PathSensitiveBugReport &BR) { - - // Find the ExplodedNode where the taint was first introduced - if (!isTainted(N->getState(), V) || - isTainted(N->getFirstPred()->getState(), V)) - return nullptr; - - const Stmt *S = N->getStmtForDiagnostics(); - if (!S) - return nullptr; - - const LocationContext *NCtx = N->getLocationContext(); - PathDiagnosticLocation L = - PathDiagnosticLocation::createBegin(S, BRC.getSourceManager(), NCtx); - if (!L.isValid() || !L.asLocation().isValid()) - return nullptr; - - return std::make_shared(L, "Taint originated here"); + return nullptr; } Index: clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp =================================================================== --- clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp +++ clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp @@ -55,8 +55,7 @@ const Expr *SizeE) const; void reportBug(VLASize_Kind Kind, const Expr *SizeE, ProgramStateRef State, - CheckerContext &C, - std::unique_ptr Visitor = nullptr) const; + CheckerContext &C, SymbolRef TaintedSymbol = nullptr) const; public: void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const; @@ -166,9 +165,8 @@ return nullptr; // Check if the size is tainted. - if (isTainted(State, SizeV)) { - reportBug(VLA_Tainted, SizeE, nullptr, C, - std::make_unique(SizeV)); + if (SymbolRef TSR = isTainted(State, SizeV)) { + reportBug(VLA_Tainted, SizeE, nullptr, C, TSR); return nullptr; } @@ -209,17 +207,24 @@ return State; } -void VLASizeChecker::reportBug( - VLASize_Kind Kind, const Expr *SizeE, ProgramStateRef State, - CheckerContext &C, std::unique_ptr Visitor) const { +void VLASizeChecker::reportBug(VLASize_Kind Kind, const Expr *SizeE, + ProgramStateRef State, CheckerContext &C, + SymbolRef TaintedSymbol) const { // Generate an error node. ExplodedNode *N = C.generateErrorNode(State); if (!N) return; - if (!BT) - BT.reset(new BuiltinBug( - this, "Dangerous variable-length array (VLA) declaration")); + if (!BT) { + if (Kind == VLA_Tainted) + BT.reset(new BugType(this, + "Dangerous variable-length array (VLA) declaration", + categories::TaintedData)); + else + BT.reset(new BugType(this, + "Dangerous variable-length array (VLA) declaration", + categories::LogicError)); + } SmallString<256> buf; llvm::raw_svector_ostream os(buf); @@ -243,9 +248,11 @@ } auto report = std::make_unique(*BT, os.str(), N); - report->addVisitor(std::move(Visitor)); report->addRange(SizeE->getSourceRange()); bugreporter::trackExpressionValue(N, SizeE, *report); + if (TaintedSymbol) + report->markInteresting(TaintedSymbol); + C.emitReport(std::move(report)); } Index: clang/lib/StaticAnalyzer/Core/CommonBugCategories.cpp =================================================================== --- clang/lib/StaticAnalyzer/Core/CommonBugCategories.cpp +++ clang/lib/StaticAnalyzer/Core/CommonBugCategories.cpp @@ -23,6 +23,7 @@ const char *const CXXMoveSemantics = "C++ move semantics"; const char *const SecurityError = "Security error"; const char *const UnusedCode = "Unused code"; +const char *const TaintedData = "Tainted data used"; } // namespace categories } // namespace ento } // namespace clang Index: clang/test/Analysis/taint-diagnostic-visitor.c =================================================================== --- clang/test/Analysis/taint-diagnostic-visitor.c +++ clang/test/Analysis/taint-diagnostic-visitor.c @@ -2,13 +2,24 @@ // This file is for testing enhanced diagnostics produced by the GenericTaintChecker +typedef unsigned long size_t; +struct _IO_FILE; +typedef struct _IO_FILE FILE; + int scanf(const char *restrict format, ...); int system(const char *command); +char* getenv( const char* env_var ); +size_t strlen( const char* str ); +void *malloc(size_t size ); +void free( void *ptr ); +char *fgets(char *str, int n, FILE *stream); +FILE *stdin; void taintDiagnostic(void) { char buf[128]; scanf("%s", buf); // expected-note {{Taint originated here}} + // expected-note@-1 {{Taint propagated to argument 2}} system(buf); // expected-warning {{Untrusted data is passed to a system call}} // expected-note {{Untrusted data is passed to a system call (CERT/STR02-C. Sanitize data passed to complex subsystems)}} } @@ -16,6 +27,7 @@ int index; int Array[] = {1, 2, 3, 4, 5}; scanf("%d", &index); // expected-note {{Taint originated here}} + // expected-note@-1 {{Taint propagated to argument 2}} return Array[index]; // expected-warning {{Out of bound memory access (index is tainted)}} // expected-note@-1 {{Out of bound memory access (index is tainted)}} } @@ -23,6 +35,7 @@ int taintDiagnosticDivZero(int operand) { scanf("%d", &operand); // expected-note {{Value assigned to 'operand'}} // expected-note@-1 {{Taint originated here}} + // expected-note@-2 {{Taint propagated to argument 2}} return 10 / operand; // expected-warning {{Division by a tainted value, possibly zero}} // expected-note@-1 {{Division by a tainted value, possibly zero}} } @@ -31,6 +44,71 @@ int x; scanf("%d", &x); // expected-note {{Value assigned to 'x'}} // expected-note@-1 {{Taint originated here}} + // expected-note@-2 {{Taint propagated to argument 2}} int vla[x]; // expected-warning {{Declared variable-length array (VLA) has tainted size}} // expected-note@-1 {{Declared variable-length array (VLA) has tainted size}} } + + +//Tests if the originated note is correctly placed even if the path is +//propagating through variables and expressions +char* taintDiagnosticPropagation(){ + char *pathbuf; + char *pathlist=getenv("PATH"); // expected-note {{Taint originated here}} + // expected-note@-1 {{Taint propagated to the return value}} + if (pathlist){ // expected-note {{Assuming 'pathlist' is non-null}} + // expected-note@-1 {{Taking true branch}} + pathbuf=(char*) malloc(strlen(pathlist)+1); // expected-warning{{Untrusted data is used to specify the buffer size}} + // expected-note@-1{{Untrusted data is used to specify the buffer size}} + // expected-note@-2 {{Taint propagated to the return value}} + return pathbuf; + } + return 0; +} + +//Taint origin should be marked correctly even if there are multiple taint +//sources in the function +char* taintDiagnosticPropagation2(){ + char *pathbuf; + char *user_env2=getenv("USER_ENV_VAR2");//unrelated taint source + char *pathlist=getenv("PATH"); // expected-note {{Taint originated here}} + // expected-note@-1 {{Taint propagated to the return value}} + char *user_env=getenv("USER_ENV_VAR");//unrelated taint source + if (pathlist){ // expected-note {{Assuming 'pathlist' is non-null}} + // expected-note@-1 {{Taking true branch}} + pathbuf=(char*) malloc(strlen(pathlist)+1); // expected-warning{{Untrusted data is used to specify the buffer size}} + // expected-note@-1{{Untrusted data is used to specify the buffer size}} + // expected-note@-2 {{Taint propagated to the return value}} + return pathbuf; + } + return 0; +} + +void testReadStdIn(){ + char buf[1024]; + fgets(buf, sizeof(buf), stdin);// expected-note {{Taint originated here}} + // expected-note@-1 {{Taint propagated to argument 1}} + system(buf);// expected-warning {{Untrusted data is passed to a system call}} // expected-note {{Untrusted data is passed to a system call (CERT/STR02-C. Sanitize data passed to complex subsystems)}} + +} + +void multipleTaintSources(void) { + int x,y,z; + scanf("%d", &x); // expected-note {{Taint originated here}} + // expected-note@-1 {{Taint propagated to argument 2}} + scanf("%d", &y); // FIXME: Would be nice to indicate taint originated and propagation here too + scanf("%d", &z); + int* ptr = (int*) malloc(y + x); // expected-warning {{Untrusted data is used to specify the buffer size}} + // expected-note@-1{{Untrusted data is used to specify the buffer size}} + free (ptr); +} + +void multipleTaintedArgs(void) { + int x,y; + scanf("%d %d", &x, &y); // expected-note {{Taint originated here}} + // expected-note@-1 {{Taint propagated to argument 3}} + // FIXME: Would be nice to indicate taint propagation argument 2 + int* ptr = (int*) malloc(x + y); // expected-warning {{Untrusted data is used to specify the buffer size}} + // expected-note@-1{{Untrusted data is used to specify the buffer size}} + free (ptr); +} Index: clang/test/Analysis/taint-tester.c =================================================================== --- clang/test/Analysis/taint-tester.c +++ clang/test/Analysis/taint-tester.c @@ -122,7 +122,7 @@ fscanf(pp, "%d", &ii); int jj = ii;// expected-warning + {{tainted}} - fscanf(p, "%d", &ii); + fscanf(p, "%d", &ii);// expected-warning + {{tainted}} int jj2 = ii;// expected-warning + {{tainted}} ii = 3;