Index: clang/include/clang/StaticAnalyzer/Checkers/Taint.h =================================================================== --- clang/include/clang/StaticAnalyzer/Checkers/Taint.h +++ clang/include/clang/StaticAnalyzer/Checkers/Taint.h @@ -61,44 +61,29 @@ const SubRegion *SubRegion, TaintTagType Kind = TaintTagGeneric); + /// Check if the statement has a tainted value in the given state. -bool isTainted(ProgramStateRef State, const Stmt *S, +SymbolRef isTainted(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind = TaintTagGeneric); /// Check if the value is tainted in the given state. -bool isTainted(ProgramStateRef State, SVal V, +SymbolRef isTainted(ProgramStateRef State, SVal V, TaintTagType Kind = TaintTagGeneric); /// Check if the symbol is tainted in the given state. -bool isTainted(ProgramStateRef State, SymbolRef Sym, +SymbolRef isTainted(ProgramStateRef State, SymbolRef Sym, TaintTagType Kind = TaintTagGeneric); /// Check if the pointer represented by the region is tainted in the given /// state. -bool isTainted(ProgramStateRef State, const MemRegion *Reg, +SymbolRef isTainted(ProgramStateRef State, const MemRegion *Reg, TaintTagType Kind = TaintTagGeneric); void printTaint(ProgramStateRef State, raw_ostream &Out, const char *nl = "\n", const char *sep = ""); LLVM_DUMP_METHOD void dumpTaint(ProgramStateRef State); - -/// The bug visitor prints a diagnostic message at the location where a given -/// variable was tainted. -class TaintBugVisitor final : public BugReporterVisitor { -private: - const SVal V; - -public: - TaintBugVisitor(const SVal V) : V(V) {} - void Profile(llvm::FoldingSetNodeID &ID) const override { ID.Add(V); } - - PathDiagnosticPieceRef VisitNode(const ExplodedNode *N, - BugReporterContext &BRC, - PathSensitiveBugReport &BR) override; -}; - } // namespace taint } // namespace ento } // namespace clang Index: clang/include/clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h =================================================================== --- clang/include/clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h +++ clang/include/clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h @@ -22,6 +22,7 @@ extern const char *const CXXMoveSemantics; extern const char *const SecurityError; extern const char *const UnusedCode; +extern const char *const TaintedData; } // namespace categories } // namespace ento } // namespace clang Index: clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp =================================================================== --- clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp +++ clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp @@ -32,12 +32,12 @@ namespace { class ArrayBoundCheckerV2 : public Checker { - mutable std::unique_ptr BT; + mutable std::unique_ptr BT; enum OOB_Kind { OOB_Precedes, OOB_Excedes, OOB_Tainted }; void reportOOB(CheckerContext &C, ProgramStateRef errorState, OOB_Kind kind, - std::unique_ptr Visitor = nullptr) const; + SymbolRef TaintedSymbol = nullptr) const; public: void checkLocation(SVal l, bool isLoad, const Stmt*S, @@ -206,9 +206,8 @@ // If we are under constrained and the index variables are tainted, report. if (state_exceedsUpperBound && state_withinUpperBound) { SVal ByteOffset = rawOffset.getByteOffset(); - if (isTainted(state, ByteOffset)) { - reportOOB(checkerContext, state_exceedsUpperBound, OOB_Tainted, - std::make_unique(ByteOffset)); + if (SymbolRef TSR = isTainted(state, ByteOffset)) { + reportOOB(checkerContext, state_exceedsUpperBound, OOB_Tainted, TSR); return; } } else if (state_exceedsUpperBound) { @@ -229,14 +228,20 @@ void ArrayBoundCheckerV2::reportOOB( CheckerContext &checkerContext, ProgramStateRef errorState, OOB_Kind kind, - std::unique_ptr Visitor) const { + SymbolRef TaintedSymbol) const { ExplodedNode *errorNode = checkerContext.generateErrorNode(errorState); if (!errorNode) return; - if (!BT) - BT.reset(new BuiltinBug(this, "Out-of-bound access")); + if (!BT) { + if (kind == OOB_Tainted) + BT.reset( + new BugType(this, "Out-of-bound access", categories::TaintedData)); + else + BT.reset( + new BugType(this, "Out-of-bound access", categories::LogicError)); + } // FIXME: This diagnostics are preliminary. We should get far better // diagnostics for explaining buffer overruns. @@ -257,7 +262,9 @@ } auto BR = std::make_unique(*BT, os.str(), errorNode); - BR->addVisitor(std::move(Visitor)); + if (TaintedSymbol) + BR->markInteresting(TaintedSymbol); + checkerContext.emitReport(std::move(BR)); } Index: clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp =================================================================== --- clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp +++ clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp @@ -25,9 +25,10 @@ namespace { class DivZeroChecker : public Checker< check::PreStmt > { - mutable std::unique_ptr BT; - void reportBug(const char *Msg, ProgramStateRef StateZero, CheckerContext &C, - std::unique_ptr Visitor = nullptr) const; + mutable std::unique_ptr BT; + void reportBug(const char *Msg, std::string Category, + ProgramStateRef StateZero, CheckerContext &C, + SymbolRef TaintedSymbol = nullptr) const; public: void checkPreStmt(const BinaryOperator *B, CheckerContext &C) const; @@ -41,16 +42,17 @@ return nullptr; } -void DivZeroChecker::reportBug( - const char *Msg, ProgramStateRef StateZero, CheckerContext &C, - std::unique_ptr Visitor) const { +void DivZeroChecker::reportBug(const char *Msg, std::string Category, + ProgramStateRef StateZero, CheckerContext &C, + SymbolRef TaintedSymbol) const { if (ExplodedNode *N = C.generateErrorNode(StateZero)) { if (!BT) - BT.reset(new BuiltinBug(this, "Division by zero")); + BT.reset(new BugType(this, "Division by zero", Category)); auto R = std::make_unique(*BT, Msg, N); - R->addVisitor(std::move(Visitor)); bugreporter::trackExpressionValue(N, getDenomExpr(N), *R); + if (TaintedSymbol) + R->markInteresting(TaintedSymbol); C.emitReport(std::move(R)); } } @@ -82,14 +84,14 @@ if (!stateNotZero) { assert(stateZero); - reportBug("Division by zero", stateZero, C); + reportBug("Division by zero", categories::LogicError, stateZero, C); return; } - bool TaintedD = isTainted(C.getState(), *DV); - if ((stateNotZero && stateZero && TaintedD)) { - reportBug("Division by a tainted value, possibly zero", stateZero, C, - std::make_unique(*DV)); + SymbolRef TSR = isTainted(C.getState(), *DV); + if ((stateNotZero && stateZero && TSR)) { + reportBug("Division by a tainted value, possibly zero", + categories::TaintedData, stateZero, C, TSR); return; } Index: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp =================================================================== --- clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -32,6 +32,7 @@ #include #include #include +#include #define DEBUG_TYPE "taint-checker" @@ -135,26 +136,93 @@ /// otherwise return the pointer's SVal if tainted. /// Also considers stdin as a taint source. std::optional getTaintedPointeeOrPointer(const CheckerContext &C, + const ProgramStateRef State, SVal Arg) { - const ProgramStateRef State = C.getState(); - if (auto Pointee = getPointeeOf(C, Arg)) if (isTainted(State, *Pointee)) // FIXME: isTainted(...) ? Pointee : None; return Pointee; if (isTainted(State, Arg)) return Arg; - - // FIXME: This should be done by the isTainted() API. - if (isStdin(Arg, C.getASTContext())) - return Arg; - return std::nullopt; } bool isTaintedOrPointsToTainted(const Expr *E, const ProgramStateRef &State, CheckerContext &C) { - return getTaintedPointeeOrPointer(C, C.getSVal(E)).has_value(); + return getTaintedPointeeOrPointer(C, State, C.getSVal(E)).has_value(); +} + +/// Helps in printing taint diagnostics. +/// Marks the incoming parameters of a function interesting (to be printed) +/// when the return value, or the outgoing parameters are tainted. +const NoteTag *createTaintPreTag(CheckerContext &C, + std::vector TaintedSymbols, + std::vector TaintedArgs, + const CallEvent& Call) { + const LocationContext* LC = Call.getCalleeStackFrame(0); + + const NoteTag *InjectionTag = C.getNoteTag( + [TaintedSymbols, TaintedArgs,LC](PathSensitiveBugReport &BR) -> std::string { + SmallString<256> Msg; + llvm::raw_svector_ostream Out(Msg); + // We give diagnostics only for taint related reports + if (!BR.isInteresting(LC) || + BR.getBugType().getCategory() != categories::TaintedData) { + return ""; + } + if (TaintedSymbols.empty()){ + Out << "Taint originated here"; + return std::string(Out.str()); + } + int i = 0; + for (SymbolRef SR : TaintedSymbols) { + LLVM_DEBUG(llvm::dbgs() << "Taint Propagated from argument" + << TaintedArgs.at(i) << "\n"); + BR.markInteresting(SR); + i++; + } + return std::string(Out.str()); + }); + return InjectionTag; +} + +/// Helps in printing taint diagnostics. +/// Marks the function interesting (to be printed) +/// when the return value, or the outgoing parameters are tainted. +const NoteTag *createTaintPostTag(CheckerContext &C, + std::vector TaintedSymbols, + std::vector TaintedArgs, + const CallEvent& Call) { + const LocationContext* LC = Call.getCalleeStackFrame(0); + + const NoteTag *InjectionTag = C.getNoteTag( + [TaintedSymbols, TaintedArgs, LC](PathSensitiveBugReport &BR) -> std::string { + SmallString<256> Msg; + llvm::raw_svector_ostream Out(Msg); + // We give diagnostics only for taint related reports + if (TaintedSymbols.empty() || + BR.getBugType().getCategory() != categories::TaintedData) { + return ""; + } + int i = 0; + for (SymbolRef SR : TaintedSymbols) { + if (BR.isInteresting(SR)) { + BR.markInteresting(LC); + if (TaintedArgs.at(i) != ReturnValueIndex) { + LLVM_DEBUG(llvm::dbgs() << "Taint Propagated to argument " + << TaintedArgs.at(i) << "\n"); + Out << "Taint propagated to argument " << TaintedArgs.at(i); + } else { + LLVM_DEBUG(llvm::dbgs() << "Taint Propagated to return value.\n"); + Out << "Taint propagated to the return value"; + } + } + i++; + } + + return std::string(Out.str()); + }); + return InjectionTag; } /// ArgSet is used to describe arguments relevant for taint detection or @@ -193,7 +261,7 @@ ArgSet SinkArgs; /// Arguments which should be sanitized on function return. ArgSet FilterArgs; - /// Arguments which can participate in taint propagationa. If any of the + /// Arguments which can participate in taint propagation. If any of the /// arguments in PropSrcArgs is tainted, all arguments in PropDstArgs should /// be tainted. ArgSet PropSrcArgs; @@ -343,7 +411,7 @@ CheckerContext &C) const; private: - const BugType BT{this, "Use of Untrusted Data", "Untrusted Data"}; + const BugType BT{this, "Use of Untrusted Data", categories::TaintedData}; bool checkUncontrolledFormatString(const CallEvent &Call, CheckerContext &C) const; @@ -788,22 +856,44 @@ llvm::dbgs() << "> actually wants to taint arg index: " << I << '\n'; }); + const NoteTag *InjectionTag = nullptr; + std::vector TaintedSymbols; + std::vector TaintedIndexes; for (ArgIdxTy ArgNum : *TaintArgs) { // Special handling for the tainted return value. if (ArgNum == ReturnValueIndex) { State = addTaint(State, Call.getReturnValue()); + SymbolRef TSR = isTainted(State, Call.getReturnValue()); + if (TSR){ + TaintedSymbols.push_back(TSR); + TaintedIndexes.push_back(ArgNum); + }else{ + LLVM_DEBUG(llvm::dbgs() << "Strange. Return value not tainted.\n"); + LLVM_DEBUG(Call.getReturnValue().dumpToStream(llvm::dbgs())); + LLVM_DEBUG(llvm::dbgs() << "\n"); + } continue; } - // The arguments are pointer arguments. The data they are pointing at is // tainted after the call. - if (auto V = getPointeeOf(C, Call.getArgSVal(ArgNum))) + if (auto V = getPointeeOf(C, Call.getArgSVal(ArgNum))){ State = addTaint(State, *V); + SymbolRef TSR = isTainted(State, *V); + if (TSR) { + TaintedSymbols.push_back(TSR); + TaintedIndexes.push_back(ArgNum); + } else { + LLVM_DEBUG(llvm::dbgs() << "Strange. Arg Not Tainted:" << ArgNum << "\n"); + LLVM_DEBUG(V->dumpToStream(llvm::dbgs())); + LLVM_DEBUG(llvm::dbgs() << "\n"); + } + } } + InjectionTag = createTaintPostTag(C, TaintedSymbols, TaintedIndexes, Call); // Clear up the taint info from the state. State = State->remove(CurrentFrame); - C.addTransition(State); + C.addTransition(State,InjectionTag); } void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State, @@ -826,6 +916,10 @@ /// Check for taint sinks. ForEachCallArg([this, &Checker, &C, &State](ArgIdxTy I, const Expr *E, SVal) { + //Add taintedness to stdin parameters + if (isStdin(C.getSVal(E),C.getASTContext())){ + State = addTaint(State, C.getSVal(E)); + } if (SinkArgs.contains(I) && isTaintedOrPointsToTainted(E, State, C)) Checker.generateReportIfTainted(E, SinkMsg.value_or(MsgCustomSink), C); }); @@ -839,15 +933,31 @@ } }); + + /// Check for taint propagation sources. /// A rule is relevant if PropSrcArgs is empty, or if any of its signified /// args are tainted in context of the current CallEvent. bool IsMatching = PropSrcArgs.isEmpty(); - ForEachCallArg( - [this, &C, &IsMatching, &State](ArgIdxTy I, const Expr *E, SVal) { - IsMatching = IsMatching || (PropSrcArgs.contains(I) && - isTaintedOrPointsToTainted(E, State, C)); - }); + const NoteTag *InjectionTag = nullptr; + + std::vector TaintedSymbols; + std::vector TaintedIndexes; + ForEachCallArg([this, &C, &IsMatching, &State, &TaintedSymbols, + &TaintedIndexes](ArgIdxTy I, const Expr *E, SVal) { + IsMatching = IsMatching || (PropSrcArgs.contains(I) && + isTaintedOrPointsToTainted(E, State, C)); + std::optional TaintedSVal = + getTaintedPointeeOrPointer(C, State, C.getSVal(E)); + + // We track back tainted arguments except for stdin + if (TaintedSVal && !isStdin(*TaintedSVal, C.getASTContext())) { + if (SymbolRef SR = isTainted(State, *TaintedSVal)) { + TaintedSymbols.push_back(SR); + TaintedIndexes.push_back(I); + } + } + }); if (!IsMatching) return; @@ -890,7 +1000,8 @@ if (!Result.isEmpty()) State = State->set(C.getStackFrame(), Result); - C.addTransition(State); + InjectionTag = createTaintPreTag(C,TaintedSymbols,TaintedIndexes,Call); + C.addTransition(State, InjectionTag); } bool GenericTaintRule::UntrustedEnv(CheckerContext &C) { @@ -902,7 +1013,7 @@ bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg, CheckerContext &C) const { assert(E); - std::optional TaintedSVal{getTaintedPointeeOrPointer(C, C.getSVal(E))}; + std::optional TaintedSVal{getTaintedPointeeOrPointer(C, C.getState(), C.getSVal(E))}; if (!TaintedSVal) return false; @@ -911,7 +1022,10 @@ if (ExplodedNode *N = C.generateNonFatalErrorNode()) { auto report = std::make_unique(BT, Msg, N); report->addRange(E->getSourceRange()); - report->addVisitor(std::make_unique(*TaintedSVal)); + SymbolRef TSR = isTainted(C.getState(),*TaintedSVal); + if (TSR) + report->markInteresting(TSR); + C.emitReport(std::move(report)); return true; } Index: clang/lib/StaticAnalyzer/Checkers/Taint.cpp =================================================================== --- clang/lib/StaticAnalyzer/Checkers/Taint.cpp +++ clang/lib/StaticAnalyzer/Checkers/Taint.cpp @@ -144,30 +144,34 @@ return NewState; } -bool taint::isTainted(ProgramStateRef State, const Stmt *S, +SymbolRef taint::isTainted(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind) { SVal val = State->getSVal(S, LCtx); return isTainted(State, val, Kind); } -bool taint::isTainted(ProgramStateRef State, SVal V, TaintTagType Kind) { +SymbolRef taint::isTainted(ProgramStateRef State, SVal V, TaintTagType Kind) { if (SymbolRef Sym = V.getAsSymbol()) return isTainted(State, Sym, Kind); if (const MemRegion *Reg = V.getAsRegion()) return isTainted(State, Reg, Kind); - return false; + return nullptr; } -bool taint::isTainted(ProgramStateRef State, const MemRegion *Reg, +SymbolRef taint::isTainted(ProgramStateRef State, const MemRegion *Reg, TaintTagType K) { if (!Reg) - return false; + return nullptr; // Element region (array element) is tainted if either the base or the offset // are tainted. - if (const ElementRegion *ER = dyn_cast(Reg)) - return isTainted(State, ER->getSuperRegion(), K) || - isTainted(State, ER->getIndex(), K); + if (const ElementRegion *ER = dyn_cast(Reg)) { + SymbolRef TSR; + if (TSR = isTainted(State, ER->getSuperRegion(), K)) + return TSR; + else if (TSR = isTainted(State, ER->getIndex(), K)) + return TSR; + } if (const SymbolicRegion *SR = dyn_cast(Reg)) return isTainted(State, SR->getSymbol(), K); @@ -175,12 +179,12 @@ if (const SubRegion *ER = dyn_cast(Reg)) return isTainted(State, ER->getSuperRegion(), K); - return false; + return nullptr; } -bool taint::isTainted(ProgramStateRef State, SymbolRef Sym, TaintTagType Kind) { +SymbolRef taint::isTainted(ProgramStateRef State, SymbolRef Sym, TaintTagType Kind) { if (!Sym) - return false; + return nullptr; // Traverse all the symbols this symbol depends on to see if any are tainted. for (SymExpr::symbol_iterator SI = Sym->symbol_begin(), @@ -190,14 +194,16 @@ continue; if (const TaintTagType *Tag = State->get(*SI)) { - if (*Tag == Kind) - return true; + if (*Tag == Kind){ + return *SI; + } } if (const auto *SD = dyn_cast(*SI)) { // If this is a SymbolDerived with a tainted parent, it's also tainted. - if (isTainted(State, SD->getParentSymbol(), Kind)) - return true; + if (SymbolRef TSR = + isTainted(State, SD->getParentSymbol(), Kind)) + return TSR; // If this is a SymbolDerived with the same parent symbol as another // tainted SymbolDerived and a region that's a sub-region of that tainted @@ -210,46 +216,27 @@ // complete. For example, this would not currently identify // overlapping fields in a union as tainted. To identify this we can // check for overlapping/nested byte offsets. - if (Kind == I.second && R->isSubRegionOf(I.first)) - return true; + if (Kind == I.second && R->isSubRegionOf(I.first)){ + return SD->getParentSymbol(); + } } } } // If memory region is tainted, data is also tainted. if (const auto *SRV = dyn_cast(*SI)) { - if (isTainted(State, SRV->getRegion(), Kind)) - return true; + if (SymbolRef TSR = + isTainted(State, SRV->getRegion(), Kind)) + return TSR; } // If this is a SymbolCast from a tainted value, it's also tainted. if (const auto *SC = dyn_cast(*SI)) { - if (isTainted(State, SC->getOperand(), Kind)) - return true; + if (SymbolRef TSR = + isTainted(State, SC->getOperand(), Kind)) + return TSR; } } - return false; -} - -PathDiagnosticPieceRef TaintBugVisitor::VisitNode(const ExplodedNode *N, - BugReporterContext &BRC, - PathSensitiveBugReport &BR) { - - // Find the ExplodedNode where the taint was first introduced - if (!isTainted(N->getState(), V) || - isTainted(N->getFirstPred()->getState(), V)) - return nullptr; - - const Stmt *S = N->getStmtForDiagnostics(); - if (!S) - return nullptr; - - const LocationContext *NCtx = N->getLocationContext(); - PathDiagnosticLocation L = - PathDiagnosticLocation::createBegin(S, BRC.getSourceManager(), NCtx); - if (!L.isValid() || !L.asLocation().isValid()) - return nullptr; - - return std::make_shared(L, "Taint originated here"); + return nullptr; } Index: clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp =================================================================== --- clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp +++ clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp @@ -56,7 +56,7 @@ void reportBug(VLASize_Kind Kind, const Expr *SizeE, ProgramStateRef State, CheckerContext &C, - std::unique_ptr Visitor = nullptr) const; + SymbolRef TaintedSymbol = nullptr) const; public: void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const; @@ -166,9 +166,8 @@ return nullptr; // Check if the size is tainted. - if (isTainted(State, SizeV)) { - reportBug(VLA_Tainted, SizeE, nullptr, C, - std::make_unique(SizeV)); + if (SymbolRef TSR = isTainted(State, SizeV)) { + reportBug(VLA_Tainted, SizeE, nullptr, C, TSR); return nullptr; } @@ -209,17 +208,24 @@ return State; } -void VLASizeChecker::reportBug( - VLASize_Kind Kind, const Expr *SizeE, ProgramStateRef State, - CheckerContext &C, std::unique_ptr Visitor) const { +void VLASizeChecker::reportBug(VLASize_Kind Kind, const Expr *SizeE, + ProgramStateRef State, CheckerContext &C, + SymbolRef TaintedSymbol) const { // Generate an error node. ExplodedNode *N = C.generateErrorNode(State); if (!N) return; - if (!BT) - BT.reset(new BuiltinBug( - this, "Dangerous variable-length array (VLA) declaration")); + if (!BT) { + if (Kind == VLA_Tainted) + BT.reset(new BugType(this, + "Dangerous variable-length array (VLA) declaration", + categories::TaintedData)); + else + BT.reset(new BugType(this, + "Dangerous variable-length array (VLA) declaration", + categories::LogicError)); + } SmallString<256> buf; llvm::raw_svector_ostream os(buf); @@ -243,9 +249,11 @@ } auto report = std::make_unique(*BT, os.str(), N); - report->addVisitor(std::move(Visitor)); report->addRange(SizeE->getSourceRange()); bugreporter::trackExpressionValue(N, SizeE, *report); + if (TaintedSymbol) + report->markInteresting(TaintedSymbol); + C.emitReport(std::move(report)); } Index: clang/lib/StaticAnalyzer/Core/CommonBugCategories.cpp =================================================================== --- clang/lib/StaticAnalyzer/Core/CommonBugCategories.cpp +++ clang/lib/StaticAnalyzer/Core/CommonBugCategories.cpp @@ -23,6 +23,7 @@ const char *const CXXMoveSemantics = "C++ move semantics"; const char *const SecurityError = "Security error"; const char *const UnusedCode = "Unused code"; +const char *const TaintedData = "Tainted data used"; } // namespace categories } // namespace ento } // namespace clang Index: clang/test/Analysis/taint-diagnostic-visitor.c =================================================================== --- clang/test/Analysis/taint-diagnostic-visitor.c +++ clang/test/Analysis/taint-diagnostic-visitor.c @@ -2,13 +2,23 @@ // This file is for testing enhanced diagnostics produced by the GenericTaintChecker +typedef unsigned long size_t; +struct _IO_FILE; +typedef struct _IO_FILE FILE; + int scanf(const char *restrict format, ...); int system(const char *command); +char* getenv( const char* env_var ); +size_t strlen( const char* str ); +void *malloc(size_t size ); +char *fgets(char *str, int n, FILE *stream); +FILE *stdin; void taintDiagnostic(void) { char buf[128]; scanf("%s", buf); // expected-note {{Taint originated here}} + // expected-note@-1 {{Taint propagated to argument 1}} system(buf); // expected-warning {{Untrusted data is passed to a system call}} // expected-note {{Untrusted data is passed to a system call (CERT/STR02-C. Sanitize data passed to complex subsystems)}} } @@ -16,6 +26,7 @@ int index; int Array[] = {1, 2, 3, 4, 5}; scanf("%d", &index); // expected-note {{Taint originated here}} + // expected-note@-1 {{Taint propagated to argument 1}} return Array[index]; // expected-warning {{Out of bound memory access (index is tainted)}} // expected-note@-1 {{Out of bound memory access (index is tainted)}} } @@ -23,6 +34,7 @@ int taintDiagnosticDivZero(int operand) { scanf("%d", &operand); // expected-note {{Value assigned to 'operand'}} // expected-note@-1 {{Taint originated here}} + // expected-note@-2 {{Taint propagated to argument 1}} return 10 / operand; // expected-warning {{Division by a tainted value, possibly zero}} // expected-note@-1 {{Division by a tainted value, possibly zero}} } @@ -31,6 +43,50 @@ int x; scanf("%d", &x); // expected-note {{Value assigned to 'x'}} // expected-note@-1 {{Taint originated here}} + // expected-note@-2 {{Taint propagated to argument 1}} int vla[x]; // expected-warning {{Declared variable-length array (VLA) has tainted size}} // expected-note@-1 {{Declared variable-length array (VLA) has tainted size}} } + + +//Tests if the originated note is correctly placed even if the path is +//propagating through variables and expressions +char* taintDiagnosticPropagation(){ + char *pathbuf; + char *pathlist=getenv("PATH"); // expected-note {{Taint originated here}} + // expected-note@-1 {{Taint propagated to the return value}} + if (pathlist){ // expected-note {{Assuming 'pathlist' is non-null}} + // expected-note@-1 {{Taking true branch}} + pathbuf=(char*) malloc(strlen(pathlist)+1); // expected-warning{{Untrusted data is used to specify the buffer size}} + // expected-note@-1{{Untrusted data is used to specify the buffer size}} + // expected-note@-2 {{Taint propagated to the return value}} + return pathbuf; + } + return 0; +} + +//Taint origin should be marked correctly even if there are multiple taint +//sources in the function +char* taintDiagnosticPropagation2(){ + char *pathbuf; + char *user_env2=getenv("USER_ENV_VAR2");//unrelated taint source + char *pathlist=getenv("PATH"); // expected-note {{Taint originated here}} + // expected-note@-1 {{Taint propagated to the return value}} + char *user_env=getenv("USER_ENV_VAR");//unrelated taint source + if (pathlist){ // expected-note {{Assuming 'pathlist' is non-null}} + // expected-note@-1 {{Taking true branch}} + pathbuf=(char*) malloc(strlen(pathlist)+1); // expected-warning{{Untrusted data is used to specify the buffer size}} + // expected-note@-1{{Untrusted data is used to specify the buffer size}} + // expected-note@-2 {{Taint propagated to the return value}} + return pathbuf; + } + return 0; +} + +void testReadStdIn(){ + char buf[1024]; + fgets(buf, sizeof(buf), stdin);// expected-note {{Taint originated here}} + // expected-note@-1 {{Taint propagated to argument 0}} + system(buf);// expected-warning {{Untrusted data is passed to a system call}} // expected-note {{Untrusted data is passed to a system call (CERT/STR02-C. Sanitize data passed to complex subsystems)}} + +} Index: clang/test/Analysis/taint-tester.c =================================================================== --- clang/test/Analysis/taint-tester.c +++ clang/test/Analysis/taint-tester.c @@ -122,7 +122,7 @@ fscanf(pp, "%d", &ii); int jj = ii;// expected-warning + {{tainted}} - fscanf(p, "%d", &ii); + fscanf(p, "%d", &ii);// expected-warning + {{tainted}} int jj2 = ii;// expected-warning + {{tainted}} ii = 3;