Diff 71199

lib/Sema/SemaChecking.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 3,833 Lines • ▼ Show 20 Lines

enum StringLiteralCheckType {		enum StringLiteralCheckType {
SLCT_NotALiteral,		SLCT_NotALiteral,
SLCT_UncheckedLiteral,		SLCT_UncheckedLiteral,
SLCT_CheckedLiteral		SLCT_CheckedLiteral
};		};
} // end anonymous namespace		} // end anonymous namespace

static void CheckFormatString(Sema &S, const StringLiteral *FExpr,		static void sumUpStringLiteralOffset(llvm::APSInt &Offset, llvm::APSInt Addend,
		BinaryOperatorKind BinOpKind,
		bool AddendIsRight) {
		srhinesUnsubmitted Done Reply Inline Actions Is "Operand" better than "Addend"? In particular, there is the possibility that we do subtraction of the value instead of addition, so "Addend" makes it a bit confusing. Of course, I then would expect "OperandIsRight" instead of "AddendIsRight" too. srhines: Is "Operand" better than "Addend"? In particular, there is the possibility that we do…
		meikebAuthorUnsubmitted Not Done Reply Inline Actions Clang summarizes sub and add as "additive" operands. This is why I think this is fitting. Operand is misleading because it includes a lot more operands than add and sub imo. meikeb: Clang summarizes sub and add as "additive" operands. This is why I think this is fitting.
		unsigned BitWidth = Offset.getBitWidth();
		unsigned AddendBitWidth = Addend.getBitWidth();
		// There might be negative interim results.
		if (Addend.isUnsigned()) {
		Addend = Addend.zext(++AddendBitWidth);
		Addend.setIsSigned(true);
		}
		// Adjust the bit width of the APSInts.
		srhinesUnsubmitted Done Reply Inline Actions Align -> Canonicalize or Adjust This is confusing here as "Align" already has too many overloaded meanings in programming (that could be relevant to bitwidths). Canonicalize or Adjust don't have this problem. srhines: Align -> Canonicalize or Adjust This is confusing here as "Align" already has too many…
		if (AddendBitWidth > BitWidth) {
		Offset = Offset.sext(AddendBitWidth);
		BitWidth = AddendBitWidth;
		} else if (BitWidth > AddendBitWidth) {
		Addend = Addend.sext(BitWidth);
		}

		bool Ov = false;
		srhinesUnsubmitted Done Reply Inline Actions Ov -> Overflow srhines: Ov -> Overflow
		meikebAuthorUnsubmitted Not Done Reply Inline Actions I named that in compliance with clang naming. E.g. sadd_ov. It is common in this file to abbreviate variable names with 1-3 characters. meikeb: I named that in compliance with clang naming. E.g. sadd_ov. It is common in this file to…
		llvm::APSInt ResOffset = Offset;
		if (BinOpKind == BO_Add)
		ResOffset = Offset.sadd_ov(Addend, Ov);
		else if (AddendIsRight && BinOpKind == BO_Sub)
		ResOffset = Offset.ssub_ov(Addend, Ov);
		srhinesUnsubmitted Done Reply Inline Actions What happens if someone passes something that isn't caught by these two cases? Should we be returning an indicator that the calculation failed? If not, should we assert here? srhines: What happens if someone passes something that isn't caught by these two cases? Should we be…
		else
		assert(false && "operator must be add or sub with addend on the right");
		srhinesUnsubmitted Done Reply Inline Actions 2 places to fix: "a offset" -> "an offset" srhines: 2 places to fix: "a offset" -> "an offset"
		rsmithUnsubmitted Done Reply Inline Actions Rather than `assert(false && XXX);`, use either `llvm_unreachable(XXX)` or change the previous case to be: else { assert(AddendIsRight && BinOpKind == BO_Sub && "operator must be ..."); rsmith: Rather than `assert(false && XXX);`, use either `llvm_unreachable(XXX)` or change the previous…
		rsmithUnsubmitted Done Reply Inline Actions The suggestion was to remove the condition in the `else if` and put the assertion inside its body, rather than duplicating it here: if (BinOpKind == BO_Add) // handle add else { assert(it's a subtract); // handle sub } rsmith: The suggestion was to remove the condition in the `else if` and put the assertion inside its…
		meikebAuthorUnsubmitted Not Done Reply Inline Actions This is way better. Thank you. meikeb: This is way better. Thank you.

		// We add an offset to a pointer here so we should support an offset as big as
		// possible.
		if (Ov) {
		assert(BitWidth <= UINT_MAX / 2 && "index (intermediate) result too big");
		Offset.sext(2 * BitWidth);
		sumUpStringLiteralOffset(Offset, Addend, BinOpKind, AddendIsRight);
		return;
		}

		Offset = ResOffset;
		}

		rsmithUnsubmitted Done Reply Inline Actions Typo "retruning" rsmith: Typo "retruning"
		namespace {
		// This is a wrapper class around StringLiteral to support offsetted string
		// literals as format strings. It takes the offset into account when returning
		// the string and its length or the source locations to display notes correctly.
		class FormatStringLiteral {
		const StringLiteral *FExpr;
		int64_t Offset;

		public:
		FormatStringLiteral(const StringLiteral *fexpr, int64_t Offset = 0)
		: FExpr(fexpr), Offset(Offset) {}

		rsmithUnsubmitted Done Reply Inline Actions I think you can simplify this to `return FExpr->getString().drop_front(Offset);` rsmith: I think you can simplify this to `return FExpr->getString().drop_front(Offset);`
		StringRef getString() const {
		return FExpr->getString().drop_front(Offset);
		}

		unsigned getByteLength() const {
		return FExpr->getByteLength() - getCharByteWidth() * Offset;
		}
		unsigned getLength() const { return FExpr->getLength() - Offset; }
		unsigned getCharByteWidth() const { return FExpr->getCharByteWidth(); }

		StringLiteral::StringKind getKind() const { return FExpr->getKind(); }

		QualType getType() const { return FExpr->getType(); }

		bool isAscii() const { return FExpr->isAscii(); }
		bool isWide() const { return FExpr->isWide(); }
		bool isUTF8() const { return FExpr->isUTF8(); }
		bool isUTF16() const { return FExpr->isUTF16(); }
		bool isUTF32() const { return FExpr->isUTF32(); }
		bool isPascal() const { return FExpr->isPascal(); }

		SourceLocation getLocationOfByte(
		unsigned ByteNo, const SourceManager &SM, const LangOptions &Features,
		const TargetInfo &Target, unsigned *StartToken = nullptr,
		unsigned *StartTokenByteOffset = nullptr) const {
		return FExpr->getLocationOfByte(ByteNo + Offset, SM, Features, Target,
		StartToken, StartTokenByteOffset);
		}

		SourceLocation getLocStart() const LLVM_READONLY {
		return FExpr->getLocStart().getLocWithOffset(Offset);
		}
		SourceLocation getLocEnd() const LLVM_READONLY { return FExpr->getLocEnd(); }
		};
		} // end anonymous namespace

		static void CheckFormatString(Sema &S, const FormatStringLiteral *FExpr,
		srhinesUnsubmitted Done Reply Inline Actions Is "computeStringLiteralOffset" or "calculate..." a better name here? srhines: Is "computeStringLiteralOffset" or "calculate..." a better name here?
		meikebAuthorUnsubmitted Not Done Reply Inline Actions I thought about that but decided to go with sumUp because compute or calculate sounds like this function would do what we actually do what the caller of this function does (computing the offset). This is just a nice helper to sum up the offset we already have with another piece of offset. meikeb: I thought about that but decided to go with sumUp because compute or calculate sounds like this…
const Expr *OrigFormatExpr,		const Expr *OrigFormatExpr,
		rsmithUnsubmitted Done Reply Inline Actions I can't tell from this declaration what this function is for -- what does "reckon up" mean? rsmith: I can't tell from this declaration what this function is for -- what does "reckon up" mean?
ArrayRef<const Expr *> Args,		ArrayRef<const Expr *> Args,
bool HasVAListArg, unsigned format_idx,		bool HasVAListArg, unsigned format_idx,
unsigned firstDataArg,		unsigned firstDataArg,
Sema::FormatStringType Type,		Sema::FormatStringType Type,
bool inFunctionCall,		bool inFunctionCall,
Sema::VariadicCallType CallType,		Sema::VariadicCallType CallType,
llvm::SmallBitVector &CheckedVarArgs,		llvm::SmallBitVector &CheckedVarArgs,
UncoveredArgHandler &UncoveredArg);		UncoveredArgHandler &UncoveredArg);

// Determine if an expression is a string literal or constant string.		// Determine if an expression is a string literal or constant string.
// If this function returns false on the arguments to a function expecting a		// If this function returns false on the arguments to a function expecting a
// format string, we will usually need to emit a warning.		// format string, we will usually need to emit a warning.
// True string literals are then checked by CheckFormatString.		// True string literals are then checked by CheckFormatString.
		srhinesUnsubmitted Not Done Reply Inline Actions It might be good to mention that Offset now goes back to the caller to allow for checking of string literal suffixes. srhines: It might be good to mention that Offset now goes back to the caller to allow for checking of…
		meikebAuthorUnsubmitted Not Done Reply Inline Actions I'm not sure if that should be mentioned here because it is a very high level comment and the suffix of a string literal is a string literal itself. meikeb: I'm not sure if that should be mentioned here because it is a very high level comment and the…
static StringLiteralCheckType		static StringLiteralCheckType
checkFormatStringExpr(Sema &S, const Expr E, ArrayRef<const Expr > Args,		checkFormatStringExpr(Sema &S, const Expr E, ArrayRef<const Expr > Args,
bool HasVAListArg, unsigned format_idx,		bool HasVAListArg, unsigned format_idx,
unsigned firstDataArg, Sema::FormatStringType Type,		unsigned firstDataArg, Sema::FormatStringType Type,
Sema::VariadicCallType CallType, bool InFunctionCall,		Sema::VariadicCallType CallType, bool InFunctionCall,
llvm::SmallBitVector &CheckedVarArgs,		llvm::SmallBitVector &CheckedVarArgs,
UncoveredArgHandler &UncoveredArg) {		UncoveredArgHandler &UncoveredArg,
		llvm::APSInt Offset) {
		rsmithUnsubmitted Done Reply Inline Actions Why is this passed by reference? It's just an input, not an output, right? rsmith: Why is this passed by reference? It's just an input, not an output, right?
tryAgain:		tryAgain:
		assert(Offset.isSigned() && "invalid offset");

if (E->isTypeDependent() \|\| E->isValueDependent())		if (E->isTypeDependent() \|\| E->isValueDependent())
return SLCT_NotALiteral;		return SLCT_NotALiteral;

E = E->IgnoreParenCasts();		E = E->IgnoreParenCasts();

if (E->isNullPointerConstant(S.Context, Expr::NPC_ValueDependentIsNotNull))		if (E->isNullPointerConstant(S.Context, Expr::NPC_ValueDependentIsNotNull))
// Technically -Wformat-nonliteral does not warn about this case.		// Technically -Wformat-nonliteral does not warn about this case.
// The behavior of printf and friends in this case is implementation		// The behavior of printf and friends in this case is implementation
Show All 17 Lines	case Stmt::ConditionalOperatorClass: {
bool Cond;		bool Cond;
if (C->getCond()->EvaluateAsBooleanCondition(Cond, S.getASTContext())) {		if (C->getCond()->EvaluateAsBooleanCondition(Cond, S.getASTContext())) {
if (Cond)		if (Cond)
CheckRight = false;		CheckRight = false;
else		else
CheckLeft = false;		CheckLeft = false;
}		}

		// We need to maintain the offsets for the right and the left hand side
		// separately to check if every possible indexed expression is a valid
		srhinesUnsubmitted Done Reply Inline Actions separately srhines: separately
		// string literal. They might have different offsets for different string
		// literals in the end.
StringLiteralCheckType Left;		StringLiteralCheckType Left;
if (!CheckLeft)		if (!CheckLeft)
Left = SLCT_UncheckedLiteral;		Left = SLCT_UncheckedLiteral;
else {		else {
Left = checkFormatStringExpr(S, C->getTrueExpr(), Args,		Left = checkFormatStringExpr(S, C->getTrueExpr(), Args,
HasVAListArg, format_idx, firstDataArg,		HasVAListArg, format_idx, firstDataArg,
Type, CallType, InFunctionCall,		Type, CallType, InFunctionCall,
CheckedVarArgs, UncoveredArg);		CheckedVarArgs, UncoveredArg, Offset);
if (Left == SLCT_NotALiteral \|\| !CheckRight)		if (Left == SLCT_NotALiteral \|\| !CheckRight) {
return Left;		return Left;
}		}
		}

StringLiteralCheckType Right =		StringLiteralCheckType Right =
checkFormatStringExpr(S, C->getFalseExpr(), Args,		checkFormatStringExpr(S, C->getFalseExpr(), Args,
HasVAListArg, format_idx, firstDataArg,		HasVAListArg, format_idx, firstDataArg,
Type, CallType, InFunctionCall, CheckedVarArgs,		Type, CallType, InFunctionCall, CheckedVarArgs,
UncoveredArg);		UncoveredArg, Offset);

return (CheckLeft && Left < Right) ? Left : Right;		return (CheckLeft && Left < Right) ? Left : Right;
}		}

case Stmt::ImplicitCastExprClass: {		case Stmt::ImplicitCastExprClass: {
E = cast<ImplicitCastExpr>(E)->getSubExpr();		E = cast<ImplicitCastExpr>(E)->getSubExpr();
goto tryAgain;		goto tryAgain;
}		}
Show All 36 Lines	if (const VarDecl *VD = dyn_cast<VarDecl>(DR->getDecl())) {
// Look through initializers like const char c[] = { "foo" }		// Look through initializers like const char c[] = { "foo" }
if (const InitListExpr *InitList = dyn_cast<InitListExpr>(Init)) {		if (const InitListExpr *InitList = dyn_cast<InitListExpr>(Init)) {
if (InitList->isStringLiteralInit())		if (InitList->isStringLiteralInit())
Init = InitList->getInit(0)->IgnoreParenImpCasts();		Init = InitList->getInit(0)->IgnoreParenImpCasts();
}		}
return checkFormatStringExpr(S, Init, Args,		return checkFormatStringExpr(S, Init, Args,
HasVAListArg, format_idx,		HasVAListArg, format_idx,
firstDataArg, Type, CallType,		firstDataArg, Type, CallType,
/InFunctionCall/false, CheckedVarArgs,		/InFunctionCall/ false, CheckedVarArgs,
UncoveredArg);		UncoveredArg, Offset);
}		}
}		}

// For vprintf* functions (i.e., HasVAListArg==true), we add a		// For vprintf* functions (i.e., HasVAListArg==true), we add a
// special check to see if the format string is a function parameter		// special check to see if the format string is a function parameter
// of the function calling the printf function. If the function		// of the function calling the printf function. If the function
// has an attribute indicating it is a printf-like function, then we		// has an attribute indicating it is a printf-like function, then we
// should suppress warnings concerning non-literals being used in a call		// should suppress warnings concerning non-literals being used in a call
Show All 38 Lines	if (const NamedDecl *ND = dyn_cast_or_null<NamedDecl>(CE->getCalleeDecl())) {
if (const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(ND))		if (const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(ND))
if (MD->isInstance())		if (MD->isInstance())
--ArgIndex;		--ArgIndex;
const Expr *Arg = CE->getArg(ArgIndex - 1);		const Expr *Arg = CE->getArg(ArgIndex - 1);

return checkFormatStringExpr(S, Arg, Args,		return checkFormatStringExpr(S, Arg, Args,
HasVAListArg, format_idx, firstDataArg,		HasVAListArg, format_idx, firstDataArg,
Type, CallType, InFunctionCall,		Type, CallType, InFunctionCall,
CheckedVarArgs, UncoveredArg);		CheckedVarArgs, UncoveredArg, Offset);
} else if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(ND)) {		} else if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(ND)) {
unsigned BuiltinID = FD->getBuiltinID();		unsigned BuiltinID = FD->getBuiltinID();
if (BuiltinID == Builtin::BI__builtin___CFStringMakeConstantString \|\|		if (BuiltinID == Builtin::BI__builtin___CFStringMakeConstantString \|\|
BuiltinID == Builtin::BI__builtin___NSStringMakeConstantString) {		BuiltinID == Builtin::BI__builtin___NSStringMakeConstantString) {
const Expr *Arg = CE->getArg(0);		const Expr *Arg = CE->getArg(0);
return checkFormatStringExpr(S, Arg, Args,		return checkFormatStringExpr(S, Arg, Args,
HasVAListArg, format_idx,		HasVAListArg, format_idx,
firstDataArg, Type, CallType,		firstDataArg, Type, CallType,
InFunctionCall, CheckedVarArgs,		InFunctionCall, CheckedVarArgs,
UncoveredArg);		UncoveredArg, Offset);
}		}
}		}
}		}

return SLCT_NotALiteral;		return SLCT_NotALiteral;
}		}
case Stmt::ObjCStringLiteralClass:		case Stmt::ObjCStringLiteralClass:
case Stmt::StringLiteralClass: {		case Stmt::StringLiteralClass: {
const StringLiteral *StrE = nullptr;		const StringLiteral *StrE = nullptr;

if (const ObjCStringLiteral *ObjCFExpr = dyn_cast<ObjCStringLiteral>(E))		if (const ObjCStringLiteral *ObjCFExpr = dyn_cast<ObjCStringLiteral>(E))
StrE = ObjCFExpr->getString();		StrE = ObjCFExpr->getString();
else		else
StrE = cast<StringLiteral>(E);		StrE = cast<StringLiteral>(E);

if (StrE) {		if (StrE) {
CheckFormatString(S, StrE, E, Args, HasVAListArg, format_idx,		if (Offset.isNegative() \|\| Offset > StrE->getLength()) {
		// TODO: It would be better to have an explicit warning for out of
		// bounds literals.
		return SLCT_NotALiteral;
		}
		FormatStringLiteral FStr =
		FormatStringLiteral(StrE, Offset.sextOrTrunc(64).getSExtValue());
		rsmithUnsubmitted Done Reply Inline Actions Does this need to be heap-allocated? rsmith: Does this need to be heap-allocated?
		rsmithUnsubmitted Done Reply Inline Actions You can write this more simply as FormatStringLiteral FStr(StrE, Offset.sextOrTrunc(64).getSExtValue()); rsmith: You can write this more simply as FormatStringLiteral FStr(StrE, Offset.sextOrTrunc(64).
		CheckFormatString(S, &FStr, E, Args, HasVAListArg, format_idx,
		rsmithUnsubmitted Done Reply Inline Actions You should presumably also do this if `Offset` is >= the length of the string literal (we want `printf` and friends to at least find the trailing nul byte). rsmith: You should presumably also do this if `Offset` is >= the length of the string literal (we want…
firstDataArg, Type, InFunctionCall, CallType,		firstDataArg, Type, InFunctionCall, CallType,
		rsmithUnsubmitted Done Reply Inline Actions This doesn't seem like it preserves enough information for the downstream code to give correct caret diagnostics pointing at locations within the string. It seems like it would be extremely complicated to maintain the necessary invariants to make that work (you'd need to create a fake string literal source buffer so that the `StringLiteralParser` can reparse it, for whichever of the string literal tokens the offset ends up within). Have you looked at how much work it'd be to feed a starting offset into `CheckFormatString` instead? rsmith: This doesn't seem like it preserves enough information for the downstream code to give correct…
		meikebAuthorUnsubmitted Not Done Reply Inline Actions The = case is part of a different warning. It's checked in CheckFormatString. meikeb: The = case is part of a different warning. It's checked in CheckFormatString.
CheckedVarArgs, UncoveredArg);		CheckedVarArgs, UncoveredArg);
return SLCT_CheckedLiteral;		return SLCT_CheckedLiteral;
}		}

return SLCT_NotALiteral;		return SLCT_NotALiteral;
}		}
		case Stmt::BinaryOperatorClass: {
		llvm::APSInt LResult;
		llvm::APSInt RResult;

		const BinaryOperator *BinOp = cast<BinaryOperator>(E);

		// A string literal + an int offset is still a string literal.
		if (BinOp->isAdditiveOp()) {
		bool LIsInt = BinOp->getLHS()->EvaluateAsInt(LResult, S.Context);
		bool RIsInt = BinOp->getRHS()->EvaluateAsInt(RResult, S.Context);
		rsmithUnsubmitted Not Done Reply Inline Actions What happens if one of these expressions is value-dependent? The evaluator can crash or assert if given a value-dependent expression. If we don't defer these checks in dependent contexts, you'll need to handle that possibility somehow. Example: template<int N> void f(const char p) { printf("blah blah %s" + N, p); } rsmith:* What happens if one of these expressions is value-dependent? The evaluator can crash or assert…
		meikebAuthorUnsubmitted Not Done Reply Inline Actions I think I don't understand what you are trying to tell me. Especially the example you provided does just fine and behaves as I expected. As far as I followed EvaluateAsInt it does not assert but returns false if we don't get a constexpr here. We warn under -Wformat-nonliteral for value-dependent string literals. Could you explain this more or provide an example that triggers an assert or explain what behavior is wrong regarding the provided example? Thanks! meikeb: I think I don't understand what you are trying to tell me. Especially the example you provided…
		rsmithUnsubmitted Done Reply Inline Actions We should not warn for that example, since (for instance) calling `f<0>` is fine (we should warn for `f<11>`, though, since it has no format specifiers). While `EvaluateAsInt` happens to not assert for that particular value-dependent input, it does assert for some other value-dependent cases. It's not easy for me to find you such a case, because Clang is currently careful to never call this function on a value-dependent expression, but perhaps this will trigger an assert: struct S { constexpr S(int n) : n(n) {} int n; }; template<int N> void f(const char p) { printf("blah blah %s" + S(N).n, p); } rsmith:* We should not warn for that example, since (for instance) calling `f<0>` is fine (we should…
		meikebAuthorUnsubmitted Not Done Reply Inline Actions I hope this additional check fixes this issue. As far as I read the code there were none such asserts in isIntegerConstantExpr(). Thanks for explaining this! meikeb: I hope this additional check fixes this issue. As far as I read the code there were none such…
		rsmithUnsubmitted Done Reply Inline Actions OK, I've now checked and the value-dependent case is handled up on line 3953. So just calling `EvaluateAsInt` here is fine after all. rsmith: OK, I've now checked and the value-dependent case is handled up on line 3953. So just calling…

		if (LIsInt != RIsInt) {
		BinaryOperatorKind BinOpKind = BinOp->getOpcode();

		if (LIsInt) {
		if (BinOpKind == BO_Add) {
		sumUpStringLiteralOffset(Offset, LResult, BinOpKind, RIsInt);
		E = BinOp->getRHS();
		goto tryAgain;
		}
		rsmithUnsubmitted Done Reply Inline Actions This will assert if the result doesn't fit into 64 bits, and it's not guaranteed to (if one of the operands was cast to `__int128`, for instance). You could use `getLimitedValue` instead, with some suitable limit. rsmith: This will assert if the result doesn't fit into 64 bits, and it's not guaranteed to (if one of…
		} else {
		sumUpStringLiteralOffset(Offset, RResult, BinOpKind, RIsInt);
		E = BinOp->getLHS();
		goto tryAgain;
		}
		}

		return SLCT_NotALiteral;
		}
		}
		case Stmt::UnaryOperatorClass: {
		const UnaryOperator *UnaOp = cast<UnaryOperator>(E);
		auto ASE = dyn_cast<ArraySubscriptExpr>(UnaOp->getSubExpr());
		if (UnaOp->getOpcode() == clang::UO_AddrOf && ASE) {
		llvm::APSInt IndexResult;
		if (ASE->getRHS()->EvaluateAsInt(IndexResult, S.Context)) {
		sumUpStringLiteralOffset(Offset, IndexResult, BO_Add, /RHS is int/ true);
		E = ASE->getBase();
		goto tryAgain;
		}
		}

		return SLCT_NotALiteral;
		}

default:		default:
return SLCT_NotALiteral;		return SLCT_NotALiteral;
}		}
}		}

Sema::FormatStringType Sema::GetFormatStringType(const FormatAttr *Format) {		Sema::FormatStringType Sema::GetFormatStringType(const FormatAttr *Format) {
return llvm::StringSwitch<FormatStringType>(Format->getType()->getName())		return llvm::StringSwitch<FormatStringType>(Format->getType()->getName())
▲ Show 20 Lines • Show All 50 Lines • ▼ Show 20 Lines	bool Sema::CheckFormatArguments(ArrayRef<const Expr *> Args,
// Format string can be either ObjC string (e.g. @"%d") or		// Format string can be either ObjC string (e.g. @"%d") or
// C string (e.g. "%d")		// C string (e.g. "%d")
// ObjC string uses the same format specifiers as C string, so we can use		// ObjC string uses the same format specifiers as C string, so we can use
// the same format string checking logic for both ObjC and C strings.		// the same format string checking logic for both ObjC and C strings.
UncoveredArgHandler UncoveredArg;		UncoveredArgHandler UncoveredArg;
StringLiteralCheckType CT =		StringLiteralCheckType CT =
checkFormatStringExpr(*this, OrigFormatExpr, Args, HasVAListArg,		checkFormatStringExpr(*this, OrigFormatExpr, Args, HasVAListArg,
format_idx, firstDataArg, Type, CallType,		format_idx, firstDataArg, Type, CallType,
/IsFunctionCall/true, CheckedVarArgs,		/IsFunctionCall/ true, CheckedVarArgs,
UncoveredArg);		UncoveredArg,
		/no string offset/ llvm::APSInt(64, false) = 0);

// Generate a diagnostic where an uncovered argument is detected.		// Generate a diagnostic where an uncovered argument is detected.
if (UncoveredArg.hasUncoveredArg()) {		if (UncoveredArg.hasUncoveredArg()) {
unsigned ArgIdx = UncoveredArg.getUncoveredArg() + firstDataArg;		unsigned ArgIdx = UncoveredArg.getUncoveredArg() + firstDataArg;
assert(ArgIdx < Args.size() && "ArgIdx outside bounds");		assert(ArgIdx < Args.size() && "ArgIdx outside bounds");
UncoveredArg.Diagnose(this, /IsFunctionCall*/true, Args[ArgIdx]);		UncoveredArg.Diagnose(this, /IsFunctionCall*/true, Args[ArgIdx]);
}		}

Show All 39 Lines	bool Sema::CheckFormatArguments(ArrayRef<const Expr *> Args,
}		}
return false;		return false;
}		}

namespace {		namespace {
class CheckFormatHandler : public analyze_format_string::FormatStringHandler {		class CheckFormatHandler : public analyze_format_string::FormatStringHandler {
protected:		protected:
Sema &S;		Sema &S;
const StringLiteral *FExpr;		const FormatStringLiteral *FExpr;
const Expr *OrigFormatExpr;		const Expr *OrigFormatExpr;
const unsigned FirstDataArg;		const unsigned FirstDataArg;
const unsigned NumDataArgs;		const unsigned NumDataArgs;
const char *Beg; // Start of format string.		const char *Beg; // Start of format string.
const bool HasVAListArg;		const bool HasVAListArg;
ArrayRef<const Expr *> Args;		ArrayRef<const Expr *> Args;
unsigned FormatIdx;		unsigned FormatIdx;
llvm::SmallBitVector CoveredArgs;		llvm::SmallBitVector CoveredArgs;
bool usesPositionalArgs;		bool usesPositionalArgs;
bool atFirstArg;		bool atFirstArg;
bool inFunctionCall;		bool inFunctionCall;
Sema::VariadicCallType CallType;		Sema::VariadicCallType CallType;
llvm::SmallBitVector &CheckedVarArgs;		llvm::SmallBitVector &CheckedVarArgs;
UncoveredArgHandler &UncoveredArg;		UncoveredArgHandler &UncoveredArg;

public:		public:
CheckFormatHandler(Sema &s, const StringLiteral *fexpr,		CheckFormatHandler(Sema &s, const FormatStringLiteral *fexpr,
const Expr *origFormatExpr, unsigned firstDataArg,		const Expr *origFormatExpr, unsigned firstDataArg,
unsigned numDataArgs, const char *beg, bool hasVAListArg,		unsigned numDataArgs, const char *beg, bool hasVAListArg,
ArrayRef<const Expr *> Args,		ArrayRef<const Expr *> Args,
unsigned formatIdx, bool inFunctionCall,		unsigned formatIdx, bool inFunctionCall,
Sema::VariadicCallType callType,		Sema::VariadicCallType callType,
llvm::SmallBitVector &CheckedVarArgs,		llvm::SmallBitVector &CheckedVarArgs,
UncoveredArgHandler &UncoveredArg)		UncoveredArgHandler &UncoveredArg)
: S(s), FExpr(fexpr), OrigFormatExpr(origFormatExpr),		: S(s), FExpr(fexpr), OrigFormatExpr(origFormatExpr),
▲ Show 20 Lines • Show All 83 Lines • ▼ Show 20 Lines	getSpecifierRange(const char *startSpecifier, unsigned specifierLen) {

// Advance the end SourceLocation by one due to half-open ranges.		// Advance the end SourceLocation by one due to half-open ranges.
End = End.getLocWithOffset(1);		End = End.getLocWithOffset(1);

return CharSourceRange::getCharRange(Start, End);		return CharSourceRange::getCharRange(Start, End);
}		}

SourceLocation CheckFormatHandler::getLocationOfByte(const char *x) {		SourceLocation CheckFormatHandler::getLocationOfByte(const char *x) {
return S.getLocationOfStringLiteralByte(FExpr, x - Beg);		return FExpr->getLocationOfByte(x - Beg, S.getSourceManager(),
		S.getLangOpts(), S.Context.getTargetInfo());
}		}

void CheckFormatHandler::HandleIncompleteSpecifier(const char *startSpecifier,		void CheckFormatHandler::HandleIncompleteSpecifier(const char *startSpecifier,
unsigned specifierLen){		unsigned specifierLen){
EmitFormatDiagnostic(S.PDiag(diag::warn_printf_incomplete_specifier),		EmitFormatDiagnostic(S.PDiag(diag::warn_printf_incomplete_specifier),
getLocationOfByte(startSpecifier),		getLocationOfByte(startSpecifier),
/IsStringLocation/true,		/IsStringLocation/true,
getSpecifierRange(startSpecifier, specifierLen));		getSpecifierRange(startSpecifier, specifierLen));
▲ Show 20 Lines • Show All 322 Lines • ▼ Show 20 Lines

//===--- CHECK: Printf format string checking ------------------------------===//		//===--- CHECK: Printf format string checking ------------------------------===//

namespace {		namespace {
class CheckPrintfHandler : public CheckFormatHandler {		class CheckPrintfHandler : public CheckFormatHandler {
bool ObjCContext;		bool ObjCContext;

public:		public:
CheckPrintfHandler(Sema &s, const StringLiteral *fexpr,		CheckPrintfHandler(Sema &s, const FormatStringLiteral *fexpr,
const Expr *origFormatExpr, unsigned firstDataArg,		const Expr *origFormatExpr, unsigned firstDataArg,
unsigned numDataArgs, bool isObjC,		unsigned numDataArgs, bool isObjC,
const char *beg, bool hasVAListArg,		const char *beg, bool hasVAListArg,
ArrayRef<const Expr *> Args,		ArrayRef<const Expr *> Args,
unsigned formatIdx, bool inFunctionCall,		unsigned formatIdx, bool inFunctionCall,
Sema::VariadicCallType CallType,		Sema::VariadicCallType CallType,
llvm::SmallBitVector &CheckedVarArgs,		llvm::SmallBitVector &CheckedVarArgs,
UncoveredArgHandler &UncoveredArg)		UncoveredArgHandler &UncoveredArg)
▲ Show 20 Lines • Show All 770 Lines • ▼ Show 20 Lines	CheckPrintfHandler::checkFormatExpr(const analyze_printf::PrintfSpecifier &FS,
return true;		return true;
}		}

//===--- CHECK: Scanf format string checking ------------------------------===//		//===--- CHECK: Scanf format string checking ------------------------------===//

namespace {		namespace {
class CheckScanfHandler : public CheckFormatHandler {		class CheckScanfHandler : public CheckFormatHandler {
public:		public:
CheckScanfHandler(Sema &s, const StringLiteral *fexpr,		CheckScanfHandler(Sema &s, const FormatStringLiteral *fexpr,
const Expr *origFormatExpr, unsigned firstDataArg,		const Expr *origFormatExpr, unsigned firstDataArg,
unsigned numDataArgs, const char *beg, bool hasVAListArg,		unsigned numDataArgs, const char *beg, bool hasVAListArg,
ArrayRef<const Expr *> Args,		ArrayRef<const Expr *> Args,
unsigned formatIdx, bool inFunctionCall,		unsigned formatIdx, bool inFunctionCall,
Sema::VariadicCallType CallType,		Sema::VariadicCallType CallType,
llvm::SmallBitVector &CheckedVarArgs,		llvm::SmallBitVector &CheckedVarArgs,
UncoveredArgHandler &UncoveredArg)		UncoveredArgHandler &UncoveredArg)
: CheckFormatHandler(s, fexpr, origFormatExpr, firstDataArg,		: CheckFormatHandler(s, fexpr, origFormatExpr, firstDataArg,
▲ Show 20 Lines • Show All 154 Lines • ▼ Show 20 Lines	EmitFormatDiagnostic(S.PDiag(diag)
Ex->getLocStart(),		Ex->getLocStart(),
/IsStringLocation/ false,		/IsStringLocation/ false,
getSpecifierRange(startSpecifier, specifierLen));		getSpecifierRange(startSpecifier, specifierLen));
}		}

return true;		return true;
}		}

static void CheckFormatString(Sema &S, const StringLiteral *FExpr,		static void CheckFormatString(Sema &S, const FormatStringLiteral *FExpr,
const Expr *OrigFormatExpr,		const Expr *OrigFormatExpr,
ArrayRef<const Expr *> Args,		ArrayRef<const Expr *> Args,
bool HasVAListArg, unsigned format_idx,		bool HasVAListArg, unsigned format_idx,
unsigned firstDataArg,		unsigned firstDataArg,
Sema::FormatStringType Type,		Sema::FormatStringType Type,
bool inFunctionCall,		bool inFunctionCall,
Sema::VariadicCallType CallType,		Sema::VariadicCallType CallType,
llvm::SmallBitVector &CheckedVarArgs,		llvm::SmallBitVector &CheckedVarArgs,
▲ Show 20 Lines • Show All 5,464 Lines • Show Last 20 Lines

test/Sema/format-strings.c

	Show First 20 Lines • Show All 646 Lines • ▼ Show 20 Lines
	// <rdar://problem/14178260>			// <rdar://problem/14178260>
	extern void test_format_security_extra_args(const char*, int, ...)			extern void test_format_security_extra_args(const char*, int, ...)
	__attribute__((__format__(__printf__, 1, 3)));			__attribute__((__format__(__printf__, 1, 3)));
	void test_format_security_pos(char* string) {			void test_format_security_pos(char* string) {
	test_format_security_extra_args(string, 5); // expected-warning {{format string is not a string literal (potentially insecure)}}			test_format_security_extra_args(string, 5); // expected-warning {{format string is not a string literal (potentially insecure)}}
	// expected-note@-1{{treat the string as an argument to avoid this}}			// expected-note@-1{{treat the string as an argument to avoid this}}
	}			}
	#pragma GCC diagnostic warning "-Wformat-nonliteral"			#pragma GCC diagnostic warning "-Wformat-nonliteral"

				void test_char_pointer_arithmetic(int b) {
				const char s1[] = "string";
				const char s2[] = "%s string";

				printf(s1 - 1); // expected-warning {{format string is not a string literal (potentially insecure)}}
				// expected-note@-1{{treat the string as an argument to avoid this}}

				printf(s1 + 2); // no-warning
				printf(s2 + 2); // no-warning

				const char s3[] = "%s string";
				printf((s3 + 2) - 2); // expected-warning{{more '%' conversions than data arguments}}
				// expected-note@-2{{format string is defined here}}
				printf(2 + s2); // no-warning
				printf(6 + s2 - 2); // no-warning
				printf(2 + (b ? s1 : s2)); // no-warning

				const char s5[] = "string %s";
				printf(2 + (b ? s2 : s5)); // expected-warning{{more '%' conversions than data arguments}}
				// expected-note@-2{{format string is defined here}}
				printf(2 + (b ? s2 : s5), ""); // no-warning
				printf(2 + (b ? s1 : s2 - 2), ""); // no-warning

				const char s6[] = "%s string";
				printf(2 + (b ? s1 : s6 - 2)); // expected-warning{{more '%' conversions than data arguments}}
				// expected-note@-2{{format string is defined here}}
				printf(1 ? s2 + 2 : s2); // no-warning
				printf(0 ? s2 : s2 + 2); // no-warning
				printf(2 + s2 + 5 * 3 - 16, ""); // expected-warning{{data argument not used}}

				const char s7[] = "%s string %s %s";
				printf(s7 + 3, ""); // expected-warning{{more '%' conversions than data arguments}}
				// expected-note@-2{{format string is defined here}}
				}

This is an archive of the discontinued LLVM Phabricator instance.

Do not warn about format strings that are indexed string literals.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 71199

lib/Sema/SemaChecking.cpp

test/Sema/format-strings.c

This is an archive of the discontinued LLVM Phabricator instance.

Do not warn about format strings that are indexed string literals.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 71199

lib/Sema/SemaChecking.cpp

test/Sema/format-strings.c

Do not warn about format strings that are indexed string literals.
ClosedPublic