Index: lib/Sema/SemaChecking.cpp =================================================================== --- lib/Sema/SemaChecking.cpp +++ lib/Sema/SemaChecking.cpp @@ -3860,7 +3860,8 @@ unsigned firstDataArg, Sema::FormatStringType Type, Sema::VariadicCallType CallType, bool InFunctionCall, llvm::SmallBitVector &CheckedVarArgs, - UncoveredArgHandler &UncoveredArg) { + UncoveredArgHandler &UncoveredArg, + int64_t &Offset) { tryAgain: if (E->isTypeDependent() || E->isValueDependent()) return SLCT_NotALiteral; @@ -3895,6 +3896,11 @@ CheckLeft = false; } + // We need to maintain the offsets for the right and the left hand side + // seperately to check if every possible indexed expression is a valid + // string literal. They might have different offsets for different string + // literals in the end. + int64_t LOffset = Offset; StringLiteralCheckType Left; if (!CheckLeft) Left = SLCT_UncheckedLiteral; @@ -3902,16 +3908,19 @@ Left = checkFormatStringExpr(S, C->getTrueExpr(), Args, HasVAListArg, format_idx, firstDataArg, Type, CallType, InFunctionCall, - CheckedVarArgs, UncoveredArg); - if (Left == SLCT_NotALiteral || !CheckRight) + CheckedVarArgs, UncoveredArg, LOffset); + if (Left == SLCT_NotALiteral || !CheckRight) { + Offset = LOffset; return Left; + } } + int64_t ROffset = Offset; StringLiteralCheckType Right = checkFormatStringExpr(S, C->getFalseExpr(), Args, HasVAListArg, format_idx, firstDataArg, Type, CallType, InFunctionCall, CheckedVarArgs, - UncoveredArg); + UncoveredArg, ROffset); return (CheckLeft && Left < Right) ? Left : Right; } @@ -3964,8 +3973,8 @@ return checkFormatStringExpr(S, Init, Args, HasVAListArg, format_idx, firstDataArg, Type, CallType, - /*InFunctionCall*/false, CheckedVarArgs, - UncoveredArg); + /*InFunctionCall*/ false, CheckedVarArgs, + UncoveredArg, Offset); } } @@ -4020,7 +4029,7 @@ return checkFormatStringExpr(S, Arg, Args, HasVAListArg, format_idx, firstDataArg, Type, CallType, InFunctionCall, - CheckedVarArgs, UncoveredArg); + CheckedVarArgs, UncoveredArg, Offset); } else if (const FunctionDecl *FD = dyn_cast(ND)) { unsigned BuiltinID = FD->getBuiltinID(); if (BuiltinID == Builtin::BI__builtin___CFStringMakeConstantString || @@ -4030,7 +4039,7 @@ HasVAListArg, format_idx, firstDataArg, Type, CallType, InFunctionCall, CheckedVarArgs, - UncoveredArg); + UncoveredArg, Offset); } } } @@ -4047,6 +4056,20 @@ StrE = cast(E); if (StrE) { + if (Offset > 0) { + // We create a new string literal based on the computed fixed offset + // into the original string literal. + StrE = StringLiteral::Create(S.Context, + StrE->getString().drop_front(Offset), + StrE->getKind(), + StrE->isPascal(), + StrE->getType(), + StrE->getLocStart()); + } else if (Offset < 0) { + // TODO: It would be better to have an explicit warning for out of + // bounds literals. + return SLCT_NotALiteral; + } CheckFormatString(S, StrE, E, Args, HasVAListArg, format_idx, firstDataArg, Type, InFunctionCall, CallType, CheckedVarArgs, UncoveredArg); @@ -4055,6 +4078,42 @@ return SLCT_NotALiteral; } + case Stmt::BinaryOperatorClass: { + llvm::APSInt LResult; + llvm::APSInt RResult; + + const BinaryOperator *BinOp = cast(E); + + // A string literal + an int offset is still a string literal. + if (BinOp->isAdditiveOp()) { + bool LIsInt = BinOp->getLHS()->EvaluateAsInt(LResult, S.Context); + bool RIsInt = BinOp->getRHS()->EvaluateAsInt(RResult, S.Context); + + if (LIsInt != RIsInt) { + BinaryOperatorKind BinOpKind = BinOp->getOpcode(); + + if (BinOpKind == BO_Add) { + if (LIsInt) { + Offset += LResult.getExtValue(); + E = BinOp->getRHS(); + } else { + Offset += RResult.getExtValue(); + E = BinOp->getLHS(); + } + goto tryAgain; + } else if (BinOpKind == BO_Sub) { + // We can only subtract from a pointer if we expect a literal. + if (RIsInt) { + Offset -= RResult.getExtValue(); + E = BinOp->getLHS(); + goto tryAgain; + } + } + } + + return SLCT_NotALiteral; + } + } default: return SLCT_NotALiteral; @@ -4118,11 +4177,12 @@ // ObjC string uses the same format specifiers as C string, so we can use // the same format string checking logic for both ObjC and C strings. UncoveredArgHandler UncoveredArg; + int64_t Offset = 0; StringLiteralCheckType CT = checkFormatStringExpr(*this, OrigFormatExpr, Args, HasVAListArg, format_idx, firstDataArg, Type, CallType, - /*IsFunctionCall*/true, CheckedVarArgs, - UncoveredArg); + /*IsFunctionCall*/ true, CheckedVarArgs, + UncoveredArg, Offset); // Generate a diagnostic where an uncovered argument is detected. if (UncoveredArg.hasUncoveredArg()) { Index: test/Sema/format-strings.c =================================================================== --- test/Sema/format-strings.c +++ test/Sema/format-strings.c @@ -652,3 +652,34 @@ // expected-note@-1{{treat the string as an argument to avoid this}} } #pragma GCC diagnostic warning "-Wformat-nonliteral" + +void test_char_pointer_arithmetic(int b) { + const char s1[] = "string"; + const char s2[] = "%s string"; + + printf(s1 - 1); // expected-warning {{format string is not a string literal (potentially insecure)}} + // expected-note@-1{{treat the string as an argument to avoid this}} + + printf(s1 + 2); // no-warning + printf(s2 + 2); // no-warning + + const char s3[] = "%s string"; + printf((s3 + 2) - 2); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} + printf(2 + s2); // no-warning + printf(6 + s2 - 2); // no-warning + printf(2 + (b ? s1 : s2)); // no-warning + + const char s5[] = "string %s"; + printf(2 + (b ? s2 : s5)); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} + printf(2 + (b ? s2 : s5), ""); // no-warning + printf(2 + (b ? s1 : s2 - 2), ""); // no-warning + + const char s6[] = "%s string"; + printf(2 + (b ? s1 : s6 - 2)); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} + printf(1 ? s2 + 2 : s2); // no-warning + printf(0 ? s2 : s2 + 2); // no-warning + printf(2 + s2 + 5 * 3 - 16, ""); // expected-warning{{data argument not used}} +}