Index: cfe/trunk/lib/Sema/SemaChecking.cpp =================================================================== --- cfe/trunk/lib/Sema/SemaChecking.cpp +++ cfe/trunk/lib/Sema/SemaChecking.cpp @@ -3841,7 +3841,95 @@ }; } // end anonymous namespace -static void CheckFormatString(Sema &S, const StringLiteral *FExpr, +static void sumOffsets(llvm::APSInt &Offset, llvm::APSInt Addend, + BinaryOperatorKind BinOpKind, + bool AddendIsRight) { + unsigned BitWidth = Offset.getBitWidth(); + unsigned AddendBitWidth = Addend.getBitWidth(); + // There might be negative interim results. + if (Addend.isUnsigned()) { + Addend = Addend.zext(++AddendBitWidth); + Addend.setIsSigned(true); + } + // Adjust the bit width of the APSInts. + if (AddendBitWidth > BitWidth) { + Offset = Offset.sext(AddendBitWidth); + BitWidth = AddendBitWidth; + } else if (BitWidth > AddendBitWidth) { + Addend = Addend.sext(BitWidth); + } + + bool Ov = false; + llvm::APSInt ResOffset = Offset; + if (BinOpKind == BO_Add) + ResOffset = Offset.sadd_ov(Addend, Ov); + else { + assert(AddendIsRight && BinOpKind == BO_Sub && + "operator must be add or sub with addend on the right"); + ResOffset = Offset.ssub_ov(Addend, Ov); + } + + // We add an offset to a pointer here so we should support an offset as big as + // possible. + if (Ov) { + assert(BitWidth <= UINT_MAX / 2 && "index (intermediate) result too big"); + Offset.sext(2 * BitWidth); + sumOffsets(Offset, Addend, BinOpKind, AddendIsRight); + return; + } + + Offset = ResOffset; +} + +namespace { +// This is a wrapper class around StringLiteral to support offsetted string +// literals as format strings. It takes the offset into account when returning +// the string and its length or the source locations to display notes correctly. +class FormatStringLiteral { + const StringLiteral *FExpr; + int64_t Offset; + + public: + FormatStringLiteral(const StringLiteral *fexpr, int64_t Offset = 0) + : FExpr(fexpr), Offset(Offset) {} + + StringRef getString() const { + return FExpr->getString().drop_front(Offset); + } + + unsigned getByteLength() const { + return FExpr->getByteLength() - getCharByteWidth() * Offset; + } + unsigned getLength() const { return FExpr->getLength() - Offset; } + unsigned getCharByteWidth() const { return FExpr->getCharByteWidth(); } + + StringLiteral::StringKind getKind() const { return FExpr->getKind(); } + + QualType getType() const { return FExpr->getType(); } + + bool isAscii() const { return FExpr->isAscii(); } + bool isWide() const { return FExpr->isWide(); } + bool isUTF8() const { return FExpr->isUTF8(); } + bool isUTF16() const { return FExpr->isUTF16(); } + bool isUTF32() const { return FExpr->isUTF32(); } + bool isPascal() const { return FExpr->isPascal(); } + + SourceLocation getLocationOfByte( + unsigned ByteNo, const SourceManager &SM, const LangOptions &Features, + const TargetInfo &Target, unsigned *StartToken = nullptr, + unsigned *StartTokenByteOffset = nullptr) const { + return FExpr->getLocationOfByte(ByteNo + Offset, SM, Features, Target, + StartToken, StartTokenByteOffset); + } + + SourceLocation getLocStart() const LLVM_READONLY { + return FExpr->getLocStart().getLocWithOffset(Offset); + } + SourceLocation getLocEnd() const LLVM_READONLY { return FExpr->getLocEnd(); } +}; +} // end anonymous namespace + +static void CheckFormatString(Sema &S, const FormatStringLiteral *FExpr, const Expr *OrigFormatExpr, ArrayRef Args, bool HasVAListArg, unsigned format_idx, @@ -3862,8 +3950,11 @@ unsigned firstDataArg, Sema::FormatStringType Type, Sema::VariadicCallType CallType, bool InFunctionCall, llvm::SmallBitVector &CheckedVarArgs, - UncoveredArgHandler &UncoveredArg) { + UncoveredArgHandler &UncoveredArg, + llvm::APSInt Offset) { tryAgain: + assert(Offset.isSigned() && "invalid offset"); + if (E->isTypeDependent() || E->isValueDependent()) return SLCT_NotALiteral; @@ -3897,6 +3988,10 @@ CheckLeft = false; } + // We need to maintain the offsets for the right and the left hand side + // separately to check if every possible indexed expression is a valid + // string literal. They might have different offsets for different string + // literals in the end. StringLiteralCheckType Left; if (!CheckLeft) Left = SLCT_UncheckedLiteral; @@ -3904,16 +3999,17 @@ Left = checkFormatStringExpr(S, C->getTrueExpr(), Args, HasVAListArg, format_idx, firstDataArg, Type, CallType, InFunctionCall, - CheckedVarArgs, UncoveredArg); - if (Left == SLCT_NotALiteral || !CheckRight) + CheckedVarArgs, UncoveredArg, Offset); + if (Left == SLCT_NotALiteral || !CheckRight) { return Left; + } } StringLiteralCheckType Right = checkFormatStringExpr(S, C->getFalseExpr(), Args, HasVAListArg, format_idx, firstDataArg, Type, CallType, InFunctionCall, CheckedVarArgs, - UncoveredArg); + UncoveredArg, Offset); return (CheckLeft && Left < Right) ? Left : Right; } @@ -3966,8 +4062,8 @@ return checkFormatStringExpr(S, Init, Args, HasVAListArg, format_idx, firstDataArg, Type, CallType, - /*InFunctionCall*/false, CheckedVarArgs, - UncoveredArg); + /*InFunctionCall*/ false, CheckedVarArgs, + UncoveredArg, Offset); } } @@ -4022,7 +4118,7 @@ return checkFormatStringExpr(S, Arg, Args, HasVAListArg, format_idx, firstDataArg, Type, CallType, InFunctionCall, - CheckedVarArgs, UncoveredArg); + CheckedVarArgs, UncoveredArg, Offset); } else if (const FunctionDecl *FD = dyn_cast(ND)) { unsigned BuiltinID = FD->getBuiltinID(); if (BuiltinID == Builtin::BI__builtin___CFStringMakeConstantString || @@ -4032,7 +4128,7 @@ HasVAListArg, format_idx, firstDataArg, Type, CallType, InFunctionCall, CheckedVarArgs, - UncoveredArg); + UncoveredArg, Offset); } } } @@ -4049,7 +4145,13 @@ StrE = cast(E); if (StrE) { - CheckFormatString(S, StrE, E, Args, HasVAListArg, format_idx, + if (Offset.isNegative() || Offset > StrE->getLength()) { + // TODO: It would be better to have an explicit warning for out of + // bounds literals. + return SLCT_NotALiteral; + } + FormatStringLiteral FStr(StrE, Offset.sextOrTrunc(64).getSExtValue()); + CheckFormatString(S, &FStr, E, Args, HasVAListArg, format_idx, firstDataArg, Type, InFunctionCall, CallType, CheckedVarArgs, UncoveredArg); return SLCT_CheckedLiteral; @@ -4057,6 +4159,50 @@ return SLCT_NotALiteral; } + case Stmt::BinaryOperatorClass: { + llvm::APSInt LResult; + llvm::APSInt RResult; + + const BinaryOperator *BinOp = cast(E); + + // A string literal + an int offset is still a string literal. + if (BinOp->isAdditiveOp()) { + bool LIsInt = BinOp->getLHS()->EvaluateAsInt(LResult, S.Context); + bool RIsInt = BinOp->getRHS()->EvaluateAsInt(RResult, S.Context); + + if (LIsInt != RIsInt) { + BinaryOperatorKind BinOpKind = BinOp->getOpcode(); + + if (LIsInt) { + if (BinOpKind == BO_Add) { + sumOffsets(Offset, LResult, BinOpKind, RIsInt); + E = BinOp->getRHS(); + goto tryAgain; + } + } else { + sumOffsets(Offset, RResult, BinOpKind, RIsInt); + E = BinOp->getLHS(); + goto tryAgain; + } + } + + return SLCT_NotALiteral; + } + } + case Stmt::UnaryOperatorClass: { + const UnaryOperator *UnaOp = cast(E); + auto ASE = dyn_cast(UnaOp->getSubExpr()); + if (UnaOp->getOpcode() == clang::UO_AddrOf && ASE) { + llvm::APSInt IndexResult; + if (ASE->getRHS()->EvaluateAsInt(IndexResult, S.Context)) { + sumOffsets(Offset, IndexResult, BO_Add, /*RHS is int*/ true); + E = ASE->getBase(); + goto tryAgain; + } + } + + return SLCT_NotALiteral; + } default: return SLCT_NotALiteral; @@ -4123,8 +4269,9 @@ StringLiteralCheckType CT = checkFormatStringExpr(*this, OrigFormatExpr, Args, HasVAListArg, format_idx, firstDataArg, Type, CallType, - /*IsFunctionCall*/true, CheckedVarArgs, - UncoveredArg); + /*IsFunctionCall*/ true, CheckedVarArgs, + UncoveredArg, + /*no string offset*/ llvm::APSInt(64, false) = 0); // Generate a diagnostic where an uncovered argument is detected. if (UncoveredArg.hasUncoveredArg()) { @@ -4180,7 +4327,7 @@ class CheckFormatHandler : public analyze_format_string::FormatStringHandler { protected: Sema &S; - const StringLiteral *FExpr; + const FormatStringLiteral *FExpr; const Expr *OrigFormatExpr; const unsigned FirstDataArg; const unsigned NumDataArgs; @@ -4197,7 +4344,7 @@ UncoveredArgHandler &UncoveredArg; public: - CheckFormatHandler(Sema &s, const StringLiteral *fexpr, + CheckFormatHandler(Sema &s, const FormatStringLiteral *fexpr, const Expr *origFormatExpr, unsigned firstDataArg, unsigned numDataArgs, const char *beg, bool hasVAListArg, ArrayRef Args, @@ -4297,7 +4444,8 @@ } SourceLocation CheckFormatHandler::getLocationOfByte(const char *x) { - return S.getLocationOfStringLiteralByte(FExpr, x - Beg); + return FExpr->getLocationOfByte(x - Beg, S.getSourceManager(), + S.getLangOpts(), S.Context.getTargetInfo()); } void CheckFormatHandler::HandleIncompleteSpecifier(const char *startSpecifier, @@ -4636,7 +4784,7 @@ bool ObjCContext; public: - CheckPrintfHandler(Sema &s, const StringLiteral *fexpr, + CheckPrintfHandler(Sema &s, const FormatStringLiteral *fexpr, const Expr *origFormatExpr, unsigned firstDataArg, unsigned numDataArgs, bool isObjC, const char *beg, bool hasVAListArg, @@ -5423,7 +5571,7 @@ namespace { class CheckScanfHandler : public CheckFormatHandler { public: - CheckScanfHandler(Sema &s, const StringLiteral *fexpr, + CheckScanfHandler(Sema &s, const FormatStringLiteral *fexpr, const Expr *origFormatExpr, unsigned firstDataArg, unsigned numDataArgs, const char *beg, bool hasVAListArg, ArrayRef Args, @@ -5594,7 +5742,7 @@ return true; } -static void CheckFormatString(Sema &S, const StringLiteral *FExpr, +static void CheckFormatString(Sema &S, const FormatStringLiteral *FExpr, const Expr *OrigFormatExpr, ArrayRef Args, bool HasVAListArg, unsigned format_idx, Index: cfe/trunk/test/Sema/format-strings.c =================================================================== --- cfe/trunk/test/Sema/format-strings.c +++ cfe/trunk/test/Sema/format-strings.c @@ -652,3 +652,38 @@ // expected-note@-1{{treat the string as an argument to avoid this}} } #pragma GCC diagnostic warning "-Wformat-nonliteral" + +void test_char_pointer_arithmetic(int b) { + const char s1[] = "string"; + const char s2[] = "%s string"; + + printf(s1 - 1); // expected-warning {{format string is not a string literal (potentially insecure)}} + // expected-note@-1{{treat the string as an argument to avoid this}} + + printf(s1 + 2); // no-warning + printf(s2 + 2); // no-warning + + const char s3[] = "%s string"; + printf((s3 + 2) - 2); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} + printf(2 + s2); // no-warning + printf(6 + s2 - 2); // no-warning + printf(2 + (b ? s1 : s2)); // no-warning + + const char s5[] = "string %s"; + printf(2 + (b ? s2 : s5)); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} + printf(2 + (b ? s2 : s5), ""); // no-warning + printf(2 + (b ? s1 : s2 - 2), ""); // no-warning + + const char s6[] = "%s string"; + printf(2 + (b ? s1 : s6 - 2)); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} + printf(1 ? s2 + 2 : s2); // no-warning + printf(0 ? s2 : s2 + 2); // no-warning + printf(2 + s2 + 5 * 3 - 16, ""); // expected-warning{{data argument not used}} + + const char s7[] = "%s string %s %s"; + printf(s7 + 3, ""); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} +}