Index: cfe/trunk/lib/Sema/SemaChecking.cpp =================================================================== --- cfe/trunk/lib/Sema/SemaChecking.cpp +++ cfe/trunk/lib/Sema/SemaChecking.cpp @@ -3841,95 +3841,7 @@ }; } // end anonymous namespace -static void sumOffsets(llvm::APSInt &Offset, llvm::APSInt Addend, - BinaryOperatorKind BinOpKind, - bool AddendIsRight) { - unsigned BitWidth = Offset.getBitWidth(); - unsigned AddendBitWidth = Addend.getBitWidth(); - // There might be negative interim results. - if (Addend.isUnsigned()) { - Addend = Addend.zext(++AddendBitWidth); - Addend.setIsSigned(true); - } - // Adjust the bit width of the APSInts. - if (AddendBitWidth > BitWidth) { - Offset = Offset.sext(AddendBitWidth); - BitWidth = AddendBitWidth; - } else if (BitWidth > AddendBitWidth) { - Addend = Addend.sext(BitWidth); - } - - bool Ov = false; - llvm::APSInt ResOffset = Offset; - if (BinOpKind == BO_Add) - ResOffset = Offset.sadd_ov(Addend, Ov); - else { - assert(AddendIsRight && BinOpKind == BO_Sub && - "operator must be add or sub with addend on the right"); - ResOffset = Offset.ssub_ov(Addend, Ov); - } - - // We add an offset to a pointer here so we should support an offset as big as - // possible. - if (Ov) { - assert(BitWidth <= UINT_MAX / 2 && "index (intermediate) result too big"); - Offset.sext(2 * BitWidth); - sumOffsets(Offset, Addend, BinOpKind, AddendIsRight); - return; - } - - Offset = ResOffset; -} - -namespace { -// This is a wrapper class around StringLiteral to support offsetted string -// literals as format strings. It takes the offset into account when returning -// the string and its length or the source locations to display notes correctly. -class FormatStringLiteral { - const StringLiteral *FExpr; - int64_t Offset; - - public: - FormatStringLiteral(const StringLiteral *fexpr, int64_t Offset = 0) - : FExpr(fexpr), Offset(Offset) {} - - StringRef getString() const { - return FExpr->getString().drop_front(Offset); - } - - unsigned getByteLength() const { - return FExpr->getByteLength() - getCharByteWidth() * Offset; - } - unsigned getLength() const { return FExpr->getLength() - Offset; } - unsigned getCharByteWidth() const { return FExpr->getCharByteWidth(); } - - StringLiteral::StringKind getKind() const { return FExpr->getKind(); } - - QualType getType() const { return FExpr->getType(); } - - bool isAscii() const { return FExpr->isAscii(); } - bool isWide() const { return FExpr->isWide(); } - bool isUTF8() const { return FExpr->isUTF8(); } - bool isUTF16() const { return FExpr->isUTF16(); } - bool isUTF32() const { return FExpr->isUTF32(); } - bool isPascal() const { return FExpr->isPascal(); } - - SourceLocation getLocationOfByte( - unsigned ByteNo, const SourceManager &SM, const LangOptions &Features, - const TargetInfo &Target, unsigned *StartToken = nullptr, - unsigned *StartTokenByteOffset = nullptr) const { - return FExpr->getLocationOfByte(ByteNo + Offset, SM, Features, Target, - StartToken, StartTokenByteOffset); - } - - SourceLocation getLocStart() const LLVM_READONLY { - return FExpr->getLocStart().getLocWithOffset(Offset); - } - SourceLocation getLocEnd() const LLVM_READONLY { return FExpr->getLocEnd(); } -}; -} // end anonymous namespace - -static void CheckFormatString(Sema &S, const FormatStringLiteral *FExpr, +static void CheckFormatString(Sema &S, const StringLiteral *FExpr, const Expr *OrigFormatExpr, ArrayRef Args, bool HasVAListArg, unsigned format_idx, @@ -3950,11 +3862,8 @@ unsigned firstDataArg, Sema::FormatStringType Type, Sema::VariadicCallType CallType, bool InFunctionCall, llvm::SmallBitVector &CheckedVarArgs, - UncoveredArgHandler &UncoveredArg, - llvm::APSInt Offset) { + UncoveredArgHandler &UncoveredArg) { tryAgain: - assert(Offset.isSigned() && "invalid offset"); - if (E->isTypeDependent() || E->isValueDependent()) return SLCT_NotALiteral; @@ -3988,10 +3897,6 @@ CheckLeft = false; } - // We need to maintain the offsets for the right and the left hand side - // separately to check if every possible indexed expression is a valid - // string literal. They might have different offsets for different string - // literals in the end. StringLiteralCheckType Left; if (!CheckLeft) Left = SLCT_UncheckedLiteral; @@ -3999,17 +3904,16 @@ Left = checkFormatStringExpr(S, C->getTrueExpr(), Args, HasVAListArg, format_idx, firstDataArg, Type, CallType, InFunctionCall, - CheckedVarArgs, UncoveredArg, Offset); - if (Left == SLCT_NotALiteral || !CheckRight) { + CheckedVarArgs, UncoveredArg); + if (Left == SLCT_NotALiteral || !CheckRight) return Left; - } } StringLiteralCheckType Right = checkFormatStringExpr(S, C->getFalseExpr(), Args, HasVAListArg, format_idx, firstDataArg, Type, CallType, InFunctionCall, CheckedVarArgs, - UncoveredArg, Offset); + UncoveredArg); return (CheckLeft && Left < Right) ? Left : Right; } @@ -4062,8 +3966,8 @@ return checkFormatStringExpr(S, Init, Args, HasVAListArg, format_idx, firstDataArg, Type, CallType, - /*InFunctionCall*/ false, CheckedVarArgs, - UncoveredArg, Offset); + /*InFunctionCall*/false, CheckedVarArgs, + UncoveredArg); } } @@ -4118,7 +4022,7 @@ return checkFormatStringExpr(S, Arg, Args, HasVAListArg, format_idx, firstDataArg, Type, CallType, InFunctionCall, - CheckedVarArgs, UncoveredArg, Offset); + CheckedVarArgs, UncoveredArg); } else if (const FunctionDecl *FD = dyn_cast(ND)) { unsigned BuiltinID = FD->getBuiltinID(); if (BuiltinID == Builtin::BI__builtin___CFStringMakeConstantString || @@ -4128,7 +4032,7 @@ HasVAListArg, format_idx, firstDataArg, Type, CallType, InFunctionCall, CheckedVarArgs, - UncoveredArg, Offset); + UncoveredArg); } } } @@ -4145,13 +4049,7 @@ StrE = cast(E); if (StrE) { - if (Offset.isNegative() || Offset > StrE->getLength()) { - // TODO: It would be better to have an explicit warning for out of - // bounds literals. - return SLCT_NotALiteral; - } - FormatStringLiteral FStr(StrE, Offset.sextOrTrunc(64).getSExtValue()); - CheckFormatString(S, &FStr, E, Args, HasVAListArg, format_idx, + CheckFormatString(S, StrE, E, Args, HasVAListArg, format_idx, firstDataArg, Type, InFunctionCall, CallType, CheckedVarArgs, UncoveredArg); return SLCT_CheckedLiteral; @@ -4159,50 +4057,6 @@ return SLCT_NotALiteral; } - case Stmt::BinaryOperatorClass: { - llvm::APSInt LResult; - llvm::APSInt RResult; - - const BinaryOperator *BinOp = cast(E); - - // A string literal + an int offset is still a string literal. - if (BinOp->isAdditiveOp()) { - bool LIsInt = BinOp->getLHS()->EvaluateAsInt(LResult, S.Context); - bool RIsInt = BinOp->getRHS()->EvaluateAsInt(RResult, S.Context); - - if (LIsInt != RIsInt) { - BinaryOperatorKind BinOpKind = BinOp->getOpcode(); - - if (LIsInt) { - if (BinOpKind == BO_Add) { - sumOffsets(Offset, LResult, BinOpKind, RIsInt); - E = BinOp->getRHS(); - goto tryAgain; - } - } else { - sumOffsets(Offset, RResult, BinOpKind, RIsInt); - E = BinOp->getLHS(); - goto tryAgain; - } - } - - return SLCT_NotALiteral; - } - } - case Stmt::UnaryOperatorClass: { - const UnaryOperator *UnaOp = cast(E); - auto ASE = dyn_cast(UnaOp->getSubExpr()); - if (UnaOp->getOpcode() == clang::UO_AddrOf && ASE) { - llvm::APSInt IndexResult; - if (ASE->getRHS()->EvaluateAsInt(IndexResult, S.Context)) { - sumOffsets(Offset, IndexResult, BO_Add, /*RHS is int*/ true); - E = ASE->getBase(); - goto tryAgain; - } - } - - return SLCT_NotALiteral; - } default: return SLCT_NotALiteral; @@ -4269,9 +4123,8 @@ StringLiteralCheckType CT = checkFormatStringExpr(*this, OrigFormatExpr, Args, HasVAListArg, format_idx, firstDataArg, Type, CallType, - /*IsFunctionCall*/ true, CheckedVarArgs, - UncoveredArg, - /*no string offset*/ llvm::APSInt(64, false) = 0); + /*IsFunctionCall*/true, CheckedVarArgs, + UncoveredArg); // Generate a diagnostic where an uncovered argument is detected. if (UncoveredArg.hasUncoveredArg()) { @@ -4327,7 +4180,7 @@ class CheckFormatHandler : public analyze_format_string::FormatStringHandler { protected: Sema &S; - const FormatStringLiteral *FExpr; + const StringLiteral *FExpr; const Expr *OrigFormatExpr; const unsigned FirstDataArg; const unsigned NumDataArgs; @@ -4344,7 +4197,7 @@ UncoveredArgHandler &UncoveredArg; public: - CheckFormatHandler(Sema &s, const FormatStringLiteral *fexpr, + CheckFormatHandler(Sema &s, const StringLiteral *fexpr, const Expr *origFormatExpr, unsigned firstDataArg, unsigned numDataArgs, const char *beg, bool hasVAListArg, ArrayRef Args, @@ -4444,8 +4297,7 @@ } SourceLocation CheckFormatHandler::getLocationOfByte(const char *x) { - return FExpr->getLocationOfByte(x - Beg, S.getSourceManager(), - S.getLangOpts(), S.Context.getTargetInfo()); + return S.getLocationOfStringLiteralByte(FExpr, x - Beg); } void CheckFormatHandler::HandleIncompleteSpecifier(const char *startSpecifier, @@ -4784,7 +4636,7 @@ bool ObjCContext; public: - CheckPrintfHandler(Sema &s, const FormatStringLiteral *fexpr, + CheckPrintfHandler(Sema &s, const StringLiteral *fexpr, const Expr *origFormatExpr, unsigned firstDataArg, unsigned numDataArgs, bool isObjC, const char *beg, bool hasVAListArg, @@ -5571,7 +5423,7 @@ namespace { class CheckScanfHandler : public CheckFormatHandler { public: - CheckScanfHandler(Sema &s, const FormatStringLiteral *fexpr, + CheckScanfHandler(Sema &s, const StringLiteral *fexpr, const Expr *origFormatExpr, unsigned firstDataArg, unsigned numDataArgs, const char *beg, bool hasVAListArg, ArrayRef Args, @@ -5742,7 +5594,7 @@ return true; } -static void CheckFormatString(Sema &S, const FormatStringLiteral *FExpr, +static void CheckFormatString(Sema &S, const StringLiteral *FExpr, const Expr *OrigFormatExpr, ArrayRef Args, bool HasVAListArg, unsigned format_idx, Index: cfe/trunk/test/Sema/format-strings.c =================================================================== --- cfe/trunk/test/Sema/format-strings.c +++ cfe/trunk/test/Sema/format-strings.c @@ -652,38 +652,3 @@ // expected-note@-1{{treat the string as an argument to avoid this}} } #pragma GCC diagnostic warning "-Wformat-nonliteral" - -void test_char_pointer_arithmetic(int b) { - const char s1[] = "string"; - const char s2[] = "%s string"; - - printf(s1 - 1); // expected-warning {{format string is not a string literal (potentially insecure)}} - // expected-note@-1{{treat the string as an argument to avoid this}} - - printf(s1 + 2); // no-warning - printf(s2 + 2); // no-warning - - const char s3[] = "%s string"; - printf((s3 + 2) - 2); // expected-warning{{more '%' conversions than data arguments}} - // expected-note@-2{{format string is defined here}} - printf(2 + s2); // no-warning - printf(6 + s2 - 2); // no-warning - printf(2 + (b ? s1 : s2)); // no-warning - - const char s5[] = "string %s"; - printf(2 + (b ? s2 : s5)); // expected-warning{{more '%' conversions than data arguments}} - // expected-note@-2{{format string is defined here}} - printf(2 + (b ? s2 : s5), ""); // no-warning - printf(2 + (b ? s1 : s2 - 2), ""); // no-warning - - const char s6[] = "%s string"; - printf(2 + (b ? s1 : s6 - 2)); // expected-warning{{more '%' conversions than data arguments}} - // expected-note@-2{{format string is defined here}} - printf(1 ? s2 + 2 : s2); // no-warning - printf(0 ? s2 : s2 + 2); // no-warning - printf(2 + s2 + 5 * 3 - 16, ""); // expected-warning{{data argument not used}} - - const char s7[] = "%s string %s %s"; - printf(s7 + 3, ""); // expected-warning{{more '%' conversions than data arguments}} - // expected-note@-2{{format string is defined here}} -}