Index: lib/CodeGen/CGBlocks.h =================================================================== --- lib/CodeGen/CGBlocks.h +++ lib/CodeGen/CGBlocks.h @@ -159,6 +159,11 @@ EHScopeStack::stable_iterator Cleanup; CharUnits::QuantityType Offset; + /// Type of the capture field. Normally, this is identical to the type of + /// the capture's VarDecl, but can be different if there is an enclosing + /// lambda. + QualType FieldType; + public: bool isIndex() const { return (Data & 1) != 0; } bool isConstant() const { return !isIndex(); } @@ -185,10 +190,16 @@ return reinterpret_cast(Data); } - static Capture makeIndex(unsigned index, CharUnits offset) { + QualType fieldType() const { + return FieldType; + } + + static Capture makeIndex(unsigned index, CharUnits offset, + QualType FieldType) { Capture v; v.Data = (index << 1) | 1; v.Offset = offset.getQuantity(); + v.FieldType = FieldType; return v; } Index: lib/CodeGen/CGBlocks.cpp =================================================================== --- lib/CodeGen/CGBlocks.cpp +++ lib/CodeGen/CGBlocks.cpp @@ -199,13 +199,14 @@ Qualifiers::ObjCLifetime Lifetime; const BlockDecl::Capture *Capture; // null for 'this' llvm::Type *Type; + QualType FieldType; BlockLayoutChunk(CharUnits align, CharUnits size, Qualifiers::ObjCLifetime lifetime, const BlockDecl::Capture *capture, - llvm::Type *type) + llvm::Type *type, QualType fieldType) : Alignment(align), Size(size), Lifetime(lifetime), - Capture(capture), Type(type) {} + Capture(capture), Type(type), FieldType(fieldType) {} /// Tell the block info that this chunk has the given field index. void setIndex(CGBlockInfo &info, unsigned index, CharUnits offset) { @@ -213,8 +214,8 @@ info.CXXThisIndex = index; info.CXXThisOffset = offset; } else { - info.Captures.insert({Capture->getVariable(), - CGBlockInfo::Capture::makeIndex(index, offset)}); + auto C = CGBlockInfo::Capture::makeIndex(index, offset, FieldType); + info.Captures.insert({Capture->getVariable(), C}); } } }; @@ -363,7 +364,7 @@ layout.push_back(BlockLayoutChunk(tinfo.second, tinfo.first, Qualifiers::OCL_None, - nullptr, llvmType)); + nullptr, llvmType, thisType)); } // Next, all the block captures. @@ -380,7 +381,7 @@ layout.push_back(BlockLayoutChunk(align, CGM.getPointerSize(), Qualifiers::OCL_None, &CI, - CGM.VoidPtrTy)); + CGM.VoidPtrTy, variable->getType())); continue; } @@ -436,6 +437,14 @@ } QualType VT = variable->getType(); + + // If the variable is captured by an enclosing block or lambda expression, + // use the type of the capture field. + if (CGF->BlockInfo && CI.isNested()) + VT = CGF->BlockInfo->getCapture(variable).fieldType(); + else if (auto *FD = CGF->LambdaCaptureFields.lookup(variable)) + VT = FD->getType(); + CharUnits size = C.getTypeSizeInChars(VT); CharUnits align = C.getDeclAlign(variable); @@ -444,7 +453,8 @@ llvm::Type *llvmType = CGM.getTypes().ConvertTypeForMem(VT); - layout.push_back(BlockLayoutChunk(align, size, lifetime, &CI, llvmType)); + layout.push_back( + BlockLayoutChunk(align, size, lifetime, &CI, llvmType, VT)); } // If that was everything, we're done here. @@ -775,7 +785,7 @@ // Ignore constant captures. if (capture.isConstant()) continue; - QualType type = variable->getType(); + QualType type = capture.fieldType(); // This will be a [[type]]*, except that a byref entry will just be // an i8**. @@ -1033,9 +1043,8 @@ variable->getName()); } - if (auto refType = variable->getType()->getAs()) { + if (auto refType = capture.fieldType()->getAs()) addr = EmitLoadOfReference(addr, refType); - } return addr; } Index: lib/Sema/SemaChecking.cpp =================================================================== --- lib/Sema/SemaChecking.cpp +++ lib/Sema/SemaChecking.cpp @@ -3850,7 +3850,95 @@ }; } // end anonymous namespace -static void CheckFormatString(Sema &S, const StringLiteral *FExpr, +static void sumOffsets(llvm::APSInt &Offset, llvm::APSInt Addend, + BinaryOperatorKind BinOpKind, + bool AddendIsRight) { + unsigned BitWidth = Offset.getBitWidth(); + unsigned AddendBitWidth = Addend.getBitWidth(); + // There might be negative interim results. + if (Addend.isUnsigned()) { + Addend = Addend.zext(++AddendBitWidth); + Addend.setIsSigned(true); + } + // Adjust the bit width of the APSInts. + if (AddendBitWidth > BitWidth) { + Offset = Offset.sext(AddendBitWidth); + BitWidth = AddendBitWidth; + } else if (BitWidth > AddendBitWidth) { + Addend = Addend.sext(BitWidth); + } + + bool Ov = false; + llvm::APSInt ResOffset = Offset; + if (BinOpKind == BO_Add) + ResOffset = Offset.sadd_ov(Addend, Ov); + else { + assert(AddendIsRight && BinOpKind == BO_Sub && + "operator must be add or sub with addend on the right"); + ResOffset = Offset.ssub_ov(Addend, Ov); + } + + // We add an offset to a pointer here so we should support an offset as big as + // possible. + if (Ov) { + assert(BitWidth <= UINT_MAX / 2 && "index (intermediate) result too big"); + Offset.sext(2 * BitWidth); + sumOffsets(Offset, Addend, BinOpKind, AddendIsRight); + return; + } + + Offset = ResOffset; +} + +namespace { +// This is a wrapper class around StringLiteral to support offsetted string +// literals as format strings. It takes the offset into account when returning +// the string and its length or the source locations to display notes correctly. +class FormatStringLiteral { + const StringLiteral *FExpr; + int64_t Offset; + + public: + FormatStringLiteral(const StringLiteral *fexpr, int64_t Offset = 0) + : FExpr(fexpr), Offset(Offset) {} + + StringRef getString() const { + return FExpr->getString().drop_front(Offset); + } + + unsigned getByteLength() const { + return FExpr->getByteLength() - getCharByteWidth() * Offset; + } + unsigned getLength() const { return FExpr->getLength() - Offset; } + unsigned getCharByteWidth() const { return FExpr->getCharByteWidth(); } + + StringLiteral::StringKind getKind() const { return FExpr->getKind(); } + + QualType getType() const { return FExpr->getType(); } + + bool isAscii() const { return FExpr->isAscii(); } + bool isWide() const { return FExpr->isWide(); } + bool isUTF8() const { return FExpr->isUTF8(); } + bool isUTF16() const { return FExpr->isUTF16(); } + bool isUTF32() const { return FExpr->isUTF32(); } + bool isPascal() const { return FExpr->isPascal(); } + + SourceLocation getLocationOfByte( + unsigned ByteNo, const SourceManager &SM, const LangOptions &Features, + const TargetInfo &Target, unsigned *StartToken = nullptr, + unsigned *StartTokenByteOffset = nullptr) const { + return FExpr->getLocationOfByte(ByteNo + Offset, SM, Features, Target, + StartToken, StartTokenByteOffset); + } + + SourceLocation getLocStart() const LLVM_READONLY { + return FExpr->getLocStart().getLocWithOffset(Offset); + } + SourceLocation getLocEnd() const LLVM_READONLY { return FExpr->getLocEnd(); } +}; +} // end anonymous namespace + +static void CheckFormatString(Sema &S, const FormatStringLiteral *FExpr, const Expr *OrigFormatExpr, ArrayRef Args, bool HasVAListArg, unsigned format_idx, @@ -3871,8 +3959,11 @@ unsigned firstDataArg, Sema::FormatStringType Type, Sema::VariadicCallType CallType, bool InFunctionCall, llvm::SmallBitVector &CheckedVarArgs, - UncoveredArgHandler &UncoveredArg) { + UncoveredArgHandler &UncoveredArg, + llvm::APSInt Offset) { tryAgain: + assert(Offset.isSigned() && "invalid offset"); + if (E->isTypeDependent() || E->isValueDependent()) return SLCT_NotALiteral; @@ -3906,6 +3997,10 @@ CheckLeft = false; } + // We need to maintain the offsets for the right and the left hand side + // separately to check if every possible indexed expression is a valid + // string literal. They might have different offsets for different string + // literals in the end. StringLiteralCheckType Left; if (!CheckLeft) Left = SLCT_UncheckedLiteral; @@ -3913,16 +4008,17 @@ Left = checkFormatStringExpr(S, C->getTrueExpr(), Args, HasVAListArg, format_idx, firstDataArg, Type, CallType, InFunctionCall, - CheckedVarArgs, UncoveredArg); - if (Left == SLCT_NotALiteral || !CheckRight) + CheckedVarArgs, UncoveredArg, Offset); + if (Left == SLCT_NotALiteral || !CheckRight) { return Left; + } } StringLiteralCheckType Right = checkFormatStringExpr(S, C->getFalseExpr(), Args, HasVAListArg, format_idx, firstDataArg, Type, CallType, InFunctionCall, CheckedVarArgs, - UncoveredArg); + UncoveredArg, Offset); return (CheckLeft && Left < Right) ? Left : Right; } @@ -3975,8 +4071,8 @@ return checkFormatStringExpr(S, Init, Args, HasVAListArg, format_idx, firstDataArg, Type, CallType, - /*InFunctionCall*/false, CheckedVarArgs, - UncoveredArg); + /*InFunctionCall*/ false, CheckedVarArgs, + UncoveredArg, Offset); } } @@ -4031,7 +4127,7 @@ return checkFormatStringExpr(S, Arg, Args, HasVAListArg, format_idx, firstDataArg, Type, CallType, InFunctionCall, - CheckedVarArgs, UncoveredArg); + CheckedVarArgs, UncoveredArg, Offset); } else if (const FunctionDecl *FD = dyn_cast(ND)) { unsigned BuiltinID = FD->getBuiltinID(); if (BuiltinID == Builtin::BI__builtin___CFStringMakeConstantString || @@ -4041,7 +4137,7 @@ HasVAListArg, format_idx, firstDataArg, Type, CallType, InFunctionCall, CheckedVarArgs, - UncoveredArg); + UncoveredArg, Offset); } } } @@ -4058,7 +4154,13 @@ StrE = cast(E); if (StrE) { - CheckFormatString(S, StrE, E, Args, HasVAListArg, format_idx, + if (Offset.isNegative() || Offset > StrE->getLength()) { + // TODO: It would be better to have an explicit warning for out of + // bounds literals. + return SLCT_NotALiteral; + } + FormatStringLiteral FStr(StrE, Offset.sextOrTrunc(64).getSExtValue()); + CheckFormatString(S, &FStr, E, Args, HasVAListArg, format_idx, firstDataArg, Type, InFunctionCall, CallType, CheckedVarArgs, UncoveredArg); return SLCT_CheckedLiteral; @@ -4066,6 +4168,50 @@ return SLCT_NotALiteral; } + case Stmt::BinaryOperatorClass: { + llvm::APSInt LResult; + llvm::APSInt RResult; + + const BinaryOperator *BinOp = cast(E); + + // A string literal + an int offset is still a string literal. + if (BinOp->isAdditiveOp()) { + bool LIsInt = BinOp->getLHS()->EvaluateAsInt(LResult, S.Context); + bool RIsInt = BinOp->getRHS()->EvaluateAsInt(RResult, S.Context); + + if (LIsInt != RIsInt) { + BinaryOperatorKind BinOpKind = BinOp->getOpcode(); + + if (LIsInt) { + if (BinOpKind == BO_Add) { + sumOffsets(Offset, LResult, BinOpKind, RIsInt); + E = BinOp->getRHS(); + goto tryAgain; + } + } else { + sumOffsets(Offset, RResult, BinOpKind, RIsInt); + E = BinOp->getLHS(); + goto tryAgain; + } + } + + return SLCT_NotALiteral; + } + } + case Stmt::UnaryOperatorClass: { + const UnaryOperator *UnaOp = cast(E); + auto ASE = dyn_cast(UnaOp->getSubExpr()); + if (UnaOp->getOpcode() == clang::UO_AddrOf && ASE) { + llvm::APSInt IndexResult; + if (ASE->getRHS()->EvaluateAsInt(IndexResult, S.Context)) { + sumOffsets(Offset, IndexResult, BO_Add, /*RHS is int*/ true); + E = ASE->getBase(); + goto tryAgain; + } + } + + return SLCT_NotALiteral; + } default: return SLCT_NotALiteral; @@ -4132,8 +4278,9 @@ StringLiteralCheckType CT = checkFormatStringExpr(*this, OrigFormatExpr, Args, HasVAListArg, format_idx, firstDataArg, Type, CallType, - /*IsFunctionCall*/true, CheckedVarArgs, - UncoveredArg); + /*IsFunctionCall*/ true, CheckedVarArgs, + UncoveredArg, + /*no string offset*/ llvm::APSInt(64, false) = 0); // Generate a diagnostic where an uncovered argument is detected. if (UncoveredArg.hasUncoveredArg()) { @@ -4189,7 +4336,7 @@ class CheckFormatHandler : public analyze_format_string::FormatStringHandler { protected: Sema &S; - const StringLiteral *FExpr; + const FormatStringLiteral *FExpr; const Expr *OrigFormatExpr; const unsigned FirstDataArg; const unsigned NumDataArgs; @@ -4206,7 +4353,7 @@ UncoveredArgHandler &UncoveredArg; public: - CheckFormatHandler(Sema &s, const StringLiteral *fexpr, + CheckFormatHandler(Sema &s, const FormatStringLiteral *fexpr, const Expr *origFormatExpr, unsigned firstDataArg, unsigned numDataArgs, const char *beg, bool hasVAListArg, ArrayRef Args, @@ -4306,7 +4453,8 @@ } SourceLocation CheckFormatHandler::getLocationOfByte(const char *x) { - return S.getLocationOfStringLiteralByte(FExpr, x - Beg); + return FExpr->getLocationOfByte(x - Beg, S.getSourceManager(), + S.getLangOpts(), S.Context.getTargetInfo()); } void CheckFormatHandler::HandleIncompleteSpecifier(const char *startSpecifier, @@ -4645,7 +4793,7 @@ bool ObjCContext; public: - CheckPrintfHandler(Sema &s, const StringLiteral *fexpr, + CheckPrintfHandler(Sema &s, const FormatStringLiteral *fexpr, const Expr *origFormatExpr, unsigned firstDataArg, unsigned numDataArgs, bool isObjC, const char *beg, bool hasVAListArg, @@ -5432,7 +5580,7 @@ namespace { class CheckScanfHandler : public CheckFormatHandler { public: - CheckScanfHandler(Sema &s, const StringLiteral *fexpr, + CheckScanfHandler(Sema &s, const FormatStringLiteral *fexpr, const Expr *origFormatExpr, unsigned firstDataArg, unsigned numDataArgs, const char *beg, bool hasVAListArg, ArrayRef Args, @@ -5603,7 +5751,7 @@ return true; } -static void CheckFormatString(Sema &S, const StringLiteral *FExpr, +static void CheckFormatString(Sema &S, const FormatStringLiteral *FExpr, const Expr *OrigFormatExpr, ArrayRef Args, bool HasVAListArg, unsigned format_idx, Index: test/CodeGenObjCXX/lambda-expressions.mm =================================================================== --- test/CodeGenObjCXX/lambda-expressions.mm +++ test/CodeGenObjCXX/lambda-expressions.mm @@ -4,6 +4,8 @@ typedef int (^fp)(); fp f() { auto x = []{ return 3; }; return x; } +// ARC: %[[LAMBDACLASS:.*]] = type { i32 } + // MRC: @OBJC_METH_VAR_NAME{{.*}} = private global [5 x i8] c"copy\00" // MRC: @OBJC_METH_VAR_NAME{{.*}} = private global [12 x i8] c"autorelease\00" // MRC-LABEL: define i32 ()* @_Z1fv( @@ -60,6 +62,40 @@ } @end +// ARC: define void @_ZN13LambdaCapture4foo1ERi(i32* dereferenceable(4) %{{.*}}) +// ARC: %[[CAPTURE0:.*]] = getelementptr inbounds %[[LAMBDACLASS]], %[[LAMBDACLASS]]* %{{.*}}, i32 0, i32 0 +// ARC: store i32 %{{.*}}, i32* %[[CAPTURE0]] + +// ARC: define internal void @"_ZZN13LambdaCapture4foo1ERiENK3$_3clEv"(%[[LAMBDACLASS]]* %{{.*}}) +// ARC: %[[BLOCK:.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }> +// ARC: %[[CAPTURE1:.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %[[BLOCK]], i32 0, i32 5 +// ARC: store i32 %{{.*}}, i32* %[[CAPTURE1]] + +// ARC: define internal void @"___ZZN13LambdaCapture4foo1ERiENK3$_3clEv_block_invoke" +// ARC: %[[CAPTURE2:.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %{{.*}}, i32 0, i32 5 +// ARC: store i32 %{{.*}}, i32* %[[CAPTURE2]] + +// ARC: define internal void @"___ZZN13LambdaCapture4foo1ERiENK3$_3clEv_block_invoke_2"(i8* %{{.*}}) +// ARC: %[[CAPTURE3:.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %{{.*}}, i32 0, i32 5 +// ARC: %[[V1:.*]] = load i32, i32* %[[CAPTURE3]] +// ARC: store i32 %[[V1]], i32* @_ZN13LambdaCapture1iE + +namespace LambdaCapture { + int i; + void foo1(int &a) { + auto lambda = [a]{ + auto block1 = ^{ + auto block2 = ^{ + i = a; + }; + block2(); + }; + block1(); + }; + lambda(); + } +} + // ARC-LABEL: define linkonce_odr i32 ()* @_ZZNK13StaticMembersIfE1fMUlvE_clEvENKUlvE_cvU13block_pointerFivEEv // Check lines for BlockInLambda test below Index: test/Sema/format-strings.c =================================================================== --- test/Sema/format-strings.c +++ test/Sema/format-strings.c @@ -652,3 +652,38 @@ // expected-note@-1{{treat the string as an argument to avoid this}} } #pragma GCC diagnostic warning "-Wformat-nonliteral" + +void test_char_pointer_arithmetic(int b) { + const char s1[] = "string"; + const char s2[] = "%s string"; + + printf(s1 - 1); // expected-warning {{format string is not a string literal (potentially insecure)}} + // expected-note@-1{{treat the string as an argument to avoid this}} + + printf(s1 + 2); // no-warning + printf(s2 + 2); // no-warning + + const char s3[] = "%s string"; + printf((s3 + 2) - 2); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} + printf(2 + s2); // no-warning + printf(6 + s2 - 2); // no-warning + printf(2 + (b ? s1 : s2)); // no-warning + + const char s5[] = "string %s"; + printf(2 + (b ? s2 : s5)); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} + printf(2 + (b ? s2 : s5), ""); // no-warning + printf(2 + (b ? s1 : s2 - 2), ""); // no-warning + + const char s6[] = "%s string"; + printf(2 + (b ? s1 : s6 - 2)); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} + printf(1 ? s2 + 2 : s2); // no-warning + printf(0 ? s2 : s2 + 2); // no-warning + printf(2 + s2 + 5 * 3 - 16, ""); // expected-warning{{data argument not used}} + + const char s7[] = "%s string %s %s"; + printf(s7 + 3, ""); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} +}