Index: clang/docs/ClangCommandLineReference.rst =================================================================== --- clang/docs/ClangCommandLineReference.rst +++ clang/docs/ClangCommandLineReference.rst @@ -2621,6 +2621,12 @@ .. option:: -fuse-init-array, -fno-use-init-array +.. option:: -fstrict-flex-arrays=, -fno-strict-flex-arrays + +Control which arrays are considered as flexible arrays members. +can be 1 (array of size 0, 1 and undefined are considered), 2 (array of size 0 +and undefined are considered) or 3 (only array of undefined size are considered). + .. option:: -fuse-ld= .. option:: -fuse-line-directives, -fno-use-line-directives Index: clang/docs/ReleaseNotes.rst =================================================================== --- clang/docs/ReleaseNotes.rst +++ clang/docs/ReleaseNotes.rst @@ -68,6 +68,11 @@ Randomizing structure layout is a C-only feature. +- Clang now supports the ``-fstrict-flex-arrays=`` option to control which + array bounds lead to flexible array members. The option yields more accurate + ``__builtin_object_size`` and ``__builtin_dynamic_object_size`` results in + most cases but may be overly conservative for some legacy code. + Bug Fixes --------- - ``CXXNewExpr::getArraySize()`` previously returned a ``llvm::Optional`` Index: clang/include/clang/AST/Expr.h =================================================================== --- clang/include/clang/AST/Expr.h +++ clang/include/clang/AST/Expr.h @@ -443,6 +443,16 @@ return (OK == OK_Ordinary || OK == OK_BitField); } + /// True when this expression refers to a flexible array member in a + /// struct. \c StrictFlexArraysLevel controls which array bounds are + /// acceptable for such arrays: + /// + /// - 0 => any array bound, + /// - 1 => [0], [1], [ ] + /// - 2 => [0], [ ] + /// - 3 => [ ] + bool isFlexibleArrayMember(ASTContext &Ctx, int StrictFlexArraysLevel) const; + /// setValueKind - Set the value kind produced by this expression. void setValueKind(ExprValueKind Cat) { ExprBits.ValueKind = Cat; } Index: clang/include/clang/Basic/LangOptions.def =================================================================== --- clang/include/clang/Basic/LangOptions.def +++ clang/include/clang/Basic/LangOptions.def @@ -422,6 +422,7 @@ LANGOPT(RegisterStaticDestructors, 1, 1, "Register C++ static destructors") LANGOPT(MatrixTypes, 1, 0, "Enable or disable the builtin matrix type") +LANGOPT(StrictFlexArrays, 2, 0, "Rely on strict definition of flexible arrays") COMPATIBLE_VALUE_LANGOPT(MaxTokens, 32, 0, "Max number of tokens per TU or 0") Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -1132,6 +1132,12 @@ def fapple_kext : Flag<["-"], "fapple-kext">, Group, Flags<[CC1Option]>, HelpText<"Use Apple's kernel extensions ABI">, MarshallingInfoFlag>; +def fstrict_flex_arrays_EQ : Joined<["-"], "fstrict-flex-arrays=">,Group, + MetaVarName<", Values<"1,2,3">, + LangOpts<"StrictFlexArrays">, + Flags<[CC1Option]>, + HelpText<"Enable optimizations based on the strict definition of flexible arrays">, + MarshallingInfoInt>; defm apple_pragma_pack : BoolFOption<"apple-pragma-pack", LangOpts<"ApplePragmaPack">, DefaultFalse, PosFlag, Index: clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h =================================================================== --- clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h +++ clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h @@ -1364,6 +1364,7 @@ ~MemRegionManager(); ASTContext &getContext() { return Ctx; } + const ASTContext &getContext() const { return Ctx; } llvm::BumpPtrAllocator &getAllocator() { return A; } Index: clang/lib/AST/Expr.cpp =================================================================== --- clang/lib/AST/Expr.cpp +++ clang/lib/AST/Expr.cpp @@ -203,6 +203,91 @@ return false; } +bool Expr::isFlexibleArrayMember(ASTContext &Ctx, + int StrictFlexArraysLevel) const { + const NamedDecl *ND = nullptr; + if (const DeclRefExpr *DRE = dyn_cast(this)) + ND = DRE->getDecl(); + if (const MemberExpr *ME = dyn_cast(this)) + ND = ME->getMemberDecl(); + if (const ObjCIvarRefExpr *RE = dyn_cast(this)) + ND = RE->getDecl(); + if (!ND) + return false; + + const ConstantArrayType *ArrayTy = Ctx.getAsConstantArrayType(getType()); + + const Type *BaseType = + ArrayTy == nullptr ? nullptr : ArrayTy->getElementType().getTypePtr(); + bool IsUnboundedArray = (BaseType == nullptr); + + if (!IsUnboundedArray) { + llvm::APInt Size = ArrayTy->getSize(); + + switch (StrictFlexArraysLevel) { + case 3: + return false; + case 2: + if (Size != 0) + return false; + break; + case 1: + if (Size.ugt(1)) + return false; + break; + case 0: + break; + default: + llvm_unreachable("Invalid strict flex arrays level"); + } + } + + const FieldDecl *FD = dyn_cast(ND); + if (!FD) + return false; + + // Don't consider sizes resulting from macro expansions or template argument + // substitution to form C89 tail-padded arrays. + + TypeSourceInfo *TInfo = FD->getTypeSourceInfo(); + while (TInfo) { + TypeLoc TL = TInfo->getTypeLoc(); + // Look through typedefs. + if (TypedefTypeLoc TTL = TL.getAs()) { + const TypedefNameDecl *TDL = TTL.getTypedefNameDecl(); + TInfo = TDL->getTypeSourceInfo(); + continue; + } + if (ConstantArrayTypeLoc CTL = TL.getAs()) { + const Expr *SizeExpr = dyn_cast(CTL.getSizeExpr()); + if (!SizeExpr || SizeExpr->getExprLoc().isMacroID()) + return false; + } + break; + } + + const ObjCInterfaceDecl *ID = + dyn_cast(FD->getDeclContext()); + const RecordDecl *RD = dyn_cast(FD->getDeclContext()); + if (RD) { + if (RD->isUnion()) + return false; + if (const CXXRecordDecl *CRD = dyn_cast(RD)) { + if (!CRD->isStandardLayout()) + return false; + } + } else if (!ID) + return false; + + // See if this is the last field decl in the record. + const Decl *D = FD; + while ((D = D->getNextDeclInContext())) + if (isa(D)) + return false; + + return true; +} + const ValueDecl * Expr::getAsBuiltinConstantDeclRef(const ASTContext &Context) const { Expr::EvalResult Eval; Index: clang/lib/AST/ExprConstant.cpp =================================================================== --- clang/lib/AST/ExprConstant.cpp +++ clang/lib/AST/ExprConstant.cpp @@ -11597,9 +11597,16 @@ // conservative with the last element in structs (if it's an array), so our // current behavior is more compatible than an explicit list approach would // be. + int StrictFlexArraysLevel = Ctx.getLangOpts().StrictFlexArrays; return LVal.InvalidBase && Designator.Entries.size() == Designator.MostDerivedPathLength && Designator.MostDerivedIsArrayElement && + (Designator.isMostDerivedAnUnsizedArray() || + (Designator.getMostDerivedArraySize() == 0 && + StrictFlexArraysLevel < 3) || + (Designator.getMostDerivedArraySize() == 1 && + StrictFlexArraysLevel < 2) || + StrictFlexArraysLevel == 0) && isDesignatorAtObjectEnd(Ctx, LVal); } Index: clang/lib/CodeGen/CGExpr.cpp =================================================================== --- clang/lib/CodeGen/CGExpr.cpp +++ clang/lib/CodeGen/CGExpr.cpp @@ -878,44 +878,6 @@ } } -/// Determine whether this expression refers to a flexible array member in a -/// struct. We disable array bounds checks for such members. -static bool isFlexibleArrayMemberExpr(const Expr *E) { - // For compatibility with existing code, we treat arrays of length 0 or - // 1 as flexible array members. - // FIXME: This is inconsistent with the warning code in SemaChecking. Unify - // the two mechanisms. - const ArrayType *AT = E->getType()->castAsArrayTypeUnsafe(); - if (const auto *CAT = dyn_cast(AT)) { - // FIXME: Sema doesn't treat [1] as a flexible array member if the bound - // was produced by macro expansion. - if (CAT->getSize().ugt(1)) - return false; - } else if (!isa(AT)) - return false; - - E = E->IgnoreParens(); - - // A flexible array member must be the last member in the class. - if (const auto *ME = dyn_cast(E)) { - // FIXME: If the base type of the member expr is not FD->getParent(), - // this should not be treated as a flexible array member access. - if (const auto *FD = dyn_cast(ME->getMemberDecl())) { - // FIXME: Sema doesn't treat a T[1] union member as a flexible array - // member, only a T[0] or T[] member gets that treatment. - if (FD->getParent()->isUnion()) - return true; - RecordDecl::field_iterator FI( - DeclContext::decl_iterator(const_cast(FD))); - return ++FI == FD->getParent()->field_end(); - } - } else if (const auto *IRE = dyn_cast(E)) { - return IRE->getDecl()->getNextIvar() == nullptr; - } - - return false; -} - llvm::Value *CodeGenFunction::LoadPassedObjectSize(const Expr *E, QualType EltTy) { ASTContext &C = getContext(); @@ -957,8 +919,11 @@ /// If Base is known to point to the start of an array, return the length of /// that array. Return 0 if the length cannot be determined. -static llvm::Value *getArrayIndexingBound( - CodeGenFunction &CGF, const Expr *Base, QualType &IndexedType) { +static llvm::Value *getArrayIndexingBound(CodeGenFunction &CGF, + const Expr *Base, + QualType &IndexedType, + ASTContext &Context, + int StrictFlexArraysLevel) { // For the vector indexing extension, the bound is the number of elements. if (const VectorType *VT = Base->getType()->getAs()) { IndexedType = Base->getType(); @@ -969,7 +934,8 @@ if (const auto *CE = dyn_cast(Base)) { if (CE->getCastKind() == CK_ArrayToPointerDecay && - !isFlexibleArrayMemberExpr(CE->getSubExpr())) { + !CE->getSubExpr()->isFlexibleArrayMember(Context, + StrictFlexArraysLevel)) { IndexedType = CE->getSubExpr()->getType(); const ArrayType *AT = IndexedType->castAsArrayTypeUnsafe(); if (const auto *CAT = dyn_cast(AT)) @@ -997,7 +963,8 @@ SanitizerScope SanScope(this); QualType IndexedType; - llvm::Value *Bound = getArrayIndexingBound(*this, Base, IndexedType); + llvm::Value *Bound = getArrayIndexingBound( + *this, Base, IndexedType, getContext(), getLangOpts().StrictFlexArrays); if (!Bound) return; Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -6214,6 +6214,8 @@ Args.AddLastArg(CmdArgs, options::OPT_funroll_loops, options::OPT_fno_unroll_loops); + Args.AddLastArg(CmdArgs, options::OPT_fstrict_flex_arrays_EQ); + Args.AddLastArg(CmdArgs, options::OPT_pthread); if (Args.hasFlag(options::OPT_mspeculative_load_hardening, Index: clang/lib/Sema/SemaChecking.cpp =================================================================== --- clang/lib/Sema/SemaChecking.cpp +++ clang/lib/Sema/SemaChecking.cpp @@ -15715,53 +15715,6 @@ << TRange << Op->getSourceRange(); } -/// Check whether this array fits the idiom of a size-one tail padded -/// array member of a struct. -/// -/// We avoid emitting out-of-bounds access warnings for such arrays as they are -/// commonly used to emulate flexible arrays in C89 code. -static bool IsTailPaddedMemberArray(Sema &S, const llvm::APInt &Size, - const NamedDecl *ND) { - if (Size != 1 || !ND) return false; - - const FieldDecl *FD = dyn_cast(ND); - if (!FD) return false; - - // Don't consider sizes resulting from macro expansions or template argument - // substitution to form C89 tail-padded arrays. - - TypeSourceInfo *TInfo = FD->getTypeSourceInfo(); - while (TInfo) { - TypeLoc TL = TInfo->getTypeLoc(); - // Look through typedefs. - if (TypedefTypeLoc TTL = TL.getAs()) { - const TypedefNameDecl *TDL = TTL.getTypedefNameDecl(); - TInfo = TDL->getTypeSourceInfo(); - continue; - } - if (ConstantArrayTypeLoc CTL = TL.getAs()) { - const Expr *SizeExpr = dyn_cast(CTL.getSizeExpr()); - if (!SizeExpr || SizeExpr->getExprLoc().isMacroID()) - return false; - } - break; - } - - const RecordDecl *RD = dyn_cast(FD->getDeclContext()); - if (!RD) return false; - if (RD->isUnion()) return false; - if (const CXXRecordDecl *CRD = dyn_cast(RD)) { - if (!CRD->isStandardLayout()) return false; - } - - // See if this is the last field decl in the record. - const Decl *D = FD; - while ((D = D->getNextDeclInContext())) - if (isa(D)) - return false; - return true; -} - void Sema::CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr, const ArraySubscriptExpr *ASE, bool AllowOnePastEnd, bool IndexNegated) { @@ -15914,10 +15867,9 @@ if (AllowOnePastEnd ? index.ule(size) : index.ult(size)) return; - // Also don't warn for arrays of size 1 which are members of some - // structure. These are often used to approximate flexible arrays in C89 - // code. - if (IsTailPaddedMemberArray(*this, size, ND)) + // Also don't warn for flexible array members. + if (BaseExpr->isFlexibleArrayMember(Context, + getLangOpts().StrictFlexArrays)) return; // Suppress the warning if the subscript expression (as identified by the Index: clang/lib/StaticAnalyzer/Core/MemRegion.cpp =================================================================== --- clang/lib/StaticAnalyzer/Core/MemRegion.cpp +++ clang/lib/StaticAnalyzer/Core/MemRegion.cpp @@ -789,6 +789,9 @@ if (isa(AT)) return true; + if (getContext().getLangOpts().StrictFlexArrays) + return false; + if (const auto *CAT = dyn_cast(AT)) { const llvm::APInt &Size = CAT->getSize(); if (Size.isZero()) Index: clang/test/CodeGen/bounds-checking.c =================================================================== --- clang/test/CodeGen/bounds-checking.c +++ clang/test/CodeGen/bounds-checking.c @@ -35,8 +35,9 @@ // CHECK-LABEL: define {{.*}} @f4 int f4(union U *u, int i) { - // a and b are treated as flexible array members. - // CHECK-NOT: @llvm.ubsantrap + // a and b bounds are treated as flexible array members, but they are inside a union + // and that prevent them from being considered as flexible array members. + // NONLOCAL: @llvm.ubsantrap return u->a[i] + u->b[i]; // CHECK: } } Index: clang/test/CodeGen/object-size-flex-array.c =================================================================== --- /dev/null +++ clang/test/CodeGen/object-size-flex-array.c @@ -0,0 +1,106 @@ +// RUN: %clang -fstrict-flex-arrays=3 -target x86_64-apple-darwin -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefixes=CHECK,CHECK-STRICT-3 %s +// RUN: %clang -fstrict-flex-arrays=2 -target x86_64-apple-darwin -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefixes=CHECK,CHECK-STRICT-2 %s +// RUN: %clang -fstrict-flex-arrays=1 -target x86_64-apple-darwin -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefixes=CHECK,CHECK-STRICT-1 %s +// RUN: %clang -fstrict-flex-arrays=0 -target x86_64-apple-darwin -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefixes=CHECK,CHECK-STRICT-0 %s + +#define OBJECT_SIZE_BUILTIN __builtin_object_size + +typedef struct { + float f; + double c[]; +} foo_t; + +typedef struct { + float f; + double c[0]; +} foo0_t; + +typedef struct { + float f; + double c[1]; +} foo1_t; + +typedef struct { + float f; + double c[2]; +} foo2_t; + +// CHECK-LABEL: @bar +unsigned bar(foo_t *f) { + // CHECK-STRICT-0: ret i32 % + // CHECK-STRICT-1: ret i32 % + // CHECK-STRICT-2: ret i32 % + // CHECK-STRICT-3: ret i32 % + return OBJECT_SIZE_BUILTIN(f->c, 1); +} + +// CHECK-LABEL: @bar0 +unsigned bar0(foo0_t *f) { + // CHECK-STRICT-0: ret i32 % + // CHECK-STRICT-1: ret i32 % + // CHECK-STRICT-2: ret i32 % + // CHECK-STRICT-3: ret i32 0 + return OBJECT_SIZE_BUILTIN(f->c, 1); +} + +// CHECK-LABEL: @bar1 +unsigned bar1(foo1_t *f) { + // CHECK-STRICT-0: ret i32 % + // CHECK-STRICT-1: ret i32 % + // CHECK-STRICT-2: ret i32 8 + // CHECK-STRICT-3: ret i32 8 + return OBJECT_SIZE_BUILTIN(f->c, 1); +} + +// CHECK-LABEL: @bar2 +unsigned bar2(foo2_t *f) { + // CHECK-STRICT-0: ret i32 % + // CHECK-STRICT-1: ret i32 16 + // CHECK-STRICT-2: ret i32 16 + // CHECK-STRICT-3: ret i32 16 + return OBJECT_SIZE_BUILTIN(f->c, 1); +} + +// Also checks for non-trailing flex-array like members + +typedef struct { + double c[0]; + float f; +} foofoo0_t; + +typedef struct { + double c[1]; + float f; +} foofoo1_t; + +typedef struct { + double c[2]; + float f; +} foofoo2_t; + +// CHECK-LABEL: @babar0 +unsigned babar0(foofoo0_t *f) { + // CHECK-STRICT-0: ret i32 0 + // CHECK-STRICT-1: ret i32 0 + // CHECK-STRICT-2: ret i32 0 + // CHECK-STRICT-3: ret i32 0 + return OBJECT_SIZE_BUILTIN(f->c, 1); +} + +// CHECK-LABEL: @babar1 +unsigned babar1(foofoo1_t *f) { + // CHECK-STRICT-0: ret i32 8 + // CHECK-STRICT-1: ret i32 8 + // CHECK-STRICT-2: ret i32 8 + // CHECK-STRICT-3: ret i32 8 + return OBJECT_SIZE_BUILTIN(f->c, 1); +} + +// CHECK-LABEL: @babar2 +unsigned babar2(foofoo2_t *f) { + // CHECK-STRICT-0: ret i32 16 + // CHECK-STRICT-1: ret i32 16 + // CHECK-STRICT-2: ret i32 16 + // CHECK-STRICT-3: ret i32 16 + return OBJECT_SIZE_BUILTIN(f->c, 1); +} Index: clang/test/CodeGenObjC/ubsan-array-bounds.m =================================================================== --- clang/test/CodeGenObjC/ubsan-array-bounds.m +++ clang/test/CodeGenObjC/ubsan-array-bounds.m @@ -14,46 +14,3 @@ return FA1->chars[1]; // CHECK: } } - -@interface FlexibleArray2 { -@public - char chars[0]; -} -@end -@implementation FlexibleArray2 { -@public - char chars2[0]; -} -@end - -// CHECK-LABEL: test_FlexibleArray2_1 -char test_FlexibleArray2_1(FlexibleArray2 *FA2) { - // CHECK: !nosanitize - return FA2->chars[1]; - // CHECK: } -} - -// CHECK-LABEL: test_FlexibleArray2_2 -char test_FlexibleArray2_2(FlexibleArray2 *FA2) { - // CHECK-NOT: !nosanitize - return FA2->chars2[1]; - // CHECK: } -} - -@interface FlexibleArray3 { -@public - char chars[0]; -} -@end -@implementation FlexibleArray3 { -@public - int i; -} -@end - -// CHECK-LABEL: test_FlexibleArray3 -char test_FlexibleArray3(FlexibleArray3 *FA3) { - // CHECK: !nosanitize - return FA3->chars[1]; - // CHECK: } -} Index: clang/test/Sema/array-bounds-ptr-arith.c =================================================================== --- clang/test/Sema/array-bounds-ptr-arith.c +++ clang/test/Sema/array-bounds-ptr-arith.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -verify -Warray-bounds-pointer-arithmetic %s +// RUN: %clang_cc1 -verify -Warray-bounds-pointer-arithmetic -fstrict-flex-arrays=1 %s // Test case from PR10615 struct ext2_super_block{ @@ -12,40 +12,3 @@ { return (void *)es->s_uuid + 80; // expected-warning {{refers past the end of the array}} } - -// Test case reduced from PR11594 -struct S { int n; }; -void pr11594(struct S *s) { - int a[10]; - int *p = a - s->n; -} - -// Test case reduced from . This resulted in -// an assertion failure because of the typedef instead of an explicit -// constant array type. -struct RDar11387038 {}; -typedef struct RDar11387038 RDar11387038Array[1]; -struct RDar11387038_Table { - RDar11387038Array z; -}; -typedef struct RDar11387038_Table * TPtr; -typedef TPtr *TabHandle; -struct RDar11387038_B { TabHandle x; }; -typedef struct RDar11387038_B RDar11387038_B; - -void radar11387038(void) { - RDar11387038_B *pRDar11387038_B; - struct RDar11387038* y = &(*pRDar11387038_B->x)->z[4]; -} - -void pr51682 (void) { - int arr [1]; - switch (0) { - case 0: - break; - case 1: - asm goto (""::"r"(arr[42] >> 1)::failed); // no-warning - break; - } -failed:; -} Index: clang/test/SemaCXX/array-bounds-strict-flex-arrays.cpp =================================================================== --- /dev/null +++ clang/test/SemaCXX/array-bounds-strict-flex-arrays.cpp @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 -verify -fstrict-flex-arrays=3 %s + +// We cannot know for sure the size of a flexible array. +void test() { + struct { + int f; + int a[]; + } s2; + s2.a[2] = 0; // no-warning +} + +// Under -fstrict-flex-arrays `a` is not a flexible array. +void test1() { + struct { + int f; + int a[1]; // expected-note {{declared here}} + } s2; + s2.a[2] = 0; // expected-warning 1 {{array index 2 is past the end of the array (which contains 1 element)}} +}