Index: clang/docs/ClangCommandLineReference.rst =================================================================== --- clang/docs/ClangCommandLineReference.rst +++ clang/docs/ClangCommandLineReference.rst @@ -2639,6 +2639,12 @@ .. option:: -fuse-init-array, -fno-use-init-array +.. option:: -fstrict-flex-arrays=, -fno-strict-flex-arrays + +Control which arrays are considered as flexible arrays members. +can be 1 (array of size 0, 1 and undefined are considered) or 2 (array of size 0 +and undefined are considered). + .. option:: -fuse-ld= .. option:: -fuse-line-directives, -fno-use-line-directives Index: clang/docs/ReleaseNotes.rst =================================================================== --- clang/docs/ReleaseNotes.rst +++ clang/docs/ReleaseNotes.rst @@ -68,6 +68,10 @@ Randomizing structure layout is a C-only feature. +- Clang now supports the ``-fstrict-flex-arrays=`` option to control which + array bounds lead to flexible array members. The option yields more accurate + ``__builtin_object_size`` and ``__builtin_dynamic_object_size`` results in + most cases but may be overly conservative for some legacy code. - Experimental support for HLSL has been added. The implementation is incomplete and highly experimental. For more information about the ongoing work to support HLSL see the `documentation Index: clang/include/clang/Basic/LangOptions.def =================================================================== --- clang/include/clang/Basic/LangOptions.def +++ clang/include/clang/Basic/LangOptions.def @@ -424,6 +424,7 @@ LANGOPT(RegisterStaticDestructors, 1, 1, "Register C++ static destructors") LANGOPT(MatrixTypes, 1, 0, "Enable or disable the builtin matrix type") +LANGOPT(StrictFlexArrays, 2, 0, "Rely on strict definition of flexible arrays") COMPATIBLE_VALUE_LANGOPT(MaxTokens, 32, 0, "Max number of tokens per TU or 0") Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -1140,6 +1140,12 @@ def fapple_kext : Flag<["-"], "fapple-kext">, Group, Flags<[CC1Option]>, HelpText<"Use Apple's kernel extensions ABI">, MarshallingInfoFlag>; +def fstrict_flex_arrays_EQ : Joined<["-"], "fstrict-flex-arrays=">,Group, + MetaVarName<"">, Values<"0,1,2">, + LangOpts<"StrictFlexArrays">, + Flags<[CC1Option]>, + HelpText<"Enable optimizations based on the strict definition of flexible arrays">, + MarshallingInfoInt>; defm apple_pragma_pack : BoolFOption<"apple-pragma-pack", LangOpts<"ApplePragmaPack">, DefaultFalse, PosFlag, Index: clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h =================================================================== --- clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h +++ clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h @@ -1364,6 +1364,7 @@ ~MemRegionManager(); ASTContext &getContext() { return Ctx; } + const ASTContext &getContext() const { return Ctx; } llvm::BumpPtrAllocator &getAllocator() { return A; } Index: clang/lib/AST/ExprConstant.cpp =================================================================== --- clang/lib/AST/ExprConstant.cpp +++ clang/lib/AST/ExprConstant.cpp @@ -11592,9 +11592,15 @@ // conservative with the last element in structs (if it's an array), so our // current behavior is more compatible than an explicit list approach would // be. + int StrictFlexArraysLevel = Ctx.getLangOpts().StrictFlexArrays; return LVal.InvalidBase && Designator.Entries.size() == Designator.MostDerivedPathLength && Designator.MostDerivedIsArrayElement && + (Designator.isMostDerivedAnUnsizedArray() || + Designator.getMostDerivedArraySize() == 0 || + (Designator.getMostDerivedArraySize() == 1 && + StrictFlexArraysLevel < 2) || + StrictFlexArraysLevel == 0) && isDesignatorAtObjectEnd(Ctx, LVal); } Index: clang/lib/CodeGen/CGExpr.cpp =================================================================== --- clang/lib/CodeGen/CGExpr.cpp +++ clang/lib/CodeGen/CGExpr.cpp @@ -877,7 +877,8 @@ /// Determine whether this expression refers to a flexible array member in a /// struct. We disable array bounds checks for such members. -static bool isFlexibleArrayMemberExpr(const Expr *E) { +static bool isFlexibleArrayMemberExpr(const Expr *E, + unsigned StrictFlexArraysLevel) { // For compatibility with existing code, we treat arrays of length 0 or // 1 as flexible array members. // FIXME: This is inconsistent with the warning code in SemaChecking. Unify @@ -886,6 +887,8 @@ if (const auto *CAT = dyn_cast(AT)) { // FIXME: Sema doesn't treat [1] as a flexible array member if the bound // was produced by macro expansion. + if (StrictFlexArraysLevel >= 2 && CAT->getSize().ugt(0)) + return false; if (CAT->getSize().ugt(1)) return false; } else if (!isa(AT)) @@ -900,8 +903,10 @@ if (const auto *FD = dyn_cast(ME->getMemberDecl())) { // FIXME: Sema doesn't treat a T[1] union member as a flexible array // member, only a T[0] or T[] member gets that treatment. + // Under StrictFlexArraysLevel, obey c99+ that disallows FAM in union, see + // C11 6.7.2.1 ยง18 if (FD->getParent()->isUnion()) - return true; + return StrictFlexArraysLevel < 2; RecordDecl::field_iterator FI( DeclContext::decl_iterator(const_cast(FD))); return ++FI == FD->getParent()->field_end(); @@ -954,8 +959,10 @@ /// If Base is known to point to the start of an array, return the length of /// that array. Return 0 if the length cannot be determined. -static llvm::Value *getArrayIndexingBound( - CodeGenFunction &CGF, const Expr *Base, QualType &IndexedType) { +static llvm::Value *getArrayIndexingBound(CodeGenFunction &CGF, + const Expr *Base, + QualType &IndexedType, + unsigned StrictFlexArraysLevel) { // For the vector indexing extension, the bound is the number of elements. if (const VectorType *VT = Base->getType()->getAs()) { IndexedType = Base->getType(); @@ -966,7 +973,7 @@ if (const auto *CE = dyn_cast(Base)) { if (CE->getCastKind() == CK_ArrayToPointerDecay && - !isFlexibleArrayMemberExpr(CE->getSubExpr())) { + !isFlexibleArrayMemberExpr(CE->getSubExpr(), StrictFlexArraysLevel)) { IndexedType = CE->getSubExpr()->getType(); const ArrayType *AT = IndexedType->castAsArrayTypeUnsafe(); if (const auto *CAT = dyn_cast(AT)) @@ -993,8 +1000,11 @@ "should not be called unless adding bounds checks"); SanitizerScope SanScope(this); + const unsigned StrictFlexArraysLevel = getLangOpts().StrictFlexArrays; + QualType IndexedType; - llvm::Value *Bound = getArrayIndexingBound(*this, Base, IndexedType); + llvm::Value *Bound = + getArrayIndexingBound(*this, Base, IndexedType, StrictFlexArraysLevel); if (!Bound) return; Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -6232,6 +6232,8 @@ Args.AddLastArg(CmdArgs, options::OPT_funroll_loops, options::OPT_fno_unroll_loops); + Args.AddLastArg(CmdArgs, options::OPT_fstrict_flex_arrays_EQ); + Args.AddLastArg(CmdArgs, options::OPT_pthread); if (Args.hasFlag(options::OPT_mspeculative_load_hardening, Index: clang/lib/Sema/SemaChecking.cpp =================================================================== --- clang/lib/Sema/SemaChecking.cpp +++ clang/lib/Sema/SemaChecking.cpp @@ -15787,11 +15787,26 @@ /// We avoid emitting out-of-bounds access warnings for such arrays as they are /// commonly used to emulate flexible arrays in C89 code. static bool IsTailPaddedMemberArray(Sema &S, const llvm::APInt &Size, - const NamedDecl *ND) { - if (Size != 1 || !ND) return false; + const NamedDecl *ND, + unsigned StrictFlexArraysLevel) { + if (!ND) + return false; + + if (StrictFlexArraysLevel >= 2 && Size != 0) + return false; + + if (StrictFlexArraysLevel == 1 && Size.ule(1)) + return false; + + // FIXME: we should also allow Size = 0 here per the definition of + // StrictFlexArraysLevel, but that's backward incompatible with previous clang + // behavior. + if (StrictFlexArraysLevel == 0 && Size != 1) + return false; const FieldDecl *FD = dyn_cast(ND); - if (!FD) return false; + if (!FD) + return false; // Don't consider sizes resulting from macro expansions or template argument // substitution to form C89 tail-padded arrays. @@ -15814,10 +15829,13 @@ } const RecordDecl *RD = dyn_cast(FD->getDeclContext()); - if (!RD) return false; - if (RD->isUnion()) return false; + if (!RD) + return false; + if (RD->isUnion()) + return false; if (const CXXRecordDecl *CRD = dyn_cast(RD)) { - if (!CRD->isStandardLayout()) return false; + if (!CRD->isStandardLayout()) + return false; } // See if this is the last field decl in the record. @@ -15980,10 +15998,9 @@ if (AllowOnePastEnd ? index.ule(size) : index.ult(size)) return; - // Also don't warn for arrays of size 1 which are members of some - // structure. These are often used to approximate flexible arrays in C89 - // code. - if (IsTailPaddedMemberArray(*this, size, ND)) + // Also don't warn for Flexible Array Member emulation. + const unsigned StrictFlexArraysLevel = getLangOpts().StrictFlexArrays; + if (IsTailPaddedMemberArray(*this, size, ND, StrictFlexArraysLevel)) return; // Suppress the warning if the subscript expression (as identified by the Index: clang/lib/StaticAnalyzer/Core/MemRegion.cpp =================================================================== --- clang/lib/StaticAnalyzer/Core/MemRegion.cpp +++ clang/lib/StaticAnalyzer/Core/MemRegion.cpp @@ -789,6 +789,9 @@ if (isa(AT)) return true; + if (getContext().getLangOpts().StrictFlexArrays >= 2) + return false; + if (const auto *CAT = dyn_cast(AT)) { const llvm::APInt &Size = CAT->getSize(); if (Size.isZero()) Index: clang/test/CodeGen/bounds-checking-fam.c =================================================================== --- clang/test/CodeGen/bounds-checking-fam.c +++ clang/test/CodeGen/bounds-checking-fam.c @@ -1,11 +1,14 @@ // REQUIRES: x86-registered-target -// RUN: %clang_cc1 -emit-llvm -triple x86_64 -fsanitize=array-bounds %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT-0 -// RUN: %clang_cc1 -emit-llvm -triple x86_64 -fsanitize=array-bounds -x c++ %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT-0,CXX,CXX-STRICT-0 - -/// Before flexible array member was added to C99, many projects use a -/// one-element array as the last emember of a structure as an alternative. -/// E.g. https://github.com/python/cpython/issues/84301 -/// Suppress such errors by default. +// RUN: %clang_cc1 -emit-llvm -triple x86_64 -fstrict-flex-arrays=0 -fsanitize=array-bounds %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT-0 +// RUN: %clang_cc1 -emit-llvm -triple x86_64 -fstrict-flex-arrays=0 -fsanitize=array-bounds -x c++ %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT-0,CXX,CXX-STRICT-0 +// RUN: %clang_cc1 -emit-llvm -triple x86_64 -fstrict-flex-arrays=1 -fsanitize=array-bounds %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT-1 +// RUN: %clang_cc1 -emit-llvm -triple x86_64 -fstrict-flex-arrays=1 -fsanitize=array-bounds -x c++ %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT-1,CXX +// RUN: %clang_cc1 -emit-llvm -triple x86_64 -fstrict-flex-arrays=2 -fsanitize=array-bounds %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT-2 +// RUN: %clang_cc1 -emit-llvm -triple x86_64 -fstrict-flex-arrays=2 -fsanitize=array-bounds -x c++ %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT-2,CXX +// Before flexible array member was added to C99, many projects use a +// one-element array as the last member of a structure as an alternative. +// E.g. https://github.com/python/cpython/issues/94250 +// Suppress such errors with -fstrict-flex-arrays=0. struct One { int a[1]; }; @@ -19,18 +22,24 @@ // CHECK-LABEL: define {{.*}} @{{.*}}test_one{{.*}}( int test_one(struct One *p, int i) { // CHECK-STRICT-0-NOT: @__ubsan + // CHECK-STRICT-1-NOT: @__ubsan + // CHECK-STRICT-2: call void @__ubsan_handle_out_of_bounds_abort( return p->a[i] + (p->a)[i]; } // CHECK-LABEL: define {{.*}} @{{.*}}test_two{{.*}}( int test_two(struct Two *p, int i) { // CHECK-STRICT-0: call void @__ubsan_handle_out_of_bounds_abort( + // CHECK-STRICT-1: call void @__ubsan_handle_out_of_bounds_abort( + // CHECK-STRICT-2: call void @__ubsan_handle_out_of_bounds_abort( return p->a[i] + (p->a)[i]; } // CHECK-LABEL: define {{.*}} @{{.*}}test_three{{.*}}( int test_three(struct Three *p, int i) { // CHECK-STRICT-0: call void @__ubsan_handle_out_of_bounds_abort( + // CHECK-STRICT-1: call void @__ubsan_handle_out_of_bounds_abort( + // CHECK-STRICT-2: call void @__ubsan_handle_out_of_bounds_abort( return p->a[i] + (p->a)[i]; } Index: clang/test/CodeGen/bounds-checking-fam.cpp =================================================================== --- /dev/null +++ clang/test/CodeGen/bounds-checking-fam.cpp @@ -0,0 +1,18 @@ +// REQUIRES: x86-registered-target +// RUN: %clang_cc1 -emit-llvm -triple x86_64 -fsanitize=array-bounds %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT-0 +// +// Disable checks on FAM even though the class doesn't have standard layout. + +struct C { + int head; +}; + +struct S : C { + int tail[1]; +}; + +// CHECK-LABEL: define {{.*}} @_Z8test_oneP1Si( +int test_one(S *p, int i) { + // CHECK-STRICT-0-NOT: @__ubsan + return p->tail[i] + (p->tail)[i]; +} Index: clang/test/CodeGen/object-size-flex-array.c =================================================================== --- /dev/null +++ clang/test/CodeGen/object-size-flex-array.c @@ -0,0 +1,98 @@ +// RUN: %clang -fstrict-flex-arrays=2 -target x86_64-apple-darwin -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefixes=CHECK,CHECK-STRICT-2 %s +// RUN: %clang -fstrict-flex-arrays=1 -target x86_64-apple-darwin -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefixes=CHECK,CHECK-STRICT-1 %s +// RUN: %clang -fstrict-flex-arrays=0 -target x86_64-apple-darwin -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefixes=CHECK,CHECK-STRICT-0 %s + +#define OBJECT_SIZE_BUILTIN __builtin_object_size + +typedef struct { + float f; + double c[]; +} foo_t; + +typedef struct { + float f; + double c[0]; +} foo0_t; + +typedef struct { + float f; + double c[1]; +} foo1_t; + +typedef struct { + float f; + double c[2]; +} foo2_t; + +// CHECK-LABEL: @bar +unsigned bar(foo_t *f) { + // CHECK-STRICT-0: ret i32 % + // CHECK-STRICT-1: ret i32 % + // CHECK-STRICT-2: ret i32 % + return OBJECT_SIZE_BUILTIN(f->c, 1); +} + +// CHECK-LABEL: @bar0 +unsigned bar0(foo0_t *f) { + // CHECK-STRICT-0: ret i32 % + // CHECK-STRICT-1: ret i32 % + // CHECK-STRICT-2: ret i32 % + return OBJECT_SIZE_BUILTIN(f->c, 1); +} + +// CHECK-LABEL: @bar1 +unsigned bar1(foo1_t *f) { + // CHECK-STRICT-0: ret i32 % + // CHECK-STRICT-1: ret i32 % + // CHECK-STRICT-2: ret i32 8 + return OBJECT_SIZE_BUILTIN(f->c, 1); +} + +// CHECK-LABEL: @bar2 +unsigned bar2(foo2_t *f) { + // CHECK-STRICT-0: ret i32 % + // CHECK-STRICT-1: ret i32 16 + // CHECK-STRICT-2: ret i32 16 + return OBJECT_SIZE_BUILTIN(f->c, 1); +} + +// Also checks for non-trailing flex-array like members + +typedef struct { + double c[0]; + float f; +} foofoo0_t; + +typedef struct { + double c[1]; + float f; +} foofoo1_t; + +typedef struct { + double c[2]; + float f; +} foofoo2_t; + +// CHECK-LABEL: @babar0 +unsigned babar0(foofoo0_t *f) { + // CHECK-STRICT-0: ret i32 0 + // CHECK-STRICT-1: ret i32 0 + // CHECK-STRICT-2: ret i32 0 + return OBJECT_SIZE_BUILTIN(f->c, 1); +} + +// CHECK-LABEL: @babar1 +unsigned babar1(foofoo1_t *f) { + // CHECK-STRICT-0: ret i32 8 + // CHECK-STRICT-1: ret i32 8 + // CHECK-STRICT-2: ret i32 8 + return OBJECT_SIZE_BUILTIN(f->c, 1); +} + +// CHECK-LABEL: @babar2 +unsigned babar2(foofoo2_t *f) { + // CHECK-STRICT-0: ret i32 16 + // CHECK-STRICT-1: ret i32 16 + // CHECK-STRICT-2: ret i32 16 + return OBJECT_SIZE_BUILTIN(f->c, 1); +} Index: clang/test/CodeGenObjC/ubsan-array-bounds.m =================================================================== --- clang/test/CodeGenObjC/ubsan-array-bounds.m +++ clang/test/CodeGenObjC/ubsan-array-bounds.m @@ -14,46 +14,3 @@ return FA1->chars[1]; // CHECK: } } - -@interface FlexibleArray2 { -@public - char chars[0]; -} -@end -@implementation FlexibleArray2 { -@public - char chars2[0]; -} -@end - -// CHECK-LABEL: test_FlexibleArray2_1 -char test_FlexibleArray2_1(FlexibleArray2 *FA2) { - // CHECK: !nosanitize - return FA2->chars[1]; - // CHECK: } -} - -// CHECK-LABEL: test_FlexibleArray2_2 -char test_FlexibleArray2_2(FlexibleArray2 *FA2) { - // CHECK-NOT: !nosanitize - return FA2->chars2[1]; - // CHECK: } -} - -@interface FlexibleArray3 { -@public - char chars[0]; -} -@end -@implementation FlexibleArray3 { -@public - int i; -} -@end - -// CHECK-LABEL: test_FlexibleArray3 -char test_FlexibleArray3(FlexibleArray3 *FA3) { - // CHECK: !nosanitize - return FA3->chars[1]; - // CHECK: } -} Index: clang/test/Sema/array-bounds-ptr-arith.c =================================================================== --- clang/test/Sema/array-bounds-ptr-arith.c +++ clang/test/Sema/array-bounds-ptr-arith.c @@ -1,4 +1,6 @@ -// RUN: %clang_cc1 -verify -Warray-bounds-pointer-arithmetic %s +// RUN: %clang_cc1 -verify=expected -Warray-bounds-pointer-arithmetic %s +// RUN: %clang_cc1 -verify=expected -Warray-bounds-pointer-arithmetic %s -fstrict-flex-arrays=0 +// RUN: %clang_cc1 -verify=expected,strict -Warray-bounds-pointer-arithmetic %s -fstrict-flex-arrays=2 // Test case from PR10615 struct ext2_super_block{ @@ -14,7 +16,9 @@ } // Test case reduced from PR11594 -struct S { int n; }; +struct S { + int n; +}; void pr11594(struct S *s) { int a[10]; int *p = a - s->n; @@ -26,26 +30,28 @@ struct RDar11387038 {}; typedef struct RDar11387038 RDar11387038Array[1]; struct RDar11387038_Table { - RDar11387038Array z; + RDar11387038Array z; // strict-note {{array 'z' declared here}} }; -typedef struct RDar11387038_Table * TPtr; +typedef struct RDar11387038_Table *TPtr; typedef TPtr *TabHandle; -struct RDar11387038_B { TabHandle x; }; +struct RDar11387038_B { + TabHandle x; +}; typedef struct RDar11387038_B RDar11387038_B; void radar11387038(void) { RDar11387038_B *pRDar11387038_B; - struct RDar11387038* y = &(*pRDar11387038_B->x)->z[4]; + struct RDar11387038 *y = &(*pRDar11387038_B->x)->z[4]; // strict-warning {{array index 4 is past the end of the array (which contains 1 element)}} } -void pr51682 (void) { - int arr [1]; +void pr51682(void) { + int arr[1]; switch (0) { - case 0: - break; - case 1: - asm goto (""::"r"(arr[42] >> 1)::failed); // no-warning - break; + case 0: + break; + case 1: + asm goto("" ::"r"(arr[42] >> 1)::failed); + break; } failed:; } Index: clang/test/SemaCXX/array-bounds-strict-flex-arrays.cpp =================================================================== --- /dev/null +++ clang/test/SemaCXX/array-bounds-strict-flex-arrays.cpp @@ -0,0 +1,28 @@ +// RUN: %clang_cc1 -verify -fstrict-flex-arrays=2 %s + +// We cannot know for sure the size of a flexible array. +struct t { + int f; + int a[]; +}; +void test(t *s2) { + s2->a[2] = 0; // no-warning +} + +// Under -fstrict-flex-arrays `a` is not a flexible array. +struct t1 { + int f; + int a[1]; // expected-note {{array 'a' declared here}} +}; +void test1(t1 *s2) { + s2->a[2] = 0; // expected-warning {{array index 2 is past the end of the array (which contains 1 element)}} +} + +// Under -fstrict-flex-arrays `a` is a flexible array. +struct t2 { + int f; + int a[0]; +}; +void test1(t2 *s2) { + s2->a[2] = 0; // no-warning +} Index: clang/test/SemaCXX/array-bounds.cpp =================================================================== --- clang/test/SemaCXX/array-bounds.cpp +++ clang/test/SemaCXX/array-bounds.cpp @@ -208,12 +208,15 @@ namespace metaprogramming { #define ONE 1 - struct foo { char c[ONE]; }; // expected-note {{declared here}} +struct foo { + char c[ONE]; // expected-note {{array 'c' declared here}} +}; + template struct bar { char c[N]; }; // expected-note {{declared here}} char test(foo *F, bar<1> *B) { return F->c[3] + // expected-warning {{array index 3 is past the end of the array (which contains 1 element)}} - B->c[3]; // expected-warning {{array index 3 is past the end of the array (which contains 1 element)}} + B->c[3]; // expected-warning {{array index 3 is past the end of the array (which contains 1 element)}} } }