diff --git a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp --- a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp +++ b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp @@ -437,10 +437,13 @@ RegionBindingsRef removeSubRegionBindings(RegionBindingsConstRef B, const SubRegion *R); - Optional getConstantValFromConstArrayInitializer( - RegionBindingsConstRef B, const VarRegion *VR, const ElementRegion *R); - Optional getSValFromInitListExpr(const InitListExpr *ILE, - uint64_t Offset, QualType ElemT); + Optional + getConstantValFromConstArrayInitializer(RegionBindingsConstRef B, + const ElementRegion *R); + Optional + getSValFromInitListExpr(const InitListExpr *ILE, + const SmallVector &ConcreteOffsets, + QualType ElemT); SVal getSValFromStringLiteral(const StringLiteral *SL, uint64_t Offset, QualType ElemT); @@ -1631,9 +1634,127 @@ return Result; } +/// This is a helper function for `getConstantValFromConstArrayInitializer`. +/// +/// Return an array of extents of the declared array type. +/// +/// E.g. for `int x[1][2][3];` returns { 1, 2, 3 }. +static SmallVector +getConstantArrayExtents(const ConstantArrayType *CAT) { + assert(CAT && "ConstantArrayType should not be null"); + CAT = cast(CAT->getCanonicalTypeInternal()); + SmallVector Extents; + do { + Extents.push_back(CAT->getSize().getZExtValue()); + } while ((CAT = dyn_cast(CAT->getElementType()))); + return Extents; +} + +/// This is a helper function for `getConstantValFromConstArrayInitializer`. +/// +/// Return an array of offsets from nested ElementRegions and a root base +/// region. The array is never empty and a base region is never null. +/// +/// E.g. for `Element{Element{Element{VarRegion},1},2},3}` returns { 3, 2, 1 }. +/// This represents an access through indirection: `arr[1][2][3];` +/// +/// \param ER The given (possibly nested) ElementRegion. +/// +/// \note The result array is in the reverse order of indirection expression: +/// arr[1][2][3] -> { 3, 2, 1 }. This helps to provide complexity O(n), where n +/// is a number of indirections. It may not affect performance in real-life +/// code, though. +static std::pair, const MemRegion *> +getElementRegionOffsetsWithBase(const ElementRegion *ER) { + assert(ER && "ConstantArrayType should not be null"); + const MemRegion *Base; + SmallVector SValOffsets; + do { + SValOffsets.push_back(ER->getIndex()); + Base = ER->getSuperRegion(); + ER = dyn_cast(Base); + } while (ER); + return {SValOffsets, Base}; +} + +/// This is a helper function for `getConstantValFromConstArrayInitializer`. +/// +/// Convert array of offsets from `SVal` to `uint64_t` in consideration of +/// respective array extents. +/// \param SrcOffsets [in] The array of offsets of type `SVal` in reversed +/// order (expectedly received from `getElementRegionOffsetsWithBase`). +/// \param ArrayExtents [in] The array of extents. +/// \param DstOffsets [out] The array of offsets of type `uint64_t`. +/// \returns: +/// - `None` for successful convertion. +/// - `UndefinedVal` or `UnknownVal` otherwise. It's expected that this SVal +/// will be returned as a suitable value of the access operation. +/// which should be returned as a correct +/// +/// \example: +/// const int arr[10][20][30] = {}; // ArrayExtents { 10, 20, 30 } +/// int x1 = arr[4][5][6]; // SrcOffsets { NonLoc(6), NonLoc(5), NonLoc(4) } +/// // DstOffsets { 4, 5, 6 } +/// // returns None +/// int x2 = arr[42][5][-6]; // returns UndefinedVal +/// int x3 = arr[4][5][x2]; // returns UnknownVal +static Optional +convertOffsetsFromSvalToUnsigneds(const SmallVector &SrcOffsets, + const SmallVector ArrayExtents, + SmallVector &DstOffsets) { + // Check offsets for being out of bounds. + // C++20 [expr.add] 7.6.6.4 (excerpt): + // If P points to an array element i of an array object x with n + // elements, where i < 0 or i > n, the behavior is undefined. + // Dereferencing is not allowed on the "one past the last + // element", when i == n. + // Example: + // const int arr[3][2] = {{1, 2}, {3, 4}}; + // arr[0][0]; // 1 + // arr[0][1]; // 2 + // arr[0][2]; // UB + // arr[1][0]; // 3 + // arr[1][1]; // 4 + // arr[1][-1]; // UB + // arr[2][0]; // 0 + // arr[2][1]; // 0 + // arr[-2][0]; // UB + DstOffsets.resize(SrcOffsets.size()); + auto ExtentIt = ArrayExtents.begin(); + auto OffsetIt = DstOffsets.begin(); + // Reverse `SValOffsets` to make it consistent with `ArrayExtents`. + for (SVal V : llvm::reverse(SrcOffsets)) { + if (auto CI = V.getAs()) { + // When offset is out of array's bounds, result is UB. + const llvm::APSInt &Offset = CI->getValue(); + if (Offset.isNegative() || Offset.uge(*(ExtentIt++))) + return UndefinedVal(); + // Store index in a reversive order. + *(OffsetIt++) = Offset.getZExtValue(); + continue; + } + // Symbolic index presented. Return Unknown value. + // FIXME: We also need to take ElementRegions with symbolic indexes into + // account. + return UnknownVal(); + } + return None; +} + Optional RegionStoreManager::getConstantValFromConstArrayInitializer( - RegionBindingsConstRef B, const VarRegion *VR, const ElementRegion *R) { - assert(R && VR && "Regions should not be null"); + RegionBindingsConstRef B, const ElementRegion *R) { + assert(R && "ElementRegion should not be null"); + + // Treat an n-dimensional array. + SmallVector SValOffsets; + const MemRegion *Base; + std::tie(SValOffsets, Base) = getElementRegionOffsetsWithBase(R); + const VarRegion *VR = dyn_cast(Base); + if (!VR) + return None; + + assert(!SValOffsets.empty() && "getElementRegionOffsets guarantees the " + "offsets vector is not empty."); // Check if the containing array has an initialized value that we can trust. // We can trust a const value or a value of a global initializer in main(). @@ -1664,85 +1785,91 @@ if (!CAT) return None; - // Array should be one-dimensional. - // TODO: Support multidimensional array. - if (isa(CAT->getElementType())) // is multidimensional - return None; + // Get array extents. + SmallVector Extents = getConstantArrayExtents(CAT); - // Array's offset should be a concrete value. - // Return Unknown value if symbolic index presented. - // FIXME: We also need to take ElementRegions with symbolic - // indexes into account. - const auto OffsetVal = R->getIndex().getAs(); - if (!OffsetVal.hasValue()) - return UnknownVal(); + // The number of offsets should equal to the numbers of extents, + // otherwise wrong type punning occured. For instance: + // int arr[1][2][3]; + // auto ptr = (int(*)[42])arr; + // auto x = ptr[4][2]; // UB + // FIXME: Should return UndefinedVal. + if (SValOffsets.size() != Extents.size()) + return None; - // Check offset for being out of bounds. - // C++20 [expr.add] 7.6.6.4 (excerpt): - // If P points to an array element i of an array object x with n - // elements, where i < 0 or i > n, the behavior is undefined. - // Dereferencing is not allowed on the "one past the last - // element", when i == n. - // Example: - // const int arr[4] = {1, 2}; - // const int *ptr = arr; - // int x0 = ptr[0]; // 1 - // int x1 = ptr[1]; // 2 - // int x2 = ptr[2]; // 0 - // int x3 = ptr[3]; // 0 - // int x4 = ptr[4]; // UB - // int x5 = ptr[-1]; // UB - const llvm::APSInt &OffsetInt = OffsetVal->getValue(); - const auto Offset = static_cast(OffsetInt.getExtValue()); - // Use `getZExtValue` because array extent can not be negative. - const uint64_t Extent = CAT->getSize().getZExtValue(); - // Check for `OffsetInt < 0` but NOT for `Offset < 0`, because `OffsetInt` - // CAN be negative, but `Offset` can NOT, because `Offset` is an uint64_t. - if (OffsetInt < 0 || Offset >= Extent) - return UndefinedVal(); - // From here `Offset` is in the bounds. + SmallVector ConcreteOffsets; + if (Optional V = convertOffsetsFromSvalToUnsigneds(SValOffsets, Extents, + ConcreteOffsets)) + return *V; // Handle InitListExpr. // Example: - // const char arr[] = { 1, 2, 3 }; + // const char arr[4][2] = { { 1, 2 }, { 3 }, 4, 5 }; if (const auto *ILE = dyn_cast(Init)) - return getSValFromInitListExpr(ILE, Offset, R->getElementType()); + return getSValFromInitListExpr(ILE, ConcreteOffsets, R->getElementType()); // Handle StringLiteral. // Example: // const char arr[] = "abc"; if (const auto *SL = dyn_cast(Init)) - return getSValFromStringLiteral(SL, Offset, R->getElementType()); + return getSValFromStringLiteral(SL, ConcreteOffsets.front(), + R->getElementType()); // FIXME: Handle CompoundLiteralExpr. return None; } -Optional -RegionStoreManager::getSValFromInitListExpr(const InitListExpr *ILE, - uint64_t Offset, QualType ElemT) { +/// Returns an SVal, if possible, for the specified position of an +/// initialization list. +/// +/// \param ILE The given initialization list. +/// \param Offsets The array of unsigned offsets. E.g. for the expression +/// `int x = arr[1][2][3];` an array should be { 1, 2, 3 }. +/// \param ElemT The type of the result SVal expression. +/// \return Optional SVal for the particular position in the initialization +/// list. E.g. for the list `{{1, 2},[3, 4],{5, 6}, {}}` offsets: +/// - {1, 1} returns SVal{4}, because it's the second position in the second +/// sublist; +/// - {3, 0} returns SVal{0}, because there's no explicit value at this +/// position in the sublist. +/// +/// NOTE: Inorder to get a valid SVal, a caller shall guarantee valid offsets +/// for the given initialization list. Otherwise SVal can be an equivalent to 0 +/// or lead to assertion. +Optional RegionStoreManager::getSValFromInitListExpr( + const InitListExpr *ILE, const SmallVector &Offsets, + QualType ElemT) { assert(ILE && "InitListExpr should not be null"); - // C++20 [dcl.init.string] 9.4.2.1: - // An array of ordinary character type [...] can be initialized by [...] - // an appropriately-typed string-literal enclosed in braces. - // Example: - // const char arr[] = { "abc" }; - if (ILE->isStringLiteralInit()) - if (const auto *SL = dyn_cast(ILE->getInit(0))) - return getSValFromStringLiteral(SL, Offset, ElemT); + for (uint64_t Offset : Offsets) { + // C++20 [dcl.init.string] 9.4.2.1: + // An array of ordinary character type [...] can be initialized by [...] + // an appropriately-typed string-literal enclosed in braces. + // Example: + // const char arr[] = { "abc" }; + if (ILE->isStringLiteralInit()) + if (const auto *SL = dyn_cast(ILE->getInit(0))) + return getSValFromStringLiteral(SL, Offset, ElemT); - // C++20 [expr.add] 9.4.17.5 (excerpt): - // i-th array element is value-initialized for each k < i ≤ n, - // where k is an expression-list size and n is an array extent. - if (Offset >= ILE->getNumInits()) - return svalBuilder.makeZeroVal(ElemT); + // C++20 [expr.add] 9.4.17.5 (excerpt): + // i-th array element is value-initialized for each k < i ≤ n, + // where k is an expression-list size and n is an array extent. + if (Offset >= ILE->getNumInits()) + return svalBuilder.makeZeroVal(ElemT); - // Return a constant value, if it is presented. - // FIXME: Support other SVals. - const Expr *E = ILE->getInit(Offset); - return svalBuilder.getConstantVal(E); + const Expr *E = ILE->getInit(Offset); + const auto *IL = dyn_cast(E); + if (!IL) + // Return a constant value, if it is presented. + // FIXME: Support other SVals. + return svalBuilder.getConstantVal(E); + + // Go to the nested initializer list. + ILE = IL; + } + llvm_unreachable( + "Unhandled InitListExpr sub-expressions or invalid offsets."); } /// Returns an SVal, if possible, for the specified position in a string @@ -1804,8 +1931,8 @@ const StringLiteral *SL = StrR->getStringLiteral(); return getSValFromStringLiteral(SL, Idx.getZExtValue(), T); } - } else if (const VarRegion *VR = dyn_cast(superR)) { - if (Optional V = getConstantValFromConstArrayInitializer(B, VR, R)) + } else if (isa(superR)) { + if (Optional V = getConstantValFromConstArrayInitializer(B, R)) return *V; } diff --git a/clang/test/Analysis/initialization.c b/clang/test/Analysis/initialization.c --- a/clang/test/Analysis/initialization.c +++ b/clang/test/Analysis/initialization.c @@ -58,44 +58,35 @@ int res = ptr[x]; // expected-warning{{garbage or undefined}} } -// TODO: Support multidimensional array. const int glob_arr2[3][3] = {[0][0] = 1, [1][1] = 5, [2][0] = 7}; void glob_arr_index3() { - // FIXME: These all should be TRUE. - clang_analyzer_eval(glob_arr2[0][0] == 1); // expected-warning{{UNKNOWN}} - clang_analyzer_eval(glob_arr2[0][1] == 0); // expected-warning{{UNKNOWN}} - clang_analyzer_eval(glob_arr2[0][2] == 0); // expected-warning{{UNKNOWN}} - clang_analyzer_eval(glob_arr2[1][0] == 0); // expected-warning{{UNKNOWN}} - clang_analyzer_eval(glob_arr2[1][1] == 5); // expected-warning{{UNKNOWN}} - clang_analyzer_eval(glob_arr2[1][2] == 0); // expected-warning{{UNKNOWN}} - clang_analyzer_eval(glob_arr2[2][0] == 7); // expected-warning{{UNKNOWN}} - clang_analyzer_eval(glob_arr2[2][1] == 0); // expected-warning{{UNKNOWN}} - clang_analyzer_eval(glob_arr2[2][2] == 0); // expected-warning{{UNKNOWN}} -} - -// TODO: Support multidimensional array. + clang_analyzer_eval(glob_arr2[0][0] == 1); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr2[0][1] == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr2[0][2] == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr2[1][0] == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr2[1][1] == 5); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr2[1][2] == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr2[2][0] == 7); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr2[2][1] == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr2[2][2] == 0); // expected-warning{{TRUE}} +} + void negative_index() { int x = 2, y = -2; - // FIXME: Should be UNDEFINED. - clang_analyzer_eval(glob_arr2[x][y] == 5); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(glob_arr2[x][y] == 5); // expected-warning{{UNDEFINED}} x = 3; y = -3; - // FIXME: Should be UNDEFINED. - clang_analyzer_eval(glob_arr2[x][y] == 7); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(glob_arr2[x][y] == 7); // expected-warning{{UNDEFINED}} } -// TODO: Support multidimensional array. void glob_invalid_index3() { int x = -1, y = -1; - // FIXME: Should warn {{garbage or undefined}}. - int res = glob_arr2[x][y]; // no-warning + int res = glob_arr2[x][y]; // expected-warning{{garbage or undefined}} } -// TODO: Support multidimensional array. void glob_invalid_index4() { int x = 3, y = 2; - // FIXME: Should warn {{garbage or undefined}}. - int res = glob_arr2[x][y]; // no-warning + int res = glob_arr2[x][y]; // expected-warning{{garbage or undefined}} } const int glob_arr_no_init[10]; diff --git a/clang/test/Analysis/initialization.cpp b/clang/test/Analysis/initialization.cpp --- a/clang/test/Analysis/initialization.cpp +++ b/clang/test/Analysis/initialization.cpp @@ -14,13 +14,6 @@ clang_analyzer_eval(sarr[i].a); // expected-warning{{UNKNOWN}} } -int const arr[2][2] = {}; -void arr2init() { - int i = 1; - // FIXME: Should recognize that it is 0. - clang_analyzer_eval(arr[i][0]); // expected-warning{{UNKNOWN}} -} - int const glob_arr1[3] = {}; void glob_array_index1() { clang_analyzer_eval(glob_arr1[0] == 0); // expected-warning{{TRUE}} @@ -60,23 +53,18 @@ return glob_arr3[0]; // no-warning (garbage or undefined) } -// TODO: Support multidimensional array. int const glob_arr4[4][2] = {}; void glob_array_index2() { - // FIXME: Should be TRUE. - clang_analyzer_eval(glob_arr4[1][0] == 0); // expected-warning{{UNKNOWN}} - // FIXME: Should be TRUE. - clang_analyzer_eval(glob_arr4[1][1] == 0); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(glob_arr4[0][0] == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr4[1][0] == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr4[1][1] == 0); // expected-warning{{TRUE}} } -// TODO: Support multidimensional array. void glob_invalid_index3() { int idx = -42; - // FIXME: Should warn {{garbage or undefined}}. - auto x = glob_arr4[1][idx]; // no-warning + auto x = glob_arr4[1][idx]; // expected-warning{{garbage or undefined}} } -// TODO: Support multidimensional array. void glob_invalid_index4() { const int *ptr = glob_arr4[1]; int idx = -42; @@ -84,28 +72,18 @@ auto x = ptr[idx]; // no-warning } -// TODO: Support multidimensional array. int const glob_arr5[4][2] = {{1}, 3, 4, 5}; void glob_array_index3() { - // FIXME: Should be TRUE. - clang_analyzer_eval(glob_arr5[0][0] == 1); // expected-warning{{UNKNOWN}} - // FIXME: Should be TRUE. - clang_analyzer_eval(glob_arr5[0][1] == 0); // expected-warning{{UNKNOWN}} - // FIXME: Should be TRUE. - clang_analyzer_eval(glob_arr5[1][0] == 3); // expected-warning{{UNKNOWN}} - // FIXME: Should be TRUE. - clang_analyzer_eval(glob_arr5[1][1] == 4); // expected-warning{{UNKNOWN}} - // FIXME: Should be TRUE. - clang_analyzer_eval(glob_arr5[2][0] == 5); // expected-warning{{UNKNOWN}} - // FIXME: Should be TRUE. - clang_analyzer_eval(glob_arr5[2][1] == 0); // expected-warning{{UNKNOWN}} - // FIXME: Should be TRUE. - clang_analyzer_eval(glob_arr5[3][0] == 0); // expected-warning{{UNKNOWN}} - // FIXME: Should be TRUE. - clang_analyzer_eval(glob_arr5[3][1] == 0); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(glob_arr5[0][0] == 1); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr5[0][1] == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr5[1][0] == 3); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr5[1][1] == 4); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr5[2][0] == 5); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr5[2][1] == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr5[3][0] == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr5[3][1] == 0); // expected-warning{{TRUE}} } -// TODO: Support multidimensional array. void glob_ptr_index2() { int const *ptr = glob_arr5[1]; // FIXME: Should be TRUE. @@ -120,19 +98,16 @@ clang_analyzer_eval(ptr[4] == 0); // expected-warning{{UNKNOWN}} } -// TODO: Support multidimensional array. void glob_invalid_index5() { int idx = -42; - // FIXME: Should warn {{garbage or undefined}}. - auto x = glob_arr5[1][idx]; // no-warning + auto x = glob_arr5[1][idx]; // expected-warning{{garbage or undefined}} } -// TODO: Support multidimensional array. void glob_invalid_index6() { int const *ptr = &glob_arr5[1][0]; int idx = 42; // FIXME: Should warn {{garbage or undefined}}. - auto x = ptr[idx]; // // no-warning + auto x = ptr[idx]; // no-warning } extern const int glob_arr_no_init[10]; @@ -253,3 +228,31 @@ clang_analyzer_eval(glob_ptr12[2] == 'c'); // expected-warning{{TRUE}} clang_analyzer_eval(glob_ptr12[3] == '\0'); // expected-warning{{TRUE}} } + +typedef int Int; +typedef Int const CInt; +typedef CInt Arr[2]; +typedef Arr Arr2[4]; +Arr2 glob_arr8 = {{1}, 3, 4, 5}; // const int[4][2] +void glob_array_typedef1() { + clang_analyzer_eval(glob_arr8[0][0] == 1); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr8[0][1] == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr8[1][0] == 3); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr8[1][1] == 4); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr8[2][0] == 5); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr8[2][1] == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr8[3][0] == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr8[3][1] == 0); // expected-warning{{TRUE}} +} + +const int glob_arr9[2][4] = {{(1), 2, ((3)), 4}, 5, 6, (((7)))}; +void glob_array_parentheses1() { + clang_analyzer_eval(glob_arr9[0][0] == 1); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr9[0][1] == 2); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr9[0][2] == 3); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr9[0][3] == 4); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr9[1][0] == 5); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr9[1][1] == 6); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr9[1][2] == 7); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr9[1][3] == 0); // expected-warning{{TRUE}} +}