Index: clang/include/clang/AST/Type.h =================================================================== --- clang/include/clang/AST/Type.h +++ clang/include/clang/AST/Type.h @@ -2950,6 +2950,7 @@ public: const llvm::APInt &getSize() const { return Size; } + SmallVector getAllExtents() const; const Expr *getSizeExpr() const { return ConstantArrayTypeBits.HasStoredSizeExpr ? *getTrailingObjects() Index: clang/lib/AST/Type.cpp =================================================================== --- clang/lib/AST/Type.cpp +++ clang/lib/AST/Type.cpp @@ -138,6 +138,18 @@ ArrayTypeBits.SizeModifier = sm; } +/// Return an array with extents of the declared array type. +/// +/// E.g. for `const int x[1][2][3];` returns {1,2,3}. +SmallVector ConstantArrayType::getAllExtents() const { + SmallVector Extents; + const ConstantArrayType *CAT = this; + do { + Extents.push_back(CAT->getSize().getZExtValue()); + } while ((CAT = dyn_cast(CAT->getElementType()))); + return Extents; +} + unsigned ConstantArrayType::getNumAddressingBits(const ASTContext &Context, QualType ElementType, const llvm::APInt &NumElements) { Index: clang/lib/StaticAnalyzer/Core/RegionStore.cpp =================================================================== --- clang/lib/StaticAnalyzer/Core/RegionStore.cpp +++ clang/lib/StaticAnalyzer/Core/RegionStore.cpp @@ -437,10 +437,13 @@ RegionBindingsRef removeSubRegionBindings(RegionBindingsConstRef B, const SubRegion *R); - Optional getConstantValFromConstArrayInitializer( - RegionBindingsConstRef B, const VarRegion *VR, const ElementRegion *R); - Optional getSValFromInitListExpr(const InitListExpr *ILE, - uint64_t Offset, QualType ElemT); + Optional + getConstantValFromConstArrayInitializer(RegionBindingsConstRef B, + const ElementRegion *R); + Optional + getSValFromInitListExpr(const InitListExpr *ILE, + const SmallVector &ConcreteOffsets, + QualType ElemT); SVal getSValFromStringLiteral(const StringLiteral *SL, uint64_t Offset, QualType ElemT); @@ -1632,8 +1635,27 @@ } Optional RegionStoreManager::getConstantValFromConstArrayInitializer( - RegionBindingsConstRef B, const VarRegion *VR, const ElementRegion *R) { - assert(R && VR && "Regions should not be null"); + RegionBindingsConstRef B, const ElementRegion *R) { + assert(R && "ElementRegion should not be null"); + + // Treat an n-dimensional array. Get offsets from the expression, + // like `arr[4][2][1];`, `SValOffsets` should be {1, 2, 4}; + const VarRegion *VR = dyn_cast(R->getSuperRegion()); + SmallVector SValOffsets; + SValOffsets.push_back(R->getIndex()); + if (const ElementRegion *ER = dyn_cast(R->getSuperRegion())) { + const ElementRegion *LastER = nullptr; + do { + SValOffsets.push_back(ER->getIndex()); + LastER = ER; + ER = dyn_cast(ER->getSuperRegion()); + } while (ER); + VR = dyn_cast(LastER->getSuperRegion()); + } + + // TODO: Add a test case for this check. + if (!VR) + return None; // Check if the containing array has an initialized value that we can trust. // We can trust a const value or a value of a global initializer in main(). @@ -1663,10 +1685,15 @@ if (!CAT) return None; - // Array should be one-dimensional. - // TODO: Support multidimensional array. - if (isa(CAT->getElementType())) // is multidimensional - return None; + // The number of offsets should equal to the numbers of extents, + // otherwise wrong type punning occured. For instance: + // int arr[1][2][3]; + // auto ptr = (int(*)[42])arr; + // auto x = ptr[4][2]; // UB + // TODO: Add a test case for this check. + SmallVector Extents = CAT->getAllExtents(); + if (SValOffsets.size() != Extents.size()) + return UndefinedVal(); // Array's offset should be a concrete value. // Return Unknown value if symbolic index presented. @@ -1676,21 +1703,45 @@ if (!OffsetVal.hasValue()) return UnknownVal(); - // Check offset for being out of bounds. + // Check offsets for being out of bounds. // C++20 [expr.add] 7.6.6.4 (excerpt): // If P points to an array element i of an array object x with n // elements, where i < 0 or i > n, the behavior is undefined. // Dereferencing is not allowed on the "one past the last // element", when i == n. // Example: - // const int arr[4] = {1, 2}; - // const int *ptr = arr; - // int x0 = ptr[0]; // 1 - // int x1 = ptr[1]; // 2 - // int x2 = ptr[2]; // 0 - // int x3 = ptr[3]; // 0 - // int x4 = ptr[4]; // UB - // int x5 = ptr[-1]; // UB + // const int arr[3][2] = {{1, 2}, {3, 4}}; + // arr[0][0]; // 1 + // arr[0][1]; // 2 + // arr[0][2]; // UB + // arr[1][0]; // 3 + // arr[1][1]; // 4 + // arr[1][-1]; // UB + // arr[2][0]; // 0 + // arr[2][1]; // 0 + // arr[-2][0]; // UB + SmallVector ConcreteOffsets; + ConcreteOffsets.resize(SValOffsets.size()); + auto ExtentIt = Extents.begin(); + auto OffsetIt = ConcreteOffsets.begin(); + // Reverse `SValOffsets` to make it consistent with `Extents`. + for (SVal &V : llvm::reverse(SValOffsets)) { + if (auto CI = V.getAs()) { + const llvm::APSInt &Offset = CI->getValue(); + const auto I = static_cast(Offset.getExtValue()); + // Check for `Offset < 0`, NOT for `I < 0`, because `Offset` CAN + // be negative, but `I` can NOT (because it's an uint64_t). + if (Offset < 0 || I >= *(ExtentIt++)) + return UndefinedVal(); + // Store index in a reversive order. + *(OffsetIt++) = I; + } else + // Symbolic index presented. Return Unknown value. + // FIXME: We also need to take ElementRegions with symbolic indexes into + // account. + return UnknownVal(); + } + const llvm::APSInt &OffsetInt = OffsetVal->getValue(); const auto Offset = static_cast(OffsetInt.getExtValue()); // Use `getZExtValue` because array extent can not be negative. @@ -1705,7 +1756,7 @@ // Example: // const char arr[] = { 1, 2, 3 }; if (const auto *ILE = dyn_cast(Init)) - return getSValFromInitListExpr(ILE, Offset, R->getElementType()); + return getSValFromInitListExpr(ILE, ConcreteOffsets, R->getElementType()); // Handle StringLiteral. // Example: @@ -1718,30 +1769,37 @@ return None; } -Optional -RegionStoreManager::getSValFromInitListExpr(const InitListExpr *ILE, - uint64_t Offset, QualType ElemT) { +Optional RegionStoreManager::getSValFromInitListExpr( + const InitListExpr *ILE, const SmallVector &ConcreteOffsets, + QualType ElemT) { assert(ILE && "InitListExpr should not be null"); - // C++20 [dcl.init.string] 9.4.2.1: - // An array of ordinary character type [...] can be initialized by [...] - // an appropriately-typed string-literal enclosed in braces. - // Example: - // const char arr[] = { "abc" }; - if (ILE->isStringLiteralInit()) - if (const auto *SL = dyn_cast(ILE->getInit(0))) - return getSValFromStringLiteral(SL, Offset, ElemT); - - // C++20 [expr.add] 9.4.17.5 (excerpt): - // i-th array element is value-initialized for each k < i ≤ n, - // where k is an expression-list size and n is an array extent. - if (Offset >= ILE->getNumInits()) - return svalBuilder.makeZeroVal(ElemT); - - // Return a constant value, if it is presented. - // FIXME: Support other SVals. - const Expr *E = ILE->getInit(Offset); - return svalBuilder.getConstantVal(E); + for (auto Offset : ConcreteOffsets) { + // C++20 [dcl.init.string] 9.4.2.1: + // An array of ordinary character type [...] can be initialized by [...] + // an appropriately-typed string-literal enclosed in braces. + // Example: + // const char arr[] = { "abc" }; + if (ILE->isStringLiteralInit()) + if (const auto *SL = dyn_cast(ILE->getInit(0))) + return getSValFromStringLiteral(SL, Offset, ElemT); + + // C++20 [expr.add] 9.4.17.5 (excerpt): + // i-th array element is value-initialized for each k < i ≤ n, + // where k is an expression-list size and n is an array extent. + if (Offset >= ILE->getNumInits()) + return svalBuilder.makeZeroVal(ElemT); + + const Expr *E = ILE->getInit(Offset); + // Go to the nested initializer list. + if (const auto *IL = dyn_cast(E)) + ILE = IL; + else + // Return a constant value, if it is presented. + // FIXME: Support other SVals. + return svalBuilder.getConstantVal(E); + } + llvm_unreachable("Unknown InitListExpr construction."); } SVal RegionStoreManager::getSValFromStringLiteral(const StringLiteral *SL, @@ -1786,8 +1844,8 @@ return UndefinedVal(); return getSValFromStringLiteral(SL, Offset, T); } - } else if (const VarRegion *VR = dyn_cast(superR)) { - if (Optional V = getConstantValFromConstArrayInitializer(B, VR, R)) + } else if (isa(superR)) { + if (Optional V = getConstantValFromConstArrayInitializer(B, R)) return *V; } Index: clang/test/Analysis/initialization.c =================================================================== --- clang/test/Analysis/initialization.c +++ clang/test/Analysis/initialization.c @@ -58,44 +58,35 @@ int res = ptr[x]; // expected-warning{{garbage or undefined}} } -// TODO: Support multidimensional array. const int glob_arr2[3][3] = {[0][0] = 1, [1][1] = 5, [2][0] = 7}; void glob_arr_index3() { - // FIXME: These all should be TRUE. - clang_analyzer_eval(glob_arr2[0][0] == 1); // expected-warning{{UNKNOWN}} - clang_analyzer_eval(glob_arr2[0][1] == 0); // expected-warning{{UNKNOWN}} - clang_analyzer_eval(glob_arr2[0][2] == 0); // expected-warning{{UNKNOWN}} - clang_analyzer_eval(glob_arr2[1][0] == 0); // expected-warning{{UNKNOWN}} - clang_analyzer_eval(glob_arr2[1][1] == 5); // expected-warning{{UNKNOWN}} - clang_analyzer_eval(glob_arr2[1][2] == 0); // expected-warning{{UNKNOWN}} - clang_analyzer_eval(glob_arr2[2][0] == 7); // expected-warning{{UNKNOWN}} - clang_analyzer_eval(glob_arr2[2][1] == 0); // expected-warning{{UNKNOWN}} - clang_analyzer_eval(glob_arr2[2][2] == 0); // expected-warning{{UNKNOWN}} -} - -// TODO: Support multidimensional array. + clang_analyzer_eval(glob_arr2[0][0] == 1); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr2[0][1] == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr2[0][2] == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr2[1][0] == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr2[1][1] == 5); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr2[1][2] == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr2[2][0] == 7); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr2[2][1] == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr2[2][2] == 0); // expected-warning{{TRUE}} +} + void negative_index() { int x = 2, y = -2; - // FIXME: Should be UNDEFINED. - clang_analyzer_eval(glob_arr2[x][y] == 5); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(glob_arr2[x][y] == 5); // expected-warning{{UNDEFINED}} x = 3; y = -3; - // FIXME: Should be UNDEFINED. - clang_analyzer_eval(glob_arr2[x][y] == 7); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(glob_arr2[x][y] == 7); // expected-warning{{UNDEFINED}} } -// TODO: Support multidimensional array. void glob_invalid_index3() { int x = -1, y = -1; - // FIXME: Should warn {{garbage or undefined}}. - int res = glob_arr2[x][y]; // no-warning + int res = glob_arr2[x][y]; // expected-warning{{garbage or undefined}} } -// TODO: Support multidimensional array. void glob_invalid_index4() { int x = 3, y = 2; - // FIXME: Should warn {{garbage or undefined}}. - int res = glob_arr2[x][y]; // no-warning + int res = glob_arr2[x][y]; // expected-warning{{garbage or undefined}} } const int glob_arr_no_init[10]; Index: clang/test/Analysis/initialization.cpp =================================================================== --- clang/test/Analysis/initialization.cpp +++ clang/test/Analysis/initialization.cpp @@ -14,13 +14,6 @@ clang_analyzer_eval(sarr[i].a); // expected-warning{{UNKNOWN}} } -int const arr[2][2] = {}; -void arr2init() { - int i = 1; - // FIXME: Should recognize that it is 0. - clang_analyzer_eval(arr[i][0]); // expected-warning{{UNKNOWN}} -} - int const glob_arr1[3] = {}; void glob_array_index1() { clang_analyzer_eval(glob_arr1[0] == 0); // expected-warning{{TRUE}} @@ -60,79 +53,56 @@ return glob_arr3[0]; // no-warning (garbage or undefined) } -// TODO: Support multidimensional array. int const glob_arr4[4][2] = {}; void glob_array_index2() { - // FIXME: Should be TRUE. - clang_analyzer_eval(glob_arr4[1][0] == 0); // expected-warning{{UNKNOWN}} - // FIXME: Should be TRUE. - clang_analyzer_eval(glob_arr4[1][1] == 0); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(glob_arr4[0][0] == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr4[1][0] == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr4[1][1] == 0); // expected-warning{{TRUE}} } -// TODO: Support multidimensional array. void glob_invalid_index3() { int idx = -42; - // FIXME: Should warn {{garbage or undefined}}. - auto x = glob_arr4[1][idx]; // no-warning + auto x = glob_arr4[1][idx]; // expected-warning{{garbage or undefined}} } -// TODO: Support multidimensional array. void glob_invalid_index4() { const int *ptr = glob_arr4[1]; int idx = -42; - // FIXME: Should warn {{garbage or undefined}}. - auto x = ptr[idx]; // no-warning + auto x = ptr[idx]; // expected-warning{{garbage or undefined}} } -// TODO: Support multidimensional array. int const glob_arr5[4][2] = {{1}, 3, 4, 5}; void glob_array_index3() { - // FIXME: Should be TRUE. - clang_analyzer_eval(glob_arr5[0][0] == 1); // expected-warning{{UNKNOWN}} - // FIXME: Should be TRUE. - clang_analyzer_eval(glob_arr5[0][1] == 0); // expected-warning{{UNKNOWN}} - // FIXME: Should be TRUE. - clang_analyzer_eval(glob_arr5[1][0] == 3); // expected-warning{{UNKNOWN}} - // FIXME: Should be TRUE. - clang_analyzer_eval(glob_arr5[1][1] == 4); // expected-warning{{UNKNOWN}} - // FIXME: Should be TRUE. - clang_analyzer_eval(glob_arr5[2][0] == 5); // expected-warning{{UNKNOWN}} - // FIXME: Should be TRUE. - clang_analyzer_eval(glob_arr5[2][1] == 0); // expected-warning{{UNKNOWN}} - // FIXME: Should be TRUE. - clang_analyzer_eval(glob_arr5[3][0] == 0); // expected-warning{{UNKNOWN}} - // FIXME: Should be TRUE. - clang_analyzer_eval(glob_arr5[3][1] == 0); // expected-warning{{UNKNOWN}} -} - -// TODO: Support multidimensional array. + clang_analyzer_eval(glob_arr5[0][0] == 1); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr5[0][1] == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr5[1][0] == 3); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr5[1][1] == 4); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr5[2][0] == 5); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr5[2][1] == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr5[3][0] == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr5[3][1] == 0); // expected-warning{{TRUE}} +} + void glob_ptr_index2() { int const *ptr = glob_arr5[1]; - // FIXME: Should be TRUE. - clang_analyzer_eval(ptr[0] == 3); // expected-warning{{UNKNOWN}} - // FIXME: Should be TRUE. - clang_analyzer_eval(ptr[1] == 4); // expected-warning{{UNKNOWN}} - // FIXME: Should be UNDEFINED. - clang_analyzer_eval(ptr[2] == 5); // expected-warning{{UNKNOWN}} - // FIXME: Should be UNDEFINED. - clang_analyzer_eval(ptr[3] == 0); // expected-warning{{UNKNOWN}} - // FIXME: Should be UNDEFINED. - clang_analyzer_eval(ptr[4] == 0); // expected-warning{{UNKNOWN}} + // FIXME: Should be TRUE + clang_analyzer_eval(ptr[0] == 3); // expected-warning{{UNDEFINED}} + // FIXME: Should be TRUE + clang_analyzer_eval(ptr[1] == 4); // expected-warning{{UNDEFINED}} + clang_analyzer_eval(ptr[2] == 5); // expected-warning{{UNDEFINED}} + clang_analyzer_eval(ptr[3] == 0); // expected-warning{{UNDEFINED}} + clang_analyzer_eval(ptr[4] == 0); // expected-warning{{UNDEFINED}} } -// TODO: Support multidimensional array. void glob_invalid_index5() { int idx = -42; - // FIXME: Should warn {{garbage or undefined}}. - auto x = glob_arr5[1][idx]; // no-warning + auto x = glob_arr5[1][idx]; // expected-warning{{garbage or undefined}} } -// TODO: Support multidimensional array. void glob_invalid_index6() { int const *ptr = &glob_arr5[1][0]; int idx = 42; - // FIXME: Should warn {{garbage or undefined}}. - auto x = ptr[idx]; // // no-warning + auto x = ptr[idx]; // // expected-warning{{garbage or undefined}} } extern const int glob_arr_no_init[10];