Index: clang/lib/StaticAnalyzer/Core/RegionStore.cpp =================================================================== --- clang/lib/StaticAnalyzer/Core/RegionStore.cpp +++ clang/lib/StaticAnalyzer/Core/RegionStore.cpp @@ -573,6 +573,8 @@ SVal getBindingForFieldOrElementCommon(RegionBindingsConstRef B, const TypedValueRegion *R, QualType Ty); + SVal getSValFromStringLiteralByIndex(const StringLiteral *SL, + const llvm::APSInt &Idx, QualType ElemT); SVal getLazyBinding(const SubRegion *LazyBindingRegion, RegionBindingsRef LazyBinding); @@ -1625,6 +1627,23 @@ return Result; } +SVal RegionStoreManager::getSValFromStringLiteralByIndex( + const StringLiteral *SL, const llvm::APSInt &Idx, QualType ElemT) { + assert(SL && "StringLiteral should not be null"); + // If index is out of bounds, return Undef. + if (Idx < 0) + return UndefinedVal(); + // Technically, only i == length is guaranteed to be null. + // However, such overflows should be caught before reaching this point; + // the only time such an access would be made is if a string literal was + // used to initialize a larger array. + // FIXME: Take array dimension into account to prevent exceeding its size. + const int64_t I = Idx.getExtValue(); + uint32_t Code = + (static_cast(I) >= SL->getLength()) ? 0 : SL->getCodeUnit(I); + return svalBuilder.makeIntVal(Code, ElemT); +} + SVal RegionStoreManager::getBindingForElement(RegionBindingsConstRef B, const ElementRegion* R) { // Check if the region has a binding. @@ -1636,26 +1655,16 @@ // Check if the region is an element region of a string literal. if (const StringRegion *StrR = dyn_cast(superR)) { // FIXME: Handle loads from strings where the literal is treated as - // an integer, e.g., *((unsigned int*)"hello") + // an integer, e.g., *((unsigned int*)"hello"). Such loads are UB according + // to C++20 7.2.1.11 [basic.lval]. QualType T = Ctx.getAsArrayType(StrR->getValueType())->getElementType(); if (!Ctx.hasSameUnqualifiedType(T, R->getElementType())) return UnknownVal(); - const StringLiteral *Str = StrR->getStringLiteral(); SVal Idx = R->getIndex(); if (Optional CI = Idx.getAs()) { - int64_t i = CI->getValue().getSExtValue(); - // Abort on string underrun. This can be possible by arbitrary - // clients of getBindingForElement(). - if (i < 0) - return UndefinedVal(); - int64_t length = Str->getLength(); - // Technically, only i == length is guaranteed to be null. - // However, such overflows should be caught before reaching this point; - // the only time such an access would be made is if a string literal was - // used to initialize a larger array. - char c = (i >= length) ? '\0' : Str->getCodeUnit(i); - return svalBuilder.makeIntVal(c, T); + const StringLiteral *SL = StrR->getStringLiteral(); + return getSValFromStringLiteralByIndex(SL, CI->getValue(), T); } } else if (const VarRegion *VR = dyn_cast(superR)) { // Check if the containing array has an initialized value that we can trust. @@ -1665,7 +1674,13 @@ R->getElementType().isConstQualified() || (B.isMainAnalysis() && VD->hasGlobalStorage())) { if (const Expr *Init = VD->getAnyInitializer()) { - if (const auto *InitList = dyn_cast(Init)) { + if (const auto *SL = dyn_cast(Init)) { + if (auto CI = R->getIndex().getAs()) { + const llvm::APSInt &Idx = CI->getValue(); + QualType ElemT = R->getElementType(); + return getSValFromStringLiteralByIndex(SL, Idx, ElemT); + } + } else if (const auto *InitList = dyn_cast(Init)) { // The array index has to be known. if (auto CI = R->getIndex().getAs()) { // If it is not an array, return Undef. Index: clang/test/Analysis/initialization.cpp =================================================================== --- clang/test/Analysis/initialization.cpp +++ clang/test/Analysis/initialization.cpp @@ -128,3 +128,39 @@ // FIXME: Should warn {{garbage or undefined}}. auto x = ptr[idx]; // // no-warning } + +char const glob_arr6[5] = "123"; +void glob_array_index4() { + clang_analyzer_eval(glob_arr6[0] == '1'); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr6[1] == '2'); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr6[2] == '3'); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr6[3] == '\0'); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr6[4] == '\0'); // expected-warning{{TRUE}} +} + +void glob_ptr_index3() { + char const *ptr = glob_arr6; + clang_analyzer_eval(ptr[-42] == '\0'); // expected-warning{{UNDEFINED}} + clang_analyzer_eval(ptr[0] == '1'); // expected-warning{{TRUE}} + clang_analyzer_eval(ptr[1] == '2'); // expected-warning{{TRUE}} + clang_analyzer_eval(ptr[2] == '3'); // expected-warning{{TRUE}} + clang_analyzer_eval(ptr[3] == '\0'); // expected-warning{{TRUE}} + clang_analyzer_eval(ptr[4] == '\0'); // expected-warning{{TRUE}} + // FIXME: Should be UNDEFINED. + clang_analyzer_eval(ptr[5] == '\0'); // expected-warning{{TRUE}} + // FIXME: Should be UNDEFINED. + clang_analyzer_eval(ptr[6] == '\0'); // expected-warning{{TRUE}} +} + +void glob_invalid_index7() { + int idx = -42; + auto x = glob_arr6[idx]; // expected-warning{{garbage or undefined}} +} + +// TODO: Support multidimensional array. +void glob_invalid_index8() { + const char *ptr = glob_arr6; + int idx = 42; + // FIXME: Should warn {{garbage or undefined}}. + auto x = ptr[idx]; // no-warning +}