Index: include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h =================================================================== --- include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h +++ include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h @@ -1333,7 +1333,9 @@ /// Tells that a region's contents is not changed. TK_PreserveContents = 0x1, /// Suppress pointer-escaping of a region. - TK_SuppressEscape = 0x2 + TK_SuppressEscape = 0x2, + // Do not invalidate super region. + TK_DoNotInvalidateSuperRegion = 0x4 // Do not forget to extend StorageTypeForKinds if number of traits exceed // the number of bits StorageTypeForKinds can store. Index: lib/StaticAnalyzer/Checkers/CStringChecker.cpp =================================================================== --- lib/StaticAnalyzer/Checkers/CStringChecker.cpp +++ lib/StaticAnalyzer/Checkers/CStringChecker.cpp @@ -847,6 +847,14 @@ RegionAndSymbolInvalidationTraits::TK_PreserveContents); ITraits.setTrait(R, RegionAndSymbolInvalidationTraits::TK_SuppressEscape); CausesPointerEscape = true; + } else { + if (const FieldRegion *FR = dyn_cast(R)) { + // If destination buffer is a field region, do not invalidate the parent + // structure. + ITraits.setTrait( + R, + RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion); + } } return state->invalidateRegions(R, E, C.blockCount(), LCtx, Index: lib/StaticAnalyzer/Core/RegionStore.cpp =================================================================== --- lib/StaticAnalyzer/Core/RegionStore.cpp +++ lib/StaticAnalyzer/Core/RegionStore.cpp @@ -345,7 +345,8 @@ /// regions. void populateWorkList(invalidateRegionsWorker &W, ArrayRef Values, - InvalidatedRegions *TopLevelRegions); + InvalidatedRegions *TopLevelRegions, + RegionAndSymbolInvalidationTraits &ITraits); public: RegionStoreManager(ProgramStateManager& mgr, const RegionStoreFeatures &f) @@ -715,8 +716,12 @@ return true; } - bool AddToWorkList(const MemRegion *R) { - const MemRegion *BaseR = R->getBaseRegion(); + bool AddToWorkList(const MemRegion *R, + RegionAndSymbolInvalidationTraits &ITraits) { + bool doNotInvalidateSuperRegion = ITraits.hasTrait( + R, RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion); + const MemRegion *BaseR = + doNotInvalidateSuperRegion ? R : R->getBaseRegion(); return AddToWorkList(WorkListElement(BaseR), getCluster(BaseR)); } @@ -971,7 +976,7 @@ IS.insert(Sym); if (const MemRegion *R = V.getAsRegion()) { - AddToWorkList(R); + AddToWorkList(R, ITraits); return; } @@ -1015,7 +1020,7 @@ const VarRegion *VR = BI.getCapturedRegion(); const VarDecl *VD = VR->getDecl(); if (VD->hasAttr() || !VD->hasLocalStorage()) { - AddToWorkList(VR); + AddToWorkList(VR, ITraits); } else if (Loc::isLocType(VR->getValueType())) { // Map the current bindings to a Store to retrieve the value @@ -1026,7 +1031,7 @@ SVal V = RM.getBinding(B, loc::MemRegionVal(VR)); if (Optional L = V.getAs()) { if (const MemRegion *LR = L->getAsRegion()) - AddToWorkList(LR); + AddToWorkList(LR, ITraits); } } } @@ -1077,6 +1082,43 @@ } if (const ArrayType *AT = Ctx.getAsArrayType(T)) { + bool doNotInvalidateSuperRegion = ITraits.hasTrait( + baseR, + RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion); + + if (doNotInvalidateSuperRegion) { + // We are not doing blank invalidation of the whole array region so we + // have to manually invalidate each elements. + Optional NumElements; + + // Compute lower and upper offsets for region within array. + if (const ConstantArrayType *CAT = dyn_cast(AT)) + NumElements = CAT->getSize().getZExtValue(); + if (!NumElements) + return; + QualType ElementTy = AT->getElementType(); + uint64_t ElemSize = Ctx.getTypeSize(ElementTy); + if (!ElemSize) + return; + const RegionOffset &RO = baseR->getAsOffset(); + assert(RO.getOffset() >= 0 && "Offset should not be negative"); + uint64_t LowerOffset = RO.getOffset(); + uint64_t UpperOffset = LowerOffset + *NumElements * ElemSize; + + // Invalidate regions which are within array boundaries. + if (const MemRegion *SuperR = baseR->getBaseRegion()) { + if (const ClusterBindings *C = B.lookup(SuperR)) { + for (ClusterBindings::iterator I = C->begin(), E = C->end(); I != E; + ++I) { + uint64_t ROffset = I.getKey().getOffset(); + if (ROffset < LowerOffset || ROffset >= UpperOffset) + continue; + B = B.removeBinding(I.getKey()); + } + } + } + } + // Set the default value of the array to conjured symbol. DefinedOrUnknownSVal V = svalBuilder.conjureSymbolVal(baseR, Ex, LCtx, @@ -1118,7 +1160,8 @@ void RegionStoreManager::populateWorkList(invalidateRegionsWorker &W, ArrayRef Values, - InvalidatedRegions *TopLevelRegions) { + InvalidatedRegions *TopLevelRegions, + RegionAndSymbolInvalidationTraits &ITraits) { for (ArrayRef::iterator I = Values.begin(), E = Values.end(); I != E; ++I) { SVal V = *I; @@ -1132,7 +1175,7 @@ // Note: the last argument is false here because these are // non-top-level regions. if (const MemRegion *R = (*I).getAsRegion()) - W.AddToWorkList(R); + W.AddToWorkList(R, ITraits); } continue; } @@ -1140,7 +1183,7 @@ if (const MemRegion *R = V.getAsRegion()) { if (TopLevelRegions) TopLevelRegions->push_back(R); - W.AddToWorkList(R); + W.AddToWorkList(R, ITraits); continue; } } @@ -1174,7 +1217,7 @@ W.GenerateClusters(); // Add the regions to the worklist. - populateWorkList(W, Values, TopLevelRegions); + populateWorkList(W, Values, TopLevelRegions, ITraits); W.RunWorkList(); @@ -2264,14 +2307,15 @@ // If V is a region, then add it to the worklist. if (const MemRegion *R = V.getAsRegion()) { - AddToWorkList(R); + RegionAndSymbolInvalidationTraits ITraits; + AddToWorkList(R, ITraits); // All regions captured by a block are also live. if (const BlockDataRegion *BR = dyn_cast(R)) { BlockDataRegion::referenced_vars_iterator I = BR->referenced_vars_begin(), E = BR->referenced_vars_end(); for ( ; I != E; ++I) - AddToWorkList(I.getCapturedRegion()); + AddToWorkList(I.getCapturedRegion(), ITraits); } } @@ -2291,7 +2335,8 @@ I = Postponed.begin(), E = Postponed.end() ; I != E ; ++I) { if (const SymbolicRegion *SR = *I) { if (SymReaper.isLive(SR->getSymbol())) { - changed |= AddToWorkList(SR); + RegionAndSymbolInvalidationTraits ITraits; + changed |= AddToWorkList(SR, ITraits); *I = nullptr; } } @@ -2310,7 +2355,8 @@ // Enqueue the region roots onto the worklist. for (SymbolReaper::region_iterator I = SymReaper.region_begin(), E = SymReaper.region_end(); I != E; ++I) { - W.AddToWorkList(*I); + RegionAndSymbolInvalidationTraits ITraits; + W.AddToWorkList(*I, ITraits); } do W.RunWorkList(); while (W.UpdatePostponed()); Index: test/Analysis/pr22954.c =================================================================== --- /dev/null +++ test/Analysis/pr22954.c @@ -0,0 +1,266 @@ +// Given code 'struct aa { char s1[4]; char * s2;} a; memcpy(a.s1, ...);', +// this test checks that the CStringChecker only invalidates the destination buffer array a.s1 (instead of a.s1 and a.s2). +// At the moment the whole of the destination array content is invalidated. +// RUN: %clang_cc1 -analyze -analyzer-checker=core,unix.Malloc,debug.ExprInspection -analyzer-store=region -verify %s + +typedef __typeof(sizeof(int)) size_t; + +char *strdup(const char *s); +void free(void *); +void *memcpy(void *dst, const void *src, size_t n); + +void clang_analyzer_eval(int); + +struct aa { + char s1[4]; + char *s2; +}; + +// Test different types of structure initialisation. +int f0() { + struct aa a0 = {{1, 2, 3, 4}, 0}; + a0.s2 = strdup("hello"); + char input[] = {'a', 'b', 'c', 'd'}; + memcpy(a0.s1, input, 4); + clang_analyzer_eval(a0.s1[0] == 'a'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(a0.s1[1] == 'b'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(a0.s1[2] == 'c'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(a0.s1[3] == 'd'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(a0.s2 == 0); // expected-warning{{UNKNOWN}} + free(a0.s2); // no warning + return 0; +} + +int f1() { + struct aa a1; + a1.s2 = strdup("hello"); + char input[] = {'a', 'b', 'c', 'd'}; + memcpy(a1.s1, input, 4); + clang_analyzer_eval(a1.s1[0] == 'a'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(a1.s1[1] == 'b'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(a1.s1[2] == 'c'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(a1.s1[3] == 'd'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(a1.s2 == 0); // expected-warning{{UNKNOWN}} + free(a1.s2); // no warning + return 0; +} + +int f2() { + struct aa a2 = {{1, 2}}; + a2.s2 = strdup("hello"); + char input[] = {'a', 'b', 'c', 'd'}; + memcpy(a2.s1, input, 4); + clang_analyzer_eval(a2.s1[0] == 'a'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(a2.s1[1] == 'b'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(a2.s1[2] == 'c'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(a2.s1[3] == 'd'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(a2.s2 == 0); // expected-warning{{UNKNOWN}} + free(a2.s2); // no warning + return 0; +} + +int f3() { + struct aa a3 = {{1, 2, 3, 4}, 0}; + a3.s2 = strdup("hello"); + char input[] = {'a', 'b', 'c', 'd'}; + int * dest = (int*)a3.s1; + memcpy(dest, input, 4); + clang_analyzer_eval(a3.s1[0] == 'a'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(dest[0] == 'a'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(a3.s1[1] == 'b'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(dest[1] == 'b'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(a3.s1[2] == 'c'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(dest[2] == 'c'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(a3.s1[3] == 'd'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(dest[3] == 'd'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(a3.s2 == 0); // expected-warning{{UNKNOWN}} + free(a3.s2); // no warning + return 0; +} + +struct bb { + struct aa a; + char * s2; +}; + +int f4() { + struct bb b0 = {{1, 2, 3, 4}, 0}; + b0.s2 = strdup("hello"); + b0.a.s2 = strdup("hola"); + char input[] = {'a', 'b', 'c', 'd'}; + char * dest = (char*)(b0.a.s1); + memcpy(dest, input, 4); + clang_analyzer_eval(b0.a.s1[0] == 'a'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(dest[0] == 'a'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(b0.a.s1[1] == 'b'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(dest[1] == 'b'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(b0.a.s1[2] == 'c'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(dest[2] == 'c'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(b0.a.s1[3] == 'd'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(dest[3] == 'd'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(b0.s2 == 0); // expected-warning{{UNKNOWN}} + free(b0.a.s2); // no warning + free(b0.s2); // no warning + return 0; +} + +int f5() { + struct aa a0 = {{1, 2, 3, 4}, 0}; + a0.s2 = strdup("hello"); + char input[] = {'a', 'b', 'c', 'd'}; + memcpy(a0.s1, input, 4); + return 0; // expected-warning{{Potential leak of memory pointed to by 'a0.s2'}} +} + +int f6() { + struct aa a1; + a1.s2 = strdup("hello"); + char input[] = {'a', 'b', 'c', 'd'}; + memcpy(a1.s1, input, 4); + return 0; // expected-warning{{Potential leak of memory pointed to by 'a1.s2'}} +} + +int f7() { + struct aa a2 = {{1, 2}}; + a2.s2 = strdup("hello"); + char input[] = {'a', 'b', 'c', 'd'}; + memcpy(a2.s1, input, 4); + return 0; // expected-warning{{Potential leak of memory pointed to by 'a2.s2'}} +} + +int f8() { + struct aa a3 = {{1, 2, 3, 4}, 0}; + a3.s2 = strdup("hello"); + char input[] = {'a', 'b', 'c', 'd'}; + int * dest = (int*)a3.s1; + memcpy(dest, input, 4); + return 0; // expected-warning{{Potential leak of memory pointed to by 'a3.s2'}} +} + +int f9() { + struct bb b0 = {{1, 2, 3, 4}, 0}; + b0.s2 = strdup("hello"); + b0.a.s2 = strdup("hola"); + char input[] = {'a', 'b', 'c', 'd'}; + char * dest = (char*)(b0.a.s1); + memcpy(dest, input, 4); + free(b0.a.s2); // expected-warning{{Potential leak of memory pointed to by 'b0.s2'}} + return 0; +} + +int f10() { + struct bb b0 = {{1, 2, 3, 4}, 0}; + b0.s2 = strdup("hello"); + b0.a.s2 = strdup("hola"); + char input[] = {'a', 'b', 'c', 'd'}; + char * dest = (char*)(b0.a.s1); + memcpy(dest, input, 4); + free(b0.s2); // expected-warning{{Potential leak of memory pointed to by 'b0.a.s2'}} + return 0; +} + +struct cc { + char * s1; + char * s2; +}; + +int f11() { + char x[4] = {1, 2}; + struct cc c0; + c0.s2 = strdup("hello"); + c0.s1 = &x[0]; + char input[] = {'a', 'b', 'c', 'd'}; + memcpy(c0.s1, input, 4); + clang_analyzer_eval(c0.s1[0] == 'a'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(c0.s1[1] == 'b'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(c0.s1[2] == 'c'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(c0.s1[3] == 'd'); // expected-warning{{UNKNOWN}} + free(c0.s2); // no-warning + return 0; +} + +struct dd { + char *s2; + char s1[4]; +}; + +int f12() { + struct dd d0 = {0, {1, 2, 3, 4}}; + d0.s2 = strdup("hello"); + char input[] = {'a', 'b', 'c', 'd'}; + memcpy(d0.s1, input, 4); + clang_analyzer_eval(d0.s1[0] == 'a'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(d0.s1[1] == 'b'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(d0.s1[2] == 'c'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(d0.s1[3] == 'd'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(d0.s2 == 0); // expected-warning{{UNKNOWN}} + free(d0.s2); // no warning + return 0; +} + +struct ee { + int a; + char b; +}; + +struct EE { + struct ee s1[2]; + char * s2; +}; + +int f13() { + struct EE E0 = {{{1, 2}, {3, 4}}, 0}; + E0.s2 = strdup("hello"); + char input[] = {'a', 'b', 'c', 'd'}; + memcpy(E0.s1, input, 4); + clang_analyzer_eval(E0.s1[0].a == 'a'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(E0.s1[0].b == 'b'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(E0.s1[1].a == 'c'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(E0.s1[1].b == 'd'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(E0.s2 == 0); // expected-warning{{UNKNOWN}} + free(E0.s2); // no warning + return 0; +} + +struct ff { + char * s1[2]; + char * s2; +}; + +int f14() { + struct ff f0 = {{0, 0}, 0}; + f0.s1[0] = strdup("hello"); + f0.s1[1] = strdup("hola"); + f0.s2 = strdup("world"); + char input[] = {'a', 'b', 'c', 'd'}; + memcpy(f0.s1[0], input, 4); + memcpy(f0.s1[1], input, 4); + clang_analyzer_eval(f0.s1[0][0] == 'a'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(f0.s1[0][1] == 'b'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(f0.s1[0][2] == 'c'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(f0.s1[0][3] == 'd'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(f0.s1[1][0] == 'a'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(f0.s1[1][1] == 'b'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(f0.s1[1][2] == 'c'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(f0.s1[1][3] == 'd'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(f0.s2 == 0); // expected-warning{{UNKNOWN}} + free(f0.s2); + free(f0.s1[1]); + free(f0.s1[0]); // no warning + return 0; // no warning +} + +struct aa a15 = {{1, 2, 3, 4}, 0}; + +int f15() { + a15.s2 = strdup("hello"); + char input[] = {'a', 'b', 'c', 'd'}; + memcpy(a15.s1, input, 4); + clang_analyzer_eval(a15.s1[0] == 'a'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(a15.s1[1] == 'b'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(a15.s1[2] == 'c'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(a15.s1[3] == 'd'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(a15.s2 == 0); // expected-warning{{UNKNOWN}} + free(a15.s2); // no warning + return 0; +}