diff --git a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp --- a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp @@ -605,6 +605,10 @@ ProgramStateRef State, EquivalenceClass Class); + /// Remove one member from the class. + LLVM_NODISCARD ProgramStateRef removeMember(ProgramStateRef State, + const SymbolRef Old); + void dumpToStream(ProgramStateRef State, raw_ostream &os) const; LLVM_DUMP_METHOD void dump(ProgramStateRef State) const { dumpToStream(State, llvm::errs()); @@ -1692,29 +1696,43 @@ bool ConstraintAssignor::assignSymExprToConst(const SymExpr *Sym, const llvm::APSInt &Constraint) { - llvm::SmallSet SimplifiedClasses; - // Iterate over all equivalence classes and try to simplify them. - ClassMembersTy Members = State->get(); - for (std::pair ClassToSymbolSet : Members) { - EquivalenceClass Class = ClassToSymbolSet.first; - State = EquivalenceClass::simplify(Builder, RangeFactory, State, Class); - if (!State) - return false; - SimplifiedClasses.insert(Class); - } + ProgramStateRef OldState; + do { + OldState = State; + + // Iterate over all equivalence classes and try to simplify them. + ClassMembersTy Members = State->get(); + for (std::pair ClassToSymbolSet : Members) { + EquivalenceClass Class = ClassToSymbolSet.first; + State = EquivalenceClass::simplify(Builder, RangeFactory, State, Class); + if (!State) + return false; + } - // Trivial equivalence classes (those that have only one symbol member) are - // not stored in the State. Thus, we must skim through the constraints as - // well. And we try to simplify symbols in the constraints. - ConstraintRangeTy Constraints = State->get(); - for (std::pair ClassConstraint : Constraints) { - EquivalenceClass Class = ClassConstraint.first; - if (SimplifiedClasses.count(Class)) // Already simplified. - continue; - State = EquivalenceClass::simplify(Builder, RangeFactory, State, Class); - if (!State) - return false; - } + // Trivial equivalence classes (those that have only one symbol member) are + // not stored in the State. Thus, we must skim through the constraints as + // well. And we try to simplify symbols in the constraints. + ConstraintRangeTy Constraints = State->get(); + for (std::pair ClassConstraint : Constraints) { + EquivalenceClass Class = ClassConstraint.first; + State = EquivalenceClass::simplify(Builder, RangeFactory, State, Class); + if (!State) + return false; + } + + // We may have trivial equivalence classes in the disequality info as + // well, and we need to simplify them. + DisequalityMapTy DisequalityInfo = State->get(); + for (std::pair DisequalityEntry : + DisequalityInfo) { + EquivalenceClass Class = DisequalityEntry.first; + ClassSet DisequalClasses = DisequalityEntry.second; + State = EquivalenceClass::simplify(Builder, RangeFactory, State, Class); + if (!State) + return false; + } + + } while (State != OldState); return true; } @@ -2086,6 +2104,32 @@ return llvm::None; } +LLVM_NODISCARD ProgramStateRef +EquivalenceClass::removeMember(ProgramStateRef State, const SymbolRef Old) { + + SymbolSet ClsMembers = getClassMembers(State); + assert(ClsMembers.contains(Old)); + assert(ClsMembers.getHeight() > 1 && + "Class should have at least two members"); + + // We don't remove `Old`'s Sym->Class relation for two reasons: + // 1) This way constraints for the old symbol can still be found via it's + // equivalence class that it used to be the member of. + // 2) Performance and resource reasons. We can spare one removal and thus one + // additional tree in the forest of `ClassMap`. + + // Remove `Old`'s Class->Sym relation. + SymbolSet::Factory &F = getMembersFactory(State); + ClassMembersTy::Factory &EMFactory = State->get_context(); + ClsMembers = F.remove(ClsMembers, Old); + // Overwrite the existing members assigned to this class. + ClassMembersTy ClassMembersMap = State->get(); + ClassMembersMap = EMFactory.add(ClassMembersMap, *this, ClsMembers); + State = State->set(ClassMembersMap); + + return State; +} + // Iterate over all symbols and try to simplify them. Once a symbol is // simplified then we check if we can merge the simplified symbol's equivalence // class to this class. This way, we simplify not just the symbols but the @@ -2101,9 +2145,16 @@ // The simplified symbol should be the member of the original Class, // however, it might be in another existing class at the moment. We // have to merge these classes. + ProgramStateRef OldState = State; State = merge(F, State, MemberSym, SimplifiedMemberSym); if (!State) return nullptr; + // No state change, no merge happened actually. + if (OldState == State) + continue; + assert(find(State, MemberSym) == find(State, SimplifiedMemberSym)); + // Remove the old and more complex symbol. + State = find(State, MemberSym).removeMember(State, MemberSym); } } return State; diff --git a/clang/test/Analysis/expr-inspection-printState-eq-classes.c b/clang/test/Analysis/expr-inspection-printState-eq-classes.c --- a/clang/test/Analysis/expr-inspection-printState-eq-classes.c +++ b/clang/test/Analysis/expr-inspection-printState-eq-classes.c @@ -8,14 +8,13 @@ return; if (a != d) return; - if (b != 0) + if (a != c) return; clang_analyzer_printState(); (void)(a * b * c * d); return; } - // CHECK: "equivalence_classes": [ - // CHECK-NEXT: [ "((reg_$0) + (reg_$1)) != (reg_$2)", "(reg_$0) != (reg_$2)" ], - // CHECK-NEXT: [ "(reg_$0) + (reg_$1)", "reg_$0", "reg_$2", "reg_$3" ] - // CHECK-NEXT: ], + // CHECK: "equivalence_classes": [ + // CHECK-NEXT: [ "(reg_$0) + (reg_$1)", "reg_$0", "reg_$2", "reg_$3" ] + // CHECK-NEXT: ], diff --git a/clang/test/Analysis/symbol-simplification-disequality-info.cpp b/clang/test/Analysis/symbol-simplification-disequality-info.cpp new file mode 100644 --- /dev/null +++ b/clang/test/Analysis/symbol-simplification-disequality-info.cpp @@ -0,0 +1,65 @@ +// RUN: %clang_analyze_cc1 %s \ +// RUN: -analyzer-checker=core \ +// RUN: -analyzer-checker=debug.ExprInspection \ +// RUN: 2>&1 | FileCheck %s + +// In this test we check how the solver's symbol simplification mechanism +// simplifies the disequality info. + +void clang_analyzer_printState(); + +void test_contradiction(int a, int b, int c, int d) { + if (a + b + c == d) + return; + clang_analyzer_printState(); + // CHECK: "disequality_info": [ + // CHECK-NEXT: { + // CHECK-NEXT: "class": [ "((reg_$0) + (reg_$1)) + (reg_$2)" ], + // CHECK-NEXT: "disequal_to": [ + // CHECK-NEXT: [ "reg_$3" ]] + // CHECK-NEXT: }, + // CHECK-NEXT: { + // CHECK-NEXT: "class": [ "reg_$3" ], + // CHECK-NEXT: "disequal_to": [ + // CHECK-NEXT: [ "((reg_$0) + (reg_$1)) + (reg_$2)" ]] + // CHECK-NEXT: } + // CHECK-NEXT: ], + + + // Simplification starts here. + if (b != 0) + return; + clang_analyzer_printState(); + // CHECK: "disequality_info": [ + // CHECK-NEXT: { + // CHECK-NEXT: "class": [ "(reg_$0) + (reg_$2)" ], + // CHECK-NEXT: "disequal_to": [ + // CHECK-NEXT: [ "reg_$3" ]] + // CHECK-NEXT: }, + // CHECK-NEXT: { + // CHECK-NEXT: "class": [ "reg_$3" ], + // CHECK-NEXT: "disequal_to": [ + // CHECK-NEXT: [ "(reg_$0) + (reg_$2)" ]] + // CHECK-NEXT: } + // CHECK-NEXT: ], + + if (c != 0) + return; + clang_analyzer_printState(); + // CHECK: "disequality_info": [ + // CHECK-NEXT: { + // CHECK-NEXT: "class": [ "reg_$0" ], + // CHECK-NEXT: "disequal_to": [ + // CHECK-NEXT: [ "reg_$3" ]] + // CHECK-NEXT: }, + // CHECK-NEXT: { + // CHECK-NEXT: "class": [ "reg_$3" ], + // CHECK-NEXT: "disequal_to": [ + // CHECK-NEXT: [ "reg_$0" ]] + // CHECK-NEXT: } + // CHECK-NEXT: ], + + // Keep the symbols and the constraints! alive. + (void)(a * b * c * d); + return; +} diff --git a/clang/test/Analysis/symbol-simplification-fixpoint-iteration-unreachable-code.cpp b/clang/test/Analysis/symbol-simplification-fixpoint-iteration-unreachable-code.cpp new file mode 100644 --- /dev/null +++ b/clang/test/Analysis/symbol-simplification-fixpoint-iteration-unreachable-code.cpp @@ -0,0 +1,47 @@ +// RUN: %clang_analyze_cc1 %s \ +// RUN: -analyzer-checker=core \ +// RUN: -analyzer-checker=debug.ExprInspection \ +// RUN: -verify + +// In this test we check whether the solver's symbol simplification mechanism +// is capable of reaching a fixpoint. + +void clang_analyzer_printState(); +void clang_analyzer_warnIfReached(); + +void test_contradiction(int a, int b, int c, int d, int x) { + if (a + b + c != d) + return; + if (a == d) + return; + if (c + b != 0) + return; + clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}} + clang_analyzer_printState(); + + // Bring in the contradiction. + if (b != 0) + return; + + // After the simplification of `b == 0` we have: + // b == 0 + // a + c == d + // a != d + // c == 0 + // Doing another iteration we reach the fixpoint (with a contradiction): + // b == 0 + // a == d + // a != d + // c == 0 + clang_analyzer_warnIfReached(); // no-warning, i.e. UNREACHABLE + clang_analyzer_printState(); + + // Enabling expensive checks would trigger an assertion failure here without + // the fixpoint iteration. + if (a + c == x) + return; + + // Keep the symbols and the constraints! alive. + (void)(a * b * c * d * x); + return; +} diff --git a/clang/test/Analysis/symbol-simplification-fixpoint-one-iteration.cpp b/clang/test/Analysis/symbol-simplification-fixpoint-one-iteration.cpp new file mode 100644 --- /dev/null +++ b/clang/test/Analysis/symbol-simplification-fixpoint-one-iteration.cpp @@ -0,0 +1,40 @@ +// RUN: %clang_analyze_cc1 %s \ +// RUN: -analyzer-checker=core \ +// RUN: -analyzer-checker=debug.ExprInspection \ +// RUN: 2>&1 | FileCheck %s + +// In this test we check whether the solver's symbol simplification mechanism +// is capable of reaching a fixpoint. This should be done after one iteration. + +void clang_analyzer_printState(); + +void test(int a, int b, int c) { + if (a + b != c) + return; + clang_analyzer_printState(); + // CHECK: "constraints": [ + // CHECK-NEXT: { "symbol": "((reg_$0) + (reg_$1)) != (reg_$2)", "range": "{ [0, 0] }" } + // CHECK-NEXT: ], + // CHECK-NEXT: "equivalence_classes": [ + // CHECK-NEXT: [ "(reg_$0) + (reg_$1)", "reg_$2" ] + // CHECK-NEXT: ], + // CHECK-NEXT: "disequality_info": null, + + // Simplification starts here. + if (b != 0) + return; + clang_analyzer_printState(); + // CHECK: "constraints": [ + // CHECK-NEXT: { "symbol": "(reg_$0) != (reg_$2)", "range": "{ [0, 0] }" }, + // CHECK-NEXT: { "symbol": "reg_$1", "range": "{ [0, 0] }" } + // CHECK-NEXT: ], + // CHECK-NEXT: "equivalence_classes": [ + // CHECK-NEXT: [ "(reg_$0) != (reg_$2)" ], + // CHECK-NEXT: [ "reg_$0", "reg_$2" ] + // CHECK-NEXT: ], + // CHECK-NEXT: "disequality_info": null, + + // Keep the symbols and the constraints! alive. + (void)(a * b * c); + return; +} diff --git a/clang/test/Analysis/symbol-simplification-fixpoint-two-iterations.cpp b/clang/test/Analysis/symbol-simplification-fixpoint-two-iterations.cpp new file mode 100644 --- /dev/null +++ b/clang/test/Analysis/symbol-simplification-fixpoint-two-iterations.cpp @@ -0,0 +1,45 @@ +// RUN: %clang_analyze_cc1 %s \ +// RUN: -analyzer-checker=core \ +// RUN: -analyzer-checker=debug.ExprInspection \ +// RUN: 2>&1 | FileCheck %s + +// In this test we check whether the solver's symbol simplification mechanism +// is capable of reaching a fixpoint. This should be done after TWO iterations. + +void clang_analyzer_printState(); + +void test_contradiction(int a, int b, int c, int d) { + if (a + b + c != d) + return; + if (c + b != 0) + return; + clang_analyzer_printState(); + // CHECK: "constraints": [ + // CHECK-NEXT: { "symbol": "(((reg_$0) + (reg_$1)) + (reg_$2)) != (reg_$3)", "range": "{ [0, 0] }" }, + // CHECK-NEXT: { "symbol": "(reg_$2) + (reg_$1)", "range": "{ [0, 0] }" } + // CHECK-NEXT: ], + // CHECK-NEXT: "equivalence_classes": [ + // CHECK-NEXT: [ "((reg_$0) + (reg_$1)) + (reg_$2)", "reg_$3" ] + // CHECK-NEXT: ], + // CHECK-NEXT: "disequality_info": null, + + // Simplification starts here. + if (b != 0) + return; + clang_analyzer_printState(); + // CHECK: "constraints": [ + // CHECK-NEXT: { "symbol": "(reg_$0) != (reg_$3)", "range": "{ [0, 0] }" }, + // CHECK-NEXT: { "symbol": "reg_$1", "range": "{ [0, 0] }" }, + // CHECK-NEXT: { "symbol": "reg_$2", "range": "{ [0, 0] }" } + // CHECK-NEXT: ], + // CHECK-NEXT: "equivalence_classes": [ + // CHECK-NEXT: [ "(reg_$0) != (reg_$3)" ], + // CHECK-NEXT: [ "reg_$0", "reg_$3" ], + // CHECK-NEXT: [ "reg_$2" ] + // CHECK-NEXT: ], + // CHECK-NEXT: "disequality_info": null, + + // Keep the symbols and the constraints! alive. + (void)(a * b * c * d); + return; +}