blob: 4b7d5d194d2714e38347e44c4c5e16589888d6f2 [file] [log] [blame]
From 16ae0cfa1e8e07e17fdc7ad93e544a828d933072 Mon Sep 17 00:00:00 2001
From: Gabor Marton <gabor.marton@ericsson.com>
Date: Wed, 1 Dec 2021 16:47:22 +0100
Subject: [PATCH] [Analyzer][solver] Simplification: Do a fixpoint iteration
before the eq class merge
This reverts commit f02c5f3478318075d1a469203900e452ba651421 and
addresses the issue mentioned in D114619 differently.
Repeating the issue here:
Currently, during symbol simplification we remove the original member
symbol from the equivalence class (`ClassMembers` trait). However, we
keep the reverse link (`ClassMap` trait), in order to be able the query
the related constraints even for the old member. This asymmetry can lead
to a problem when we merge equivalence classes:
```
ClassA: [a, b] // ClassMembers trait,
a->a, b->a // ClassMap trait, a is the representative symbol
```
Now let,s delete `a`:
```
ClassA: [b]
a->a, b->a
```
Let's merge ClassA into the trivial class `c`:
```
ClassA: [c, b]
c->c, b->c, a->a
```
Now, after the merge operation, `c` and `a` are actually in different
equivalence classes, which is inconsistent.
This issue manifests in a test case (added in D103317):
```
void recurring_symbol(int b) {
if (b * b != b)
if ((b * b) * b * b != (b * b) * b)
if (b * b == 1)
}
```
Before the simplification we have these equivalence classes:
```
trivial EQ1: [b * b != b]
trivial EQ2: [(b * b) * b * b != (b * b) * b]
```
During the simplification with `b * b == 1`, EQ1 is merged with `1 != b`
`EQ1: [b * b != b, 1 != b]` and we remove the complex symbol, so
`EQ1: [1 != b]`
Then we start to simplify the only symbol in EQ2:
`(b * b) * b * b != (b * b) * b --> 1 * b * b != 1 * b --> b * b != b`
But `b * b != b` is such a symbol that had been removed previously from
EQ1, thus we reach the above mentioned inconsistency.
This patch addresses the issue by making it impossible to synthesise a
symbol that had been simplified before. We achieve this by simplifying
the given symbol to the absolute simplest form.
Differential Revision: https://reviews.llvm.org/D114887
---
.../Core/RangeConstraintManager.cpp | 93 ++++++++++++++++---
.../expr-inspection-printState-eq-classes.c | 4 +-
...symbol-simplification-disequality-info.cpp | 57 ++++++------
...-simplification-fixpoint-one-iteration.cpp | 17 ++--
...simplification-fixpoint-two-iterations.cpp | 25 +++--
5 files changed, 128 insertions(+), 68 deletions(-)
diff --git a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
index 3f8d9e4298a0..23c67c64f975 100644
--- a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
+++ b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
@@ -601,6 +601,10 @@ public:
LLVM_NODISCARD static inline Optional<bool>
areEqual(ProgramStateRef State, SymbolRef First, SymbolRef Second);
+ /// Remove one member from the class.
+ LLVM_NODISCARD ProgramStateRef removeMember(ProgramStateRef State,
+ const SymbolRef Old);
+
/// Iterate over all symbols and try to simplify them.
LLVM_NODISCARD static inline ProgramStateRef simplify(SValBuilder &SVB,
RangeSet::Factory &F,
@@ -2132,6 +2136,34 @@ inline Optional<bool> EquivalenceClass::areEqual(ProgramStateRef State,
return llvm::None;
}
+LLVM_NODISCARD ProgramStateRef
+EquivalenceClass::removeMember(ProgramStateRef State, const SymbolRef Old) {
+
+ SymbolSet ClsMembers = getClassMembers(State);
+ assert(ClsMembers.contains(Old));
+
+ // We don't remove `Old`'s Sym->Class relation for two reasons:
+ // 1) This way constraints for the old symbol can still be found via it's
+ // equivalence class that it used to be the member of.
+ // 2) Performance and resource reasons. We can spare one removal and thus one
+ // additional tree in the forest of `ClassMap`.
+
+ // Remove `Old`'s Class->Sym relation.
+ SymbolSet::Factory &F = getMembersFactory(State);
+ ClassMembersTy::Factory &EMFactory = State->get_context<ClassMembers>();
+ ClsMembers = F.remove(ClsMembers, Old);
+ // Ensure another precondition of the removeMember function (we can check
+ // this only with isEmpty, thus we have to do the remove first).
+ assert(!ClsMembers.isEmpty() &&
+ "Class should have had at least two members before member removal");
+ // Overwrite the existing members assigned to this class.
+ ClassMembersTy ClassMembersMap = State->get<ClassMembers>();
+ ClassMembersMap = EMFactory.add(ClassMembersMap, *this, ClsMembers);
+ State = State->set<ClassMembers>(ClassMembersMap);
+
+ return State;
+}
+
// Re-evaluate an SVal with top-level `State->assume` logic.
LLVM_NODISCARD ProgramStateRef reAssume(ProgramStateRef State,
const RangeSet *Constraint,
@@ -2159,6 +2191,42 @@ LLVM_NODISCARD ProgramStateRef reAssume(ProgramStateRef State,
Constraint->getMaxValue(), true);
}
+// Simplify the given symbol with the help of the SValBuilder. In
+// SValBuilder::symplifySval, we traverse the symbol tree and query the
+// constraint values for the sub-trees and if a value is a constant we do the
+// constant folding. Compound symbols might collapse to simpler symbol tree
+// that is still possible to further simplify. Thus, we do the simplification on
+// a new symbol tree until we reach the simplest form, i.e. the fixpoint.
+//
+// Consider the following symbol `(b * b) * b * b` which has this tree:
+// *
+// / \
+// * b
+// / \
+// / b
+// (b * b)
+// Now, if the `b * b == 1` new constraint is added then during the first
+// iteration we have the following transformations:
+// * *
+// / \ / \
+// * b --> b b
+// / \
+// / b
+// 1
+// We need another iteration to reach the final result `1`.
+LLVM_NODISCARD
+static SVal simplifyUntilFixpoint(SValBuilder &SVB, ProgramStateRef State,
+ const SymbolRef Sym) {
+ SVal Val = SVB.makeSymbolVal(Sym);
+ SVal SimplifiedVal = SVB.simplifySVal(State, Val);
+ // Do the simplification until we can.
+ while (SimplifiedVal != Val) {
+ Val = SimplifiedVal;
+ SimplifiedVal = SVB.simplifySVal(State, Val);
+ }
+ return SimplifiedVal;
+}
+
// Iterate over all symbols and try to simplify them. Once a symbol is
// simplified then we check if we can merge the simplified symbol's equivalence
// class to this class. This way, we simplify not just the symbols but the
@@ -2170,7 +2238,8 @@ EquivalenceClass::simplify(SValBuilder &SVB, RangeSet::Factory &F,
SymbolSet ClassMembers = Class.getClassMembers(State);
for (const SymbolRef &MemberSym : ClassMembers) {
- const SVal SimplifiedMemberVal = simplifyToSVal(State, MemberSym);
+ const SVal SimplifiedMemberVal =
+ simplifyUntilFixpoint(SVB, State, MemberSym);
const SymbolRef SimplifiedMemberSym = SimplifiedMemberVal.getAsSymbol();
// The symbol is collapsed to a constant, check if the current State is
@@ -2196,6 +2265,8 @@ EquivalenceClass::simplify(SValBuilder &SVB, RangeSet::Factory &F,
continue;
assert(find(State, MemberSym) == find(State, SimplifiedMemberSym));
+ // Remove the old and more complex symbol.
+ State = find(State, MemberSym).removeMember(State, MemberSym);
// Query the class constraint again b/c that may have changed during the
// merge above.
@@ -2207,25 +2278,19 @@ EquivalenceClass::simplify(SValBuilder &SVB, RangeSet::Factory &F,
// About performance and complexity: Let us assume that in a State we
// have N non-trivial equivalence classes and that all constraints and
// disequality info is related to non-trivial classes. In the worst case,
- // we can simplify only one symbol of one class in each iteration.
+ // we can simplify only one symbol of one class in each iteration. The
+ // number of symbols in one class cannot grow b/c we replace the old
+ // symbol with the simplified one. Also, the number of the equivalence
+ // classes can decrease only, b/c the algorithm does a merge operation
+ // optionally. We need N iterations in this case to reach the fixpoint.
+ // Thus, the steps needed to be done in the worst case is proportional to
+ // N*N.
//
- // The number of the equivalence classes can decrease only, because the
- // algorithm does a merge operation optionally.
- // ASSUMPTION G: Let us assume that the
- // number of symbols in one class cannot grow because we replace the old
- // symbol with the simplified one.
- // If assumption G holds then we need N iterations in this case to reach
- // the fixpoint. Thus, the steps needed to be done in the worst case is
- // proportional to N*N.
// This worst case scenario can be extended to that case when we have
// trivial classes in the constraints and in the disequality map. This
// case can be reduced to the case with a State where there are only
// non-trivial classes. This is because a merge operation on two trivial
// classes results in one non-trivial class.
- //
- // Empirical measurements show that if we relax assumption G (i.e. if we
- // do not replace the complex symbol just add the simplified one) then
- // the runtime and memory consumption does not grow noticeably.
State = reAssume(State, ClassConstraint, SimplifiedMemberVal);
if (!State)
return nullptr;
diff --git a/clang/test/Analysis/expr-inspection-printState-eq-classes.c b/clang/test/Analysis/expr-inspection-printState-eq-classes.c
index c56fcd627b0a..08a1c6cbd60b 100644
--- a/clang/test/Analysis/expr-inspection-printState-eq-classes.c
+++ b/clang/test/Analysis/expr-inspection-printState-eq-classes.c
@@ -16,6 +16,6 @@ void test_equivalence_classes(int a, int b, int c, int d) {
}
// CHECK: "equivalence_classes": [
-// CHECK-NEXT: [ "((reg_$0<int a>) + (reg_$1<int b>)) != (reg_$2<int c>)", "(reg_$0<int a>) != (reg_$2<int c>)" ],
-// CHECK-NEXT: [ "(reg_$0<int a>) + (reg_$1<int b>)", "reg_$0<int a>", "reg_$2<int c>", "reg_$3<int d>" ]
+// CHECK-NEXT: [ "(reg_$0<int a>) != (reg_$2<int c>)" ],
+// CHECK-NEXT: [ "reg_$0<int a>", "reg_$2<int c>", "reg_$3<int d>" ]
// CHECK-NEXT: ],
diff --git a/clang/test/Analysis/symbol-simplification-disequality-info.cpp b/clang/test/Analysis/symbol-simplification-disequality-info.cpp
index 45d8c05bcfc5..69238b583eb8 100644
--- a/clang/test/Analysis/symbol-simplification-disequality-info.cpp
+++ b/clang/test/Analysis/symbol-simplification-disequality-info.cpp
@@ -12,18 +12,18 @@ void test(int a, int b, int c, int d) {
if (a + b + c == d)
return;
clang_analyzer_printState();
- // CHECK: "disequality_info": [
- // CHECK-NEXT: {
- // CHECK-NEXT: "class": [ "((reg_$0<int a>) + (reg_$1<int b>)) + (reg_$2<int c>)" ],
- // CHECK-NEXT: "disequal_to": [
- // CHECK-NEXT: [ "reg_$3<int d>" ]]
- // CHECK-NEXT: },
- // CHECK-NEXT: {
- // CHECK-NEXT: "class": [ "reg_$3<int d>" ],
- // CHECK-NEXT: "disequal_to": [
- // CHECK-NEXT: [ "((reg_$0<int a>) + (reg_$1<int b>)) + (reg_$2<int c>)" ]]
- // CHECK-NEXT: }
- // CHECK-NEXT: ],
+ // CHECK: "disequality_info": [
+ // CHECK-NEXT: {
+ // CHECK-NEXT: "class": [ "((reg_$0<int a>) + (reg_$1<int b>)) + (reg_$2<int c>)" ],
+ // CHECK-NEXT: "disequal_to": [
+ // CHECK-NEXT: [ "reg_$3<int d>" ]]
+ // CHECK-NEXT: },
+ // CHECK-NEXT: {
+ // CHECK-NEXT: "class": [ "reg_$3<int d>" ],
+ // CHECK-NEXT: "disequal_to": [
+ // CHECK-NEXT: [ "((reg_$0<int a>) + (reg_$1<int b>)) + (reg_$2<int c>)" ]]
+ // CHECK-NEXT: }
+ // CHECK-NEXT: ],
// Simplification starts here.
@@ -32,33 +32,32 @@ void test(int a, int b, int c, int d) {
clang_analyzer_printState();
// CHECK: "disequality_info": [
// CHECK-NEXT: {
- // CHECK-NEXT: "class": [ "((reg_$0<int a>) + (reg_$1<int b>)) + (reg_$2<int c>)", "(reg_$0<int a>) + (reg_$2<int c>)" ],
+ // CHECK-NEXT: "class": [ "(reg_$0<int a>) + (reg_$2<int c>)" ],
// CHECK-NEXT: "disequal_to": [
// CHECK-NEXT: [ "reg_$3<int d>" ]]
// CHECK-NEXT: },
// CHECK-NEXT: {
// CHECK-NEXT: "class": [ "reg_$3<int d>" ],
// CHECK-NEXT: "disequal_to": [
- // CHECK-NEXT: [ "((reg_$0<int a>) + (reg_$1<int b>)) + (reg_$2<int c>)", "(reg_$0<int a>) + (reg_$2<int c>)" ]]
- // CHECK-NEXT: }
- // CHECK-NEXT: ],
+ // CHECK-NEXT: [ "(reg_$0<int a>) + (reg_$2<int c>)" ]]
+ // CHECK-NEXT: }
+ // CHECK-NEXT: ],
if (c != 0)
return;
clang_analyzer_printState();
-
- // CHECK: "disequality_info": [
- // CHECK-NEXT: {
- // CHECK-NEXT: "class": [ "((reg_$0<int a>) + (reg_$1<int b>)) + (reg_$2<int c>)", "(reg_$0<int a>) + (reg_$2<int c>)", "reg_$0<int a>" ],
- // CHECK-NEXT: "disequal_to": [
- // CHECK-NEXT: [ "reg_$3<int d>" ]]
- // CHECK-NEXT: },
- // CHECK-NEXT: {
- // CHECK-NEXT: "class": [ "reg_$3<int d>" ],
- // CHECK-NEXT: "disequal_to": [
- // CHECK-NEXT: [ "((reg_$0<int a>) + (reg_$1<int b>)) + (reg_$2<int c>)", "(reg_$0<int a>) + (reg_$2<int c>)", "reg_$0<int a>" ]]
- // CHECK-NEXT: }
- // CHECK-NEXT: ],
+ // CHECK: "disequality_info": [
+ // CHECK-NEXT: {
+ // CHECK-NEXT: "class": [ "reg_$0<int a>" ],
+ // CHECK-NEXT: "disequal_to": [
+ // CHECK-NEXT: [ "reg_$3<int d>" ]]
+ // CHECK-NEXT: },
+ // CHECK-NEXT: {
+ // CHECK-NEXT: "class": [ "reg_$3<int d>" ],
+ // CHECK-NEXT: "disequal_to": [
+ // CHECK-NEXT: [ "reg_$0<int a>" ]]
+ // CHECK-NEXT: }
+ // CHECK-NEXT: ],
// Keep the symbols and the constraints! alive.
(void)(a * b * c * d);
diff --git a/clang/test/Analysis/symbol-simplification-fixpoint-one-iteration.cpp b/clang/test/Analysis/symbol-simplification-fixpoint-one-iteration.cpp
index aec0da1b82f1..73922d420a8c 100644
--- a/clang/test/Analysis/symbol-simplification-fixpoint-one-iteration.cpp
+++ b/clang/test/Analysis/symbol-simplification-fixpoint-one-iteration.cpp
@@ -24,15 +24,14 @@ void test(int a, int b, int c) {
if (b != 0)
return;
clang_analyzer_printState();
- // CHECK: "constraints": [
- // CHECK-NEXT: { "symbol": "((reg_$0<int a>) + (reg_$1<int b>)) != (reg_$2<int c>)", "range": "{ [0, 0] }" },
- // CHECK-NEXT: { "symbol": "(reg_$0<int a>) != (reg_$2<int c>)", "range": "{ [0, 0] }" },
- // CHECK-NEXT: { "symbol": "reg_$1<int b>", "range": "{ [0, 0] }" }
- // CHECK-NEXT: ],
- // CHECK-NEXT: "equivalence_classes": [
- // CHECK-NEXT: [ "((reg_$0<int a>) + (reg_$1<int b>)) != (reg_$2<int c>)", "(reg_$0<int a>) != (reg_$2<int c>)" ],
- // CHECK-NEXT: [ "(reg_$0<int a>) + (reg_$1<int b>)", "reg_$0<int a>", "reg_$2<int c>" ]
- // CHECK-NEXT: ],
+ // CHECK: "constraints": [
+ // CHECK-NEXT: { "symbol": "(reg_$0<int a>) != (reg_$2<int c>)", "range": "{ [0, 0] }" },
+ // CHECK-NEXT: { "symbol": "reg_$1<int b>", "range": "{ [0, 0] }" }
+ // CHECK-NEXT: ],
+ // CHECK-NEXT: "equivalence_classes": [
+ // CHECK-NEXT: [ "(reg_$0<int a>) != (reg_$2<int c>)" ],
+ // CHECK-NEXT: [ "reg_$0<int a>", "reg_$2<int c>" ]
+ // CHECK-NEXT: ],
// CHECK-NEXT: "disequality_info": null,
// Keep the symbols and the constraints! alive.
diff --git a/clang/test/Analysis/symbol-simplification-fixpoint-two-iterations.cpp b/clang/test/Analysis/symbol-simplification-fixpoint-two-iterations.cpp
index f1b057be3abd..679ed3fda7a7 100644
--- a/clang/test/Analysis/symbol-simplification-fixpoint-two-iterations.cpp
+++ b/clang/test/Analysis/symbol-simplification-fixpoint-two-iterations.cpp
@@ -27,20 +27,17 @@ void test(int a, int b, int c, int d) {
if (b != 0)
return;
clang_analyzer_printState();
- // CHECK: "constraints": [
- // CHECK-NEXT: { "symbol": "(((reg_$0<int a>) + (reg_$1<int b>)) + (reg_$2<int c>)) != (reg_$3<int d>)", "range": "{ [0, 0] }" },
- // CHECK-NEXT: { "symbol": "((reg_$0<int a>) + (reg_$2<int c>)) != (reg_$3<int d>)", "range": "{ [0, 0] }" },
- // CHECK-NEXT: { "symbol": "(reg_$0<int a>) != (reg_$3<int d>)", "range": "{ [0, 0] }" },
- // CHECK-NEXT: { "symbol": "(reg_$2<int c>) + (reg_$1<int b>)", "range": "{ [0, 0] }" },
- // CHECK-NEXT: { "symbol": "reg_$1<int b>", "range": "{ [0, 0] }" },
- // CHECK-NEXT: { "symbol": "reg_$2<int c>", "range": "{ [0, 0] }" }
- // CHECK-NEXT: ],
- // CHECK-NEXT: "equivalence_classes": [
- // CHECK-NEXT: [ "(((reg_$0<int a>) + (reg_$1<int b>)) + (reg_$2<int c>)) != (reg_$3<int d>)", "((reg_$0<int a>) + (reg_$2<int c>)) != (reg_$3<int d>)", "(reg_$0<int a>) != (reg_$3<int d>)" ],
- // CHECK-NEXT: [ "((reg_$0<int a>) + (reg_$1<int b>)) + (reg_$2<int c>)", "(reg_$0<int a>) + (reg_$2<int c>)", "reg_$0<int a>", "reg_$3<int d>" ],
- // CHECK-NEXT: [ "(reg_$2<int c>) + (reg_$1<int b>)", "reg_$2<int c>" ]
- // CHECK-NEXT: ],
- // CHECK-NEXT: "disequality_info": null,
+ // CHECK: "constraints": [
+ // CHECK-NEXT: { "symbol": "(reg_$0<int a>) != (reg_$3<int d>)", "range": "{ [0, 0] }" },
+ // CHECK-NEXT: { "symbol": "reg_$1<int b>", "range": "{ [0, 0] }" },
+ // CHECK-NEXT: { "symbol": "reg_$2<int c>", "range": "{ [0, 0] }" }
+ // CHECK-NEXT: ],
+ // CHECK-NEXT: "equivalence_classes": [
+ // CHECK-NEXT: [ "(reg_$0<int a>) != (reg_$3<int d>)" ],
+ // CHECK-NEXT: [ "reg_$0<int a>", "reg_$3<int d>" ],
+ // CHECK-NEXT: [ "reg_$2<int c>" ]
+ // CHECK-NEXT: ],
+ // CHECK-NEXT: "disequality_info": null,
// Keep the symbols and the constraints! alive.
(void)(a * b * c * d);
--
2.35.0.rc0.227.g00780c9af4-goog