[analyzer] When invalidating symbolic offset regions, take fields into account.

Previously, RegionStore was being VERY conservative in saying that because
p[i].x and p[i].y have a concrete base region of 'p', they might overlap.
Now, we check the chain of fields back up to the base object and check if
they match.

This only kicks in when dealing with symbolic offset regions because
RegionStore's "base+offset" representation of concrete offset regions loses
all information about fields. In cases where all offsets are concrete
(s.x and s.y), RegionStore will already do the right thing, but mixing
concrete and symbolic offsets can cause bindings to be invalidated that
are known to not overlap (e.g. p[0].x and p[i].y).
This additional refinement is tracked by <rdar://problem/12676180>.

<rdar://problem/12530149>

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@167654 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/StaticAnalyzer/Core/RegionStore.cpp b/lib/StaticAnalyzer/Core/RegionStore.cpp
index 5b8f65d..aed994d 100644
--- a/lib/StaticAnalyzer/Core/RegionStore.cpp
+++ b/lib/StaticAnalyzer/Core/RegionStore.cpp
@@ -522,6 +522,46 @@
   return true;
 }
 
+static inline bool isUnionField(const FieldRegion *FR) {
+  return FR->getDecl()->getParent()->isUnion();
+}
+
+typedef SmallVector<const FieldDecl *, 8> FieldVector;
+
+void getSymbolicOffsetFields(BindingKey K, FieldVector &Fields) {
+  assert(K.hasSymbolicOffset() && "Not implemented for concrete offset keys");
+
+  const MemRegion *Base = K.getConcreteOffsetRegion();
+  const MemRegion *R = K.getRegion();
+
+  while (R != Base) {
+    if (const FieldRegion *FR = dyn_cast<FieldRegion>(R))
+      if (!isUnionField(FR))
+        Fields.push_back(FR->getDecl());
+
+    R = cast<SubRegion>(R)->getSuperRegion();
+  }
+}
+
+static bool isCompatibleWithFields(BindingKey K, const FieldVector &Fields) {
+  assert(K.hasSymbolicOffset() && "Not implemented for concrete offset keys");
+
+  if (Fields.empty())
+    return true;
+
+  FieldVector FieldsInBindingKey;
+  getSymbolicOffsetFields(K, FieldsInBindingKey);
+
+  ptrdiff_t Delta = FieldsInBindingKey.size() - Fields.size();
+  if (Delta >= 0)
+    return std::equal(FieldsInBindingKey.begin() + Delta,
+                      FieldsInBindingKey.end(),
+                      Fields.begin());
+  else
+    return std::equal(FieldsInBindingKey.begin(), FieldsInBindingKey.end(),
+                      Fields.begin() - Delta);
+}
+
 RegionBindings RegionStoreManager::removeSubRegionBindings(RegionBindings B,
                                                            const SubRegion *R) {
   BindingKey SRKey = BindingKey::Make(R, BindingKey::Default);
@@ -531,10 +571,12 @@
     return RBFactory.remove(B, R);
   }
 
-  if (SRKey.hasSymbolicOffset()) {
-    const SubRegion *Base = cast<SubRegion>(SRKey.getConcreteOffsetRegion());
-    B = removeSubRegionBindings(B, Base);
-    return addBinding(B, Base, BindingKey::Default, UnknownVal());
+  FieldVector FieldsInSymbolicSubregions;
+  bool HasSymbolicOffset = SRKey.hasSymbolicOffset();
+  if (HasSymbolicOffset) {
+    getSymbolicOffsetFields(SRKey, FieldsInSymbolicSubregions);
+    R = cast<SubRegion>(SRKey.getConcreteOffsetRegion());
+    SRKey = BindingKey::Make(R, BindingKey::Default);
   }
 
   // This assumes the region being invalidated is char-aligned. This isn't
@@ -562,11 +604,17 @@
        I != E; ++I) {
     BindingKey NextKey = I.getKey();
     if (NextKey.getRegion() == SRKey.getRegion()) {
+      // FIXME: This doesn't catch the case where we're really invalidating a
+      // region with a symbolic offset. Example:
+      //      R: points[i].y
+      //   Next: points[0].x
+
       if (NextKey.getOffset() > SRKey.getOffset() &&
           NextKey.getOffset() - SRKey.getOffset() < Length) {
         // Case 1: The next binding is inside the region we're invalidating.
         // Remove it.
         Result = CBFactory.remove(Result, NextKey);
+
       } else if (NextKey.getOffset() == SRKey.getOffset()) {
         // Case 2: The next binding is at the same offset as the region we're
         // invalidating. In this case, we need to leave default bindings alone,
@@ -577,6 +625,7 @@
         if (NextKey.isDirect())
           Result = CBFactory.remove(Result, NextKey);
       }
+      
     } else if (NextKey.hasSymbolicOffset()) {
       const MemRegion *Base = NextKey.getConcreteOffsetRegion();
       if (R->isSubRegionOf(Base)) {
@@ -584,16 +633,24 @@
         // its concrete region. We don't know if the binding is still valid, so
         // we'll be conservative and remove it.
         if (NextKey.isDirect())
-          Result = CBFactory.remove(Result, NextKey);
+          if (isCompatibleWithFields(NextKey, FieldsInSymbolicSubregions))
+            Result = CBFactory.remove(Result, NextKey);
       } else if (const SubRegion *BaseSR = dyn_cast<SubRegion>(Base)) {
         // Case 4: The next key is symbolic, but we changed a known
         // super-region. In this case the binding is certainly no longer valid.
         if (R == Base || BaseSR->isSubRegionOf(R))
-          Result = CBFactory.remove(Result, NextKey);
+          if (isCompatibleWithFields(NextKey, FieldsInSymbolicSubregions))
+            Result = CBFactory.remove(Result, NextKey);
       }
     }
   }
 
+  // If we're invalidating a region with a symbolic offset, we need to make sure
+  // we don't treat the base region as uninitialized anymore.
+  // FIXME: This isn't very precise; see the example in the loop.
+  if (HasSymbolicOffset)
+    Result = CBFactory.add(Result, SRKey, UnknownVal());
+
   if (Result.isEmpty())
     return RBFactory.remove(B, ClusterHead);
   return RBFactory.add(B, ClusterHead, Result);
diff --git a/test/Analysis/array-struct-region.c b/test/Analysis/array-struct-region.c
index e8b1fb3..d628c47 100644
--- a/test/Analysis/array-struct-region.c
+++ b/test/Analysis/array-struct-region.c
@@ -183,3 +183,110 @@
 }
 
 
+int testNonOverlappingStructFieldsSimple() {
+  S val;
+
+  val.x = 1;
+  val.y = 2;
+  clang_analyzer_eval(val.x == 1); // expected-warning{{TRUE}}
+  clang_analyzer_eval(val.y == 2); // expected-warning{{TRUE}}
+
+  return val.z; // expected-warning{{garbage}}
+}
+
+int testNonOverlappingStructFieldsSymbolicBase(int index, int anotherIndex) {
+  SS vals;
+
+  vals.a[index].x = 42;
+  vals.a[index].y = 42;
+  clang_analyzer_eval(vals.a[index].x == 42); // expected-warning{{TRUE}}
+  clang_analyzer_eval(vals.a[index].y == 42); // expected-warning{{TRUE}}
+
+  vals.a[anotherIndex].x = 42;
+  clang_analyzer_eval(vals.a[index].x == 42); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(vals.a[index].y == 42); // expected-warning{{TRUE}}
+
+  // FIXME: False negative. No bind ever set a field 'z'.
+  return vals.a[index].z; // no-warning
+}
+
+int testStructFieldChains(int index, int anotherIndex) {
+  SS vals[4];
+
+  vals[index].a[0].x = 42;
+  vals[anotherIndex].a[1].y = 42;
+  clang_analyzer_eval(vals[index].a[0].x == 42); // expected-warning{{TRUE}}
+  clang_analyzer_eval(vals[anotherIndex].a[1].y == 42); // expected-warning{{TRUE}}
+
+  // This doesn't affect anything in the 'a' array field.
+  vals[anotherIndex].b[1].x = 42;
+  clang_analyzer_eval(vals[index].a[0].x == 42); // expected-warning{{TRUE}}
+  clang_analyzer_eval(vals[anotherIndex].a[1].y == 42); // expected-warning{{TRUE}}
+  clang_analyzer_eval(vals[anotherIndex].b[1].x == 42); // expected-warning{{TRUE}}
+
+  // This doesn't affect anything in the 'b' array field.
+  vals[index].a[anotherIndex].x = 42;
+  clang_analyzer_eval(vals[index].a[0].x == 42); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(vals[anotherIndex].a[0].x == 42); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(vals[anotherIndex].a[1].y == 42); // expected-warning{{TRUE}}
+  clang_analyzer_eval(vals[anotherIndex].b[1].x == 42); // expected-warning{{TRUE}}
+
+  // FIXME: False negative. No bind ever set a field 'z'.
+  return vals[index].a[0].z; // no-warning
+}
+
+int testStructFieldChainsNested(int index, int anotherIndex) {
+  SS vals[4];
+
+  vals[index].a[0].x = 42;
+  clang_analyzer_eval(vals[index].a[0].x == 42); // expected-warning{{TRUE}}
+
+  vals[index].b[0] = makeS();
+  clang_analyzer_eval(vals[index].a[0].x == 42); // expected-warning{{TRUE}}
+
+  vals[index].a[0] = makeS();
+  clang_analyzer_eval(vals[index].a[0].x == 42); // expected-warning{{UNKNOWN}}
+
+  vals[index].a[0].x = 42;
+  clang_analyzer_eval(vals[index].a[0].x == 42); // expected-warning{{TRUE}}
+
+  return 0;
+}
+
+
+// --------------------
+// False positives
+// --------------------
+
+int testMixSymbolicAndConcrete(int index, int anotherIndex) {
+  SS vals;
+
+  vals.a[index].x = 42;
+  vals.a[0].y = 42;
+
+  // FIXME: Should be TRUE.
+  clang_analyzer_eval(vals.a[index].x == 42); // expected-warning{{UNKNOWN}}
+  // Should be TRUE; we set this explicitly.
+  clang_analyzer_eval(vals.a[0].y == 42); // expected-warning{{TRUE}}
+
+  vals.a[anotherIndex].y = 42;
+
+  // Should be UNKNOWN; we set an 'x'.
+  clang_analyzer_eval(vals.a[index].x == 42); // expected-warning{{UNKNOWN}}
+  // FIXME: Should be TRUE.
+  clang_analyzer_eval(vals.a[0].y == 42); // expected-warning{{UNKNOWN}}
+
+  return vals.a[0].x; // no-warning
+}
+
+void testFieldChainIsNotEnough(int index) {
+  SS vals[4];
+
+  vals[index].a[0].x = 42;
+  clang_analyzer_eval(vals[index].a[0].x == 42); // expected-warning{{TRUE}}
+
+  vals[index].a[1] = makeS();
+  // FIXME: Should be TRUE.
+  clang_analyzer_eval(vals[index].a[0].x == 42); // expected-warning{{UNKNOWN}}
+}
+