glsl: Walk a list of ir_dereference_array to mark array elements as accessed

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Cc: mesa-stable@lists.freedesktop.org
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
diff --git a/src/compiler/glsl/ir_array_refcount.cpp b/src/compiler/glsl/ir_array_refcount.cpp
index 7627fa1..1a46b21 100644
--- a/src/compiler/glsl/ir_array_refcount.cpp
+++ b/src/compiler/glsl/ir_array_refcount.cpp
@@ -34,7 +34,7 @@
 #include "util/hash_table.h"
 
 ir_array_refcount_visitor::ir_array_refcount_visitor()
-   : derefs(0), num_derefs(0), derefs_size(0)
+   : last_array_deref(0), derefs(0), num_derefs(0), derefs_size(0)
 {
    this->mem_ctx = ralloc_context(NULL);
    this->ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
@@ -159,6 +159,77 @@
    return d;
 }
 
+ir_visitor_status
+ir_array_refcount_visitor::visit_enter(ir_dereference_array *ir)
+{
+   /* It could also be a vector or a matrix.  Individual elements of vectors
+    * are natrices are not tracked, so bail.
+    */
+   if (!ir->array->type->is_array())
+      return visit_continue;
+
+   /* If this array dereference is a child of an array dereference that was
+    * already visited, just continue on.  Otherwise, for an arrays-of-arrays
+    * dereference like x[1][2][3][4], we'd process the [1][2][3][4] sequence,
+    * the [1][2][3] sequence, the [1][2] sequence, and the [1] sequence.  This
+    * ensures that we only process the full sequence.
+    */
+   if (last_array_deref && last_array_deref->array == ir) {
+      last_array_deref = ir;
+      return visit_continue;
+   }
+
+   last_array_deref = ir;
+
+   num_derefs = 0;
+
+   ir_rvalue *rv = ir;
+   while (rv->ir_type == ir_type_dereference_array) {
+      ir_dereference_array *const deref = rv->as_dereference_array();
+
+      assert(deref != NULL);
+      assert(deref->array->type->is_array());
+
+      ir_rvalue *const array = deref->array;
+      const ir_constant *const idx = deref->array_index->as_constant();
+      array_deref_range *const dr = get_array_deref();
+
+      dr->size = array->type->array_size();
+
+      if (idx != NULL) {
+         dr->index = idx->get_int_component(0);
+      } else {
+         /* An unsized array can occur at the end of an SSBO.  We can't track
+          * accesses to such an array, so bail.
+          */
+         if (array->type->array_size() == 0)
+            return visit_continue;
+
+         dr->index = dr->size;
+      }
+
+      rv = array;
+   }
+
+   ir_dereference_variable *const var_deref = rv->as_dereference_variable();
+
+   /* If the array being dereferenced is not a variable, bail.  At the very
+    * least, ir_constant and ir_dereference_record are possible.
+    */
+   if (var_deref == NULL)
+      return visit_continue;
+
+   ir_array_refcount_entry *const entry =
+      this->get_variable_entry(var_deref->var);
+
+   if (entry == NULL)
+      return visit_stop;
+
+   entry->mark_array_elements_referenced(derefs, num_derefs);
+
+   return visit_continue;
+}
+
 
 ir_visitor_status
 ir_array_refcount_visitor::visit(ir_dereference_variable *ir)
diff --git a/src/compiler/glsl/ir_array_refcount.h b/src/compiler/glsl/ir_array_refcount.h
index 2988046..46ba36c 100644
--- a/src/compiler/glsl/ir_array_refcount.h
+++ b/src/compiler/glsl/ir_array_refcount.h
@@ -140,6 +140,7 @@
    virtual ir_visitor_status visit(ir_dereference_variable *);
 
    virtual ir_visitor_status visit_enter(ir_function_signature *);
+   virtual ir_visitor_status visit_enter(ir_dereference_array *);
 
    /**
     * Find variable in the hash table, and insert it if not present
@@ -158,6 +159,15 @@
    array_deref_range *get_array_deref();
 
    /**
+    * Last ir_dereference_array that was visited
+    *
+    * Used to prevent some redundant calculations.
+    *
+    * \sa ::visit_enter(ir_dereference_array *)
+    */
+   ir_dereference_array *last_array_deref;
+
+   /**
     * \name array_deref_range tracking
     */
    /*@{*/
diff --git a/src/compiler/glsl/tests/array_refcount_test.cpp b/src/compiler/glsl/tests/array_refcount_test.cpp
index 29bb133..ecd7f46 100644
--- a/src/compiler/glsl/tests/array_refcount_test.cpp
+++ b/src/compiler/glsl/tests/array_refcount_test.cpp
@@ -23,12 +23,18 @@
 #include <gtest/gtest.h>
 #include "ir.h"
 #include "ir_array_refcount.h"
+#include "ir_builder.h"
+#include "util/hash_table.h"
+
+using namespace ir_builder;
 
 class array_refcount_test : public ::testing::Test {
 public:
    virtual void SetUp();
    virtual void TearDown();
 
+   exec_list instructions;
+   ir_factory *body;
    void *mem_ctx;
 
    /**
@@ -40,6 +46,14 @@
    const glsl_type *array_3_of_array_4_of_array_5_of_vec4;
 
    /**
+    * glsl_type for a int[3].
+    *
+    * The exceptionally verbose name is picked because it matches the syntax
+    * of http://cdecl.org/.
+    */
+   const glsl_type *array_3_of_int;
+
+   /**
     * Wrapper to access private member "bits" of ir_array_refcount_entry
     *
     * The test class is a friend to ir_array_refcount_entry, but the
@@ -81,6 +95,9 @@
 {
    mem_ctx = ralloc_context(NULL);
 
+   instructions.make_empty();
+   body = new ir_factory(&instructions, mem_ctx);
+
    /* The type of vec4 x[3][4][5]; */
    const glsl_type *const array_5_of_vec4 =
       glsl_type::get_array_instance(glsl_type::vec4_type, 5);
@@ -88,15 +105,105 @@
       glsl_type::get_array_instance(array_5_of_vec4, 4);
    array_3_of_array_4_of_array_5_of_vec4 =
       glsl_type::get_array_instance(array_4_of_array_5_of_vec4, 3);
+
+   array_3_of_int = glsl_type::get_array_instance(glsl_type::int_type, 3);
 }
 
 void
 array_refcount_test::TearDown()
 {
+   delete body;
+   body = NULL;
+
    ralloc_free(mem_ctx);
    mem_ctx = NULL;
 }
 
+static operand
+deref_array(operand array, operand index)
+{
+   void *mem_ctx = ralloc_parent(array.val);
+
+   ir_rvalue *val = new(mem_ctx) ir_dereference_array(array.val, index.val);
+
+   return operand(val);
+}
+
+static operand
+deref_struct(operand s, const char *field)
+{
+   void *mem_ctx = ralloc_parent(s.val);
+
+   ir_rvalue *val = new(mem_ctx) ir_dereference_record(s.val, field);
+
+   return operand(val);
+}
+
+/**
+ * Verify that only the specified set of ir_variables exists in the hash table
+ */
+static void
+validate_variables_in_hash_table(struct hash_table *ht,
+                                 unsigned count,
+                                 ...)
+{
+   ir_variable **vars = new ir_variable *[count];
+   va_list args;
+
+   /* Make a copy of the list of expected ir_variables.  The copied list can
+    * be modified during the checking.
+    */
+   va_start(args, count);
+
+   for (unsigned i = 0; i < count; i++)
+      vars[i] = va_arg(args, ir_variable *);
+
+   va_end(args);
+
+   struct hash_entry *entry;
+   hash_table_foreach(ht, entry) {
+      const ir_instruction *const ir = (ir_instruction *) entry->key;
+      const ir_variable *const v = ir->as_variable();
+
+      if (v == NULL) {
+         ADD_FAILURE() << "Invalid junk in hash table: ir_type = "
+                       << ir->ir_type << ", address = "
+                       << (void *) ir;
+         continue;
+      }
+
+      unsigned i;
+      for (i = 0; i < count; i++) {
+         if (vars[i] == NULL)
+            continue;
+
+         if (vars[i] == v)
+            break;
+      }
+
+      if (i == count) {
+            ADD_FAILURE() << "Invalid variable in hash table: \""
+                          << v->name << "\"";
+      } else {
+         /* As each variable is encountered, remove it from the set.  Don't
+          * bother compacting the set because we don't care about
+          * performance here.
+          */
+         vars[i] = NULL;
+      }
+   }
+
+   /* Check that there's nothing left in the set. */
+   for (unsigned i = 0; i < count; i++) {
+      if (vars[i] != NULL) {
+         ADD_FAILURE() << "Variable was not in the hash table: \""
+                          << vars[i]->name << "\"";
+      }
+   }
+
+   delete [] vars;
+}
+
 TEST_F(array_refcount_test, ir_array_refcount_entry_initial_state_for_scalar)
 {
    ir_variable *const var =
@@ -290,3 +397,321 @@
       }
    }
 }
+
+TEST_F(array_refcount_test, do_not_process_vector_indexing)
+{
+   /* Vectors and matrices can also be indexed in much the same manner as
+    * arrays.  The visitor should not try to track per-element accesses to
+    * these types.
+    */
+   ir_variable *var_a = new(mem_ctx) ir_variable(glsl_type::float_type,
+                                                 "a",
+                                                 ir_var_auto);
+   ir_variable *var_b = new(mem_ctx) ir_variable(glsl_type::int_type,
+                                                 "b",
+                                                 ir_var_auto);
+   ir_variable *var_c = new(mem_ctx) ir_variable(glsl_type::vec4_type,
+                                                 "c",
+                                                 ir_var_auto);
+
+   body->emit(assign(var_a, deref_array(var_c, var_b)));
+
+   ir_array_refcount_visitor v;
+
+   visit_list_elements(&v, &instructions);
+
+   ir_array_refcount_entry *entry_a = v.get_variable_entry(var_a);
+   ir_array_refcount_entry *entry_b = v.get_variable_entry(var_b);
+   ir_array_refcount_entry *entry_c = v.get_variable_entry(var_c);
+
+   EXPECT_TRUE(entry_a->is_referenced);
+   EXPECT_TRUE(entry_b->is_referenced);
+   EXPECT_TRUE(entry_c->is_referenced);
+
+   /* As validated by previous tests, for non-array types, num_bits is 1. */
+   ASSERT_EQ(1, get_num_bits(*entry_c));
+   EXPECT_FALSE(entry_c->is_linearized_index_referenced(0));
+}
+
+TEST_F(array_refcount_test, do_not_process_matrix_indexing)
+{
+   /* Vectors and matrices can also be indexed in much the same manner as
+    * arrays.  The visitor should not try to track per-element accesses to
+    * these types.
+    */
+   ir_variable *var_a = new(mem_ctx) ir_variable(glsl_type::vec4_type,
+                                                 "a",
+                                                 ir_var_auto);
+   ir_variable *var_b = new(mem_ctx) ir_variable(glsl_type::int_type,
+                                                 "b",
+                                                 ir_var_auto);
+   ir_variable *var_c = new(mem_ctx) ir_variable(glsl_type::mat4_type,
+                                                 "c",
+                                                 ir_var_auto);
+
+   body->emit(assign(var_a, deref_array(var_c, var_b)));
+
+   ir_array_refcount_visitor v;
+
+   visit_list_elements(&v, &instructions);
+
+   ir_array_refcount_entry *entry_a = v.get_variable_entry(var_a);
+   ir_array_refcount_entry *entry_b = v.get_variable_entry(var_b);
+   ir_array_refcount_entry *entry_c = v.get_variable_entry(var_c);
+
+   EXPECT_TRUE(entry_a->is_referenced);
+   EXPECT_TRUE(entry_b->is_referenced);
+   EXPECT_TRUE(entry_c->is_referenced);
+
+   /* As validated by previous tests, for non-array types, num_bits is 1. */
+   ASSERT_EQ(1, get_num_bits(*entry_c));
+   EXPECT_FALSE(entry_c->is_linearized_index_referenced(0));
+}
+
+TEST_F(array_refcount_test, do_not_process_array_inside_structure)
+{
+   /* Structures can contain arrays.  The visitor should not try to track
+    * per-element accesses to arrays contained inside structures.
+    */
+   const glsl_struct_field fields[] = {
+      glsl_struct_field(array_3_of_int, "i"),
+   };
+
+   const glsl_type *const record_of_array_3_of_int =
+      glsl_type::get_record_instance(fields, ARRAY_SIZE(fields), "S");
+
+   ir_variable *var_a = new(mem_ctx) ir_variable(glsl_type::int_type,
+                                                 "a",
+                                                 ir_var_auto);
+
+   ir_variable *var_b = new(mem_ctx) ir_variable(record_of_array_3_of_int,
+                                                 "b",
+                                                 ir_var_auto);
+
+   /* a = b.i[2] */
+   body->emit(assign(var_a,
+                     deref_array(
+                        deref_struct(var_b, "i"),
+                        body->constant(int(2)))));
+
+   ir_array_refcount_visitor v;
+
+   visit_list_elements(&v, &instructions);
+
+   ir_array_refcount_entry *entry_a = v.get_variable_entry(var_a);
+   ir_array_refcount_entry *entry_b = v.get_variable_entry(var_b);
+
+   EXPECT_TRUE(entry_a->is_referenced);
+   EXPECT_TRUE(entry_b->is_referenced);
+
+   ASSERT_EQ(1, get_num_bits(*entry_b));
+   EXPECT_FALSE(entry_b->is_linearized_index_referenced(0));
+
+   validate_variables_in_hash_table(v.ht, 2, var_a, var_b);
+}
+
+TEST_F(array_refcount_test, visit_simple_indexing)
+{
+   ir_variable *var_a = new(mem_ctx) ir_variable(glsl_type::vec4_type,
+                                                 "a",
+                                                 ir_var_auto);
+   ir_variable *var_b = new(mem_ctx) ir_variable(array_3_of_array_4_of_array_5_of_vec4,
+                                                 "b",
+                                                 ir_var_auto);
+
+   /* a = b[2][1][0] */
+   body->emit(assign(var_a,
+                     deref_array(
+                        deref_array(
+                           deref_array(var_b, body->constant(int(2))),
+                           body->constant(int(1))),
+                        body->constant(int(0)))));
+
+   ir_array_refcount_visitor v;
+
+   visit_list_elements(&v, &instructions);
+
+   const unsigned accessed_element = 0 + (1 * 5) + (2 * 4 * 5);
+   ir_array_refcount_entry *entry_b = v.get_variable_entry(var_b);
+   const unsigned total_elements = var_b->type->arrays_of_arrays_size();
+
+   for (unsigned i = 0; i < total_elements; i++)
+      EXPECT_EQ(i == accessed_element, entry_b->is_linearized_index_referenced(i)) <<
+         "i = " << i;
+
+   validate_variables_in_hash_table(v.ht, 2, var_a, var_b);
+}
+
+TEST_F(array_refcount_test, visit_whole_second_array_indexing)
+{
+   ir_variable *var_a = new(mem_ctx) ir_variable(glsl_type::vec4_type,
+                                                 "a",
+                                                 ir_var_auto);
+   ir_variable *var_b = new(mem_ctx) ir_variable(array_3_of_array_4_of_array_5_of_vec4,
+                                                 "b",
+                                                 ir_var_auto);
+   ir_variable *var_i = new(mem_ctx) ir_variable(glsl_type::int_type,
+                                                 "i",
+                                                 ir_var_auto);
+
+   /* a = b[2][i][1] */
+   body->emit(assign(var_a,
+                     deref_array(
+                        deref_array(
+                           deref_array(var_b, body->constant(int(2))),
+                           var_i),
+                        body->constant(int(1)))));
+
+   ir_array_refcount_visitor v;
+
+   visit_list_elements(&v, &instructions);
+
+   ir_array_refcount_entry *const entry_b = v.get_variable_entry(var_b);
+   for (unsigned i = 0; i < 3; i++) {
+      for (unsigned j = 0; j < 4; j++) {
+         for (unsigned k = 0; k < 5; k++) {
+            const bool accessed = (i == 2) && (k == 1);
+            const unsigned linearized_index = k + (j * 5) + (i * 4 * 5);
+
+            EXPECT_EQ(accessed,
+                      entry_b->is_linearized_index_referenced(linearized_index)) <<
+               "i = " << i;
+         }
+      }
+   }
+
+   validate_variables_in_hash_table(v.ht, 3, var_a, var_b, var_i);
+}
+
+TEST_F(array_refcount_test, visit_array_indexing_an_array)
+{
+   ir_variable *var_a = new(mem_ctx) ir_variable(glsl_type::vec4_type,
+                                                 "a",
+                                                 ir_var_auto);
+   ir_variable *var_b = new(mem_ctx) ir_variable(array_3_of_array_4_of_array_5_of_vec4,
+                                                 "b",
+                                                 ir_var_auto);
+   ir_variable *var_c = new(mem_ctx) ir_variable(array_3_of_int,
+                                                 "c",
+                                                 ir_var_auto);
+   ir_variable *var_i = new(mem_ctx) ir_variable(glsl_type::int_type,
+                                                 "i",
+                                                 ir_var_auto);
+
+   /* a = b[2][3][c[i]] */
+   body->emit(assign(var_a,
+                     deref_array(
+                        deref_array(
+                           deref_array(var_b, body->constant(int(2))),
+                           body->constant(int(3))),
+                        deref_array(var_c, var_i))));
+
+   ir_array_refcount_visitor v;
+
+   visit_list_elements(&v, &instructions);
+
+   ir_array_refcount_entry *const entry_b = v.get_variable_entry(var_b);
+
+   for (unsigned i = 0; i < 3; i++) {
+      for (unsigned j = 0; j < 4; j++) {
+         for (unsigned k = 0; k < 5; k++) {
+            const bool accessed = (i == 2) && (j == 3);
+            const unsigned linearized_index = k + (j * 5) + (i * 4 * 5);
+
+            EXPECT_EQ(accessed,
+                      entry_b->is_linearized_index_referenced(linearized_index)) <<
+               "array b[" << i << "][" << j << "][" << k << "], " <<
+               "linear index = " << linearized_index;
+         }
+      }
+   }
+
+   ir_array_refcount_entry *const entry_c = v.get_variable_entry(var_c);
+
+   for (unsigned i = 0; i < var_c->type->array_size(); i++) {
+      EXPECT_EQ(true, entry_c->is_linearized_index_referenced(i)) <<
+         "array c, i = " << i;
+   }
+
+   validate_variables_in_hash_table(v.ht, 4, var_a, var_b, var_c, var_i);
+}
+
+TEST_F(array_refcount_test, visit_array_indexing_with_itself)
+{
+   const glsl_type *const array_2_of_array_3_of_int =
+      glsl_type::get_array_instance(array_3_of_int, 2);
+
+   const glsl_type *const array_2_of_array_2_of_array_3_of_int =
+      glsl_type::get_array_instance(array_2_of_array_3_of_int, 2);
+
+   ir_variable *var_a = new(mem_ctx) ir_variable(glsl_type::int_type,
+                                                 "a",
+                                                 ir_var_auto);
+   ir_variable *var_b = new(mem_ctx) ir_variable(array_2_of_array_2_of_array_3_of_int,
+                                                 "b",
+                                                 ir_var_auto);
+
+   /* Given GLSL code:
+    *
+    *    int b[2][2][3];
+    *    a = b[ b[0][0][0] ][ b[ b[0][1][0] ][ b[1][0][0] ][1] ][2]
+    *
+    * b[0][0][0], b[0][1][0], and b[1][0][0] are trivially accessed.
+    *
+    * b[*][*][1] and b[*][*][2] are accessed.
+    *
+    * Only b[1][1][0] is not accessed.
+    */
+   operand b000 = deref_array(
+      deref_array(
+         deref_array(var_b, body->constant(int(0))),
+         body->constant(int(0))),
+      body->constant(int(0)));
+
+   operand b010 = deref_array(
+      deref_array(
+         deref_array(var_b, body->constant(int(0))),
+         body->constant(int(1))),
+      body->constant(int(0)));
+
+   operand b100 = deref_array(
+      deref_array(
+         deref_array(var_b, body->constant(int(1))),
+         body->constant(int(0))),
+      body->constant(int(0)));
+
+   operand b_b010_b100_1 = deref_array(
+      deref_array(
+         deref_array(var_b, b010),
+         b100),
+      body->constant(int(1)));
+
+   body->emit(assign(var_a,
+                     deref_array(
+                        deref_array(
+                           deref_array(var_b, b000),
+                           b_b010_b100_1),
+                        body->constant(int(2)))));
+
+   ir_array_refcount_visitor v;
+
+   visit_list_elements(&v, &instructions);
+
+   ir_array_refcount_entry *const entry_b = v.get_variable_entry(var_b);
+
+   for (unsigned i = 0; i < 2; i++) {
+      for (unsigned j = 0; j < 2; j++) {
+         for (unsigned k = 0; k < 3; k++) {
+            const bool accessed = !(i == 1 && j == 1 && k == 0);
+            const unsigned linearized_index = k + (j * 3) + (i * 2 * 3);
+
+            EXPECT_EQ(accessed,
+                      entry_b->is_linearized_index_referenced(linearized_index)) <<
+               "array b[" << i << "][" << j << "][" << k << "], " <<
+               "linear index = " << linearized_index;
+         }
+      }
+   }
+
+   validate_variables_in_hash_table(v.ht, 2, var_a, var_b);
+}