nir: do not vectorize load/store if offset can overflow and robustness enabled This prevents vectorization for loads/stores that can overflow if the low offset is negative and the range greater or equal than 0. The caller can pass the list of variable modes that matter for robust access. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4881>

commit: 04718a9cd63cea9d815bffd91495069a79db8ac5 [log] [tgz]
author: Samuel Pitoiset <samuel.pitoiset@gmail.com> Mon May 04 16:02:38 2020 +0200
committer: Marge Bot <eric+marge@anholt.net> Mon May 11 07:25:15 2020 +0000
tree: 6ad0455776ada01b701e18cd8391fbef39141dc9
parent: 3fba0a7a6f01496344ddb93e774b2d4bc9195e8a [diff]
diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp
index 2864501..f1fde78 100644
--- a/src/amd/compiler/aco_instruction_selection_setup.cpp
+++ b/src/amd/compiler/aco_instruction_selection_setup.cpp

@@ -998,7 +998,8 @@
                                     (nir_variable_mode)(nir_var_mem_ssbo | nir_var_mem_ubo |
                                                         nir_var_mem_push_const | nir_var_mem_shared |
                                                         nir_var_mem_global),
-                                    mem_vectorize_callback)) {
+                                    mem_vectorize_callback,
+                                    (nir_variable_mode)0)) {
       lower_to_scalar = true;
       lower_pack = true;
    }

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 04080b5..25caa37 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h

@@ -4429,7 +4429,8 @@
                                               nir_intrinsic_instr *low, nir_intrinsic_instr *high);
 
 bool nir_opt_load_store_vectorize(nir_shader *shader, nir_variable_mode modes,
-                                  nir_should_vectorize_mem_func callback);
+                                  nir_should_vectorize_mem_func callback,
+                                  nir_variable_mode robust_modes);
 
 void nir_schedule(nir_shader *shader, int threshold);
 

diff --git a/src/compiler/nir/nir_opt_load_store_vectorize.c b/src/compiler/nir/nir_opt_load_store_vectorize.c
index c31c8d2..814c5a5 100644
--- a/src/compiler/nir/nir_opt_load_store_vectorize.c
+++ b/src/compiler/nir/nir_opt_load_store_vectorize.c

@@ -187,6 +187,7 @@
 struct vectorize_ctx {
    nir_variable_mode modes;
    nir_should_vectorize_mem_func callback;
+   nir_variable_mode robust_modes;
    struct list_head entries[nir_num_variable_modes];
    struct hash_table *loads[nir_num_variable_modes];
    struct hash_table *stores[nir_num_variable_modes];
@@ -1054,6 +1055,22 @@
 }
 
 static bool
+check_for_robustness(struct vectorize_ctx *ctx, struct entry *low)
+{
+   nir_variable_mode mode = get_variable_mode(low);
+   if (mode & ctx->robust_modes) {
+      unsigned low_bit_size = get_bit_size(low);
+      unsigned low_size = low->intrin->num_components * low_bit_size;
+
+      /* don't attempt to vectorize accesses if the offset can overflow. */
+      /* TODO: handle indirect accesses. */
+      return low->offset_signed < 0 && low->offset_signed + low_size >= 0;
+   }
+
+   return false;
+}
+
+static bool
 is_strided_vector(const struct glsl_type *type)
 {
    if (glsl_type_is_vector(type)) {
@@ -1077,6 +1094,9 @@
    if (check_for_aliasing(ctx, first, second))
       return false;
 
+   if (check_for_robustness(ctx, low))
+      return false;
+
    /* we can only vectorize non-volatile loads/stores of the same type and with
     * the same access */
    if (first->info != second->info || first->access != second->access ||
@@ -1327,13 +1347,15 @@
 
 bool
 nir_opt_load_store_vectorize(nir_shader *shader, nir_variable_mode modes,
-                             nir_should_vectorize_mem_func callback)
+                             nir_should_vectorize_mem_func callback,
+                             nir_variable_mode robust_modes)
 {
    bool progress = false;
 
    struct vectorize_ctx *ctx = rzalloc(NULL, struct vectorize_ctx);
    ctx->modes = modes;
    ctx->callback = callback;
+   ctx->robust_modes = robust_modes;
 
    nir_index_vars(shader, NULL, modes);
 

diff --git a/src/compiler/nir/tests/load_store_vectorizer_tests.cpp b/src/compiler/nir/tests/load_store_vectorizer_tests.cpp
index 710a2da..4001b8d 100644
--- a/src/compiler/nir/tests/load_store_vectorizer_tests.cpp
+++ b/src/compiler/nir/tests/load_store_vectorizer_tests.cpp

@@ -38,7 +38,8 @@
    nir_intrinsic_instr *get_intrinsic(nir_intrinsic_op intrinsic,
                                       unsigned index);
 
-   bool run_vectorizer(nir_variable_mode modes, bool cse=false);
+   bool run_vectorizer(nir_variable_mode modes, bool cse=false,
+                       nir_variable_mode robust_modes = (nir_variable_mode)0);
 
    nir_ssa_def *get_resource(uint32_t binding, bool ssbo);
 
@@ -134,11 +135,13 @@
 }
 
 bool
-nir_load_store_vectorize_test::run_vectorizer(nir_variable_mode modes, bool cse)
+nir_load_store_vectorize_test::run_vectorizer(nir_variable_mode modes,
+                                              bool cse,
+                                              nir_variable_mode robust_modes)
 {
    if (modes & nir_var_mem_shared)
       nir_lower_vars_to_explicit_types(b->shader, nir_var_mem_shared, shared_type_info);
-   bool progress = nir_opt_load_store_vectorize(b->shader, modes, mem_vectorize_callback);
+   bool progress = nir_opt_load_store_vectorize(b->shader, modes, mem_vectorize_callback, robust_modes);
    if (progress) {
       nir_validate_shader(b->shader, NULL);
       if (cse)
@@ -1773,3 +1776,16 @@
 
    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
 }
+
+TEST_F(nir_load_store_vectorize_test, ssbo_offset_overflow_robust)
+{
+   create_load(nir_var_mem_ssbo, 0, 0xfffffffc, 0x1);
+   create_load(nir_var_mem_ssbo, 0, 0x0, 0x2);
+
+   nir_validate_shader(b->shader, NULL);
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+
+   EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo, false, nir_var_mem_ssbo));
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
+}

diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index e59b3ac..7a1fea1 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c

@@ -881,7 +881,8 @@
       OPT(nir_opt_load_store_vectorize,
           nir_var_mem_ubo | nir_var_mem_ssbo |
           nir_var_mem_global | nir_var_mem_shared,
-          brw_nir_should_vectorize_mem);
+          brw_nir_should_vectorize_mem,
+          (nir_variable_mode)0);
    }
 
    OPT(brw_nir_lower_mem_access_bit_sizes, devinfo);
commit	04718a9cd63cea9d815bffd91495069a79db8ac5	[log] [tgz]
author	Samuel Pitoiset <samuel.pitoiset@gmail.com>	Mon May 04 16:02:38 2020 +0200
committer	Marge Bot <eric+marge@anholt.net>	Mon May 11 07:25:15 2020 +0000
tree	6ad0455776ada01b701e18cd8391fbef39141dc9
parent	3fba0a7a6f01496344ddb93e774b2d4bc9195e8a [diff]