gallium: Use unpack_rgba() instead of fetch_rgba in translate_generic This is the only user of fetch_rgba outside of llvmpipe, and it's in the fallback path of this fallback path. Looking at an example of these two functions, b8g8r8a8's unpack_rgba is 2.7x as long as fetch_rgba. It feels reasonable to sacrifice some perf in this already slow (VBO readback, and a function pointer call per attribute per vertex) path to reduce our binary size. And, if I ever finish getting unpack codegen to switch to rows instead of rects, that factor will go back down. Saves 40kb of binary on non-llvmpipe gallium drivers. Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6305>

commit: 09196355b2b2b6a2d3935eb3c43fe784d226426a [log] [tgz]
author: Eric Anholt <eric@anholt.net> Thu Aug 13 10:14:11 2020 -0700
committer: Marge Bot <eric+marge@anholt.net> Sun Aug 16 21:25:14 2020 +0000
tree: 6091b9e4c083b9d5ccea242f815bb207c282c60e
parent: 5b8d67cb64ca38c93089da2f0b414c5897a19e27 [diff]
diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c
index 14631da..d48c8f9 100644
--- a/src/gallium/auxiliary/translate/translate_generic.c
+++ b/src/gallium/auxiliary/translate/translate_generic.c

@@ -50,7 +50,9 @@
    struct {
       enum translate_element_type type;
 
-      util_format_fetch_rgba_func_ptr fetch;
+      void (*fetch)(void *dst, unsigned dst_stride,
+                    const uint8_t *src, unsigned src_stride,
+                    unsigned width, unsigned height);
       unsigned buffer;
       unsigned input_offset;
       unsigned instance_divisor;
@@ -623,7 +625,7 @@
          if (likely(copy_size >= 0)) {
             memcpy(dst, src, copy_size);
          } else {
-            tg->attrib[attr].fetch(data, src, 0, 0);
+            tg->attrib[attr].fetch(data, 0, src, 0, 1, 1);
 
             if (0)
                debug_printf("Fetch linear attr %d  from %p  stride %d  index %d: "
@@ -796,6 +798,8 @@
    for (i = 0; i < key->nr_elements; i++) {
       const struct util_format_description *format_desc =
             util_format_description(key->element[i].input_format);
+      const struct util_format_unpack_description *unpack =
+         util_format_unpack_description(key->element[i].input_format);
 
       assert(format_desc);
 
@@ -811,8 +815,7 @@
          }
       }
 
-      tg->attrib[i].fetch =
-         util_format_fetch_rgba_func(key->element[i].input_format);
+      tg->attrib[i].fetch = unpack->unpack_rgba;
       tg->attrib[i].buffer = key->element[i].input_buffer;
       tg->attrib[i].input_offset = key->element[i].input_offset;
       tg->attrib[i].instance_divisor = key->element[i].instance_divisor;
commit	09196355b2b2b6a2d3935eb3c43fe784d226426a	[log] [tgz]
author	Eric Anholt <eric@anholt.net>	Thu Aug 13 10:14:11 2020 -0700
committer	Marge Bot <eric+marge@anholt.net>	Sun Aug 16 21:25:14 2020 +0000
tree	6091b9e4c083b9d5ccea242f815bb207c282c60e
parent	5b8d67cb64ca38c93089da2f0b414c5897a19e27 [diff]