src/panfrost/lib/pan_attributes.c - platform/external/mesa3d - Git at Google

 /*
  * Copyright (C) 2019 Collabora, Ltd.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
  * to deal in the Software without restriction, including without limitation
  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  * and/or sell copies of the Software, and to permit persons to whom the
  * Software is furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice (including the next
  * paragraph) shall be included in all copies or substantial portions of the
  * Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  *
  */

 #include "util/u_math.h"
 #include "midgard_pack.h"
 #include "pan_encoder.h"

 /* This file handles attribute descriptors. The
  * bulk of the complexity is from instancing. See mali_job for
  * notes on how this works. But basically, for small vertex
  * counts, we have a lookup table, and for large vertex counts,
  * we look at the high bits as a heuristic. This has to match
  * exactly how the hardware calculates this (which is why the
  * algorithm is so weird) or else instancing will break. */

 /* Given an odd number (of the form 2k + 1), compute k */
 #define ODD(odd) ((odd - 1) >> 1)

 static unsigned
 panfrost_small_padded_vertex_count(unsigned idx)
 {
         if (idx == 11 || idx == 13 || idx == 15 || idx == 19)
                 return idx + 1;
         else
                 return idx;
 }

 static unsigned
 panfrost_large_padded_vertex_count(uint32_t vertex_count)
 {
         /* First, we have to find the highest set one */
         unsigned highest = 32 - __builtin_clz(vertex_count);

         /* Using that, we mask out the highest 4-bits */
         unsigned n = highest - 4;
         unsigned nibble = (vertex_count >> n) & 0xF;

         /* Great, we have the nibble. Now we can just try possibilities. Note
          * that we don't care about the bottom most bit in most cases, and we
          * know the top bit must be 1 */

         unsigned middle_two = (nibble >> 1) & 0x3;

         switch (middle_two) {
         case 0b00:
                 if (!(nibble & 1))
                         return (1 << n) * 9;
                 else
                         return (1 << (n + 1)) * 5;
         case 0b01:
                 return (1 << (n + 2)) * 3;
         case 0b10:
                 return (1 << (n + 1)) * 7;
         case 0b11:
                 return (1 << (n + 4));
         default:
                 return 0; /* unreachable */
         }
 }

 unsigned
 panfrost_padded_vertex_count(unsigned vertex_count)
 {
         if (vertex_count < 20)
                 return panfrost_small_padded_vertex_count(vertex_count);
         else
                 return panfrost_large_padded_vertex_count(vertex_count);
 }

 /* The much, much more irritating case -- instancing is enabled. See
  * panfrost_job.h for notes on how this works */

 unsigned
 panfrost_compute_magic_divisor(unsigned hw_divisor, unsigned *o_shift, unsigned *extra_flags)
 {
         /* We have a NPOT divisor. Here's the fun one (multipling by
          * the inverse and shifting) */

         /* floor(log2(d)) */
         unsigned shift = util_logbase2(hw_divisor);

         /* m = ceil(2^(32 + shift) / d) */
         uint64_t shift_hi = 32 + shift;
         uint64_t t = 1ll << shift_hi;
         double t_f = t;
         double hw_divisor_d = hw_divisor;
         double m_f = ceil(t_f / hw_divisor_d);
         unsigned m = m_f;

         /* Default case */
         uint32_t magic_divisor = m;

         /* e = 2^(shift + 32) % d */
         uint64_t e = t % hw_divisor;

         /* Apply round-down algorithm? e <= 2^shift?. XXX: The blob
          * seems to use a different condition */
         if (e <= (1ll << shift)) {
                 magic_divisor = m - 1;
                 *extra_flags = 1;
         }

         /* Top flag implicitly set */
         assert(magic_divisor & (1u << 31));
         magic_divisor &= ~(1u << 31);
         *o_shift = shift;

         return magic_divisor;
 }

 /* Records for gl_VertexID and gl_InstanceID use a slightly special encoding,
  * but the idea is the same */

 void
 panfrost_vertex_id(
         unsigned padded_count,
         union mali_attr *attr)
 {
         /* We factor the padded count as shift/odd and that's it */

         attr->elements = MALI_ATTR_VERTEXID;
         attr->shift = __builtin_ctz(padded_count);
         attr->extra_flags = padded_count >> (attr->shift + 1);
         attr->stride = attr->size = 0;
 }

 void
 panfrost_instance_id(
         unsigned padded_count,
         union mali_attr *attr)
 {
         attr->elements = MALI_ATTR_INSTANCEID;
         attr->stride = 0;
         attr->extra_flags = 0;
         attr->size = 0;

         /* POT records have just a shift directly with an off-by-one for
          * unclear reasons. NPOT records have a magic divisor smushed into the
          * stride field (which is unused for these special records) */

         if (util_is_power_of_two_or_zero(padded_count)) {
                 attr->shift = __builtin_ctz(padded_count) - 1;
         } else {
                 unsigned shift = 0, flags = 0;

                 attr->stride = panfrost_compute_magic_divisor(padded_count, &shift, &flags);
                 attr->shift = shift;
                 attr->extra_flags = flags;
         }
 }
	/*
	* Copyright (C) 2019 Collabora, Ltd.
	*
	* Permission is hereby granted, free of charge, to any person obtaining a
	* copy of this software and associated documentation files (the "Software"),
	* to deal in the Software without restriction, including without limitation
	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
	* and/or sell copies of the Software, and to permit persons to whom the
	* Software is furnished to do so, subject to the following conditions:
	*
	* The above copyright notice and this permission notice (including the next
	* paragraph) shall be included in all copies or substantial portions of the
	* Software.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	* SOFTWARE.
	*
	*/

	#include "util/u_math.h"
	#include "midgard_pack.h"
	#include "pan_encoder.h"

	/* This file handles attribute descriptors. The
	* bulk of the complexity is from instancing. See mali_job for
	* notes on how this works. But basically, for small vertex
	* counts, we have a lookup table, and for large vertex counts,
	* we look at the high bits as a heuristic. This has to match
	* exactly how the hardware calculates this (which is why the
	* algorithm is so weird) or else instancing will break. */

	/* Given an odd number (of the form 2k + 1), compute k */
	#define ODD(odd) ((odd - 1) >> 1)

	static unsigned
	panfrost_small_padded_vertex_count(unsigned idx)
	{
	if (idx == 11 \|\| idx == 13 \|\| idx == 15 \|\| idx == 19)
	return idx + 1;
	else
	return idx;
	}

	static unsigned
	panfrost_large_padded_vertex_count(uint32_t vertex_count)
	{
	/* First, we have to find the highest set one */
	unsigned highest = 32 - __builtin_clz(vertex_count);

	/* Using that, we mask out the highest 4-bits */
	unsigned n = highest - 4;
	unsigned nibble = (vertex_count >> n) & 0xF;

	/* Great, we have the nibble. Now we can just try possibilities. Note
	* that we don't care about the bottom most bit in most cases, and we
	* know the top bit must be 1 */

	unsigned middle_two = (nibble >> 1) & 0x3;

	switch (middle_two) {
	case 0b00:
	if (!(nibble & 1))
	return (1 << n) * 9;
	else
	return (1 << (n + 1)) * 5;
	case 0b01:
	return (1 << (n + 2)) * 3;
	case 0b10:
	return (1 << (n + 1)) * 7;
	case 0b11:
	return (1 << (n + 4));
	default:
	return 0; /* unreachable */
	}
	}

	unsigned
	panfrost_padded_vertex_count(unsigned vertex_count)
	{
	if (vertex_count < 20)
	return panfrost_small_padded_vertex_count(vertex_count);
	else
	return panfrost_large_padded_vertex_count(vertex_count);
	}

	/* The much, much more irritating case -- instancing is enabled. See
	* panfrost_job.h for notes on how this works */

	unsigned
	panfrost_compute_magic_divisor(unsigned hw_divisor, unsigned o_shift, unsigned extra_flags)
	{
	/* We have a NPOT divisor. Here's the fun one (multipling by
	* the inverse and shifting) */

	/* floor(log2(d)) */
	unsigned shift = util_logbase2(hw_divisor);

	/* m = ceil(2^(32 + shift) / d) */
	uint64_t shift_hi = 32 + shift;
	uint64_t t = 1ll << shift_hi;
	double t_f = t;
	double hw_divisor_d = hw_divisor;
	double m_f = ceil(t_f / hw_divisor_d);
	unsigned m = m_f;

	/* Default case */
	uint32_t magic_divisor = m;

	/* e = 2^(shift + 32) % d */
	uint64_t e = t % hw_divisor;

	/* Apply round-down algorithm? e <= 2^shift?. XXX: The blob
	* seems to use a different condition */
	if (e <= (1ll << shift)) {
	magic_divisor = m - 1;
	*extra_flags = 1;
	}

	/* Top flag implicitly set */
	assert(magic_divisor & (1u << 31));
	magic_divisor &= ~(1u << 31);
	*o_shift = shift;

	return magic_divisor;
	}

	/* Records for gl_VertexID and gl_InstanceID use a slightly special encoding,
	* but the idea is the same */

	void
	panfrost_vertex_id(
	unsigned padded_count,
	union mali_attr *attr)
	{
	/* We factor the padded count as shift/odd and that's it */

	attr->elements = MALI_ATTR_VERTEXID;
	attr->shift = __builtin_ctz(padded_count);
	attr->extra_flags = padded_count >> (attr->shift + 1);
	attr->stride = attr->size = 0;
	}

	void
	panfrost_instance_id(
	unsigned padded_count,
	union mali_attr *attr)
	{
	attr->elements = MALI_ATTR_INSTANCEID;
	attr->stride = 0;
	attr->extra_flags = 0;
	attr->size = 0;

	/* POT records have just a shift directly with an off-by-one for
	* unclear reasons. NPOT records have a magic divisor smushed into the
	* stride field (which is unused for these special records) */

	if (util_is_power_of_two_or_zero(padded_count)) {
	attr->shift = __builtin_ctz(padded_count) - 1;
	} else {
	unsigned shift = 0, flags = 0;

	attr->stride = panfrost_compute_magic_divisor(padded_count, &shift, &flags);
	attr->shift = shift;
	attr->extra_flags = flags;
	}
	}