src/gallium/drivers/softpipe/sp_compute.c - platform/external/mesa3d - Git at Google

 /*
  * Copyright 2016 Red Hat.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
  * to deal in the Software without restriction, including without limitation
  * on the rights to use, copy, modify, merge, publish, distribute, sub
  * license, and/or sell copies of the Software, and to permit persons to whom
  * the Software is furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice (including the next
  * paragraph) shall be included in all copies or substantial portions of the
  * Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 #include "util/u_inlines.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 #include "util/u_pstipple.h"
 #include "pipe/p_shader_tokens.h"
 #include "draw/draw_context.h"
 #include "draw/draw_vertex.h"
 #include "sp_context.h"
 #include "sp_screen.h"
 #include "sp_state.h"
 #include "sp_texture.h"
 #include "sp_tex_sample.h"
 #include "sp_tex_tile_cache.h"
 #include "tgsi/tgsi_parse.h"

 static void
 cs_prepare(const struct sp_compute_shader *cs,
            struct tgsi_exec_machine *machine,
            int w, int h, int d,
            int g_w, int g_h, int g_d,
            int b_w, int b_h, int b_d,
            struct tgsi_sampler *sampler,
            struct tgsi_image *image,
            struct tgsi_buffer *buffer )
 {
    int j;
    /*
     * Bind tokens/shader to the interpreter's machine state.
     */
    tgsi_exec_machine_bind_shader(machine,
                                  cs->tokens,
                                  sampler, image, buffer);

    if (machine->SysSemanticToIndex[TGSI_SEMANTIC_THREAD_ID] != -1) {
       unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_THREAD_ID];
       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
          machine->SystemValue[i].xyzw[0].i[j] = w;
          machine->SystemValue[i].xyzw[1].i[j] = h;
          machine->SystemValue[i].xyzw[2].i[j] = d;
       }
    }

    if (machine->SysSemanticToIndex[TGSI_SEMANTIC_GRID_SIZE] != -1) {
       unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_GRID_SIZE];
       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
          machine->SystemValue[i].xyzw[0].i[j] = g_w;
          machine->SystemValue[i].xyzw[1].i[j] = g_h;
          machine->SystemValue[i].xyzw[2].i[j] = g_d;
       }
    }

    if (machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_SIZE] != -1) {
       unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_SIZE];
       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
          machine->SystemValue[i].xyzw[0].i[j] = b_w;
          machine->SystemValue[i].xyzw[1].i[j] = b_h;
          machine->SystemValue[i].xyzw[2].i[j] = b_d;
       }
    }
 }

 static bool
 cs_run(const struct sp_compute_shader *cs,
        int g_w, int g_h, int g_d,
        struct tgsi_exec_machine *machine, bool restart)
 {
    if (!restart) {
       if (machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_ID] != -1) {
          unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_ID];
          int j;
          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
             machine->SystemValue[i].xyzw[0].i[j] = g_w;
             machine->SystemValue[i].xyzw[1].i[j] = g_h;
             machine->SystemValue[i].xyzw[2].i[j] = g_d;
          }
       }
       machine->NonHelperMask = (1 << 1) - 1;
    }

    tgsi_exec_machine_run(machine, restart ? machine->pc : 0);

    if (machine->pc != -1)
       return true;
    return false;
 }

 static void
 run_workgroup(const struct sp_compute_shader *cs,
               int g_w, int g_h, int g_d, int num_threads,
               struct tgsi_exec_machine **machines)
 {
    int i;
    bool grp_hit_barrier, restart_threads = false;

    do {
       grp_hit_barrier = false;
       for (i = 0; i < num_threads; i++) {
          grp_hit_barrier |= cs_run(cs, g_w, g_h, g_d, machines[i], restart_threads);
       }
       restart_threads = false;
       if (grp_hit_barrier) {
          grp_hit_barrier = false;
          restart_threads = true;
       }
    } while (restart_threads);
 }

 static void
 cs_delete(const struct sp_compute_shader *cs,
           struct tgsi_exec_machine *machine)
 {
    if (machine->Tokens == cs->tokens) {
       tgsi_exec_machine_bind_shader(machine, NULL, NULL, NULL, NULL);
    }
 }

 static void
 fill_grid_size(struct pipe_context *context,
                const struct pipe_grid_info *info,
                uint32_t grid_size[3])
 {
    struct pipe_transfer *transfer;
    uint32_t *params;
    if (!info->indirect) {
       grid_size[0] = info->grid[0];
       grid_size[1] = info->grid[1];
       grid_size[2] = info->grid[2];
       return;
    }
    params = pipe_buffer_map_range(context, info->indirect,
                                   info->indirect_offset,
                                   3 * sizeof(uint32_t),
                                   PIPE_MAP_READ,
                                   &transfer);

    if (!transfer)
       return;

    grid_size[0] = params[0];
    grid_size[1] = params[1];
    grid_size[2] = params[2];
    pipe_buffer_unmap(context, transfer);
 }

 void
 softpipe_launch_grid(struct pipe_context *context,
                      const struct pipe_grid_info *info)
 {
    struct softpipe_context *softpipe = softpipe_context(context);
    struct sp_compute_shader *cs = softpipe->cs;
    int num_threads_in_group;
    struct tgsi_exec_machine **machines;
    int bwidth, bheight, bdepth;
    int w, h, d, i;
    int g_w, g_h, g_d;
    uint32_t grid_size[3] = {0};
    void *local_mem = NULL;

    softpipe_update_compute_samplers(softpipe);
    bwidth = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH];
    bheight = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT];
    bdepth = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH];
    num_threads_in_group = bwidth * bheight * bdepth;

    fill_grid_size(context, info, grid_size);

    if (cs->shader.req_local_mem) {
       local_mem = CALLOC(1, cs->shader.req_local_mem);
    }

    machines = CALLOC(sizeof(struct tgsi_exec_machine *), num_threads_in_group);
    if (!machines) {
       FREE(local_mem);
       return;
    }

    /* initialise machines + GRID_SIZE + THREAD_ID  + BLOCK_SIZE */
    for (d = 0; d < bdepth; d++) {
       for (h = 0; h < bheight; h++) {
          for (w = 0; w < bwidth; w++) {
             int idx = w + (h * bwidth) + (d * bheight * bwidth);
             machines[idx] = tgsi_exec_machine_create(PIPE_SHADER_COMPUTE);

             machines[idx]->LocalMem = local_mem;
             machines[idx]->LocalMemSize = cs->shader.req_local_mem;
             cs_prepare(cs, machines[idx],
                        w, h, d,
                        grid_size[0], grid_size[1], grid_size[2],
                        bwidth, bheight, bdepth,
                        (struct tgsi_sampler *)softpipe->tgsi.sampler[PIPE_SHADER_COMPUTE],
                        (struct tgsi_image *)softpipe->tgsi.image[PIPE_SHADER_COMPUTE],
                        (struct tgsi_buffer *)softpipe->tgsi.buffer[PIPE_SHADER_COMPUTE]);
             tgsi_exec_set_constant_buffers(machines[idx], PIPE_MAX_CONSTANT_BUFFERS,
                                            softpipe->mapped_constants[PIPE_SHADER_COMPUTE],
                                            softpipe->const_buffer_size[PIPE_SHADER_COMPUTE]);
          }
       }
    }

    for (g_d = 0; g_d < grid_size[2]; g_d++) {
       for (g_h = 0; g_h < grid_size[1]; g_h++) {
          for (g_w = 0; g_w < grid_size[0]; g_w++) {
             run_workgroup(cs, g_w, g_h, g_d, num_threads_in_group, machines);
          }
       }
    }

    for (i = 0; i < num_threads_in_group; i++) {
       cs_delete(cs, machines[i]);
       tgsi_exec_machine_destroy(machines[i]);
    }

    FREE(local_mem);
    FREE(machines);
 }
	/*
	* Copyright 2016 Red Hat.
	*
	* Permission is hereby granted, free of charge, to any person obtaining a
	* copy of this software and associated documentation files (the "Software"),
	* to deal in the Software without restriction, including without limitation
	* on the rights to use, copy, modify, merge, publish, distribute, sub
	* license, and/or sell copies of the Software, and to permit persons to whom
	* the Software is furnished to do so, subject to the following conditions:
	*
	* The above copyright notice and this permission notice (including the next
	* paragraph) shall be included in all copies or substantial portions of the
	* Software.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
	* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
	* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
	* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
	* USE OR OTHER DEALINGS IN THE SOFTWARE.
	*/
	#include "util/u_inlines.h"
	#include "util/u_math.h"
	#include "util/u_memory.h"
	#include "util/u_pstipple.h"
	#include "pipe/p_shader_tokens.h"
	#include "draw/draw_context.h"
	#include "draw/draw_vertex.h"
	#include "sp_context.h"
	#include "sp_screen.h"
	#include "sp_state.h"
	#include "sp_texture.h"
	#include "sp_tex_sample.h"
	#include "sp_tex_tile_cache.h"
	#include "tgsi/tgsi_parse.h"

	static void
	cs_prepare(const struct sp_compute_shader *cs,
	struct tgsi_exec_machine *machine,
	int w, int h, int d,
	int g_w, int g_h, int g_d,
	int b_w, int b_h, int b_d,
	struct tgsi_sampler *sampler,
	struct tgsi_image *image,
	struct tgsi_buffer *buffer )
	{
	int j;
	/*
	* Bind tokens/shader to the interpreter's machine state.
	*/
	tgsi_exec_machine_bind_shader(machine,
	cs->tokens,
	sampler, image, buffer);

	if (machine->SysSemanticToIndex[TGSI_SEMANTIC_THREAD_ID] != -1) {
	unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_THREAD_ID];
	for (j = 0; j < TGSI_QUAD_SIZE; j++) {
	machine->SystemValue[i].xyzw[0].i[j] = w;
	machine->SystemValue[i].xyzw[1].i[j] = h;
	machine->SystemValue[i].xyzw[2].i[j] = d;
	}
	}

	if (machine->SysSemanticToIndex[TGSI_SEMANTIC_GRID_SIZE] != -1) {
	unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_GRID_SIZE];
	for (j = 0; j < TGSI_QUAD_SIZE; j++) {
	machine->SystemValue[i].xyzw[0].i[j] = g_w;
	machine->SystemValue[i].xyzw[1].i[j] = g_h;
	machine->SystemValue[i].xyzw[2].i[j] = g_d;
	}
	}

	if (machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_SIZE] != -1) {
	unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_SIZE];
	for (j = 0; j < TGSI_QUAD_SIZE; j++) {
	machine->SystemValue[i].xyzw[0].i[j] = b_w;
	machine->SystemValue[i].xyzw[1].i[j] = b_h;
	machine->SystemValue[i].xyzw[2].i[j] = b_d;
	}
	}
	}

	static bool
	cs_run(const struct sp_compute_shader *cs,
	int g_w, int g_h, int g_d,
	struct tgsi_exec_machine *machine, bool restart)
	{
	if (!restart) {
	if (machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_ID] != -1) {
	unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_ID];
	int j;
	for (j = 0; j < TGSI_QUAD_SIZE; j++) {
	machine->SystemValue[i].xyzw[0].i[j] = g_w;
	machine->SystemValue[i].xyzw[1].i[j] = g_h;
	machine->SystemValue[i].xyzw[2].i[j] = g_d;
	}
	}
	machine->NonHelperMask = (1 << 1) - 1;
	}

	tgsi_exec_machine_run(machine, restart ? machine->pc : 0);

	if (machine->pc != -1)
	return true;
	return false;
	}

	static void
	run_workgroup(const struct sp_compute_shader *cs,
	int g_w, int g_h, int g_d, int num_threads,
	struct tgsi_exec_machine **machines)
	{
	int i;
	bool grp_hit_barrier, restart_threads = false;

	do {
	grp_hit_barrier = false;
	for (i = 0; i < num_threads; i++) {
	grp_hit_barrier \|= cs_run(cs, g_w, g_h, g_d, machines[i], restart_threads);
	}
	restart_threads = false;
	if (grp_hit_barrier) {
	grp_hit_barrier = false;
	restart_threads = true;
	}
	} while (restart_threads);
	}

	static void
	cs_delete(const struct sp_compute_shader *cs,
	struct tgsi_exec_machine *machine)
	{
	if (machine->Tokens == cs->tokens) {
	tgsi_exec_machine_bind_shader(machine, NULL, NULL, NULL, NULL);
	}
	}

	static void
	fill_grid_size(struct pipe_context *context,
	const struct pipe_grid_info *info,
	uint32_t grid_size[3])
	{
	struct pipe_transfer *transfer;
	uint32_t *params;
	if (!info->indirect) {
	grid_size[0] = info->grid[0];
	grid_size[1] = info->grid[1];
	grid_size[2] = info->grid[2];
	return;
	}
	params = pipe_buffer_map_range(context, info->indirect,
	info->indirect_offset,
	3 * sizeof(uint32_t),
	PIPE_MAP_READ,
	&transfer);

	if (!transfer)
	return;

	grid_size[0] = params[0];
	grid_size[1] = params[1];
	grid_size[2] = params[2];
	pipe_buffer_unmap(context, transfer);
	}

	void
	softpipe_launch_grid(struct pipe_context *context,
	const struct pipe_grid_info *info)
	{
	struct softpipe_context *softpipe = softpipe_context(context);
	struct sp_compute_shader *cs = softpipe->cs;
	int num_threads_in_group;
	struct tgsi_exec_machine **machines;
	int bwidth, bheight, bdepth;
	int w, h, d, i;
	int g_w, g_h, g_d;
	uint32_t grid_size[3] = {0};
	void *local_mem = NULL;

	softpipe_update_compute_samplers(softpipe);
	bwidth = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH];
	bheight = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT];
	bdepth = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH];
	num_threads_in_group = bwidth * bheight * bdepth;

	fill_grid_size(context, info, grid_size);

	if (cs->shader.req_local_mem) {
	local_mem = CALLOC(1, cs->shader.req_local_mem);
	}

	machines = CALLOC(sizeof(struct tgsi_exec_machine *), num_threads_in_group);
	if (!machines) {
	FREE(local_mem);
	return;
	}

	/* initialise machines + GRID_SIZE + THREAD_ID + BLOCK_SIZE */
	for (d = 0; d < bdepth; d++) {
	for (h = 0; h < bheight; h++) {
	for (w = 0; w < bwidth; w++) {
	int idx = w + (h * bwidth) + (d * bheight * bwidth);
	machines[idx] = tgsi_exec_machine_create(PIPE_SHADER_COMPUTE);

	machines[idx]->LocalMem = local_mem;
	machines[idx]->LocalMemSize = cs->shader.req_local_mem;
	cs_prepare(cs, machines[idx],
	w, h, d,
	grid_size[0], grid_size[1], grid_size[2],
	bwidth, bheight, bdepth,
	(struct tgsi_sampler *)softpipe->tgsi.sampler[PIPE_SHADER_COMPUTE],
	(struct tgsi_image *)softpipe->tgsi.image[PIPE_SHADER_COMPUTE],
	(struct tgsi_buffer *)softpipe->tgsi.buffer[PIPE_SHADER_COMPUTE]);
	tgsi_exec_set_constant_buffers(machines[idx], PIPE_MAX_CONSTANT_BUFFERS,
	softpipe->mapped_constants[PIPE_SHADER_COMPUTE],
	softpipe->const_buffer_size[PIPE_SHADER_COMPUTE]);
	}
	}
	}

	for (g_d = 0; g_d < grid_size[2]; g_d++) {
	for (g_h = 0; g_h < grid_size[1]; g_h++) {
	for (g_w = 0; g_w < grid_size[0]; g_w++) {
	run_workgroup(cs, g_w, g_h, g_d, num_threads_in_group, machines);
	}
	}
	}

	for (i = 0; i < num_threads_in_group; i++) {
	cs_delete(cs, machines[i]);
	tgsi_exec_machine_destroy(machines[i]);
	}

	FREE(local_mem);
	FREE(machines);
	}