src/compiler/glsl/lower_int64.cpp - platform/external/mesa3d - Git at Google

 /*
  * Copyright © 2016 Intel Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
  * to deal in the Software without restriction, including without limitation
  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  * and/or sell copies of the Software, and to permit persons to whom the
  * Software is furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice (including the next
  * paragraph) shall be included in all copies or substantial portions of the
  * Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  */

 /**
  * \file lower_int64.cpp
  *
  * Lower 64-bit operations to 32-bit operations.  Each 64-bit value is lowered
  * to a uvec2.  For each operation that can be lowered, there is a function
  * called __builtin_foo with the same number of parameters that takes uvec2
  * sources and produces uvec2 results.  An operation like
  *
  *     uint64_t(x) * uint64_t(y)
  *
  * becomes
  *
  *     packUint2x32(__builtin_umul64(unpackUint2x32(x), unpackUint2x32(y)));
  */

 #include "main/macros.h"
 #include "compiler/glsl_types.h"
 #include "ir.h"
 #include "ir_rvalue_visitor.h"
 #include "ir_builder.h"
 #include "ir_optimization.h"
 #include "util/hash_table.h"
 #include "builtin_functions.h"

 typedef ir_function_signature *(*function_generator)(void *mem_ctx,
                                                      builtin_available_predicate avail);

 using namespace ir_builder;

 namespace lower_64bit {
 void expand_source(ir_factory &, ir_rvalue *val, ir_variable **expanded_src);

 ir_dereference_variable *compact_destination(ir_factory &,
                                              const glsl_type *type,
                                              ir_variable *result[4]);

 ir_rvalue *lower_op_to_function_call(ir_instruction *base_ir,
                                      ir_expression *ir,
                                      ir_function_signature *callee);
 };

 using namespace lower_64bit;

 namespace {

 class lower_64bit_visitor : public ir_rvalue_visitor {
 public:
    lower_64bit_visitor(void *mem_ctx, exec_list *instructions, unsigned lower)
       : progress(false), lower(lower),
         function_list(), added_functions(&function_list, mem_ctx)
    {
       functions = _mesa_hash_table_create(mem_ctx,
                                           _mesa_hash_string,
                                           _mesa_key_string_equal);

       foreach_in_list(ir_instruction, node, instructions) {
          ir_function *const f = node->as_function();

          if (f == NULL || strncmp(f->name, "__builtin_", 10) != 0)
             continue;

          add_function(f);
       }
    }

    ~lower_64bit_visitor()
    {
       _mesa_hash_table_destroy(functions, NULL);
    }

    void handle_rvalue(ir_rvalue **rvalue);

    void add_function(ir_function *f)
    {
       _mesa_hash_table_insert(functions, f->name, f);
    }

    ir_function *find_function(const char *name)
    {
       struct hash_entry *const entry =
          _mesa_hash_table_search(functions, name);

       return entry != NULL ? (ir_function *) entry->data : NULL;
    }

    bool progress;

 private:
    unsigned lower; /** Bitfield of which operations to lower */

    /** Hashtable containing all of the known functions in the IR */
    struct hash_table *functions;

 public:
    exec_list function_list;

 private:
    ir_factory added_functions;

    ir_rvalue *handle_op(ir_expression *ir, const char *function_name,
                         function_generator generator);
 };

 } /* anonymous namespace */

 /**
  * Determine if a particular type of lowering should occur
  */
 #define lowering(x) (this->lower & x)

 bool
 lower_64bit_integer_instructions(exec_list *instructions,
                                  unsigned what_to_lower)
 {
    if (instructions->is_empty())
       return false;

    ir_instruction *first_inst = (ir_instruction *) instructions->get_head_raw();
    void *const mem_ctx = ralloc_parent(first_inst);
    lower_64bit_visitor v(mem_ctx, instructions, what_to_lower);

    visit_list_elements(&v, instructions);

    if (v.progress && !v.function_list.is_empty()) {
       /* Move all of the nodes from function_list to the head if the incoming
        * instruction list.
        */
       exec_node *const after = &instructions->head_sentinel;
       exec_node *const before = instructions->head_sentinel.next;
       exec_node *const head = v.function_list.head_sentinel.next;
       exec_node *const tail = v.function_list.tail_sentinel.prev;

       before->next = head;
       head->prev = before;

       after->prev = tail;
       tail->next = after;
    }

    return v.progress;
 }


 /**
  * Expand individual 64-bit values to uvec2 values
  *
  * Each operation is in one of a few forms.
  *
  *     vector op vector
  *     vector op scalar
  *     scalar op vector
  *     scalar op scalar
  *
  * In the 'vector op vector' case, the two vectors must have the same size.
  * In a way, the 'scalar op scalar' form is special case of the 'vector op
  * vector' form.
  *
  * This method generates a new set of uvec2 values for each element of a
  * single operand.  If the operand is a scalar, the uvec2 is replicated
  * multiple times.  A value like
  *
  *     u64vec3(a) + u64vec3(b)
  *
  * becomes
  *
  *     u64vec3 tmp0 = u64vec3(a) + u64vec3(b);
  *     uvec2 tmp1 = unpackUint2x32(tmp0.x);
  *     uvec2 tmp2 = unpackUint2x32(tmp0.y);
  *     uvec2 tmp3 = unpackUint2x32(tmp0.z);
  *
  * and the returned operands array contains ir_variable pointers to
  *
  *     { tmp1, tmp2, tmp3, tmp1 }
  */
 void
 lower_64bit::expand_source(ir_factory &body,
                            ir_rvalue *val,
                            ir_variable **expanded_src)
 {
    assert(val->type->is_integer_64());

    ir_variable *const temp = body.make_temp(val->type, "tmp");

    body.emit(assign(temp, val));

    const ir_expression_operation unpack_opcode =
       val->type->base_type == GLSL_TYPE_UINT64
       ? ir_unop_unpack_uint_2x32 : ir_unop_unpack_int_2x32;

    const glsl_type *const type =
       val->type->base_type == GLSL_TYPE_UINT64
       ? glsl_type::uvec2_type : glsl_type::ivec2_type;

    unsigned i;
    for (i = 0; i < val->type->vector_elements; i++) {
       expanded_src[i] = body.make_temp(type, "expanded_64bit_source");

       body.emit(assign(expanded_src[i],
                        expr(unpack_opcode, swizzle(temp, i, 1))));
    }

    for (/* empty */; i < 4; i++)
       expanded_src[i] = expanded_src[0];
 }

 /**
  * Convert a series of uvec2 results into a single 64-bit integer vector
  */
 ir_dereference_variable *
 lower_64bit::compact_destination(ir_factory &body,
                                  const glsl_type *type,
                                  ir_variable *result[4])
 {
    const ir_expression_operation pack_opcode =
       type->base_type == GLSL_TYPE_UINT64
       ? ir_unop_pack_uint_2x32 : ir_unop_pack_int_2x32;

    ir_variable *const compacted_result =
       body.make_temp(type, "compacted_64bit_result");

    for (unsigned i = 0; i < type->vector_elements; i++) {
       body.emit(assign(compacted_result,
                        expr(pack_opcode, result[i]),
                        1U << i));
    }

    void *const mem_ctx = ralloc_parent(compacted_result);
    return new(mem_ctx) ir_dereference_variable(compacted_result);
 }

 ir_rvalue *
 lower_64bit::lower_op_to_function_call(ir_instruction *base_ir,
                                        ir_expression *ir,
                                        ir_function_signature *callee)
 {
    const unsigned num_operands = ir->num_operands;
    ir_variable *src[4][4];
    ir_variable *dst[4];
    void *const mem_ctx = ralloc_parent(ir);
    exec_list instructions;
    unsigned source_components = 0;
    const glsl_type *const result_type =
       ir->type->base_type == GLSL_TYPE_UINT64
       ? glsl_type::uvec2_type : glsl_type::ivec2_type;

    ir_factory body(&instructions, mem_ctx);

    for (unsigned i = 0; i < num_operands; i++) {
       expand_source(body, ir->operands[i], src[i]);

       if (ir->operands[i]->type->vector_elements > source_components)
          source_components = ir->operands[i]->type->vector_elements;
    }

    for (unsigned i = 0; i < source_components; i++) {
       dst[i] = body.make_temp(result_type, "expanded_64bit_result");

       exec_list parameters;

       for (unsigned j = 0; j < num_operands; j++)
          parameters.push_tail(new(mem_ctx) ir_dereference_variable(src[j][i]));

       ir_dereference_variable *const return_deref =
          new(mem_ctx) ir_dereference_variable(dst[i]);

       ir_call *const c = new(mem_ctx) ir_call(callee,
                                               return_deref,
                                               &parameters);

       body.emit(c);
    }

    ir_rvalue *const rv = compact_destination(body, ir->type, dst);

    /* Move all of the nodes from instructions between base_ir and the
     * instruction before it.
     */
    exec_node *const after = base_ir;
    exec_node *const before = after->prev;
    exec_node *const head = instructions.head_sentinel.next;
    exec_node *const tail = instructions.tail_sentinel.prev;

    before->next = head;
    head->prev = before;

    after->prev = tail;
    tail->next = after;

    return rv;
 }

 ir_rvalue *
 lower_64bit_visitor::handle_op(ir_expression *ir,
                                const char *function_name,
                                function_generator generator)
 {
    for (unsigned i = 0; i < ir->num_operands; i++)
       if (!ir->operands[i]->type->is_integer_64())
          return ir;

    /* Get a handle to the correct ir_function_signature for the core
     * operation.
     */
    ir_function_signature *callee = NULL;
    ir_function *f = find_function(function_name);

    if (f != NULL) {
       callee = (ir_function_signature *) f->signatures.get_head();
       assert(callee != NULL && callee->ir_type == ir_type_function_signature);
    } else {
       f = new(base_ir) ir_function(function_name);
       callee = generator(base_ir, NULL);

       f->add_signature(callee);

       add_function(f);
    }

    this->progress = true;
    return lower_op_to_function_call(this->base_ir, ir, callee);
 }

 void
 lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue)
 {
    if (*rvalue == NULL || (*rvalue)->ir_type != ir_type_expression)
       return;

    ir_expression *const ir = (*rvalue)->as_expression();
    assert(ir != NULL);

    switch (ir->operation) {
    case ir_unop_sign:
       if (lowering(SIGN64)) {
          *rvalue = handle_op(ir, "__builtin_sign64", generate_ir::sign64);
       }
       break;

    case ir_binop_div:
       if (lowering(DIV64)) {
          if (ir->type->base_type == GLSL_TYPE_UINT64) {
             *rvalue = handle_op(ir, "__builtin_udiv64", generate_ir::udiv64);
          } else {
             *rvalue = handle_op(ir, "__builtin_idiv64", generate_ir::idiv64);
          }
       }
       break;

    case ir_binop_mod:
       if (lowering(MOD64)) {
          if (ir->type->base_type == GLSL_TYPE_UINT64) {
             *rvalue = handle_op(ir, "__builtin_umod64", generate_ir::umod64);
          } else {
             *rvalue = handle_op(ir, "__builtin_imod64", generate_ir::imod64);
          }
       }
       break;

    case ir_binop_mul:
       if (lowering(MUL64)) {
          *rvalue = handle_op(ir, "__builtin_umul64", generate_ir::umul64);
       }
       break;

    default:
       break;
    }
 }
	/*
	* Copyright © 2016 Intel Corporation
	*
	* Permission is hereby granted, free of charge, to any person obtaining a
	* copy of this software and associated documentation files (the "Software"),
	* to deal in the Software without restriction, including without limitation
	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
	* and/or sell copies of the Software, and to permit persons to whom the
	* Software is furnished to do so, subject to the following conditions:
	*
	* The above copyright notice and this permission notice (including the next
	* paragraph) shall be included in all copies or substantial portions of the
	* Software.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
	* DEALINGS IN THE SOFTWARE.
	*/

	/**
	* \file lower_int64.cpp
	*
	* Lower 64-bit operations to 32-bit operations. Each 64-bit value is lowered
	* to a uvec2. For each operation that can be lowered, there is a function
	* called __builtin_foo with the same number of parameters that takes uvec2
	* sources and produces uvec2 results. An operation like
	*
	* uint64_t(x) * uint64_t(y)
	*
	* becomes
	*
	* packUint2x32(__builtin_umul64(unpackUint2x32(x), unpackUint2x32(y)));
	*/

	#include "main/macros.h"
	#include "compiler/glsl_types.h"
	#include "ir.h"
	#include "ir_rvalue_visitor.h"
	#include "ir_builder.h"
	#include "ir_optimization.h"
	#include "util/hash_table.h"
	#include "builtin_functions.h"

	typedef ir_function_signature (function_generator)(void *mem_ctx,
	builtin_available_predicate avail);

	using namespace ir_builder;

	namespace lower_64bit {
	void expand_source(ir_factory &, ir_rvalue val, ir_variable *expanded_src);

	ir_dereference_variable *compact_destination(ir_factory &,
	const glsl_type *type,
	ir_variable *result[4]);

	ir_rvalue lower_op_to_function_call(ir_instruction base_ir,
	ir_expression *ir,
	ir_function_signature *callee);
	};

	using namespace lower_64bit;

	namespace {

	class lower_64bit_visitor : public ir_rvalue_visitor {
	public:
	lower_64bit_visitor(void mem_ctx, exec_list instructions, unsigned lower)
	: progress(false), lower(lower),
	function_list(), added_functions(&function_list, mem_ctx)
	{
	functions = _mesa_hash_table_create(mem_ctx,
	_mesa_hash_string,
	_mesa_key_string_equal);

	foreach_in_list(ir_instruction, node, instructions) {
	ir_function *const f = node->as_function();

	if (f == NULL \|\| strncmp(f->name, "__builtin_", 10) != 0)
	continue;

	add_function(f);
	}
	}

	~lower_64bit_visitor()
	{
	_mesa_hash_table_destroy(functions, NULL);
	}

	void handle_rvalue(ir_rvalue **rvalue);

	void add_function(ir_function *f)
	{
	_mesa_hash_table_insert(functions, f->name, f);
	}

	ir_function find_function(const char name)
	{
	struct hash_entry *const entry =
	_mesa_hash_table_search(functions, name);

	return entry != NULL ? (ir_function *) entry->data : NULL;
	}

	bool progress;

	private:
	unsigned lower; /** Bitfield of which operations to lower */

	/** Hashtable containing all of the known functions in the IR */
	struct hash_table *functions;

	public:
	exec_list function_list;

	private:
	ir_factory added_functions;

	ir_rvalue handle_op(ir_expression ir, const char *function_name,
	function_generator generator);
	};

	} /* anonymous namespace */

	/**
	* Determine if a particular type of lowering should occur
	*/
	#define lowering(x) (this->lower & x)

	bool
	lower_64bit_integer_instructions(exec_list *instructions,
	unsigned what_to_lower)
	{
	if (instructions->is_empty())
	return false;

	ir_instruction first_inst = (ir_instruction ) instructions->get_head_raw();
	void *const mem_ctx = ralloc_parent(first_inst);
	lower_64bit_visitor v(mem_ctx, instructions, what_to_lower);

	visit_list_elements(&v, instructions);

	if (v.progress && !v.function_list.is_empty()) {
	/* Move all of the nodes from function_list to the head if the incoming
	* instruction list.
	*/
	exec_node *const after = &instructions->head_sentinel;
	exec_node *const before = instructions->head_sentinel.next;
	exec_node *const head = v.function_list.head_sentinel.next;
	exec_node *const tail = v.function_list.tail_sentinel.prev;

	before->next = head;
	head->prev = before;

	after->prev = tail;
	tail->next = after;
	}

	return v.progress;
	}


	/**
	* Expand individual 64-bit values to uvec2 values
	*
	* Each operation is in one of a few forms.
	*
	* vector op vector
	* vector op scalar
	* scalar op vector
	* scalar op scalar
	*
	* In the 'vector op vector' case, the two vectors must have the same size.
	* In a way, the 'scalar op scalar' form is special case of the 'vector op
	* vector' form.
	*
	* This method generates a new set of uvec2 values for each element of a
	* single operand. If the operand is a scalar, the uvec2 is replicated
	* multiple times. A value like
	*
	* u64vec3(a) + u64vec3(b)
	*
	* becomes
	*
	* u64vec3 tmp0 = u64vec3(a) + u64vec3(b);
	* uvec2 tmp1 = unpackUint2x32(tmp0.x);
	* uvec2 tmp2 = unpackUint2x32(tmp0.y);
	* uvec2 tmp3 = unpackUint2x32(tmp0.z);
	*
	* and the returned operands array contains ir_variable pointers to
	*
	* { tmp1, tmp2, tmp3, tmp1 }
	*/
	void
	lower_64bit::expand_source(ir_factory &body,
	ir_rvalue *val,
	ir_variable **expanded_src)
	{
	assert(val->type->is_integer_64());

	ir_variable *const temp = body.make_temp(val->type, "tmp");

	body.emit(assign(temp, val));

	const ir_expression_operation unpack_opcode =
	val->type->base_type == GLSL_TYPE_UINT64
	? ir_unop_unpack_uint_2x32 : ir_unop_unpack_int_2x32;

	const glsl_type *const type =
	val->type->base_type == GLSL_TYPE_UINT64
	? glsl_type::uvec2_type : glsl_type::ivec2_type;

	unsigned i;
	for (i = 0; i < val->type->vector_elements; i++) {
	expanded_src[i] = body.make_temp(type, "expanded_64bit_source");

	body.emit(assign(expanded_src[i],
	expr(unpack_opcode, swizzle(temp, i, 1))));
	}

	for (/* empty */; i < 4; i++)
	expanded_src[i] = expanded_src[0];
	}

	/**
	* Convert a series of uvec2 results into a single 64-bit integer vector
	*/
	ir_dereference_variable *
	lower_64bit::compact_destination(ir_factory &body,
	const glsl_type *type,
	ir_variable *result[4])
	{
	const ir_expression_operation pack_opcode =
	type->base_type == GLSL_TYPE_UINT64
	? ir_unop_pack_uint_2x32 : ir_unop_pack_int_2x32;

	ir_variable *const compacted_result =
	body.make_temp(type, "compacted_64bit_result");

	for (unsigned i = 0; i < type->vector_elements; i++) {
	body.emit(assign(compacted_result,
	expr(pack_opcode, result[i]),
	1U << i));
	}

	void *const mem_ctx = ralloc_parent(compacted_result);
	return new(mem_ctx) ir_dereference_variable(compacted_result);
	}

	ir_rvalue *
	lower_64bit::lower_op_to_function_call(ir_instruction *base_ir,
	ir_expression *ir,
	ir_function_signature *callee)
	{
	const unsigned num_operands = ir->num_operands;
	ir_variable *src[4][4];
	ir_variable *dst[4];
	void *const mem_ctx = ralloc_parent(ir);
	exec_list instructions;
	unsigned source_components = 0;
	const glsl_type *const result_type =
	ir->type->base_type == GLSL_TYPE_UINT64
	? glsl_type::uvec2_type : glsl_type::ivec2_type;

	ir_factory body(&instructions, mem_ctx);

	for (unsigned i = 0; i < num_operands; i++) {
	expand_source(body, ir->operands[i], src[i]);

	if (ir->operands[i]->type->vector_elements > source_components)
	source_components = ir->operands[i]->type->vector_elements;
	}

	for (unsigned i = 0; i < source_components; i++) {
	dst[i] = body.make_temp(result_type, "expanded_64bit_result");

	exec_list parameters;

	for (unsigned j = 0; j < num_operands; j++)
	parameters.push_tail(new(mem_ctx) ir_dereference_variable(src[j][i]));

	ir_dereference_variable *const return_deref =
	new(mem_ctx) ir_dereference_variable(dst[i]);

	ir_call *const c = new(mem_ctx) ir_call(callee,
	return_deref,
	&parameters);

	body.emit(c);
	}

	ir_rvalue *const rv = compact_destination(body, ir->type, dst);

	/* Move all of the nodes from instructions between base_ir and the
	* instruction before it.
	*/
	exec_node *const after = base_ir;
	exec_node *const before = after->prev;
	exec_node *const head = instructions.head_sentinel.next;
	exec_node *const tail = instructions.tail_sentinel.prev;

	before->next = head;
	head->prev = before;

	after->prev = tail;
	tail->next = after;

	return rv;
	}

	ir_rvalue *
	lower_64bit_visitor::handle_op(ir_expression *ir,
	const char *function_name,
	function_generator generator)
	{
	for (unsigned i = 0; i < ir->num_operands; i++)
	if (!ir->operands[i]->type->is_integer_64())
	return ir;

	/* Get a handle to the correct ir_function_signature for the core
	* operation.
	*/
	ir_function_signature *callee = NULL;
	ir_function *f = find_function(function_name);

	if (f != NULL) {
	callee = (ir_function_signature *) f->signatures.get_head();
	assert(callee != NULL && callee->ir_type == ir_type_function_signature);
	} else {
	f = new(base_ir) ir_function(function_name);
	callee = generator(base_ir, NULL);

	f->add_signature(callee);

	add_function(f);
	}

	this->progress = true;
	return lower_op_to_function_call(this->base_ir, ir, callee);
	}

	void
	lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue)
	{
	if (rvalue == NULL \|\| (rvalue)->ir_type != ir_type_expression)
	return;

	ir_expression const ir = (rvalue)->as_expression();
	assert(ir != NULL);

	switch (ir->operation) {
	case ir_unop_sign:
	if (lowering(SIGN64)) {
	*rvalue = handle_op(ir, "__builtin_sign64", generate_ir::sign64);
	}
	break;

	case ir_binop_div:
	if (lowering(DIV64)) {
	if (ir->type->base_type == GLSL_TYPE_UINT64) {
	*rvalue = handle_op(ir, "__builtin_udiv64", generate_ir::udiv64);
	} else {
	*rvalue = handle_op(ir, "__builtin_idiv64", generate_ir::idiv64);
	}
	}
	break;

	case ir_binop_mod:
	if (lowering(MOD64)) {
	if (ir->type->base_type == GLSL_TYPE_UINT64) {
	*rvalue = handle_op(ir, "__builtin_umod64", generate_ir::umod64);
	} else {
	*rvalue = handle_op(ir, "__builtin_imod64", generate_ir::imod64);
	}
	}
	break;

	case ir_binop_mul:
	if (lowering(MUL64)) {
	*rvalue = handle_op(ir, "__builtin_umul64", generate_ir::umul64);
	}
	break;

	default:
	break;
	}
	}