src/mesa/drivers/dri/i965/brw_vec4_builder.h - platform/external/mesa3d - Git at Google

 /* -*- c++ -*- */
 /*
  * Copyright © 2010-2015 Intel Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
  * to deal in the Software without restriction, including without limitation
  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  * and/or sell copies of the Software, and to permit persons to whom the
  * Software is furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice (including the next
  * paragraph) shall be included in all copies or substantial portions of the
  * Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  * IN THE SOFTWARE.
  */

 #ifndef BRW_VEC4_BUILDER_H
 #define BRW_VEC4_BUILDER_H

 #include "brw_ir_vec4.h"
 #include "brw_ir_allocator.h"
 #include "brw_context.h"

 namespace brw {
    /**
     * Toolbox to assemble a VEC4 IR program out of individual instructions.
     *
     * This object is meant to have an interface consistent with
     * brw::fs_builder.  They cannot be fully interchangeable because
     * brw::fs_builder generates scalar code while brw::vec4_builder generates
     * vector code.
     */
    class vec4_builder {
    public:
       /** Type used in this IR to represent a source of an instruction. */
       typedef brw::src_reg src_reg;

       /** Type used in this IR to represent the destination of an instruction. */
       typedef brw::dst_reg dst_reg;

       /** Type used in this IR to represent an instruction. */
       typedef vec4_instruction instruction;

       /**
        * Construct a vec4_builder that inserts instructions into \p shader.
        */
       vec4_builder(backend_shader *shader) :
          shader(shader), block(NULL), cursor(NULL),
          force_writemask_all(false),
          annotation()
       {
       }

       /**
        * Construct a vec4_builder that inserts instructions into \p shader
        * before instruction \p inst in basic block \p block.  The default
        * execution controls and debug annotation are initialized from the
        * instruction passed as argument.
        */
       vec4_builder(backend_shader *shader, bblock_t *block, instruction *inst) :
          shader(shader), block(block), cursor(inst),
          force_writemask_all(inst->force_writemask_all)
       {
          annotation.str = inst->annotation;
          annotation.ir = inst->ir;
       }

       /**
        * Construct a vec4_builder that inserts instructions before \p cursor
        * in basic block \p block, inheriting other code generation parameters
        * from this.
        */
       vec4_builder
       at(bblock_t *block, exec_node *cursor) const
       {
          vec4_builder bld = *this;
          bld.block = block;
          bld.cursor = cursor;
          return bld;
       }

       /**
        * Construct a vec4_builder appending instructions at the end of the
        * instruction list of the shader, inheriting other code generation
        * parameters from this.
        */
       vec4_builder
       at_end() const
       {
          return at(NULL, (exec_node *)&shader->instructions.tail_sentinel);
       }

       /**
        * Construct a builder with per-channel control flow execution masking
        * disabled if \p b is true.  If control flow execution masking is
        * already disabled this has no effect.
        */
       vec4_builder
       exec_all(bool b = true) const
       {
          vec4_builder bld = *this;
          if (b)
             bld.force_writemask_all = true;
          return bld;
       }

       /**
        * Construct a builder with the given debug annotation info.
        */
       vec4_builder
       annotate(const char *str, const void *ir = NULL) const
       {
          vec4_builder bld = *this;
          bld.annotation.str = str;
          bld.annotation.ir = ir;
          return bld;
       }

       /**
        * Get the SIMD width in use.
        */
       unsigned
       dispatch_width() const
       {
          return 8;
       }

       /**
        * Allocate a virtual register of natural vector size (four for this IR)
        * and SIMD width.  \p n gives the amount of space to allocate in
        * dispatch_width units (which is just enough space for four logical
        * components in this IR).
        */
       dst_reg
       vgrf(enum brw_reg_type type, unsigned n = 1) const
       {
          assert(dispatch_width() <= 32);

          if (n > 0)
             return retype(dst_reg(VGRF, shader->alloc.allocate(
                                      n * DIV_ROUND_UP(type_sz(type), 4))),
                            type);
          else
             return retype(null_reg_ud(), type);
       }

       /**
        * Create a null register of floating type.
        */
       dst_reg
       null_reg_f() const
       {
          return dst_reg(retype(brw_null_vec(dispatch_width()),
                                BRW_REGISTER_TYPE_F));
       }

       /**
        * Create a null register of signed integer type.
        */
       dst_reg
       null_reg_d() const
       {
          return dst_reg(retype(brw_null_vec(dispatch_width()),
                                BRW_REGISTER_TYPE_D));
       }

       /**
        * Create a null register of unsigned integer type.
        */
       dst_reg
       null_reg_ud() const
       {
          return dst_reg(retype(brw_null_vec(dispatch_width()),
                                BRW_REGISTER_TYPE_UD));
       }

       /**
        * Insert an instruction into the program.
        */
       instruction *
       emit(const instruction &inst) const
       {
          return emit(new(shader->mem_ctx) instruction(inst));
       }

       /**
        * Create and insert a nullary control instruction into the program.
        */
       instruction *
       emit(enum opcode opcode) const
       {
          return emit(instruction(opcode));
       }

       /**
        * Create and insert a nullary instruction into the program.
        */
       instruction *
       emit(enum opcode opcode, const dst_reg &dst) const
       {
          return emit(instruction(opcode, dst));
       }

       /**
        * Create and insert a unary instruction into the program.
        */
       instruction *
       emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0) const
       {
          switch (opcode) {
          case SHADER_OPCODE_RCP:
          case SHADER_OPCODE_RSQ:
          case SHADER_OPCODE_SQRT:
          case SHADER_OPCODE_EXP2:
          case SHADER_OPCODE_LOG2:
          case SHADER_OPCODE_SIN:
          case SHADER_OPCODE_COS:
             return fix_math_instruction(
                emit(instruction(opcode, dst,
                                 fix_math_operand(src0))));

          default:
             return emit(instruction(opcode, dst, src0));
          }
       }

       /**
        * Create and insert a binary instruction into the program.
        */
       instruction *
       emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
            const src_reg &src1) const
       {
          switch (opcode) {
          case SHADER_OPCODE_POW:
          case SHADER_OPCODE_INT_QUOTIENT:
          case SHADER_OPCODE_INT_REMAINDER:
             return fix_math_instruction(
                emit(instruction(opcode, dst,
                                 fix_math_operand(src0),
                                 fix_math_operand(src1))));

          default:
             return emit(instruction(opcode, dst, src0, src1));
          }
       }

       /**
        * Create and insert a ternary instruction into the program.
        */
       instruction *
       emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
            const src_reg &src1, const src_reg &src2) const
       {
          switch (opcode) {
          case BRW_OPCODE_BFE:
          case BRW_OPCODE_BFI2:
          case BRW_OPCODE_MAD:
          case BRW_OPCODE_LRP:
             return emit(instruction(opcode, dst,
                                     fix_3src_operand(src0),
                                     fix_3src_operand(src1),
                                     fix_3src_operand(src2)));

          default:
             return emit(instruction(opcode, dst, src0, src1, src2));
          }
       }

       /**
        * Insert a preallocated instruction into the program.
        */
       instruction *
       emit(instruction *inst) const
       {
          inst->force_writemask_all = force_writemask_all;
          inst->annotation = annotation.str;
          inst->ir = annotation.ir;

          if (block)
             static_cast<instruction *>(cursor)->insert_before(block, inst);
          else
             cursor->insert_before(inst);

          return inst;
       }

       /**
        * Select \p src0 if the comparison of both sources with the given
        * conditional mod evaluates to true, otherwise select \p src1.
        *
        * Generally useful to get the minimum or maximum of two values.
        */
       instruction *
       emit_minmax(const dst_reg &dst, const src_reg &src0,
                   const src_reg &src1, brw_conditional_mod mod) const
       {
          assert(mod == BRW_CONDITIONAL_GE || mod == BRW_CONDITIONAL_L);

          return set_condmod(mod, SEL(dst, fix_unsigned_negate(src0),
                                      fix_unsigned_negate(src1)));
       }

       /**
        * Copy any live channel from \p src to the first channel of the result.
        */
       src_reg
       emit_uniformize(const src_reg &src) const
       {
          const vec4_builder ubld = exec_all();
          const dst_reg chan_index =
             writemask(vgrf(BRW_REGISTER_TYPE_UD), WRITEMASK_X);
          const dst_reg dst = vgrf(src.type);

          ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index);
          ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, src_reg(chan_index));

          return src_reg(dst);
       }

       /**
        * Assorted arithmetic ops.
        * @{
        */
 #define ALU1(op)                                        \
       instruction *                                     \
       op(const dst_reg &dst, const src_reg &src0) const \
       {                                                 \
          return emit(BRW_OPCODE_##op, dst, src0);       \
       }

 #define ALU2(op)                                                        \
       instruction *                                                     \
       op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
       {                                                                 \
          return emit(BRW_OPCODE_##op, dst, src0, src1);                 \
       }

 #define ALU2_ACC(op)                                                    \
       instruction *                                                     \
       op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
       {                                                                 \
          instruction *inst = emit(BRW_OPCODE_##op, dst, src0, src1);    \
          inst->writes_accumulator = true;                               \
          return inst;                                                   \
       }

 #define ALU3(op)                                                        \
       instruction *                                                     \
       op(const dst_reg &dst, const src_reg &src0, const src_reg &src1,  \
          const src_reg &src2) const                                     \
       {                                                                 \
          return emit(BRW_OPCODE_##op, dst, src0, src1, src2);           \
       }

       ALU2(ADD)
       ALU2_ACC(ADDC)
       ALU2(AND)
       ALU2(ASR)
       ALU2(AVG)
       ALU3(BFE)
       ALU2(BFI1)
       ALU3(BFI2)
       ALU1(BFREV)
       ALU1(CBIT)
       ALU2(CMPN)
       ALU3(CSEL)
       ALU1(DIM)
       ALU2(DP2)
       ALU2(DP3)
       ALU2(DP4)
       ALU2(DPH)
       ALU1(F16TO32)
       ALU1(F32TO16)
       ALU1(FBH)
       ALU1(FBL)
       ALU1(FRC)
       ALU2(LINE)
       ALU1(LZD)
       ALU2(MAC)
       ALU2_ACC(MACH)
       ALU3(MAD)
       ALU1(MOV)
       ALU2(MUL)
       ALU1(NOT)
       ALU2(OR)
       ALU2(PLN)
       ALU1(RNDD)
       ALU1(RNDE)
       ALU1(RNDU)
       ALU1(RNDZ)
       ALU2(SAD2)
       ALU2_ACC(SADA2)
       ALU2(SEL)
       ALU2(SHL)
       ALU2(SHR)
       ALU2_ACC(SUBB)
       ALU2(XOR)

 #undef ALU3
 #undef ALU2_ACC
 #undef ALU2
 #undef ALU1
       /** @} */

       /**
        * CMP: Sets the low bit of the destination channels with the result
        * of the comparison, while the upper bits are undefined, and updates
        * the flag register with the packed 16 bits of the result.
        */
       instruction *
       CMP(const dst_reg &dst, const src_reg &src0, const src_reg &src1,
           brw_conditional_mod condition) const
       {
          /* Take the instruction:
           *
           * CMP null<d> src0<f> src1<f>
           *
           * Original gen4 does type conversion to the destination type
           * before comparison, producing garbage results for floating
           * point comparisons.
           *
           * The destination type doesn't matter on newer generations,
           * so we set the type to match src0 so we can compact the
           * instruction.
           */
          return set_condmod(condition,
                             emit(BRW_OPCODE_CMP, retype(dst, src0.type),
                                  fix_unsigned_negate(src0),
                                  fix_unsigned_negate(src1)));
       }

       /**
        * Gen4 predicated IF.
        */
       instruction *
       IF(brw_predicate predicate) const
       {
          return set_predicate(predicate, emit(BRW_OPCODE_IF));
       }

       /**
        * Gen6 IF with embedded comparison.
        */
       instruction *
       IF(const src_reg &src0, const src_reg &src1,
          brw_conditional_mod condition) const
       {
          assert(shader->devinfo->gen == 6);
          return set_condmod(condition,
                             emit(BRW_OPCODE_IF,
                                  null_reg_d(),
                                  fix_unsigned_negate(src0),
                                  fix_unsigned_negate(src1)));
       }

       /**
        * Emit a linear interpolation instruction.
        */
       instruction *
       LRP(const dst_reg &dst, const src_reg &x, const src_reg &y,
           const src_reg &a) const
       {
          if (shader->devinfo->gen >= 6) {
             /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so
              * we need to reorder the operands.
              */
             return emit(BRW_OPCODE_LRP, dst, a, y, x);

          } else {
             /* We can't use the LRP instruction.  Emit x*(1-a) + y*a. */
             const dst_reg y_times_a = vgrf(dst.type);
             const dst_reg one_minus_a = vgrf(dst.type);
             const dst_reg x_times_one_minus_a = vgrf(dst.type);

             MUL(y_times_a, y, a);
             ADD(one_minus_a, negate(a), brw_imm_f(1.0f));
             MUL(x_times_one_minus_a, x, src_reg(one_minus_a));
             return ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a));
          }
       }

       backend_shader *shader;

    protected:
       /**
        * Workaround for negation of UD registers.  See comment in
        * fs_generator::generate_code() for the details.
        */
       src_reg
       fix_unsigned_negate(const src_reg &src) const
       {
          if (src.type == BRW_REGISTER_TYPE_UD && src.negate) {
             dst_reg temp = vgrf(BRW_REGISTER_TYPE_UD);
             MOV(temp, src);
             return src_reg(temp);
          } else {
             return src;
          }
       }

       /**
        * Workaround for register access modes not supported by the ternary
        * instruction encoding.
        */
       src_reg
       fix_3src_operand(const src_reg &src) const
       {
          /* Using vec4 uniforms in SIMD4x2 programs is difficult. You'd like to be
           * able to use vertical stride of zero to replicate the vec4 uniform, like
           *
           *    g3<0;4,1>:f - [0, 4][1, 5][2, 6][3, 7]
           *
           * But you can't, since vertical stride is always four in three-source
           * instructions. Instead, insert a MOV instruction to do the replication so
           * that the three-source instruction can consume it.
           */

          /* The MOV is only needed if the source is a uniform or immediate. */
          if (src.file != UNIFORM && src.file != IMM)
             return src;

          if (src.file == UNIFORM && brw_is_single_value_swizzle(src.swizzle))
             return src;

          const dst_reg expanded = vgrf(src.type);
          emit(VEC4_OPCODE_UNPACK_UNIFORM, expanded, src);
          return src_reg(expanded);
       }

       /**
        * Workaround for register access modes not supported by the math
        * instruction.
        */
       src_reg
       fix_math_operand(const src_reg &src) const
       {
          /* The gen6 math instruction ignores the source modifiers --
           * swizzle, abs, negate, and at least some parts of the register
           * region description.
           *
           * Rather than trying to enumerate all these cases, *always* expand the
           * operand to a temp GRF for gen6.
           *
           * For gen7, keep the operand as-is, except if immediate, which gen7 still
           * can't use.
           */
          if (shader->devinfo->gen == 6 ||
              (shader->devinfo->gen == 7 && src.file == IMM)) {
             const dst_reg tmp = vgrf(src.type);
             MOV(tmp, src);
             return src_reg(tmp);
          } else {
             return src;
          }
       }

       /**
        * Workaround other weirdness of the math instruction.
        */
       instruction *
       fix_math_instruction(instruction *inst) const
       {
          if (shader->devinfo->gen == 6 &&
              inst->dst.writemask != WRITEMASK_XYZW) {
             const dst_reg tmp = vgrf(inst->dst.type);
             MOV(inst->dst, src_reg(tmp));
             inst->dst = tmp;

          } else if (shader->devinfo->gen < 6) {
             const unsigned sources = (inst->src[1].file == BAD_FILE ? 1 : 2);
             inst->base_mrf = 1;
             inst->mlen = sources;
          }

          return inst;
       }

       bblock_t *block;
       exec_node *cursor;

       bool force_writemask_all;

       /** Debug annotation info. */
       struct {
          const char *str;
          const void *ir;
       } annotation;
    };
 }

 #endif
	/* -- c++ -- */
	/*
	* Copyright © 2010-2015 Intel Corporation
	*
	* Permission is hereby granted, free of charge, to any person obtaining a
	* copy of this software and associated documentation files (the "Software"),
	* to deal in the Software without restriction, including without limitation
	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
	* and/or sell copies of the Software, and to permit persons to whom the
	* Software is furnished to do so, subject to the following conditions:
	*
	* The above copyright notice and this permission notice (including the next
	* paragraph) shall be included in all copies or substantial portions of the
	* Software.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
	* IN THE SOFTWARE.
	*/

	#ifndef BRW_VEC4_BUILDER_H
	#define BRW_VEC4_BUILDER_H

	#include "brw_ir_vec4.h"
	#include "brw_ir_allocator.h"
	#include "brw_context.h"

	namespace brw {
	/**
	* Toolbox to assemble a VEC4 IR program out of individual instructions.
	*
	* This object is meant to have an interface consistent with
	* brw::fs_builder. They cannot be fully interchangeable because
	* brw::fs_builder generates scalar code while brw::vec4_builder generates
	* vector code.
	*/
	class vec4_builder {
	public:
	/** Type used in this IR to represent a source of an instruction. */
	typedef brw::src_reg src_reg;

	/** Type used in this IR to represent the destination of an instruction. */
	typedef brw::dst_reg dst_reg;

	/** Type used in this IR to represent an instruction. */
	typedef vec4_instruction instruction;

	/**
	* Construct a vec4_builder that inserts instructions into \p shader.
	*/
	vec4_builder(backend_shader *shader) :
	shader(shader), block(NULL), cursor(NULL),
	force_writemask_all(false),
	annotation()
	{
	}

	/**
	* Construct a vec4_builder that inserts instructions into \p shader
	* before instruction \p inst in basic block \p block. The default
	* execution controls and debug annotation are initialized from the
	* instruction passed as argument.
	*/
	vec4_builder(backend_shader shader, bblock_t block, instruction *inst) :
	shader(shader), block(block), cursor(inst),
	force_writemask_all(inst->force_writemask_all)
	{
	annotation.str = inst->annotation;
	annotation.ir = inst->ir;
	}

	/**
	* Construct a vec4_builder that inserts instructions before \p cursor
	* in basic block \p block, inheriting other code generation parameters
	* from this.
	*/
	vec4_builder
	at(bblock_t block, exec_node cursor) const
	{
	vec4_builder bld = *this;
	bld.block = block;
	bld.cursor = cursor;
	return bld;
	}

	/**
	* Construct a vec4_builder appending instructions at the end of the
	* instruction list of the shader, inheriting other code generation
	* parameters from this.
	*/
	vec4_builder
	at_end() const
	{
	return at(NULL, (exec_node *)&shader->instructions.tail_sentinel);
	}

	/**
	* Construct a builder with per-channel control flow execution masking
	* disabled if \p b is true. If control flow execution masking is
	* already disabled this has no effect.
	*/
	vec4_builder
	exec_all(bool b = true) const
	{
	vec4_builder bld = *this;
	if (b)
	bld.force_writemask_all = true;
	return bld;
	}

	/**
	* Construct a builder with the given debug annotation info.
	*/
	vec4_builder
	annotate(const char str, const void ir = NULL) const
	{
	vec4_builder bld = *this;
	bld.annotation.str = str;
	bld.annotation.ir = ir;
	return bld;
	}

	/**
	* Get the SIMD width in use.
	*/
	unsigned
	dispatch_width() const
	{
	return 8;
	}

	/**
	* Allocate a virtual register of natural vector size (four for this IR)
	* and SIMD width. \p n gives the amount of space to allocate in
	* dispatch_width units (which is just enough space for four logical
	* components in this IR).
	*/
	dst_reg
	vgrf(enum brw_reg_type type, unsigned n = 1) const
	{
	assert(dispatch_width() <= 32);

	if (n > 0)
	return retype(dst_reg(VGRF, shader->alloc.allocate(
	n * DIV_ROUND_UP(type_sz(type), 4))),
	type);
	else
	return retype(null_reg_ud(), type);
	}

	/**
	* Create a null register of floating type.
	*/
	dst_reg
	null_reg_f() const
	{
	return dst_reg(retype(brw_null_vec(dispatch_width()),
	BRW_REGISTER_TYPE_F));
	}

	/**
	* Create a null register of signed integer type.
	*/
	dst_reg
	null_reg_d() const
	{
	return dst_reg(retype(brw_null_vec(dispatch_width()),
	BRW_REGISTER_TYPE_D));
	}

	/**
	* Create a null register of unsigned integer type.
	*/
	dst_reg
	null_reg_ud() const
	{
	return dst_reg(retype(brw_null_vec(dispatch_width()),
	BRW_REGISTER_TYPE_UD));
	}

	/**
	* Insert an instruction into the program.
	*/
	instruction *
	emit(const instruction &inst) const
	{
	return emit(new(shader->mem_ctx) instruction(inst));
	}

	/**
	* Create and insert a nullary control instruction into the program.
	*/
	instruction *
	emit(enum opcode opcode) const
	{
	return emit(instruction(opcode));
	}

	/**
	* Create and insert a nullary instruction into the program.
	*/
	instruction *
	emit(enum opcode opcode, const dst_reg &dst) const
	{
	return emit(instruction(opcode, dst));
	}

	/**
	* Create and insert a unary instruction into the program.
	*/
	instruction *
	emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0) const
	{
	switch (opcode) {
	case SHADER_OPCODE_RCP:
	case SHADER_OPCODE_RSQ:
	case SHADER_OPCODE_SQRT:
	case SHADER_OPCODE_EXP2:
	case SHADER_OPCODE_LOG2:
	case SHADER_OPCODE_SIN:
	case SHADER_OPCODE_COS:
	return fix_math_instruction(
	emit(instruction(opcode, dst,
	fix_math_operand(src0))));

	default:
	return emit(instruction(opcode, dst, src0));
	}
	}

	/**
	* Create and insert a binary instruction into the program.
	*/
	instruction *
	emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
	const src_reg &src1) const
	{
	switch (opcode) {
	case SHADER_OPCODE_POW:
	case SHADER_OPCODE_INT_QUOTIENT:
	case SHADER_OPCODE_INT_REMAINDER:
	return fix_math_instruction(
	emit(instruction(opcode, dst,
	fix_math_operand(src0),
	fix_math_operand(src1))));

	default:
	return emit(instruction(opcode, dst, src0, src1));
	}
	}

	/**
	* Create and insert a ternary instruction into the program.
	*/
	instruction *
	emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
	const src_reg &src1, const src_reg &src2) const
	{
	switch (opcode) {
	case BRW_OPCODE_BFE:
	case BRW_OPCODE_BFI2:
	case BRW_OPCODE_MAD:
	case BRW_OPCODE_LRP:
	return emit(instruction(opcode, dst,
	fix_3src_operand(src0),
	fix_3src_operand(src1),
	fix_3src_operand(src2)));

	default:
	return emit(instruction(opcode, dst, src0, src1, src2));
	}
	}

	/**
	* Insert a preallocated instruction into the program.
	*/
	instruction *
	emit(instruction *inst) const
	{
	inst->force_writemask_all = force_writemask_all;
	inst->annotation = annotation.str;
	inst->ir = annotation.ir;

	if (block)
	static_cast<instruction *>(cursor)->insert_before(block, inst);
	else
	cursor->insert_before(inst);

	return inst;
	}

	/**
	* Select \p src0 if the comparison of both sources with the given
	* conditional mod evaluates to true, otherwise select \p src1.
	*
	* Generally useful to get the minimum or maximum of two values.
	*/
	instruction *
	emit_minmax(const dst_reg &dst, const src_reg &src0,
	const src_reg &src1, brw_conditional_mod mod) const
	{
	assert(mod == BRW_CONDITIONAL_GE \|\| mod == BRW_CONDITIONAL_L);

	return set_condmod(mod, SEL(dst, fix_unsigned_negate(src0),
	fix_unsigned_negate(src1)));
	}

	/**
	* Copy any live channel from \p src to the first channel of the result.
	*/
	src_reg
	emit_uniformize(const src_reg &src) const
	{
	const vec4_builder ubld = exec_all();
	const dst_reg chan_index =
	writemask(vgrf(BRW_REGISTER_TYPE_UD), WRITEMASK_X);
	const dst_reg dst = vgrf(src.type);

	ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index);
	ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, src_reg(chan_index));

	return src_reg(dst);
	}

	/**
	* Assorted arithmetic ops.
	* @{
	*/
	#define ALU1(op) \
	instruction * \
	op(const dst_reg &dst, const src_reg &src0) const \
	{ \
	return emit(BRW_OPCODE_##op, dst, src0); \
	}

	#define ALU2(op) \
	instruction * \
	op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
	{ \
	return emit(BRW_OPCODE_##op, dst, src0, src1); \
	}

	#define ALU2_ACC(op) \
	instruction * \
	op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
	{ \
	instruction *inst = emit(BRW_OPCODE_##op, dst, src0, src1); \
	inst->writes_accumulator = true; \
	return inst; \
	}

	#define ALU3(op) \
	instruction * \
	op(const dst_reg &dst, const src_reg &src0, const src_reg &src1, \
	const src_reg &src2) const \
	{ \
	return emit(BRW_OPCODE_##op, dst, src0, src1, src2); \
	}

	ALU2(ADD)
	ALU2_ACC(ADDC)
	ALU2(AND)
	ALU2(ASR)
	ALU2(AVG)
	ALU3(BFE)
	ALU2(BFI1)
	ALU3(BFI2)
	ALU1(BFREV)
	ALU1(CBIT)
	ALU2(CMPN)
	ALU3(CSEL)
	ALU1(DIM)
	ALU2(DP2)
	ALU2(DP3)
	ALU2(DP4)
	ALU2(DPH)
	ALU1(F16TO32)
	ALU1(F32TO16)
	ALU1(FBH)
	ALU1(FBL)
	ALU1(FRC)
	ALU2(LINE)
	ALU1(LZD)
	ALU2(MAC)
	ALU2_ACC(MACH)
	ALU3(MAD)
	ALU1(MOV)
	ALU2(MUL)
	ALU1(NOT)
	ALU2(OR)
	ALU2(PLN)
	ALU1(RNDD)
	ALU1(RNDE)
	ALU1(RNDU)
	ALU1(RNDZ)
	ALU2(SAD2)
	ALU2_ACC(SADA2)
	ALU2(SEL)
	ALU2(SHL)
	ALU2(SHR)
	ALU2_ACC(SUBB)
	ALU2(XOR)

	#undef ALU3
	#undef ALU2_ACC
	#undef ALU2
	#undef ALU1
	/** @} */

	/**
	* CMP: Sets the low bit of the destination channels with the result
	* of the comparison, while the upper bits are undefined, and updates
	* the flag register with the packed 16 bits of the result.
	*/
	instruction *
	CMP(const dst_reg &dst, const src_reg &src0, const src_reg &src1,
	brw_conditional_mod condition) const
	{
	/* Take the instruction:
	*
	* CMP null<d> src0<f> src1<f>
	*
	* Original gen4 does type conversion to the destination type
	* before comparison, producing garbage results for floating
	* point comparisons.
	*
	* The destination type doesn't matter on newer generations,
	* so we set the type to match src0 so we can compact the
	* instruction.
	*/
	return set_condmod(condition,
	emit(BRW_OPCODE_CMP, retype(dst, src0.type),
	fix_unsigned_negate(src0),
	fix_unsigned_negate(src1)));
	}

	/**
	* Gen4 predicated IF.
	*/
	instruction *
	IF(brw_predicate predicate) const
	{
	return set_predicate(predicate, emit(BRW_OPCODE_IF));
	}

	/**
	* Gen6 IF with embedded comparison.
	*/
	instruction *
	IF(const src_reg &src0, const src_reg &src1,
	brw_conditional_mod condition) const
	{
	assert(shader->devinfo->gen == 6);
	return set_condmod(condition,
	emit(BRW_OPCODE_IF,
	null_reg_d(),
	fix_unsigned_negate(src0),
	fix_unsigned_negate(src1)));
	}

	/**
	* Emit a linear interpolation instruction.
	*/
	instruction *
	LRP(const dst_reg &dst, const src_reg &x, const src_reg &y,
	const src_reg &a) const
	{
	if (shader->devinfo->gen >= 6) {
	/* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so
	* we need to reorder the operands.
	*/
	return emit(BRW_OPCODE_LRP, dst, a, y, x);

	} else {
	/* We can't use the LRP instruction. Emit x(1-a) + ya. */
	const dst_reg y_times_a = vgrf(dst.type);
	const dst_reg one_minus_a = vgrf(dst.type);
	const dst_reg x_times_one_minus_a = vgrf(dst.type);

	MUL(y_times_a, y, a);
	ADD(one_minus_a, negate(a), brw_imm_f(1.0f));
	MUL(x_times_one_minus_a, x, src_reg(one_minus_a));
	return ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a));
	}
	}

	backend_shader *shader;

	protected:
	/**
	* Workaround for negation of UD registers. See comment in
	* fs_generator::generate_code() for the details.
	*/
	src_reg
	fix_unsigned_negate(const src_reg &src) const
	{
	if (src.type == BRW_REGISTER_TYPE_UD && src.negate) {
	dst_reg temp = vgrf(BRW_REGISTER_TYPE_UD);
	MOV(temp, src);
	return src_reg(temp);
	} else {
	return src;
	}
	}

	/**
	* Workaround for register access modes not supported by the ternary
	* instruction encoding.
	*/
	src_reg
	fix_3src_operand(const src_reg &src) const
	{
	/* Using vec4 uniforms in SIMD4x2 programs is difficult. You'd like to be
	* able to use vertical stride of zero to replicate the vec4 uniform, like
	*
	* g3<0;4,1>:f - [0, 4][1, 5][2, 6][3, 7]
	*
	* But you can't, since vertical stride is always four in three-source
	* instructions. Instead, insert a MOV instruction to do the replication so
	* that the three-source instruction can consume it.
	*/

	/* The MOV is only needed if the source is a uniform or immediate. */
	if (src.file != UNIFORM && src.file != IMM)
	return src;

	if (src.file == UNIFORM && brw_is_single_value_swizzle(src.swizzle))
	return src;

	const dst_reg expanded = vgrf(src.type);
	emit(VEC4_OPCODE_UNPACK_UNIFORM, expanded, src);
	return src_reg(expanded);
	}

	/**
	* Workaround for register access modes not supported by the math
	* instruction.
	*/
	src_reg
	fix_math_operand(const src_reg &src) const
	{
	/* The gen6 math instruction ignores the source modifiers --
	* swizzle, abs, negate, and at least some parts of the register
	* region description.
	*
	* Rather than trying to enumerate all these cases, always expand the
	* operand to a temp GRF for gen6.
	*
	* For gen7, keep the operand as-is, except if immediate, which gen7 still
	* can't use.
	*/
	if (shader->devinfo->gen == 6 \|\|
	(shader->devinfo->gen == 7 && src.file == IMM)) {
	const dst_reg tmp = vgrf(src.type);
	MOV(tmp, src);
	return src_reg(tmp);
	} else {
	return src;
	}
	}

	/**
	* Workaround other weirdness of the math instruction.
	*/
	instruction *
	fix_math_instruction(instruction *inst) const
	{
	if (shader->devinfo->gen == 6 &&
	inst->dst.writemask != WRITEMASK_XYZW) {
	const dst_reg tmp = vgrf(inst->dst.type);
	MOV(inst->dst, src_reg(tmp));
	inst->dst = tmp;

	} else if (shader->devinfo->gen < 6) {
	const unsigned sources = (inst->src[1].file == BAD_FILE ? 1 : 2);
	inst->base_mrf = 1;
	inst->mlen = sources;
	}

	return inst;
	}

	bblock_t *block;
	exec_node *cursor;

	bool force_writemask_all;

	/** Debug annotation info. */
	struct {
	const char *str;
	const void *ir;
	} annotation;
	};
	}

	#endif