src/compiler/glsl/lower_blend_equation_advanced.cpp - platform/external/mesa3d - Git at Google

 /*
  * Copyright © 2016 Intel Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
  * to deal in the Software without restriction, including without limitation
  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  * and/or sell copies of the Software, and to permit persons to whom the
  * Software is furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice (including the next
  * paragraph) shall be included in all copies or substantial portions of the
  * Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  */

 #include "ir.h"
 #include "ir_builder.h"
 #include "ir_optimization.h"
 #include "ir_hierarchical_visitor.h"
 #include "program/prog_instruction.h"
 #include "program/prog_statevars.h"
 #include "util/bitscan.h"

 using namespace ir_builder;

 #define imm1(x) new(mem_ctx) ir_constant((float) (x), 1)
 #define imm3(x) new(mem_ctx) ir_constant((float) (x), 3)

 static ir_rvalue *
 blend_multiply(ir_variable *src, ir_variable *dst)
 {
    /* f(Cs,Cd) = Cs*Cd */
    return mul(src, dst);
 }

 static ir_rvalue *
 blend_screen(ir_variable *src, ir_variable *dst)
 {
    /* f(Cs,Cd) = Cs+Cd-Cs*Cd */
    return sub(add(src, dst), mul(src, dst));
 }

 static ir_rvalue *
 blend_overlay(ir_variable *src, ir_variable *dst)
 {
    void *mem_ctx = ralloc_parent(src);

    /* f(Cs,Cd) = 2*Cs*Cd, if Cd <= 0.5
     *            1-2*(1-Cs)*(1-Cd), otherwise
     */
    ir_rvalue *rule_1 = mul(imm3(2), mul(src, dst));
    ir_rvalue *rule_2 =
       sub(imm3(1), mul(imm3(2), mul(sub(imm3(1), src), sub(imm3(1), dst))));
    return csel(lequal(dst, imm3(0.5f)), rule_1, rule_2);
 }

 static ir_rvalue *
 blend_darken(ir_variable *src, ir_variable *dst)
 {
    /* f(Cs,Cd) = min(Cs,Cd) */
    return min2(src, dst);
 }

 static ir_rvalue *
 blend_lighten(ir_variable *src, ir_variable *dst)
 {
    /* f(Cs,Cd) = max(Cs,Cd) */
    return max2(src, dst);
 }

 static ir_rvalue *
 blend_colordodge(ir_variable *src, ir_variable *dst)
 {
    void *mem_ctx = ralloc_parent(src);

    /* f(Cs,Cd) =
     *   0, if Cd <= 0
     *   min(1,Cd/(1-Cs)), if Cd > 0 and Cs < 1
     *   1, if Cd > 0 and Cs >= 1
     */
    return csel(lequal(dst, imm3(0)), imm3(0),
                csel(gequal(src, imm3(1)), imm3(1),
                     min2(imm3(1), div(dst, sub(imm3(1), src)))));
 }

 static ir_rvalue *
 blend_colorburn(ir_variable *src, ir_variable *dst)
 {
    void *mem_ctx = ralloc_parent(src);

    /* f(Cs,Cd) =
     *   1, if Cd >= 1
     *   1 - min(1,(1-Cd)/Cs), if Cd < 1 and Cs > 0
     *   0, if Cd < 1 and Cs <= 0
     */
    return csel(gequal(dst, imm3(1)), imm3(1),
                csel(lequal(src, imm3(0)), imm3(0),
                     sub(imm3(1), min2(imm3(1), div(sub(imm3(1), dst), src)))));
 }

 static ir_rvalue *
 blend_hardlight(ir_variable *src, ir_variable *dst)
 {
    void *mem_ctx = ralloc_parent(src);

    /* f(Cs,Cd) = 2*Cs*Cd, if Cs <= 0.5
     *            1-2*(1-Cs)*(1-Cd), otherwise
     */
    ir_rvalue *rule_1 = mul(imm3(2), mul(src, dst));
    ir_rvalue *rule_2 =
       sub(imm3(1), mul(imm3(2), mul(sub(imm3(1), src), sub(imm3(1), dst))));
    return csel(lequal(src, imm3(0.5f)), rule_1, rule_2);
 }

 static ir_rvalue *
 blend_softlight(ir_variable *src, ir_variable *dst)
 {
    void *mem_ctx = ralloc_parent(src);

    /* f(Cs,Cd) =
     *   Cd-(1-2*Cs)*Cd*(1-Cd),
     *     if Cs <= 0.5
     *   Cd+(2*Cs-1)*Cd*((16*Cd-12)*Cd+3),
     *     if Cs > 0.5 and Cd <= 0.25
     *   Cd+(2*Cs-1)*(sqrt(Cd)-Cd),
     *     if Cs > 0.5 and Cd > 0.25
     *
     * We can simplify this to
     *
     * f(Cs,Cd) = Cd+(2*Cs-1)*g(Cs,Cd) where
     * g(Cs,Cd) = Cd*Cd-Cd             if Cs <= 0.5
     *            Cd*((16*Cd-12)*Cd+3) if Cs > 0.5 and Cd <= 0.25
     *            sqrt(Cd)-Cd,         otherwise
     */
    ir_rvalue *factor_1 = mul(dst, sub(imm3(1), dst));
    ir_rvalue *factor_2 =
       mul(dst, add(mul(sub(mul(imm3(16), dst), imm3(12)), dst), imm3(3)));
    ir_rvalue *factor_3 = sub(sqrt(dst), dst);
    ir_rvalue *factor = csel(lequal(src, imm3(0.5f)), factor_1,
                             csel(lequal(dst, imm3(0.25f)),
                                         factor_2, factor_3));
    return add(dst, mul(sub(mul(imm3(2), src), imm3(1)), factor));
 }

 static ir_rvalue *
 blend_difference(ir_variable *src, ir_variable *dst)
 {
    return abs(sub(dst, src));
 }

 static ir_rvalue *
 blend_exclusion(ir_variable *src, ir_variable *dst)
 {
    void *mem_ctx = ralloc_parent(src);

    return add(src, sub(dst, mul(imm3(2), mul(src, dst))));
 }

 /* Return the minimum of a vec3's components */
 static ir_rvalue *
 minv3(ir_variable *v)
 {
    return min2(min2(swizzle_x(v), swizzle_y(v)), swizzle_z(v));
 }

 /* Return the maximum of a vec3's components */
 static ir_rvalue *
 maxv3(ir_variable *v)
 {
    return max2(max2(swizzle_x(v), swizzle_y(v)), swizzle_z(v));
 }

 static ir_rvalue *
 lumv3(ir_variable *c)
 {
    ir_constant_data data;
    data.f[0] = 0.30;
    data.f[1] = 0.59;
    data.f[2] = 0.11;

    void *mem_ctx = ralloc_parent(c);

    /* dot(c, vec3(0.30, 0.59, 0.11)) */
    return dot(c, new(mem_ctx) ir_constant(glsl_type::vec3_type, &data));
 }

 static ir_rvalue *
 satv3(ir_variable *c)
 {
    return sub(maxv3(c), minv3(c));
 }

 /* Take the base RGB color <cbase> and override its luminosity with that
  * of the RGB color <clum>.
  *
  * This follows the equations given in the ES 3.2 (June 15th, 2016)
  * specification.  Revision 16 of GL_KHR_blend_equation_advanced and
  * revision 9 of GL_NV_blend_equation_advanced specify a different set
  * of equations.  Older revisions match ES 3.2's text, and dEQP expects
  * the ES 3.2 rules implemented here.
  */
 static void
 set_lum(ir_factory *f,
         ir_variable *color,
         ir_variable *cbase,
         ir_variable *clum)
 {
    void *mem_ctx = f->mem_ctx;
    f->emit(assign(color, add(cbase, sub(lumv3(clum), lumv3(cbase)))));

    ir_variable *llum = f->make_temp(glsl_type::float_type, "__blend_lum");
    ir_variable *mincol = f->make_temp(glsl_type::float_type, "__blend_mincol");
    ir_variable *maxcol = f->make_temp(glsl_type::float_type, "__blend_maxcol");

    f->emit(assign(llum, lumv3(color)));
    f->emit(assign(mincol, minv3(color)));
    f->emit(assign(maxcol, maxv3(color)));

    f->emit(if_tree(less(mincol, imm1(0)),
                    assign(color, add(llum, div(mul(sub(color, llum), llum),
                                                sub(llum, mincol)))),
                    if_tree(greater(maxcol, imm1(1)),
                            assign(color, add(llum, div(mul(sub(color, llum),
                                                            sub(imm3(1), llum)),
                                                        sub(maxcol, llum)))))));

 }

 /* Take the base RGB color <cbase> and override its saturation with
  * that of the RGB color <csat>.  The override the luminosity of the
  * result with that of the RGB color <clum>.
  */
 static void
 set_lum_sat(ir_factory *f,
             ir_variable *color,
             ir_variable *cbase,
             ir_variable *csat,
             ir_variable *clum)
 {
    void *mem_ctx = f->mem_ctx;

    ir_rvalue *minbase = minv3(cbase);
    ir_rvalue *ssat = satv3(csat);

    ir_variable *sbase = f->make_temp(glsl_type::float_type, "__blend_sbase");
    f->emit(assign(sbase, satv3(cbase)));

    /* Equivalent (modulo rounding errors) to setting the
     * smallest (R,G,B) component to 0, the largest to <ssat>,
     * and interpolating the "middle" component based on its
     * original value relative to the smallest/largest.
     */
    f->emit(if_tree(greater(sbase, imm1(0)),
                    assign(color, div(mul(sub(cbase, minbase), ssat), sbase)),
                    assign(color, imm3(0))));
    set_lum(f, color, color, clum);
 }

 static ir_rvalue *
 is_mode(ir_variable *mode, enum gl_advanced_blend_mode q)
 {
    return equal(mode, new(ralloc_parent(mode)) ir_constant(unsigned(q)));
 }

 static ir_variable *
 calc_blend_result(ir_factory f,
                   ir_variable *mode,
                   ir_variable *fb,
                   ir_rvalue *blend_src,
                   GLbitfield blend_qualifiers)
 {
    void *mem_ctx = f.mem_ctx;
    ir_variable *result = f.make_temp(glsl_type::vec4_type, "__blend_result");

    /* Save blend_src to a temporary so we can reference it multiple times. */
    ir_variable *src = f.make_temp(glsl_type::vec4_type, "__blend_src");
    f.emit(assign(src, blend_src));

    /* If we're not doing advanced blending, just write the original value. */
    ir_if *if_blending = new(mem_ctx) ir_if(is_mode(mode, BLEND_NONE));
    f.emit(if_blending);
    if_blending->then_instructions.push_tail(assign(result, src));

    f.instructions = &if_blending->else_instructions;

    /* (Rs', Gs', Bs') =
     *   (0, 0, 0),              if As == 0
     *   (Rs/As, Gs/As, Bs/As),  otherwise
     */
    ir_variable *src_rgb = f.make_temp(glsl_type::vec3_type, "__blend_src_rgb");
    ir_variable *src_alpha = f.make_temp(glsl_type::float_type, "__blend_src_a");

    /* (Rd', Gd', Bd') =
     *   (0, 0, 0),              if Ad == 0
     *   (Rd/Ad, Gd/Ad, Bd/Ad),  otherwise
     */
    ir_variable *dst_rgb = f.make_temp(glsl_type::vec3_type, "__blend_dst_rgb");
    ir_variable *dst_alpha = f.make_temp(glsl_type::float_type, "__blend_dst_a");

    f.emit(assign(dst_alpha, swizzle_w(fb)));
    f.emit(if_tree(equal(dst_alpha, imm1(0)),
                      assign(dst_rgb, imm3(0)),
                      assign(dst_rgb, csel(equal(swizzle_xyz(fb),
                                                 swizzle(fb, SWIZZLE_WWWW, 3)),
                                           imm3(1),
                                           div(swizzle_xyz(fb), dst_alpha)))));

    f.emit(assign(src_alpha, swizzle_w(src)));
    f.emit(if_tree(equal(src_alpha, imm1(0)),
                      assign(src_rgb, imm3(0)),
                      assign(src_rgb, csel(equal(swizzle_xyz(src),
                                                 swizzle(src, SWIZZLE_WWWW, 3)),
                                           imm3(1),
                                           div(swizzle_xyz(src), src_alpha)))));

    ir_variable *factor = f.make_temp(glsl_type::vec3_type, "__blend_factor");

    ir_factory casefactory = f;

    unsigned choices = blend_qualifiers;
    while (choices) {
       enum gl_advanced_blend_mode choice = (enum gl_advanced_blend_mode)
          (1u << u_bit_scan(&choices));

       ir_if *iff = new(mem_ctx) ir_if(is_mode(mode, choice));
       casefactory.emit(iff);
       casefactory.instructions = &iff->then_instructions;

       ir_rvalue *val = NULL;

       switch (choice) {
       case BLEND_MULTIPLY:
          val = blend_multiply(src_rgb, dst_rgb);
          break;
       case BLEND_SCREEN:
          val = blend_screen(src_rgb, dst_rgb);
          break;
       case BLEND_OVERLAY:
          val = blend_overlay(src_rgb, dst_rgb);
          break;
       case BLEND_DARKEN:
          val = blend_darken(src_rgb, dst_rgb);
          break;
       case BLEND_LIGHTEN:
          val = blend_lighten(src_rgb, dst_rgb);
          break;
       case BLEND_COLORDODGE:
          val = blend_colordodge(src_rgb, dst_rgb);
          break;
       case BLEND_COLORBURN:
          val = blend_colorburn(src_rgb, dst_rgb);
          break;
       case BLEND_HARDLIGHT:
          val = blend_hardlight(src_rgb, dst_rgb);
          break;
       case BLEND_SOFTLIGHT:
          val = blend_softlight(src_rgb, dst_rgb);
          break;
       case BLEND_DIFFERENCE:
          val = blend_difference(src_rgb, dst_rgb);
          break;
       case BLEND_EXCLUSION:
          val = blend_exclusion(src_rgb, dst_rgb);
          break;
       case BLEND_HSL_HUE:
          set_lum_sat(&casefactory, factor, src_rgb, dst_rgb, dst_rgb);
          break;
       case BLEND_HSL_SATURATION:
          set_lum_sat(&casefactory, factor, dst_rgb, src_rgb, dst_rgb);
          break;
       case BLEND_HSL_COLOR:
          set_lum(&casefactory, factor, src_rgb, dst_rgb);
          break;
       case BLEND_HSL_LUMINOSITY:
          set_lum(&casefactory, factor, dst_rgb, src_rgb);
          break;
       case BLEND_NONE:
       case BLEND_ALL:
          unreachable("not real cases");
       }

       if (val)
          casefactory.emit(assign(factor, val));

       casefactory.instructions = &iff->else_instructions;
    }

    /* p0(As,Ad) = As*Ad
     * p1(As,Ad) = As*(1-Ad)
     * p2(As,Ad) = Ad*(1-As)
     */
    ir_variable *p0 = f.make_temp(glsl_type::float_type, "__blend_p0");
    ir_variable *p1 = f.make_temp(glsl_type::float_type, "__blend_p1");
    ir_variable *p2 = f.make_temp(glsl_type::float_type, "__blend_p2");

    f.emit(assign(p0, mul(src_alpha, dst_alpha)));
    f.emit(assign(p1, mul(src_alpha, sub(imm1(1), dst_alpha))));
    f.emit(assign(p2, mul(dst_alpha, sub(imm1(1), src_alpha))));

    /* R = f(Rs',Rd')*p0(As,Ad) + Y*Rs'*p1(As,Ad) + Z*Rd'*p2(As,Ad)
     * G = f(Gs',Gd')*p0(As,Ad) + Y*Gs'*p1(As,Ad) + Z*Gd'*p2(As,Ad)
     * B = f(Bs',Bd')*p0(As,Ad) + Y*Bs'*p1(As,Ad) + Z*Bd'*p2(As,Ad)
     * A =          X*p0(As,Ad) +     Y*p1(As,Ad) +     Z*p2(As,Ad)
     *
     * <X, Y, Z> is always <1, 1, 1>, so we can ignore it.
     *
     * In vector form, this is:
     * RGB = factor * p0 + Cs * p1 + Cd * p2
     *   A = p0 + p1 + p2
     */
    f.emit(assign(result,
                  add(add(mul(factor, p0), mul(src_rgb, p1)), mul(dst_rgb, p2)),
                  WRITEMASK_XYZ));
    f.emit(assign(result, add(add(p0, p1), p2), WRITEMASK_W));

    return result;
 }

 /**
  * Dereference var, or var[0] if it's an array.
  */
 static ir_dereference *
 deref_output(ir_variable *var)
 {
    void *mem_ctx = ralloc_parent(var);

    ir_dereference *val = new(mem_ctx) ir_dereference_variable(var);
    if (val->type->is_array()) {
       ir_constant *index = new(mem_ctx) ir_constant(0);
       val = new(mem_ctx) ir_dereference_array(val, index);
    }

    return val;
 }

 static ir_function_signature *
 get_main(gl_linked_shader *sh)
 {
    ir_function_signature *sig = NULL;
    /* We can't use _mesa_get_main_function_signature() because we don't
     * have a symbol table at this point.  Just go find main() by hand.
     */
    foreach_in_list(ir_instruction, ir, sh->ir) {
       ir_function *f = ir->as_function();
       if (f && strcmp(f->name, "main") == 0) {
          exec_list void_parameters;
          sig = f->matching_signature(NULL, &void_parameters, false);
          break;
       }
    }
    assert(sig != NULL); /* main() must exist */
    return sig;
 }

 bool
 lower_blend_equation_advanced(struct gl_linked_shader *sh)
 {
    if (sh->Program->sh.fs.BlendSupport == 0)
       return false;

    /* Lower early returns in main() so there's a single exit point
     * where we can insert our lowering code.
     */
    do_lower_jumps(sh->ir, false, false, true, false, false);

    void *mem_ctx = ralloc_parent(sh->ir);

    ir_variable *fb = new(mem_ctx) ir_variable(glsl_type::vec4_type,
                                               "__blend_fb_fetch",
                                               ir_var_shader_out);
    fb->data.location = FRAG_RESULT_DATA0;
    fb->data.read_only = 1;
    fb->data.fb_fetch_output = 1;
    fb->data.how_declared = ir_var_hidden;

    ir_variable *mode = new(mem_ctx) ir_variable(glsl_type::uint_type,
                                                 "gl_AdvancedBlendModeMESA",
                                                 ir_var_uniform);
    mode->data.how_declared = ir_var_hidden;
    mode->allocate_state_slots(1);
    ir_state_slot *slot0 = &mode->get_state_slots()[0];
    slot0->swizzle = SWIZZLE_XXXX;
    slot0->tokens[0] = STATE_INTERNAL;
    slot0->tokens[1] = STATE_ADVANCED_BLENDING_MODE;
    for (int i = 2; i < STATE_LENGTH; i++)
       slot0->tokens[i] = 0;

    sh->ir->push_head(fb);
    sh->ir->push_head(mode);

    /* Gather any output variables referring to render target 0.
     *
     * ARB_enhanced_layouts irritatingly allows the shader to specify
     * multiple output variables for the same render target, each of
     * which writes a subset of the components, starting at location_frac.
     * The variables can't overlap, thankfully.
     */
    ir_variable *outputs[4] = { NULL, NULL, NULL, NULL };
    foreach_in_list(ir_instruction, ir, sh->ir) {
       ir_variable *var = ir->as_variable();
       if (!var || var->data.mode != ir_var_shader_out)
          continue;

       if (var->data.location == FRAG_RESULT_DATA0 ||
           var->data.location == FRAG_RESULT_COLOR) {
          const int components = var->type->without_array()->vector_elements;

          for (int i = 0; i < components; i++) {
             outputs[var->data.location_frac + i] = var;
          }
       }
    }

    /* Combine values written to outputs into a single RGBA blend source.
     * We assign <0, 0, 0, 1> to any components with no corresponding output.
     */
    ir_rvalue *blend_source;
    if (outputs[0] && outputs[0]->type->without_array()->vector_elements == 4) {
       blend_source = deref_output(outputs[0]);
    } else {
       ir_rvalue *blend_comps[4];
       for (int i = 0; i < 4; i++) {
          ir_variable *var = outputs[i];
          if (var) {
             blend_comps[i] = swizzle(deref_output(outputs[i]),
                                      i - outputs[i]->data.location_frac, 1);
          } else {
             blend_comps[i] = new(mem_ctx) ir_constant(i < 3 ? 0.0f : 1.0f);
          }
       }

       blend_source =
          new(mem_ctx) ir_expression(ir_quadop_vector, glsl_type::vec4_type,
                                     blend_comps[0], blend_comps[1],
                                     blend_comps[2], blend_comps[3]);
    }

    ir_function_signature *main = get_main(sh);
    ir_factory f(&main->body, mem_ctx);

    ir_variable *result_dest =
       calc_blend_result(f, mode, fb, blend_source,
                         sh->Program->sh.fs.BlendSupport);

    /* Copy the result back to the original values.  It would be simpler
     * to demote the program's output variables, and create a new vec4
     * output for our result, but this pass runs before we create the
     * ARB_program_interface_query resource list.  So we have to leave
     * the original outputs in place and use them.
     */
    for (int i = 0; i < 4; i++) {
       if (!outputs[i])
          continue;

       f.emit(assign(deref_output(outputs[i]), swizzle(result_dest, i, 1),
                     1 << i));
    }

    validate_ir_tree(sh->ir);
    return true;
 }
	/*
	* Copyright © 2016 Intel Corporation
	*
	* Permission is hereby granted, free of charge, to any person obtaining a
	* copy of this software and associated documentation files (the "Software"),
	* to deal in the Software without restriction, including without limitation
	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
	* and/or sell copies of the Software, and to permit persons to whom the
	* Software is furnished to do so, subject to the following conditions:
	*
	* The above copyright notice and this permission notice (including the next
	* paragraph) shall be included in all copies or substantial portions of the
	* Software.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
	* DEALINGS IN THE SOFTWARE.
	*/

	#include "ir.h"
	#include "ir_builder.h"
	#include "ir_optimization.h"
	#include "ir_hierarchical_visitor.h"
	#include "program/prog_instruction.h"
	#include "program/prog_statevars.h"
	#include "util/bitscan.h"

	using namespace ir_builder;

	#define imm1(x) new(mem_ctx) ir_constant((float) (x), 1)
	#define imm3(x) new(mem_ctx) ir_constant((float) (x), 3)

	static ir_rvalue *
	blend_multiply(ir_variable src, ir_variable dst)
	{
	/* f(Cs,Cd) = CsCd /
	return mul(src, dst);
	}

	static ir_rvalue *
	blend_screen(ir_variable src, ir_variable dst)
	{
	/* f(Cs,Cd) = Cs+Cd-CsCd /
	return sub(add(src, dst), mul(src, dst));
	}

	static ir_rvalue *
	blend_overlay(ir_variable src, ir_variable dst)
	{
	void *mem_ctx = ralloc_parent(src);

	/* f(Cs,Cd) = 2CsCd, if Cd <= 0.5
	* 1-2(1-Cs)(1-Cd), otherwise
	*/
	ir_rvalue *rule_1 = mul(imm3(2), mul(src, dst));
	ir_rvalue *rule_2 =
	sub(imm3(1), mul(imm3(2), mul(sub(imm3(1), src), sub(imm3(1), dst))));
	return csel(lequal(dst, imm3(0.5f)), rule_1, rule_2);
	}

	static ir_rvalue *
	blend_darken(ir_variable src, ir_variable dst)
	{
	/* f(Cs,Cd) = min(Cs,Cd) */
	return min2(src, dst);
	}

	static ir_rvalue *
	blend_lighten(ir_variable src, ir_variable dst)
	{
	/* f(Cs,Cd) = max(Cs,Cd) */
	return max2(src, dst);
	}

	static ir_rvalue *
	blend_colordodge(ir_variable src, ir_variable dst)
	{
	void *mem_ctx = ralloc_parent(src);

	/* f(Cs,Cd) =
	* 0, if Cd <= 0
	* min(1,Cd/(1-Cs)), if Cd > 0 and Cs < 1
	* 1, if Cd > 0 and Cs >= 1
	*/
	return csel(lequal(dst, imm3(0)), imm3(0),
	csel(gequal(src, imm3(1)), imm3(1),
	min2(imm3(1), div(dst, sub(imm3(1), src)))));
	}

	static ir_rvalue *
	blend_colorburn(ir_variable src, ir_variable dst)
	{
	void *mem_ctx = ralloc_parent(src);

	/* f(Cs,Cd) =
	* 1, if Cd >= 1
	* 1 - min(1,(1-Cd)/Cs), if Cd < 1 and Cs > 0
	* 0, if Cd < 1 and Cs <= 0
	*/
	return csel(gequal(dst, imm3(1)), imm3(1),
	csel(lequal(src, imm3(0)), imm3(0),
	sub(imm3(1), min2(imm3(1), div(sub(imm3(1), dst), src)))));
	}

	static ir_rvalue *
	blend_hardlight(ir_variable src, ir_variable dst)
	{
	void *mem_ctx = ralloc_parent(src);

	/* f(Cs,Cd) = 2CsCd, if Cs <= 0.5
	* 1-2(1-Cs)(1-Cd), otherwise
	*/
	ir_rvalue *rule_1 = mul(imm3(2), mul(src, dst));
	ir_rvalue *rule_2 =
	sub(imm3(1), mul(imm3(2), mul(sub(imm3(1), src), sub(imm3(1), dst))));
	return csel(lequal(src, imm3(0.5f)), rule_1, rule_2);
	}

	static ir_rvalue *
	blend_softlight(ir_variable src, ir_variable dst)
	{
	void *mem_ctx = ralloc_parent(src);

	/* f(Cs,Cd) =
	* Cd-(1-2Cs)Cd*(1-Cd),
	* if Cs <= 0.5
	* Cd+(2Cs-1)Cd((16Cd-12)*Cd+3),
	* if Cs > 0.5 and Cd <= 0.25
	* Cd+(2Cs-1)(sqrt(Cd)-Cd),
	* if Cs > 0.5 and Cd > 0.25
	*
	* We can simplify this to
	*
	* f(Cs,Cd) = Cd+(2Cs-1)g(Cs,Cd) where
	* g(Cs,Cd) = Cd*Cd-Cd if Cs <= 0.5
	* Cd((16Cd-12)*Cd+3) if Cs > 0.5 and Cd <= 0.25
	* sqrt(Cd)-Cd, otherwise
	*/
	ir_rvalue *factor_1 = mul(dst, sub(imm3(1), dst));
	ir_rvalue *factor_2 =
	mul(dst, add(mul(sub(mul(imm3(16), dst), imm3(12)), dst), imm3(3)));
	ir_rvalue *factor_3 = sub(sqrt(dst), dst);
	ir_rvalue *factor = csel(lequal(src, imm3(0.5f)), factor_1,
	csel(lequal(dst, imm3(0.25f)),
	factor_2, factor_3));
	return add(dst, mul(sub(mul(imm3(2), src), imm3(1)), factor));
	}

	static ir_rvalue *
	blend_difference(ir_variable src, ir_variable dst)
	{
	return abs(sub(dst, src));
	}

	static ir_rvalue *
	blend_exclusion(ir_variable src, ir_variable dst)
	{
	void *mem_ctx = ralloc_parent(src);

	return add(src, sub(dst, mul(imm3(2), mul(src, dst))));
	}

	/* Return the minimum of a vec3's components */
	static ir_rvalue *
	minv3(ir_variable *v)
	{
	return min2(min2(swizzle_x(v), swizzle_y(v)), swizzle_z(v));
	}

	/* Return the maximum of a vec3's components */
	static ir_rvalue *
	maxv3(ir_variable *v)
	{
	return max2(max2(swizzle_x(v), swizzle_y(v)), swizzle_z(v));
	}

	static ir_rvalue *
	lumv3(ir_variable *c)
	{
	ir_constant_data data;
	data.f[0] = 0.30;
	data.f[1] = 0.59;
	data.f[2] = 0.11;

	void *mem_ctx = ralloc_parent(c);

	/* dot(c, vec3(0.30, 0.59, 0.11)) */
	return dot(c, new(mem_ctx) ir_constant(glsl_type::vec3_type, &data));
	}

	static ir_rvalue *
	satv3(ir_variable *c)
	{
	return sub(maxv3(c), minv3(c));
	}

	/* Take the base RGB color <cbase> and override its luminosity with that
	* of the RGB color <clum>.
	*
	* This follows the equations given in the ES 3.2 (June 15th, 2016)
	* specification. Revision 16 of GL_KHR_blend_equation_advanced and
	* revision 9 of GL_NV_blend_equation_advanced specify a different set
	* of equations. Older revisions match ES 3.2's text, and dEQP expects
	* the ES 3.2 rules implemented here.
	*/
	static void
	set_lum(ir_factory *f,
	ir_variable *color,
	ir_variable *cbase,
	ir_variable *clum)
	{
	void *mem_ctx = f->mem_ctx;
	f->emit(assign(color, add(cbase, sub(lumv3(clum), lumv3(cbase)))));

	ir_variable *llum = f->make_temp(glsl_type::float_type, "__blend_lum");
	ir_variable *mincol = f->make_temp(glsl_type::float_type, "__blend_mincol");
	ir_variable *maxcol = f->make_temp(glsl_type::float_type, "__blend_maxcol");

	f->emit(assign(llum, lumv3(color)));
	f->emit(assign(mincol, minv3(color)));
	f->emit(assign(maxcol, maxv3(color)));

	f->emit(if_tree(less(mincol, imm1(0)),
	assign(color, add(llum, div(mul(sub(color, llum), llum),
	sub(llum, mincol)))),
	if_tree(greater(maxcol, imm1(1)),
	assign(color, add(llum, div(mul(sub(color, llum),
	sub(imm3(1), llum)),
	sub(maxcol, llum)))))));

	}

	/* Take the base RGB color <cbase> and override its saturation with
	* that of the RGB color <csat>. The override the luminosity of the
	* result with that of the RGB color <clum>.
	*/
	static void
	set_lum_sat(ir_factory *f,
	ir_variable *color,
	ir_variable *cbase,
	ir_variable *csat,
	ir_variable *clum)
	{
	void *mem_ctx = f->mem_ctx;

	ir_rvalue *minbase = minv3(cbase);
	ir_rvalue *ssat = satv3(csat);

	ir_variable *sbase = f->make_temp(glsl_type::float_type, "__blend_sbase");
	f->emit(assign(sbase, satv3(cbase)));

	/* Equivalent (modulo rounding errors) to setting the
	* smallest (R,G,B) component to 0, the largest to <ssat>,
	* and interpolating the "middle" component based on its
	* original value relative to the smallest/largest.
	*/
	f->emit(if_tree(greater(sbase, imm1(0)),
	assign(color, div(mul(sub(cbase, minbase), ssat), sbase)),
	assign(color, imm3(0))));
	set_lum(f, color, color, clum);
	}

	static ir_rvalue *
	is_mode(ir_variable *mode, enum gl_advanced_blend_mode q)
	{
	return equal(mode, new(ralloc_parent(mode)) ir_constant(unsigned(q)));
	}

	static ir_variable *
	calc_blend_result(ir_factory f,
	ir_variable *mode,
	ir_variable *fb,
	ir_rvalue *blend_src,
	GLbitfield blend_qualifiers)
	{
	void *mem_ctx = f.mem_ctx;
	ir_variable *result = f.make_temp(glsl_type::vec4_type, "__blend_result");

	/* Save blend_src to a temporary so we can reference it multiple times. */
	ir_variable *src = f.make_temp(glsl_type::vec4_type, "__blend_src");
	f.emit(assign(src, blend_src));

	/* If we're not doing advanced blending, just write the original value. */
	ir_if *if_blending = new(mem_ctx) ir_if(is_mode(mode, BLEND_NONE));
	f.emit(if_blending);
	if_blending->then_instructions.push_tail(assign(result, src));

	f.instructions = &if_blending->else_instructions;

	/* (Rs', Gs', Bs') =
	* (0, 0, 0), if As == 0
	* (Rs/As, Gs/As, Bs/As), otherwise
	*/
	ir_variable *src_rgb = f.make_temp(glsl_type::vec3_type, "__blend_src_rgb");
	ir_variable *src_alpha = f.make_temp(glsl_type::float_type, "__blend_src_a");

	/* (Rd', Gd', Bd') =
	* (0, 0, 0), if Ad == 0
	* (Rd/Ad, Gd/Ad, Bd/Ad), otherwise
	*/
	ir_variable *dst_rgb = f.make_temp(glsl_type::vec3_type, "__blend_dst_rgb");
	ir_variable *dst_alpha = f.make_temp(glsl_type::float_type, "__blend_dst_a");

	f.emit(assign(dst_alpha, swizzle_w(fb)));
	f.emit(if_tree(equal(dst_alpha, imm1(0)),
	assign(dst_rgb, imm3(0)),
	assign(dst_rgb, csel(equal(swizzle_xyz(fb),
	swizzle(fb, SWIZZLE_WWWW, 3)),
	imm3(1),
	div(swizzle_xyz(fb), dst_alpha)))));

	f.emit(assign(src_alpha, swizzle_w(src)));
	f.emit(if_tree(equal(src_alpha, imm1(0)),
	assign(src_rgb, imm3(0)),
	assign(src_rgb, csel(equal(swizzle_xyz(src),
	swizzle(src, SWIZZLE_WWWW, 3)),
	imm3(1),
	div(swizzle_xyz(src), src_alpha)))));

	ir_variable *factor = f.make_temp(glsl_type::vec3_type, "__blend_factor");

	ir_factory casefactory = f;

	unsigned choices = blend_qualifiers;
	while (choices) {
	enum gl_advanced_blend_mode choice = (enum gl_advanced_blend_mode)
	(1u << u_bit_scan(&choices));

	ir_if *iff = new(mem_ctx) ir_if(is_mode(mode, choice));
	casefactory.emit(iff);
	casefactory.instructions = &iff->then_instructions;

	ir_rvalue *val = NULL;

	switch (choice) {
	case BLEND_MULTIPLY:
	val = blend_multiply(src_rgb, dst_rgb);
	break;
	case BLEND_SCREEN:
	val = blend_screen(src_rgb, dst_rgb);
	break;
	case BLEND_OVERLAY:
	val = blend_overlay(src_rgb, dst_rgb);
	break;
	case BLEND_DARKEN:
	val = blend_darken(src_rgb, dst_rgb);
	break;
	case BLEND_LIGHTEN:
	val = blend_lighten(src_rgb, dst_rgb);
	break;
	case BLEND_COLORDODGE:
	val = blend_colordodge(src_rgb, dst_rgb);
	break;
	case BLEND_COLORBURN:
	val = blend_colorburn(src_rgb, dst_rgb);
	break;
	case BLEND_HARDLIGHT:
	val = blend_hardlight(src_rgb, dst_rgb);
	break;
	case BLEND_SOFTLIGHT:
	val = blend_softlight(src_rgb, dst_rgb);
	break;
	case BLEND_DIFFERENCE:
	val = blend_difference(src_rgb, dst_rgb);
	break;
	case BLEND_EXCLUSION:
	val = blend_exclusion(src_rgb, dst_rgb);
	break;
	case BLEND_HSL_HUE:
	set_lum_sat(&casefactory, factor, src_rgb, dst_rgb, dst_rgb);
	break;
	case BLEND_HSL_SATURATION:
	set_lum_sat(&casefactory, factor, dst_rgb, src_rgb, dst_rgb);
	break;
	case BLEND_HSL_COLOR:
	set_lum(&casefactory, factor, src_rgb, dst_rgb);
	break;
	case BLEND_HSL_LUMINOSITY:
	set_lum(&casefactory, factor, dst_rgb, src_rgb);
	break;
	case BLEND_NONE:
	case BLEND_ALL:
	unreachable("not real cases");
	}

	if (val)
	casefactory.emit(assign(factor, val));

	casefactory.instructions = &iff->else_instructions;
	}

	/* p0(As,Ad) = As*Ad
	* p1(As,Ad) = As*(1-Ad)
	* p2(As,Ad) = Ad*(1-As)
	*/
	ir_variable *p0 = f.make_temp(glsl_type::float_type, "__blend_p0");
	ir_variable *p1 = f.make_temp(glsl_type::float_type, "__blend_p1");
	ir_variable *p2 = f.make_temp(glsl_type::float_type, "__blend_p2");

	f.emit(assign(p0, mul(src_alpha, dst_alpha)));
	f.emit(assign(p1, mul(src_alpha, sub(imm1(1), dst_alpha))));
	f.emit(assign(p2, mul(dst_alpha, sub(imm1(1), src_alpha))));

	/* R = f(Rs',Rd')p0(As,Ad) + YRs'p1(As,Ad) + ZRd'*p2(As,Ad)
	* G = f(Gs',Gd')p0(As,Ad) + YGs'p1(As,Ad) + ZGd'*p2(As,Ad)
	* B = f(Bs',Bd')p0(As,Ad) + YBs'p1(As,Ad) + ZBd'*p2(As,Ad)
	* A = Xp0(As,Ad) + Yp1(As,Ad) + Z*p2(As,Ad)
	*
	* <X, Y, Z> is always <1, 1, 1>, so we can ignore it.
	*
	* In vector form, this is:
	* RGB = factor * p0 + Cs * p1 + Cd * p2
	* A = p0 + p1 + p2
	*/
	f.emit(assign(result,
	add(add(mul(factor, p0), mul(src_rgb, p1)), mul(dst_rgb, p2)),
	WRITEMASK_XYZ));
	f.emit(assign(result, add(add(p0, p1), p2), WRITEMASK_W));

	return result;
	}

	/**
	* Dereference var, or var[0] if it's an array.
	*/
	static ir_dereference *
	deref_output(ir_variable *var)
	{
	void *mem_ctx = ralloc_parent(var);

	ir_dereference *val = new(mem_ctx) ir_dereference_variable(var);
	if (val->type->is_array()) {
	ir_constant *index = new(mem_ctx) ir_constant(0);
	val = new(mem_ctx) ir_dereference_array(val, index);
	}

	return val;
	}

	static ir_function_signature *
	get_main(gl_linked_shader *sh)
	{
	ir_function_signature *sig = NULL;
	/* We can't use _mesa_get_main_function_signature() because we don't
	* have a symbol table at this point. Just go find main() by hand.
	*/
	foreach_in_list(ir_instruction, ir, sh->ir) {
	ir_function *f = ir->as_function();
	if (f && strcmp(f->name, "main") == 0) {
	exec_list void_parameters;
	sig = f->matching_signature(NULL, &void_parameters, false);
	break;
	}
	}
	assert(sig != NULL); /* main() must exist */
	return sig;
	}

	bool
	lower_blend_equation_advanced(struct gl_linked_shader *sh)
	{
	if (sh->Program->sh.fs.BlendSupport == 0)
	return false;

	/* Lower early returns in main() so there's a single exit point
	* where we can insert our lowering code.
	*/
	do_lower_jumps(sh->ir, false, false, true, false, false);

	void *mem_ctx = ralloc_parent(sh->ir);

	ir_variable *fb = new(mem_ctx) ir_variable(glsl_type::vec4_type,
	"__blend_fb_fetch",
	ir_var_shader_out);
	fb->data.location = FRAG_RESULT_DATA0;
	fb->data.read_only = 1;
	fb->data.fb_fetch_output = 1;
	fb->data.how_declared = ir_var_hidden;

	ir_variable *mode = new(mem_ctx) ir_variable(glsl_type::uint_type,
	"gl_AdvancedBlendModeMESA",
	ir_var_uniform);
	mode->data.how_declared = ir_var_hidden;
	mode->allocate_state_slots(1);
	ir_state_slot *slot0 = &mode->get_state_slots()[0];
	slot0->swizzle = SWIZZLE_XXXX;
	slot0->tokens[0] = STATE_INTERNAL;
	slot0->tokens[1] = STATE_ADVANCED_BLENDING_MODE;
	for (int i = 2; i < STATE_LENGTH; i++)
	slot0->tokens[i] = 0;

	sh->ir->push_head(fb);
	sh->ir->push_head(mode);

	/* Gather any output variables referring to render target 0.
	*
	* ARB_enhanced_layouts irritatingly allows the shader to specify
	* multiple output variables for the same render target, each of
	* which writes a subset of the components, starting at location_frac.
	* The variables can't overlap, thankfully.
	*/
	ir_variable *outputs[4] = { NULL, NULL, NULL, NULL };
	foreach_in_list(ir_instruction, ir, sh->ir) {
	ir_variable *var = ir->as_variable();
	if (!var \|\| var->data.mode != ir_var_shader_out)
	continue;

	if (var->data.location == FRAG_RESULT_DATA0 \|\|
	var->data.location == FRAG_RESULT_COLOR) {
	const int components = var->type->without_array()->vector_elements;

	for (int i = 0; i < components; i++) {
	outputs[var->data.location_frac + i] = var;
	}
	}
	}

	/* Combine values written to outputs into a single RGBA blend source.
	* We assign <0, 0, 0, 1> to any components with no corresponding output.
	*/
	ir_rvalue *blend_source;
	if (outputs[0] && outputs[0]->type->without_array()->vector_elements == 4) {
	blend_source = deref_output(outputs[0]);
	} else {
	ir_rvalue *blend_comps[4];
	for (int i = 0; i < 4; i++) {
	ir_variable *var = outputs[i];
	if (var) {
	blend_comps[i] = swizzle(deref_output(outputs[i]),
	i - outputs[i]->data.location_frac, 1);
	} else {
	blend_comps[i] = new(mem_ctx) ir_constant(i < 3 ? 0.0f : 1.0f);
	}
	}

	blend_source =
	new(mem_ctx) ir_expression(ir_quadop_vector, glsl_type::vec4_type,
	blend_comps[0], blend_comps[1],
	blend_comps[2], blend_comps[3]);
	}

	ir_function_signature *main = get_main(sh);
	ir_factory f(&main->body, mem_ctx);

	ir_variable *result_dest =
	calc_blend_result(f, mode, fb, blend_source,
	sh->Program->sh.fs.BlendSupport);

	/* Copy the result back to the original values. It would be simpler
	* to demote the program's output variables, and create a new vec4
	* output for our result, but this pass runs before we create the
	* ARB_program_interface_query resource list. So we have to leave
	* the original outputs in place and use them.
	*/
	for (int i = 0; i < 4; i++) {
	if (!outputs[i])
	continue;

	f.emit(assign(deref_output(outputs[i]), swizzle(result_dest, i, 1),
	1 << i));
	}

	validate_ir_tree(sh->ir);
	return true;
	}