| /************************************************************************** |
| * |
| * Copyright 2009-2010 VMware, Inc. |
| * All Rights Reserved. |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the |
| * "Software"), to deal in the Software without restriction, including |
| * without limitation the rights to use, copy, modify, merge, publish, |
| * distribute, sub license, and/or sell copies of the Software, and to |
| * permit persons to whom the Software is furnished to do so, subject to |
| * the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the |
| * next paragraph) shall be included in all copies or substantial portions |
| * of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
| * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
| * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR |
| * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
| * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
| * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
| * |
| **************************************************************************/ |
| |
| /** |
| * @file |
| * Depth/stencil testing to LLVM IR translation. |
| * |
| * To be done accurately/efficiently the depth/stencil test must be done with |
| * the same type/format of the depth/stencil buffer, which implies massaging |
| * the incoming depths to fit into place. Using a more straightforward |
| * type/format for depth/stencil values internally and only convert when |
| * flushing would avoid this, but it would most likely result in depth fighting |
| * artifacts. |
| * |
| * We are free to use a different pixel layout though. Since our basic |
| * processing unit is a quad (2x2 pixel block) we store the depth/stencil |
| * values tiled, a quad at time. That is, a depth buffer containing |
| * |
| * Z11 Z12 Z13 Z14 ... |
| * Z21 Z22 Z23 Z24 ... |
| * Z31 Z32 Z33 Z34 ... |
| * Z41 Z42 Z43 Z44 ... |
| * ... ... ... ... ... |
| * |
| * will actually be stored in memory as |
| * |
| * Z11 Z12 Z21 Z22 Z13 Z14 Z23 Z24 ... |
| * Z31 Z32 Z41 Z42 Z33 Z34 Z43 Z44 ... |
| * ... ... ... ... ... ... ... ... ... |
| * |
| * |
| * @author Jose Fonseca <jfonseca@vmware.com> |
| * @author Brian Paul <jfonseca@vmware.com> |
| */ |
| |
| #include "pipe/p_state.h" |
| #include "util/u_format.h" |
| #include "util/u_cpu_detect.h" |
| |
| #include "gallivm/lp_bld_type.h" |
| #include "gallivm/lp_bld_arit.h" |
| #include "gallivm/lp_bld_bitarit.h" |
| #include "gallivm/lp_bld_const.h" |
| #include "gallivm/lp_bld_conv.h" |
| #include "gallivm/lp_bld_logic.h" |
| #include "gallivm/lp_bld_flow.h" |
| #include "gallivm/lp_bld_intr.h" |
| #include "gallivm/lp_bld_debug.h" |
| #include "gallivm/lp_bld_swizzle.h" |
| |
| #include "lp_bld_depth.h" |
| |
| |
| /** Used to select fields from pipe_stencil_state */ |
| enum stencil_op { |
| S_FAIL_OP, |
| Z_FAIL_OP, |
| Z_PASS_OP |
| }; |
| |
| |
| |
| /** |
| * Do the stencil test comparison (compare FB stencil values against ref value). |
| * This will be used twice when generating two-sided stencil code. |
| * \param stencil the front/back stencil state |
| * \param stencilRef the stencil reference value, replicated as a vector |
| * \param stencilVals vector of stencil values from framebuffer |
| * \return vector mask of pass/fail values (~0 or 0) |
| */ |
| static LLVMValueRef |
| lp_build_stencil_test_single(struct lp_build_context *bld, |
| const struct pipe_stencil_state *stencil, |
| LLVMValueRef stencilRef, |
| LLVMValueRef stencilVals) |
| { |
| LLVMBuilderRef builder = bld->gallivm->builder; |
| const unsigned stencilMax = 255; /* XXX fix */ |
| struct lp_type type = bld->type; |
| LLVMValueRef res; |
| |
| /* |
| * SSE2 has intrinsics for signed comparisons, but not unsigned ones. Values |
| * are between 0..255 so ensure we generate the fastest comparisons for |
| * wider elements. |
| */ |
| if (type.width <= 8) { |
| assert(!type.sign); |
| } else { |
| assert(type.sign); |
| } |
| |
| assert(stencil->enabled); |
| |
| if (stencil->valuemask != stencilMax) { |
| /* compute stencilRef = stencilRef & valuemask */ |
| LLVMValueRef valuemask = lp_build_const_int_vec(bld->gallivm, type, stencil->valuemask); |
| stencilRef = LLVMBuildAnd(builder, stencilRef, valuemask, ""); |
| /* compute stencilVals = stencilVals & valuemask */ |
| stencilVals = LLVMBuildAnd(builder, stencilVals, valuemask, ""); |
| } |
| |
| res = lp_build_cmp(bld, stencil->func, stencilRef, stencilVals); |
| |
| return res; |
| } |
| |
| |
| /** |
| * Do the one or two-sided stencil test comparison. |
| * \sa lp_build_stencil_test_single |
| * \param front_facing an integer vector mask, indicating front (~0) or back |
| * (0) facing polygon. If NULL, assume front-facing. |
| */ |
| static LLVMValueRef |
| lp_build_stencil_test(struct lp_build_context *bld, |
| const struct pipe_stencil_state stencil[2], |
| LLVMValueRef stencilRefs[2], |
| LLVMValueRef stencilVals, |
| LLVMValueRef front_facing) |
| { |
| LLVMValueRef res; |
| |
| assert(stencil[0].enabled); |
| |
| /* do front face test */ |
| res = lp_build_stencil_test_single(bld, &stencil[0], |
| stencilRefs[0], stencilVals); |
| |
| if (stencil[1].enabled && front_facing != NULL) { |
| /* do back face test */ |
| LLVMValueRef back_res; |
| |
| back_res = lp_build_stencil_test_single(bld, &stencil[1], |
| stencilRefs[1], stencilVals); |
| |
| res = lp_build_select(bld, front_facing, res, back_res); |
| } |
| |
| return res; |
| } |
| |
| |
| /** |
| * Apply the stencil operator (add/sub/keep/etc) to the given vector |
| * of stencil values. |
| * \return new stencil values vector |
| */ |
| static LLVMValueRef |
| lp_build_stencil_op_single(struct lp_build_context *bld, |
| const struct pipe_stencil_state *stencil, |
| enum stencil_op op, |
| LLVMValueRef stencilRef, |
| LLVMValueRef stencilVals) |
| |
| { |
| LLVMBuilderRef builder = bld->gallivm->builder; |
| struct lp_type type = bld->type; |
| LLVMValueRef res; |
| LLVMValueRef max = lp_build_const_int_vec(bld->gallivm, type, 0xff); |
| unsigned stencil_op; |
| |
| assert(type.sign); |
| |
| switch (op) { |
| case S_FAIL_OP: |
| stencil_op = stencil->fail_op; |
| break; |
| case Z_FAIL_OP: |
| stencil_op = stencil->zfail_op; |
| break; |
| case Z_PASS_OP: |
| stencil_op = stencil->zpass_op; |
| break; |
| default: |
| assert(0 && "Invalid stencil_op mode"); |
| stencil_op = PIPE_STENCIL_OP_KEEP; |
| } |
| |
| switch (stencil_op) { |
| case PIPE_STENCIL_OP_KEEP: |
| res = stencilVals; |
| /* we can return early for this case */ |
| return res; |
| case PIPE_STENCIL_OP_ZERO: |
| res = bld->zero; |
| break; |
| case PIPE_STENCIL_OP_REPLACE: |
| res = stencilRef; |
| break; |
| case PIPE_STENCIL_OP_INCR: |
| res = lp_build_add(bld, stencilVals, bld->one); |
| res = lp_build_min(bld, res, max); |
| break; |
| case PIPE_STENCIL_OP_DECR: |
| res = lp_build_sub(bld, stencilVals, bld->one); |
| res = lp_build_max(bld, res, bld->zero); |
| break; |
| case PIPE_STENCIL_OP_INCR_WRAP: |
| res = lp_build_add(bld, stencilVals, bld->one); |
| res = LLVMBuildAnd(builder, res, max, ""); |
| break; |
| case PIPE_STENCIL_OP_DECR_WRAP: |
| res = lp_build_sub(bld, stencilVals, bld->one); |
| res = LLVMBuildAnd(builder, res, max, ""); |
| break; |
| case PIPE_STENCIL_OP_INVERT: |
| res = LLVMBuildNot(builder, stencilVals, ""); |
| res = LLVMBuildAnd(builder, res, max, ""); |
| break; |
| default: |
| assert(0 && "bad stencil op mode"); |
| res = bld->undef; |
| } |
| |
| return res; |
| } |
| |
| |
| /** |
| * Do the one or two-sided stencil test op/update. |
| */ |
| static LLVMValueRef |
| lp_build_stencil_op(struct lp_build_context *bld, |
| const struct pipe_stencil_state stencil[2], |
| enum stencil_op op, |
| LLVMValueRef stencilRefs[2], |
| LLVMValueRef stencilVals, |
| LLVMValueRef mask, |
| LLVMValueRef front_facing) |
| |
| { |
| LLVMBuilderRef builder = bld->gallivm->builder; |
| LLVMValueRef res; |
| |
| assert(stencil[0].enabled); |
| |
| /* do front face op */ |
| res = lp_build_stencil_op_single(bld, &stencil[0], op, |
| stencilRefs[0], stencilVals); |
| |
| if (stencil[1].enabled && front_facing != NULL) { |
| /* do back face op */ |
| LLVMValueRef back_res; |
| |
| back_res = lp_build_stencil_op_single(bld, &stencil[1], op, |
| stencilRefs[1], stencilVals); |
| |
| res = lp_build_select(bld, front_facing, res, back_res); |
| } |
| |
| if (stencil[0].writemask != 0xff || |
| (stencil[1].enabled && front_facing != NULL && stencil[1].writemask != 0xff)) { |
| /* mask &= stencil[0].writemask */ |
| LLVMValueRef writemask = lp_build_const_int_vec(bld->gallivm, bld->type, |
| stencil[0].writemask); |
| if (stencil[1].enabled && stencil[1].writemask != stencil[0].writemask && front_facing != NULL) { |
| LLVMValueRef back_writemask = lp_build_const_int_vec(bld->gallivm, bld->type, |
| stencil[1].writemask); |
| writemask = lp_build_select(bld, front_facing, writemask, back_writemask); |
| } |
| |
| mask = LLVMBuildAnd(builder, mask, writemask, ""); |
| /* res = (res & mask) | (stencilVals & ~mask) */ |
| res = lp_build_select_bitwise(bld, mask, res, stencilVals); |
| } |
| else { |
| /* res = mask ? res : stencilVals */ |
| res = lp_build_select(bld, mask, res, stencilVals); |
| } |
| |
| return res; |
| } |
| |
| |
| |
| /** |
| * Return a type appropriate for depth/stencil testing. |
| */ |
| struct lp_type |
| lp_depth_type(const struct util_format_description *format_desc, |
| unsigned length) |
| { |
| struct lp_type type; |
| unsigned swizzle; |
| |
| assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); |
| assert(format_desc->block.width == 1); |
| assert(format_desc->block.height == 1); |
| |
| swizzle = format_desc->swizzle[0]; |
| assert(swizzle < 4); |
| |
| memset(&type, 0, sizeof type); |
| type.width = format_desc->block.bits; |
| |
| if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_FLOAT) { |
| type.floating = TRUE; |
| assert(swizzle == 0); |
| assert(format_desc->channel[swizzle].size == format_desc->block.bits); |
| } |
| else if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) { |
| assert(format_desc->block.bits <= 32); |
| assert(format_desc->channel[swizzle].normalized); |
| if (format_desc->channel[swizzle].size < format_desc->block.bits) { |
| /* Prefer signed integers when possible, as SSE has less support |
| * for unsigned comparison; |
| */ |
| type.sign = TRUE; |
| } |
| } |
| else |
| assert(0); |
| |
| assert(type.width <= length); |
| type.length = length / type.width; |
| |
| return type; |
| } |
| |
| |
| /** |
| * Compute bitmask and bit shift to apply to the incoming fragment Z values |
| * and the Z buffer values needed before doing the Z comparison. |
| * |
| * Note that we leave the Z bits in the position that we find them |
| * in the Z buffer (typically 0xffffff00 or 0x00ffffff). That lets us |
| * get by with fewer bit twiddling steps. |
| */ |
| static boolean |
| get_z_shift_and_mask(const struct util_format_description *format_desc, |
| unsigned *shift, unsigned *width, unsigned *mask) |
| { |
| const unsigned total_bits = format_desc->block.bits; |
| unsigned z_swizzle; |
| unsigned chan; |
| unsigned padding_left, padding_right; |
| |
| assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); |
| assert(format_desc->block.width == 1); |
| assert(format_desc->block.height == 1); |
| |
| z_swizzle = format_desc->swizzle[0]; |
| |
| if (z_swizzle == UTIL_FORMAT_SWIZZLE_NONE) |
| return FALSE; |
| |
| *width = format_desc->channel[z_swizzle].size; |
| |
| padding_right = 0; |
| for (chan = 0; chan < z_swizzle; ++chan) |
| padding_right += format_desc->channel[chan].size; |
| |
| padding_left = |
| total_bits - (padding_right + *width); |
| |
| if (padding_left || padding_right) { |
| unsigned long long mask_left = (1ULL << (total_bits - padding_left)) - 1; |
| unsigned long long mask_right = (1ULL << (padding_right)) - 1; |
| *mask = mask_left ^ mask_right; |
| } |
| else { |
| *mask = 0xffffffff; |
| } |
| |
| *shift = padding_right; |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Compute bitmask and bit shift to apply to the framebuffer pixel values |
| * to put the stencil bits in the least significant position. |
| * (i.e. 0x000000ff) |
| */ |
| static boolean |
| get_s_shift_and_mask(const struct util_format_description *format_desc, |
| unsigned *shift, unsigned *mask) |
| { |
| unsigned s_swizzle; |
| unsigned chan, sz; |
| |
| s_swizzle = format_desc->swizzle[1]; |
| |
| if (s_swizzle == UTIL_FORMAT_SWIZZLE_NONE) |
| return FALSE; |
| |
| *shift = 0; |
| for (chan = 0; chan < s_swizzle; chan++) |
| *shift += format_desc->channel[chan].size; |
| |
| sz = format_desc->channel[s_swizzle].size; |
| *mask = (1U << sz) - 1U; |
| |
| return TRUE; |
| } |
| |
| |
| /** |
| * Perform the occlusion test and increase the counter. |
| * Test the depth mask. Add the number of channel which has none zero mask |
| * into the occlusion counter. e.g. maskvalue is {-1, -1, -1, -1}. |
| * The counter will add 4. |
| * |
| * \param type holds element type of the mask vector. |
| * \param maskvalue is the depth test mask. |
| * \param counter is a pointer of the uint32 counter. |
| */ |
| void |
| lp_build_occlusion_count(struct gallivm_state *gallivm, |
| struct lp_type type, |
| LLVMValueRef maskvalue, |
| LLVMValueRef counter) |
| { |
| LLVMBuilderRef builder = gallivm->builder; |
| LLVMContextRef context = gallivm->context; |
| LLVMValueRef countmask = lp_build_const_int_vec(gallivm, type, 1); |
| LLVMValueRef count, newcount; |
| |
| assert(type.length <= 16); |
| assert(type.floating); |
| |
| if(util_cpu_caps.has_sse && type.length == 4) { |
| const char *movmskintr = "llvm.x86.sse.movmsk.ps"; |
| const char *popcntintr = "llvm.ctpop.i32"; |
| LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue, |
| lp_build_vec_type(gallivm, type), ""); |
| bits = lp_build_intrinsic_unary(builder, movmskintr, |
| LLVMInt32TypeInContext(context), bits); |
| count = lp_build_intrinsic_unary(builder, popcntintr, |
| LLVMInt32TypeInContext(context), bits); |
| } |
| else if(util_cpu_caps.has_avx && type.length == 8) { |
| const char *movmskintr = "llvm.x86.avx.movmsk.ps.256"; |
| const char *popcntintr = "llvm.ctpop.i32"; |
| LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue, |
| lp_build_vec_type(gallivm, type), ""); |
| bits = lp_build_intrinsic_unary(builder, movmskintr, |
| LLVMInt32TypeInContext(context), bits); |
| count = lp_build_intrinsic_unary(builder, popcntintr, |
| LLVMInt32TypeInContext(context), bits); |
| } |
| else { |
| unsigned i; |
| LLVMValueRef countv = LLVMBuildAnd(builder, maskvalue, countmask, "countv"); |
| LLVMTypeRef counttype = LLVMIntTypeInContext(context, type.length * 8); |
| LLVMTypeRef i8vntype = LLVMVectorType(LLVMInt8TypeInContext(context), type.length * 4); |
| LLVMValueRef shufflev, countd; |
| LLVMValueRef shuffles[16]; |
| const char *popcntintr = NULL; |
| |
| countv = LLVMBuildBitCast(builder, countv, i8vntype, ""); |
| |
| for (i = 0; i < type.length; i++) { |
| shuffles[i] = lp_build_const_int32(gallivm, 4*i); |
| } |
| |
| shufflev = LLVMConstVector(shuffles, type.length); |
| countd = LLVMBuildShuffleVector(builder, countv, LLVMGetUndef(i8vntype), shufflev, ""); |
| countd = LLVMBuildBitCast(builder, countd, counttype, "countd"); |
| |
| /* |
| * XXX FIXME |
| * this is bad on cpus without popcount (on x86 supported by intel |
| * nehalem, amd barcelona, and up - not tied to sse42). |
| * Would be much faster to just sum the 4 elements of the vector with |
| * some horizontal add (shuffle/add/shuffle/add after the initial and). |
| */ |
| switch (type.length) { |
| case 4: |
| popcntintr = "llvm.ctpop.i32"; |
| break; |
| case 8: |
| popcntintr = "llvm.ctpop.i64"; |
| break; |
| case 16: |
| popcntintr = "llvm.ctpop.i128"; |
| break; |
| default: |
| assert(0); |
| } |
| count = lp_build_intrinsic_unary(builder, popcntintr, counttype, countd); |
| |
| if (type.length > 4) { |
| count = LLVMBuildTrunc(builder, count, LLVMIntTypeInContext(context, 32), ""); |
| } |
| } |
| newcount = LLVMBuildLoad(builder, counter, "origcount"); |
| newcount = LLVMBuildAdd(builder, newcount, count, "newcount"); |
| LLVMBuildStore(builder, newcount, counter); |
| } |
| |
| |
| |
| /** |
| * Generate code for performing depth and/or stencil tests. |
| * We operate on a vector of values (typically n 2x2 quads). |
| * |
| * \param depth the depth test state |
| * \param stencil the front/back stencil state |
| * \param type the data type of the fragment depth/stencil values |
| * \param format_desc description of the depth/stencil surface |
| * \param mask the alive/dead pixel mask for the quad (vector) |
| * \param stencil_refs the front/back stencil ref values (scalar) |
| * \param z_src the incoming depth/stencil values (n 2x2 quad values, float32) |
| * \param zs_dst_ptr pointer to depth/stencil values in framebuffer |
| * \param face contains boolean value indicating front/back facing polygon |
| */ |
| void |
| lp_build_depth_stencil_test(struct gallivm_state *gallivm, |
| const struct pipe_depth_state *depth, |
| const struct pipe_stencil_state stencil[2], |
| struct lp_type z_src_type, |
| const struct util_format_description *format_desc, |
| struct lp_build_mask_context *mask, |
| LLVMValueRef stencil_refs[2], |
| LLVMValueRef z_src, |
| LLVMValueRef zs_dst_ptr, |
| LLVMValueRef face, |
| LLVMValueRef *zs_value, |
| boolean do_branch) |
| { |
| LLVMBuilderRef builder = gallivm->builder; |
| struct lp_type z_type; |
| struct lp_build_context z_bld; |
| struct lp_build_context s_bld; |
| struct lp_type s_type; |
| unsigned z_shift = 0, z_width = 0, z_mask = 0; |
| LLVMValueRef zs_dst, z_dst = NULL; |
| LLVMValueRef stencil_vals = NULL; |
| LLVMValueRef z_bitmask = NULL, stencil_shift = NULL; |
| LLVMValueRef z_pass = NULL, s_pass_mask = NULL; |
| LLVMValueRef orig_mask = lp_build_mask_value(mask); |
| LLVMValueRef front_facing = NULL; |
| |
| |
| /* |
| * Depths are expected to be between 0 and 1, even if they are stored in |
| * floats. Setting these bits here will ensure that the lp_build_conv() call |
| * below won't try to unnecessarily clamp the incoming values. |
| */ |
| if(z_src_type.floating) { |
| z_src_type.sign = FALSE; |
| z_src_type.norm = TRUE; |
| } |
| else { |
| assert(!z_src_type.sign); |
| assert(z_src_type.norm); |
| } |
| |
| /* Pick the depth type. */ |
| z_type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length); |
| |
| /* FIXME: Cope with a depth test type with a different bit width. */ |
| assert(z_type.width == z_src_type.width); |
| assert(z_type.length == z_src_type.length); |
| |
| /* FIXME: for non-float depth/stencil might generate better code |
| * if we'd always split it up to use 128bit operations. |
| * For stencil we'd almost certainly want to pack to 8xi16 values, |
| * for z just run twice. |
| */ |
| |
| /* Sanity checking */ |
| { |
| const unsigned z_swizzle = format_desc->swizzle[0]; |
| const unsigned s_swizzle = format_desc->swizzle[1]; |
| |
| assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE || |
| s_swizzle != UTIL_FORMAT_SWIZZLE_NONE); |
| |
| assert(depth->enabled || stencil[0].enabled); |
| |
| assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); |
| assert(format_desc->block.width == 1); |
| assert(format_desc->block.height == 1); |
| |
| if (stencil[0].enabled) { |
| assert(format_desc->format == PIPE_FORMAT_Z24_UNORM_S8_UINT || |
| format_desc->format == PIPE_FORMAT_S8_UINT_Z24_UNORM); |
| } |
| |
| assert(z_swizzle < 4); |
| assert(format_desc->block.bits == z_type.width); |
| if (z_type.floating) { |
| assert(z_swizzle == 0); |
| assert(format_desc->channel[z_swizzle].type == |
| UTIL_FORMAT_TYPE_FLOAT); |
| assert(format_desc->channel[z_swizzle].size == |
| format_desc->block.bits); |
| } |
| else { |
| assert(format_desc->channel[z_swizzle].type == |
| UTIL_FORMAT_TYPE_UNSIGNED); |
| assert(format_desc->channel[z_swizzle].normalized); |
| assert(!z_type.fixed); |
| } |
| } |
| |
| |
| /* Setup build context for Z vals */ |
| lp_build_context_init(&z_bld, gallivm, z_type); |
| |
| /* Setup build context for stencil vals */ |
| s_type = lp_int_type(z_type); |
| lp_build_context_init(&s_bld, gallivm, s_type); |
| |
| /* Load current z/stencil value from z/stencil buffer */ |
| zs_dst_ptr = LLVMBuildBitCast(builder, |
| zs_dst_ptr, |
| LLVMPointerType(z_bld.vec_type, 0), ""); |
| zs_dst = LLVMBuildLoad(builder, zs_dst_ptr, ""); |
| |
| lp_build_name(zs_dst, "zs_dst"); |
| |
| |
| /* Compute and apply the Z/stencil bitmasks and shifts. |
| */ |
| { |
| unsigned s_shift, s_mask; |
| |
| if (get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask)) { |
| if (z_mask != 0xffffffff) { |
| z_bitmask = lp_build_const_int_vec(gallivm, z_type, z_mask); |
| } |
| |
| /* |
| * Align the framebuffer Z 's LSB to the right. |
| */ |
| if (z_shift) { |
| LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift); |
| z_dst = LLVMBuildLShr(builder, zs_dst, shift, "z_dst"); |
| } else if (z_bitmask) { |
| /* TODO: Instead of loading a mask from memory and ANDing, it's |
| * probably faster to just shake the bits with two shifts. */ |
| z_dst = LLVMBuildAnd(builder, zs_dst, z_bitmask, "z_dst"); |
| } else { |
| z_dst = zs_dst; |
| lp_build_name(z_dst, "z_dst"); |
| } |
| } |
| |
| if (get_s_shift_and_mask(format_desc, &s_shift, &s_mask)) { |
| if (s_shift) { |
| LLVMValueRef shift = lp_build_const_int_vec(gallivm, s_type, s_shift); |
| stencil_vals = LLVMBuildLShr(builder, zs_dst, shift, ""); |
| stencil_shift = shift; /* used below */ |
| } |
| else { |
| stencil_vals = zs_dst; |
| } |
| |
| if (s_mask != 0xffffffff) { |
| LLVMValueRef mask = lp_build_const_int_vec(gallivm, s_type, s_mask); |
| stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, ""); |
| } |
| |
| lp_build_name(stencil_vals, "s_dst"); |
| } |
| } |
| |
| if (stencil[0].enabled) { |
| |
| if (face) { |
| LLVMValueRef zero = lp_build_const_int32(gallivm, 0); |
| |
| /* front_facing = face != 0 ? ~0 : 0 */ |
| front_facing = LLVMBuildICmp(builder, LLVMIntNE, face, zero, ""); |
| front_facing = LLVMBuildSExt(builder, front_facing, |
| LLVMIntTypeInContext(gallivm->context, |
| s_bld.type.length*s_bld.type.width), |
| ""); |
| front_facing = LLVMBuildBitCast(builder, front_facing, |
| s_bld.int_vec_type, ""); |
| } |
| |
| /* convert scalar stencil refs into vectors */ |
| stencil_refs[0] = lp_build_broadcast_scalar(&s_bld, stencil_refs[0]); |
| stencil_refs[1] = lp_build_broadcast_scalar(&s_bld, stencil_refs[1]); |
| |
| s_pass_mask = lp_build_stencil_test(&s_bld, stencil, |
| stencil_refs, stencil_vals, |
| front_facing); |
| |
| /* apply stencil-fail operator */ |
| { |
| LLVMValueRef s_fail_mask = lp_build_andnot(&s_bld, orig_mask, s_pass_mask); |
| stencil_vals = lp_build_stencil_op(&s_bld, stencil, S_FAIL_OP, |
| stencil_refs, stencil_vals, |
| s_fail_mask, front_facing); |
| } |
| } |
| |
| if (depth->enabled) { |
| /* |
| * Convert fragment Z to the desired type, aligning the LSB to the right. |
| */ |
| |
| assert(z_type.width == z_src_type.width); |
| assert(z_type.length == z_src_type.length); |
| assert(lp_check_value(z_src_type, z_src)); |
| if (z_src_type.floating) { |
| /* |
| * Convert from floating point values |
| */ |
| |
| if (!z_type.floating) { |
| z_src = lp_build_clamped_float_to_unsigned_norm(gallivm, |
| z_src_type, |
| z_width, |
| z_src); |
| } |
| } else { |
| /* |
| * Convert from unsigned normalized values. |
| */ |
| |
| assert(!z_src_type.sign); |
| assert(!z_src_type.fixed); |
| assert(z_src_type.norm); |
| assert(!z_type.floating); |
| if (z_src_type.width > z_width) { |
| LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_src_type, |
| z_src_type.width - z_width); |
| z_src = LLVMBuildLShr(builder, z_src, shift, ""); |
| } |
| } |
| assert(lp_check_value(z_type, z_src)); |
| |
| lp_build_name(z_src, "z_src"); |
| |
| /* compare src Z to dst Z, returning 'pass' mask */ |
| z_pass = lp_build_cmp(&z_bld, depth->func, z_src, z_dst); |
| |
| if (!stencil[0].enabled) { |
| /* We can potentially skip all remaining operations here, but only |
| * if stencil is disabled because we still need to update the stencil |
| * buffer values. Don't need to update Z buffer values. |
| */ |
| lp_build_mask_update(mask, z_pass); |
| |
| if (do_branch) { |
| lp_build_mask_check(mask); |
| do_branch = FALSE; |
| } |
| } |
| |
| if (depth->writemask) { |
| LLVMValueRef zselectmask; |
| |
| /* mask off bits that failed Z test */ |
| zselectmask = LLVMBuildAnd(builder, orig_mask, z_pass, ""); |
| |
| /* mask off bits that failed stencil test */ |
| if (s_pass_mask) { |
| zselectmask = LLVMBuildAnd(builder, zselectmask, s_pass_mask, ""); |
| } |
| |
| /* Mix the old and new Z buffer values. |
| * z_dst[i] = zselectmask[i] ? z_src[i] : z_dst[i] |
| */ |
| z_dst = lp_build_select(&z_bld, zselectmask, z_src, z_dst); |
| } |
| |
| if (stencil[0].enabled) { |
| /* update stencil buffer values according to z pass/fail result */ |
| LLVMValueRef z_fail_mask, z_pass_mask; |
| |
| /* apply Z-fail operator */ |
| z_fail_mask = lp_build_andnot(&z_bld, orig_mask, z_pass); |
| stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_FAIL_OP, |
| stencil_refs, stencil_vals, |
| z_fail_mask, front_facing); |
| |
| /* apply Z-pass operator */ |
| z_pass_mask = LLVMBuildAnd(builder, orig_mask, z_pass, ""); |
| stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP, |
| stencil_refs, stencil_vals, |
| z_pass_mask, front_facing); |
| } |
| } |
| else { |
| /* No depth test: apply Z-pass operator to stencil buffer values which |
| * passed the stencil test. |
| */ |
| s_pass_mask = LLVMBuildAnd(builder, orig_mask, s_pass_mask, ""); |
| stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP, |
| stencil_refs, stencil_vals, |
| s_pass_mask, front_facing); |
| } |
| |
| /* Put Z and ztencil bits in the right place */ |
| if (z_dst && z_shift) { |
| LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift); |
| z_dst = LLVMBuildShl(builder, z_dst, shift, ""); |
| } |
| if (stencil_vals && stencil_shift) |
| stencil_vals = LLVMBuildShl(builder, stencil_vals, |
| stencil_shift, ""); |
| |
| /* Finally, merge/store the z/stencil values */ |
| if ((depth->enabled && depth->writemask) || |
| (stencil[0].enabled && stencil[0].writemask)) { |
| |
| if (z_dst && stencil_vals) |
| zs_dst = LLVMBuildOr(builder, z_dst, stencil_vals, ""); |
| else if (z_dst) |
| zs_dst = z_dst; |
| else |
| zs_dst = stencil_vals; |
| |
| *zs_value = zs_dst; |
| } |
| |
| if (s_pass_mask) |
| lp_build_mask_update(mask, s_pass_mask); |
| |
| if (depth->enabled && stencil[0].enabled) |
| lp_build_mask_update(mask, z_pass); |
| |
| if (do_branch) |
| lp_build_mask_check(mask); |
| |
| } |
| |
| |
| void |
| lp_build_depth_write(LLVMBuilderRef builder, |
| const struct util_format_description *format_desc, |
| LLVMValueRef zs_dst_ptr, |
| LLVMValueRef zs_value) |
| { |
| zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, |
| LLVMPointerType(LLVMTypeOf(zs_value), 0), ""); |
| |
| LLVMBuildStore(builder, zs_value, zs_dst_ptr); |
| } |
| |
| |
| void |
| lp_build_deferred_depth_write(struct gallivm_state *gallivm, |
| struct lp_type z_src_type, |
| const struct util_format_description *format_desc, |
| struct lp_build_mask_context *mask, |
| LLVMValueRef zs_dst_ptr, |
| LLVMValueRef zs_value) |
| { |
| struct lp_type z_type; |
| struct lp_build_context z_bld; |
| LLVMValueRef z_dst; |
| LLVMBuilderRef builder = gallivm->builder; |
| |
| /* XXX: pointlessly redo type logic: |
| */ |
| z_type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length); |
| lp_build_context_init(&z_bld, gallivm, z_type); |
| |
| zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, |
| LLVMPointerType(z_bld.vec_type, 0), ""); |
| |
| z_dst = LLVMBuildLoad(builder, zs_dst_ptr, "zsbufval"); |
| z_dst = lp_build_select(&z_bld, lp_build_mask_value(mask), zs_value, z_dst); |
| |
| LLVMBuildStore(builder, z_dst, zs_dst_ptr); |
| } |