| /**************************************************************************** |
| * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| * |
| * @file depthstencil.h |
| * |
| * @brief Implements depth/stencil functionality |
| * |
| ******************************************************************************/ |
| #pragma once |
| #include "common/os.h" |
| #include "format_conversion.h" |
| |
| INLINE |
| void StencilOp(SWR_STENCILOP op, simdscalar const &mask, simdscalar const &stencilRefps, simdscalar &stencilps) |
| { |
| simdscalari stencil = _simd_castps_si(stencilps); |
| |
| switch (op) |
| { |
| case STENCILOP_KEEP: |
| break; |
| case STENCILOP_ZERO: |
| stencilps = _simd_blendv_ps(stencilps, _simd_setzero_ps(), mask); |
| break; |
| case STENCILOP_REPLACE: |
| stencilps = _simd_blendv_ps(stencilps, stencilRefps, mask); |
| break; |
| case STENCILOP_INCRSAT: |
| { |
| simdscalari stencilincr = _simd_adds_epu8(stencil, _simd_set1_epi32(1)); |
| stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencilincr), mask); |
| break; |
| } |
| case STENCILOP_DECRSAT: |
| { |
| simdscalari stencildecr = _simd_subs_epu8(stencil, _simd_set1_epi32(1)); |
| stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencildecr), mask); |
| break; |
| } |
| case STENCILOP_INCR: |
| { |
| simdscalari stencilincr = _simd_add_epi8(stencil, _simd_set1_epi32(1)); |
| stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencilincr), mask); |
| break; |
| } |
| case STENCILOP_DECR: |
| { |
| simdscalari stencildecr = _simd_add_epi8(stencil, _simd_set1_epi32((-1) & 0xff)); |
| stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencildecr), mask); |
| break; |
| } |
| case STENCILOP_INVERT: |
| { |
| simdscalar stencilinvert = _simd_andnot_ps(stencilps, _simd_cmpeq_ps(_simd_setzero_ps(), _simd_setzero_ps())); |
| stencilps = _simd_blendv_ps(stencilps, stencilinvert, mask); |
| break; |
| } |
| default: |
| break; |
| } |
| } |
| |
| |
| template<SWR_FORMAT depthFormatT> |
| simdscalar QuantizeDepth(simdscalar const &depth) |
| { |
| SWR_TYPE depthType = FormatTraits<depthFormatT>::GetType(0); |
| uint32_t depthBpc = FormatTraits<depthFormatT>::GetBPC(0); |
| |
| if (depthType == SWR_TYPE_FLOAT) |
| { |
| // assume only 32bit float depth supported |
| SWR_ASSERT(depthBpc == 32); |
| |
| // matches shader precision, no quantizing needed |
| return depth; |
| } |
| |
| // should be unorm depth if not float |
| SWR_ASSERT(depthType == SWR_TYPE_UNORM); |
| |
| float quantize = (float)((1 << depthBpc) - 1); |
| simdscalar result = _simd_mul_ps(depth, _simd_set1_ps(quantize)); |
| result = _simd_add_ps(result, _simd_set1_ps(0.5f)); |
| result = _simd_round_ps(result, _MM_FROUND_TO_ZERO); |
| |
| if (depthBpc > 16) |
| { |
| result = _simd_div_ps(result, _simd_set1_ps(quantize)); |
| } |
| else |
| { |
| result = _simd_mul_ps(result, _simd_set1_ps(1.0f / quantize)); |
| } |
| |
| return result; |
| } |
| |
| INLINE |
| simdscalar DepthStencilTest(const API_STATE* pState, |
| bool frontFacing, uint32_t viewportIndex, simdscalar const &iZ, uint8_t* pDepthBase, simdscalar const &coverageMask, |
| uint8_t *pStencilBase, simdscalar* pStencilMask) |
| { |
| static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT, "Unsupported depth hot tile format"); |
| static_assert(KNOB_STENCIL_HOT_TILE_FORMAT == R8_UINT, "Unsupported stencil hot tile format"); |
| |
| const SWR_DEPTH_STENCIL_STATE* pDSState = &pState->depthStencilState; |
| const SWR_VIEWPORT* pViewport = &pState->vp[viewportIndex]; |
| |
| simdscalar depthResult = _simd_set1_ps(-1.0f); |
| simdscalar zbuf; |
| |
| // clamp Z to viewport [minZ..maxZ] |
| simdscalar vMinZ = _simd_broadcast_ss(&pViewport->minZ); |
| simdscalar vMaxZ = _simd_broadcast_ss(&pViewport->maxZ); |
| simdscalar interpZ = _simd_min_ps(vMaxZ, _simd_max_ps(vMinZ, iZ)); |
| |
| if (pDSState->depthTestEnable) |
| { |
| switch (pDSState->depthTestFunc) |
| { |
| case ZFUNC_NEVER: depthResult = _simd_setzero_ps(); break; |
| case ZFUNC_ALWAYS: break; |
| default: |
| zbuf = _simd_load_ps((const float*)pDepthBase); |
| } |
| |
| switch (pDSState->depthTestFunc) |
| { |
| case ZFUNC_LE: depthResult = _simd_cmple_ps(interpZ, zbuf); break; |
| case ZFUNC_LT: depthResult = _simd_cmplt_ps(interpZ, zbuf); break; |
| case ZFUNC_GT: depthResult = _simd_cmpgt_ps(interpZ, zbuf); break; |
| case ZFUNC_GE: depthResult = _simd_cmpge_ps(interpZ, zbuf); break; |
| case ZFUNC_EQ: depthResult = _simd_cmpeq_ps(interpZ, zbuf); break; |
| case ZFUNC_NE: depthResult = _simd_cmpneq_ps(interpZ, zbuf); break; |
| } |
| } |
| |
| simdscalar stencilMask = _simd_set1_ps(-1.0f); |
| |
| if (pDSState->stencilTestEnable) |
| { |
| uint8_t stencilRefValue; |
| uint32_t stencilTestFunc; |
| uint8_t stencilTestMask; |
| if (frontFacing || !pDSState->doubleSidedStencilTestEnable) |
| { |
| stencilRefValue = pDSState->stencilRefValue; |
| stencilTestFunc = pDSState->stencilTestFunc; |
| stencilTestMask = pDSState->stencilTestMask; |
| } |
| else |
| { |
| stencilRefValue = pDSState->backfaceStencilRefValue; |
| stencilTestFunc = pDSState->backfaceStencilTestFunc; |
| stencilTestMask = pDSState->backfaceStencilTestMask; |
| } |
| |
| simdvector sbuf; |
| simdscalar stencilWithMask; |
| simdscalar stencilRef; |
| switch(stencilTestFunc) |
| { |
| case ZFUNC_NEVER: stencilMask = _simd_setzero_ps(); break; |
| case ZFUNC_ALWAYS: break; |
| default: |
| LoadSOA<R8_UINT>(pStencilBase, sbuf); |
| |
| // apply stencil read mask |
| stencilWithMask = _simd_castsi_ps(_simd_and_si(_simd_castps_si(sbuf.v[0]), _simd_set1_epi32(stencilTestMask))); |
| |
| // do stencil compare in float to avoid simd integer emulation in AVX1 |
| stencilWithMask = _simd_cvtepi32_ps(_simd_castps_si(stencilWithMask)); |
| |
| stencilRef = _simd_set1_ps((float)(stencilRefValue & stencilTestMask)); |
| break; |
| } |
| |
| switch(stencilTestFunc) |
| { |
| case ZFUNC_LE: stencilMask = _simd_cmple_ps(stencilRef, stencilWithMask); break; |
| case ZFUNC_LT: stencilMask = _simd_cmplt_ps(stencilRef, stencilWithMask); break; |
| case ZFUNC_GT: stencilMask = _simd_cmpgt_ps(stencilRef, stencilWithMask); break; |
| case ZFUNC_GE: stencilMask = _simd_cmpge_ps(stencilRef, stencilWithMask); break; |
| case ZFUNC_EQ: stencilMask = _simd_cmpeq_ps(stencilRef, stencilWithMask); break; |
| case ZFUNC_NE: stencilMask = _simd_cmpneq_ps(stencilRef, stencilWithMask); break; |
| } |
| } |
| |
| simdscalar depthWriteMask = _simd_and_ps(depthResult, stencilMask); |
| depthWriteMask = _simd_and_ps(depthWriteMask, coverageMask); |
| |
| *pStencilMask = stencilMask; |
| return depthWriteMask; |
| } |
| |
| INLINE |
| void DepthStencilWrite(const SWR_VIEWPORT* pViewport, const SWR_DEPTH_STENCIL_STATE* pDSState, |
| bool frontFacing, simdscalar const &iZ, uint8_t* pDepthBase, const simdscalar& depthMask, const simdscalar& coverageMask, |
| uint8_t *pStencilBase, const simdscalar& stencilMask) |
| { |
| if (pDSState->depthWriteEnable) |
| { |
| // clamp Z to viewport [minZ..maxZ] |
| simdscalar vMinZ = _simd_broadcast_ss(&pViewport->minZ); |
| simdscalar vMaxZ = _simd_broadcast_ss(&pViewport->maxZ); |
| simdscalar interpZ = _simd_min_ps(vMaxZ, _simd_max_ps(vMinZ, iZ)); |
| |
| simdscalar vMask = _simd_and_ps(depthMask, coverageMask); |
| _simd_maskstore_ps((float*)pDepthBase, _simd_castps_si(vMask), interpZ); |
| } |
| |
| if (pDSState->stencilWriteEnable) |
| { |
| simdvector sbuf; |
| LoadSOA<R8_UINT>(pStencilBase, sbuf); |
| simdscalar stencilbuf = sbuf.v[0]; |
| |
| uint8_t stencilRefValue; |
| uint32_t stencilFailOp; |
| uint32_t stencilPassDepthPassOp; |
| uint32_t stencilPassDepthFailOp; |
| uint8_t stencilWriteMask; |
| if (frontFacing || !pDSState->doubleSidedStencilTestEnable) |
| { |
| stencilRefValue = pDSState->stencilRefValue; |
| stencilFailOp = pDSState->stencilFailOp; |
| stencilPassDepthPassOp = pDSState->stencilPassDepthPassOp; |
| stencilPassDepthFailOp = pDSState->stencilPassDepthFailOp; |
| stencilWriteMask = pDSState->stencilWriteMask; |
| } |
| else |
| { |
| stencilRefValue = pDSState->backfaceStencilRefValue; |
| stencilFailOp = pDSState->backfaceStencilFailOp; |
| stencilPassDepthPassOp = pDSState->backfaceStencilPassDepthPassOp; |
| stencilPassDepthFailOp = pDSState->backfaceStencilPassDepthFailOp; |
| stencilWriteMask = pDSState->backfaceStencilWriteMask; |
| } |
| |
| simdscalar stencilps = stencilbuf; |
| simdscalar stencilRefps = _simd_castsi_ps(_simd_set1_epi32(stencilRefValue)); |
| |
| simdscalar stencilFailMask = _simd_andnot_ps(stencilMask, coverageMask); |
| simdscalar stencilPassDepthPassMask = _simd_and_ps(stencilMask, depthMask); |
| simdscalar stencilPassDepthFailMask = _simd_and_ps(stencilMask, _simd_andnot_ps(depthMask, _simd_set1_ps(-1))); |
| |
| simdscalar origStencil = stencilps; |
| |
| StencilOp((SWR_STENCILOP)stencilFailOp, stencilFailMask, stencilRefps, stencilps); |
| StencilOp((SWR_STENCILOP)stencilPassDepthFailOp, stencilPassDepthFailMask, stencilRefps, stencilps); |
| StencilOp((SWR_STENCILOP)stencilPassDepthPassOp, stencilPassDepthPassMask, stencilRefps, stencilps); |
| |
| // apply stencil write mask |
| simdscalari vWriteMask = _simd_set1_epi32(stencilWriteMask); |
| stencilps = _simd_and_ps(stencilps, _simd_castsi_ps(vWriteMask)); |
| stencilps = _simd_or_ps(_simd_andnot_ps(_simd_castsi_ps(vWriteMask), origStencil), stencilps); |
| |
| simdvector stencilResult; |
| stencilResult.v[0] = _simd_blendv_ps(origStencil, stencilps, coverageMask); |
| StoreSOA<R8_UINT>(stencilResult, pStencilBase); |
| } |
| |
| } |