blob: fafc36d1de681256dc6499ab168ea5beb2935c27 [file] [log] [blame]
/****************************************************************************
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* @file depthstencil.h
*
* @brief Implements depth/stencil functionality
*
******************************************************************************/
#pragma once
#include "common/os.h"
#include "format_conversion.h"
INLINE
void StencilOp(SWR_STENCILOP op, simdscalar const &mask, simdscalar const &stencilRefps, simdscalar &stencilps)
{
simdscalari stencil = _simd_castps_si(stencilps);
switch (op)
{
case STENCILOP_KEEP:
break;
case STENCILOP_ZERO:
stencilps = _simd_blendv_ps(stencilps, _simd_setzero_ps(), mask);
break;
case STENCILOP_REPLACE:
stencilps = _simd_blendv_ps(stencilps, stencilRefps, mask);
break;
case STENCILOP_INCRSAT:
{
simdscalari stencilincr = _simd_adds_epu8(stencil, _simd_set1_epi32(1));
stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencilincr), mask);
break;
}
case STENCILOP_DECRSAT:
{
simdscalari stencildecr = _simd_subs_epu8(stencil, _simd_set1_epi32(1));
stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencildecr), mask);
break;
}
case STENCILOP_INCR:
{
simdscalari stencilincr = _simd_add_epi8(stencil, _simd_set1_epi32(1));
stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencilincr), mask);
break;
}
case STENCILOP_DECR:
{
simdscalari stencildecr = _simd_add_epi8(stencil, _simd_set1_epi32((-1) & 0xff));
stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencildecr), mask);
break;
}
case STENCILOP_INVERT:
{
simdscalar stencilinvert = _simd_andnot_ps(stencilps, _simd_cmpeq_ps(_simd_setzero_ps(), _simd_setzero_ps()));
stencilps = _simd_blendv_ps(stencilps, stencilinvert, mask);
break;
}
default:
break;
}
}
template<SWR_FORMAT depthFormatT>
simdscalar QuantizeDepth(simdscalar const &depth)
{
SWR_TYPE depthType = FormatTraits<depthFormatT>::GetType(0);
uint32_t depthBpc = FormatTraits<depthFormatT>::GetBPC(0);
if (depthType == SWR_TYPE_FLOAT)
{
// assume only 32bit float depth supported
SWR_ASSERT(depthBpc == 32);
// matches shader precision, no quantizing needed
return depth;
}
// should be unorm depth if not float
SWR_ASSERT(depthType == SWR_TYPE_UNORM);
float quantize = (float)((1 << depthBpc) - 1);
simdscalar result = _simd_mul_ps(depth, _simd_set1_ps(quantize));
result = _simd_add_ps(result, _simd_set1_ps(0.5f));
result = _simd_round_ps(result, _MM_FROUND_TO_ZERO);
if (depthBpc > 16)
{
result = _simd_div_ps(result, _simd_set1_ps(quantize));
}
else
{
result = _simd_mul_ps(result, _simd_set1_ps(1.0f / quantize));
}
return result;
}
INLINE
simdscalar DepthStencilTest(const API_STATE* pState,
bool frontFacing, uint32_t viewportIndex, simdscalar const &iZ, uint8_t* pDepthBase, simdscalar const &coverageMask,
uint8_t *pStencilBase, simdscalar* pStencilMask)
{
static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT, "Unsupported depth hot tile format");
static_assert(KNOB_STENCIL_HOT_TILE_FORMAT == R8_UINT, "Unsupported stencil hot tile format");
const SWR_DEPTH_STENCIL_STATE* pDSState = &pState->depthStencilState;
const SWR_VIEWPORT* pViewport = &pState->vp[viewportIndex];
simdscalar depthResult = _simd_set1_ps(-1.0f);
simdscalar zbuf;
// clamp Z to viewport [minZ..maxZ]
simdscalar vMinZ = _simd_broadcast_ss(&pViewport->minZ);
simdscalar vMaxZ = _simd_broadcast_ss(&pViewport->maxZ);
simdscalar interpZ = _simd_min_ps(vMaxZ, _simd_max_ps(vMinZ, iZ));
if (pDSState->depthTestEnable)
{
switch (pDSState->depthTestFunc)
{
case ZFUNC_NEVER: depthResult = _simd_setzero_ps(); break;
case ZFUNC_ALWAYS: break;
default:
zbuf = _simd_load_ps((const float*)pDepthBase);
}
switch (pDSState->depthTestFunc)
{
case ZFUNC_LE: depthResult = _simd_cmple_ps(interpZ, zbuf); break;
case ZFUNC_LT: depthResult = _simd_cmplt_ps(interpZ, zbuf); break;
case ZFUNC_GT: depthResult = _simd_cmpgt_ps(interpZ, zbuf); break;
case ZFUNC_GE: depthResult = _simd_cmpge_ps(interpZ, zbuf); break;
case ZFUNC_EQ: depthResult = _simd_cmpeq_ps(interpZ, zbuf); break;
case ZFUNC_NE: depthResult = _simd_cmpneq_ps(interpZ, zbuf); break;
}
}
simdscalar stencilMask = _simd_set1_ps(-1.0f);
if (pDSState->stencilTestEnable)
{
uint8_t stencilRefValue;
uint32_t stencilTestFunc;
uint8_t stencilTestMask;
if (frontFacing || !pDSState->doubleSidedStencilTestEnable)
{
stencilRefValue = pDSState->stencilRefValue;
stencilTestFunc = pDSState->stencilTestFunc;
stencilTestMask = pDSState->stencilTestMask;
}
else
{
stencilRefValue = pDSState->backfaceStencilRefValue;
stencilTestFunc = pDSState->backfaceStencilTestFunc;
stencilTestMask = pDSState->backfaceStencilTestMask;
}
simdvector sbuf;
simdscalar stencilWithMask;
simdscalar stencilRef;
switch(stencilTestFunc)
{
case ZFUNC_NEVER: stencilMask = _simd_setzero_ps(); break;
case ZFUNC_ALWAYS: break;
default:
LoadSOA<R8_UINT>(pStencilBase, sbuf);
// apply stencil read mask
stencilWithMask = _simd_castsi_ps(_simd_and_si(_simd_castps_si(sbuf.v[0]), _simd_set1_epi32(stencilTestMask)));
// do stencil compare in float to avoid simd integer emulation in AVX1
stencilWithMask = _simd_cvtepi32_ps(_simd_castps_si(stencilWithMask));
stencilRef = _simd_set1_ps((float)(stencilRefValue & stencilTestMask));
break;
}
switch(stencilTestFunc)
{
case ZFUNC_LE: stencilMask = _simd_cmple_ps(stencilRef, stencilWithMask); break;
case ZFUNC_LT: stencilMask = _simd_cmplt_ps(stencilRef, stencilWithMask); break;
case ZFUNC_GT: stencilMask = _simd_cmpgt_ps(stencilRef, stencilWithMask); break;
case ZFUNC_GE: stencilMask = _simd_cmpge_ps(stencilRef, stencilWithMask); break;
case ZFUNC_EQ: stencilMask = _simd_cmpeq_ps(stencilRef, stencilWithMask); break;
case ZFUNC_NE: stencilMask = _simd_cmpneq_ps(stencilRef, stencilWithMask); break;
}
}
simdscalar depthWriteMask = _simd_and_ps(depthResult, stencilMask);
depthWriteMask = _simd_and_ps(depthWriteMask, coverageMask);
*pStencilMask = stencilMask;
return depthWriteMask;
}
INLINE
void DepthStencilWrite(const SWR_VIEWPORT* pViewport, const SWR_DEPTH_STENCIL_STATE* pDSState,
bool frontFacing, simdscalar const &iZ, uint8_t* pDepthBase, const simdscalar& depthMask, const simdscalar& coverageMask,
uint8_t *pStencilBase, const simdscalar& stencilMask)
{
if (pDSState->depthWriteEnable)
{
// clamp Z to viewport [minZ..maxZ]
simdscalar vMinZ = _simd_broadcast_ss(&pViewport->minZ);
simdscalar vMaxZ = _simd_broadcast_ss(&pViewport->maxZ);
simdscalar interpZ = _simd_min_ps(vMaxZ, _simd_max_ps(vMinZ, iZ));
simdscalar vMask = _simd_and_ps(depthMask, coverageMask);
_simd_maskstore_ps((float*)pDepthBase, _simd_castps_si(vMask), interpZ);
}
if (pDSState->stencilWriteEnable)
{
simdvector sbuf;
LoadSOA<R8_UINT>(pStencilBase, sbuf);
simdscalar stencilbuf = sbuf.v[0];
uint8_t stencilRefValue;
uint32_t stencilFailOp;
uint32_t stencilPassDepthPassOp;
uint32_t stencilPassDepthFailOp;
uint8_t stencilWriteMask;
if (frontFacing || !pDSState->doubleSidedStencilTestEnable)
{
stencilRefValue = pDSState->stencilRefValue;
stencilFailOp = pDSState->stencilFailOp;
stencilPassDepthPassOp = pDSState->stencilPassDepthPassOp;
stencilPassDepthFailOp = pDSState->stencilPassDepthFailOp;
stencilWriteMask = pDSState->stencilWriteMask;
}
else
{
stencilRefValue = pDSState->backfaceStencilRefValue;
stencilFailOp = pDSState->backfaceStencilFailOp;
stencilPassDepthPassOp = pDSState->backfaceStencilPassDepthPassOp;
stencilPassDepthFailOp = pDSState->backfaceStencilPassDepthFailOp;
stencilWriteMask = pDSState->backfaceStencilWriteMask;
}
simdscalar stencilps = stencilbuf;
simdscalar stencilRefps = _simd_castsi_ps(_simd_set1_epi32(stencilRefValue));
simdscalar stencilFailMask = _simd_andnot_ps(stencilMask, coverageMask);
simdscalar stencilPassDepthPassMask = _simd_and_ps(stencilMask, depthMask);
simdscalar stencilPassDepthFailMask = _simd_and_ps(stencilMask, _simd_andnot_ps(depthMask, _simd_set1_ps(-1)));
simdscalar origStencil = stencilps;
StencilOp((SWR_STENCILOP)stencilFailOp, stencilFailMask, stencilRefps, stencilps);
StencilOp((SWR_STENCILOP)stencilPassDepthFailOp, stencilPassDepthFailMask, stencilRefps, stencilps);
StencilOp((SWR_STENCILOP)stencilPassDepthPassOp, stencilPassDepthPassMask, stencilRefps, stencilps);
// apply stencil write mask
simdscalari vWriteMask = _simd_set1_epi32(stencilWriteMask);
stencilps = _simd_and_ps(stencilps, _simd_castsi_ps(vWriteMask));
stencilps = _simd_or_ps(_simd_andnot_ps(_simd_castsi_ps(vWriteMask), origStencil), stencilps);
simdvector stencilResult;
stencilResult.v[0] = _simd_blendv_ps(origStencil, stencilps, coverageMask);
StoreSOA<R8_UINT>(stencilResult, pStencilBase);
}
}