| /**************************************************************************** |
| * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved. |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| * |
| * @file state.h |
| * |
| * @brief Definitions for API state. |
| * |
| ******************************************************************************/ |
| // Skipping clang-format due to parsing by simplistic python scripts |
| // clang-format off |
| #pragma once |
| |
| #include "common/formats.h" |
| #include "common/intrin.h" |
| #include "common/rdtsc_buckets.h" |
| #include <functional> |
| #include <algorithm> |
| |
| using gfxptr_t = unsigned long long; |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// PRIMITIVE_TOPOLOGY. |
| ////////////////////////////////////////////////////////////////////////// |
| enum PRIMITIVE_TOPOLOGY |
| { |
| TOP_UNKNOWN = 0x0, |
| TOP_POINT_LIST = 0x1, |
| TOP_LINE_LIST = 0x2, |
| TOP_LINE_STRIP = 0x3, |
| TOP_TRIANGLE_LIST = 0x4, |
| TOP_TRIANGLE_STRIP = 0x5, |
| TOP_TRIANGLE_FAN = 0x6, |
| TOP_QUAD_LIST = 0x7, |
| TOP_QUAD_STRIP = 0x8, |
| TOP_LINE_LIST_ADJ = 0x9, |
| TOP_LISTSTRIP_ADJ = 0xA, |
| TOP_TRI_LIST_ADJ = 0xB, |
| TOP_TRI_STRIP_ADJ = 0xC, |
| TOP_TRI_STRIP_REVERSE = 0xD, |
| TOP_POLYGON = 0xE, |
| TOP_RECT_LIST = 0xF, |
| TOP_LINE_LOOP = 0x10, |
| TOP_POINT_LIST_BF = 0x11, |
| TOP_LINE_STRIP_CONT = 0x12, |
| TOP_LINE_STRIP_BF = 0x13, |
| TOP_LINE_STRIP_CONT_BF = 0x14, |
| TOP_TRIANGLE_FAN_NOSTIPPLE = 0x16, |
| TOP_TRIANGLE_DISC = 0x17, /// @todo What is this?? |
| |
| TOP_PATCHLIST_BASE = 0x1F, // Invalid topology, used to calculate num verts for a patchlist. |
| TOP_PATCHLIST_1 = 0x20, // List of 1-vertex patches |
| TOP_PATCHLIST_2 = 0x21, |
| TOP_PATCHLIST_3 = 0x22, |
| TOP_PATCHLIST_4 = 0x23, |
| TOP_PATCHLIST_5 = 0x24, |
| TOP_PATCHLIST_6 = 0x25, |
| TOP_PATCHLIST_7 = 0x26, |
| TOP_PATCHLIST_8 = 0x27, |
| TOP_PATCHLIST_9 = 0x28, |
| TOP_PATCHLIST_10 = 0x29, |
| TOP_PATCHLIST_11 = 0x2A, |
| TOP_PATCHLIST_12 = 0x2B, |
| TOP_PATCHLIST_13 = 0x2C, |
| TOP_PATCHLIST_14 = 0x2D, |
| TOP_PATCHLIST_15 = 0x2E, |
| TOP_PATCHLIST_16 = 0x2F, |
| TOP_PATCHLIST_17 = 0x30, |
| TOP_PATCHLIST_18 = 0x31, |
| TOP_PATCHLIST_19 = 0x32, |
| TOP_PATCHLIST_20 = 0x33, |
| TOP_PATCHLIST_21 = 0x34, |
| TOP_PATCHLIST_22 = 0x35, |
| TOP_PATCHLIST_23 = 0x36, |
| TOP_PATCHLIST_24 = 0x37, |
| TOP_PATCHLIST_25 = 0x38, |
| TOP_PATCHLIST_26 = 0x39, |
| TOP_PATCHLIST_27 = 0x3A, |
| TOP_PATCHLIST_28 = 0x3B, |
| TOP_PATCHLIST_29 = 0x3C, |
| TOP_PATCHLIST_30 = 0x3D, |
| TOP_PATCHLIST_31 = 0x3E, |
| TOP_PATCHLIST_32 = 0x3F, // List of 32-vertex patches |
| }; |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// SWR_SHADER_TYPE |
| ////////////////////////////////////////////////////////////////////////// |
| enum SWR_SHADER_TYPE |
| { |
| SHADER_VERTEX, |
| SHADER_GEOMETRY, |
| SHADER_DOMAIN, |
| SHADER_HULL, |
| SHADER_PIXEL, |
| SHADER_COMPUTE, |
| |
| NUM_SHADER_TYPES, |
| }; |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// SWR_RENDERTARGET_ATTACHMENT |
| /// @todo Its not clear what an "attachment" means. Its not common term. |
| ////////////////////////////////////////////////////////////////////////// |
| enum SWR_RENDERTARGET_ATTACHMENT |
| { |
| SWR_ATTACHMENT_COLOR0, |
| SWR_ATTACHMENT_COLOR1, |
| SWR_ATTACHMENT_COLOR2, |
| SWR_ATTACHMENT_COLOR3, |
| SWR_ATTACHMENT_COLOR4, |
| SWR_ATTACHMENT_COLOR5, |
| SWR_ATTACHMENT_COLOR6, |
| SWR_ATTACHMENT_COLOR7, |
| SWR_ATTACHMENT_DEPTH, |
| SWR_ATTACHMENT_STENCIL, |
| |
| SWR_NUM_ATTACHMENTS |
| }; |
| |
| #define SWR_NUM_RENDERTARGETS 8 |
| |
| #define SWR_ATTACHMENT_COLOR0_BIT 0x001 |
| #define SWR_ATTACHMENT_COLOR1_BIT 0x002 |
| #define SWR_ATTACHMENT_COLOR2_BIT 0x004 |
| #define SWR_ATTACHMENT_COLOR3_BIT 0x008 |
| #define SWR_ATTACHMENT_COLOR4_BIT 0x010 |
| #define SWR_ATTACHMENT_COLOR5_BIT 0x020 |
| #define SWR_ATTACHMENT_COLOR6_BIT 0x040 |
| #define SWR_ATTACHMENT_COLOR7_BIT 0x080 |
| #define SWR_ATTACHMENT_DEPTH_BIT 0x100 |
| #define SWR_ATTACHMENT_STENCIL_BIT 0x200 |
| #define SWR_ATTACHMENT_MASK_ALL 0x3ff |
| #define SWR_ATTACHMENT_MASK_COLOR 0x0ff |
| |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// @brief SWR Inner Tessellation factor ID |
| /// See above GetTessFactorOutputPosition code for documentation |
| enum SWR_INNER_TESSFACTOR_ID |
| { |
| SWR_QUAD_U_TRI_INSIDE, |
| SWR_QUAD_V_INSIDE, |
| |
| SWR_NUM_INNER_TESS_FACTORS, |
| }; |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// @brief SWR Outer Tessellation factor ID |
| /// See above GetTessFactorOutputPosition code for documentation |
| enum SWR_OUTER_TESSFACTOR_ID |
| { |
| SWR_QUAD_U_EQ0_TRI_U_LINE_DETAIL, |
| SWR_QUAD_U_EQ1_TRI_V_LINE_DENSITY, |
| SWR_QUAD_V_EQ0_TRI_W, |
| SWR_QUAD_V_EQ1, |
| |
| SWR_NUM_OUTER_TESS_FACTORS, |
| }; |
| |
| ///////////////////////////////////////////////////////////////////////// |
| /// simdvertex |
| /// @brief Defines a vertex element that holds all the data for SIMD vertices. |
| /// Contains space for position, SGV, and 32 generic attributes |
| ///////////////////////////////////////////////////////////////////////// |
| enum SWR_VTX_SLOTS |
| { |
| VERTEX_SGV_SLOT = 0, |
| VERTEX_SGV_RTAI_COMP = 0, |
| VERTEX_SGV_VAI_COMP = 1, |
| VERTEX_SGV_POINT_SIZE_COMP = 2, |
| VERTEX_POSITION_SLOT = 1, |
| VERTEX_POSITION_END_SLOT = 1, |
| VERTEX_CLIPCULL_DIST_LO_SLOT = (1 + VERTEX_POSITION_END_SLOT), // VS writes lower 4 clip/cull dist |
| VERTEX_CLIPCULL_DIST_HI_SLOT = (2 + VERTEX_POSITION_END_SLOT), // VS writes upper 4 clip/cull dist |
| VERTEX_ATTRIB_START_SLOT = (3 + VERTEX_POSITION_END_SLOT), |
| VERTEX_ATTRIB_END_SLOT = (34 + VERTEX_POSITION_END_SLOT), |
| SWR_VTX_NUM_SLOTS = (1 + VERTEX_ATTRIB_END_SLOT) |
| }; |
| |
| // SoAoSoA |
| struct simdvertex |
| { |
| simdvector attrib[SWR_VTX_NUM_SLOTS]; |
| }; |
| |
| struct simd16vertex |
| { |
| simd16vector attrib[SWR_VTX_NUM_SLOTS]; |
| }; |
| |
| template <typename SIMD_T> |
| struct SIMDVERTEX_T |
| { |
| typename SIMD_T::Vec4 attrib[SWR_VTX_NUM_SLOTS]; |
| }; |
| |
| struct SWR_WORKER_DATA |
| { |
| HANDLE hArContext; // handle to the archrast context |
| }; |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// SWR_SHADER_STATS |
| /// @brief Structure passed to shader for stats collection. |
| ///////////////////////////////////////////////////////////////////////// |
| struct SWR_SHADER_STATS |
| { |
| uint32_t numInstExecuted; // This is roughly the API instructions executed and not x86. |
| uint32_t numSampleExecuted; |
| uint32_t numSampleLExecuted; |
| uint32_t numSampleBExecuted; |
| uint32_t numSampleCExecuted; |
| uint32_t numSampleCLZExecuted; |
| uint32_t numSampleCDExecuted; |
| uint32_t numGather4Executed; |
| uint32_t numGather4CExecuted; |
| uint32_t numGather4CPOExecuted; |
| uint32_t numGather4CPOCExecuted; |
| uint32_t numLodExecuted; |
| }; |
| |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// SWR_VS_CONTEXT |
| /// @brief Input to vertex shader |
| ///////////////////////////////////////////////////////////////////////// |
| struct SWR_VS_CONTEXT |
| { |
| simdvertex* pVin; // IN: SIMD input vertex data store |
| simdvertex* pVout; // OUT: SIMD output vertex data store |
| |
| uint32_t InstanceID; // IN: Instance ID, constant across all verts of the SIMD |
| simdscalari VertexID; // IN: Vertex ID |
| simdscalari mask; // IN: Active mask for shader |
| |
| // SIMD16 Frontend fields. |
| uint32_t AlternateOffset; // IN: amount to offset for interleaving even/odd simd8 in |
| // simd16vertex output |
| simd16scalari mask16; // IN: Active mask for shader (16-wide) |
| simd16scalari VertexID16; // IN: Vertex ID (16-wide) |
| |
| SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast. |
| }; |
| |
| ///////////////////////////////////////////////////////////////////////// |
| /// ScalarCPoint |
| /// @brief defines a control point element as passed from the output |
| /// of the hull shader to the input of the domain shader |
| ///////////////////////////////////////////////////////////////////////// |
| struct ScalarAttrib |
| { |
| float x; |
| float y; |
| float z; |
| float w; |
| }; |
| |
| struct ScalarCPoint |
| { |
| ScalarAttrib attrib[SWR_VTX_NUM_SLOTS]; |
| }; |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// SWR_TESSELLATION_FACTORS |
| /// @brief Tessellation factors structure (non-vector) |
| ///////////////////////////////////////////////////////////////////////// |
| struct SWR_TESSELLATION_FACTORS |
| { |
| float OuterTessFactors[SWR_NUM_OUTER_TESS_FACTORS]; |
| float InnerTessFactors[SWR_NUM_INNER_TESS_FACTORS]; |
| float pad[2]; |
| }; |
| |
| SWR_STATIC_ASSERT(sizeof(SWR_TESSELLATION_FACTORS) == 32); |
| |
| #define MAX_NUM_VERTS_PER_PRIM 32 // support up to 32 control point patches |
| struct ScalarPatch |
| { |
| SWR_TESSELLATION_FACTORS tessFactors; |
| ScalarCPoint cp[MAX_NUM_VERTS_PER_PRIM]; |
| ScalarCPoint patchData; |
| }; |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// SWR_HS_CONTEXT |
| /// @brief Input to hull shader |
| ///////////////////////////////////////////////////////////////////////// |
| struct SWR_HS_CONTEXT |
| { |
| simdvertex vert[MAX_NUM_VERTS_PER_PRIM]; // IN: (SIMD) input primitive data |
| simdscalari PrimitiveID; // IN: (SIMD) primitive ID generated from the draw call |
| simdscalari mask; // IN: Active mask for shader |
| uint32_t outputSize; // IN: Size of HS output (per lane) |
| ScalarPatch* pCPout; // OUT: Output control point patch SIMD-sized-array of SCALAR patches |
| SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast. |
| }; |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// SWR_DS_CONTEXT |
| /// @brief Input to domain shader |
| ///////////////////////////////////////////////////////////////////////// |
| struct SWR_DS_CONTEXT |
| { |
| uint32_t PrimitiveID; // IN: (SCALAR) PrimitiveID for the patch associated with the DS invocation |
| uint32_t vectorOffset; // IN: (SCALAR) vector index offset into SIMD data. |
| uint32_t vectorStride; // IN: (SCALAR) stride (in vectors) of output data per attribute-component |
| uint32_t outVertexAttribOffset; // IN: (SCALAR) Offset to the attributes as processed by the next shader stage. |
| ScalarPatch* pCpIn; // IN: (SCALAR) Control patch |
| simdscalar* pDomainU; // IN: (SIMD) Domain Point U coords |
| simdscalar* pDomainV; // IN: (SIMD) Domain Point V coords |
| simdscalari mask; // IN: Active mask for shader |
| simdscalar* pOutputData; // OUT: (SIMD) Vertex Attributes (2D array of vectors, one row per attribute-component) |
| SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast. |
| }; |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// SWR_GS_CONTEXT |
| /// @brief Input to geometry shader. |
| ///////////////////////////////////////////////////////////////////////// |
| struct SWR_GS_CONTEXT |
| { |
| simdvector* pVerts; // IN: input primitive data for SIMD prims |
| uint32_t inputVertStride; // IN: input vertex stride, in attributes |
| simdscalari PrimitiveID; // IN: input primitive ID generated from the draw call |
| uint32_t InstanceID; // IN: input instance ID |
| simdscalari mask; // IN: Active mask for shader |
| uint8_t* pStreams[KNOB_SIMD_WIDTH]; // OUT: output stream (contains vertices for all output streams) |
| SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast. |
| }; |
| |
| struct PixelPositions |
| { |
| simdscalar UL; |
| simdscalar center; |
| simdscalar sample; |
| simdscalar centroid; |
| }; |
| |
| #define SWR_MAX_NUM_MULTISAMPLES 16 |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// SWR_PS_CONTEXT |
| /// @brief Input to pixel shader. |
| ///////////////////////////////////////////////////////////////////////// |
| struct SWR_PS_CONTEXT |
| { |
| PixelPositions vX; // IN: x location(s) of pixels |
| PixelPositions vY; // IN: x location(s) of pixels |
| simdscalar vZ; // INOUT: z location of pixels |
| simdscalari activeMask; // OUT: mask for kill |
| simdscalar inputMask; // IN: input coverage mask for all samples |
| simdscalari oMask; // OUT: mask for output coverage |
| |
| PixelPositions vI; // barycentric coords evaluated at pixel center, sample position, centroid |
| PixelPositions vJ; |
| PixelPositions vOneOverW; // IN: 1/w |
| |
| const float* pAttribs; // IN: pointer to attribute barycentric coefficients |
| const float* pPerspAttribs; // IN: pointer to attribute/w barycentric coefficients |
| const float* pRecipW; // IN: pointer to 1/w coord for each vertex |
| const float* I; // IN: Barycentric A, B, and C coefs used to compute I |
| const float* J; // IN: Barycentric A, B, and C coefs used to compute J |
| float recipDet; // IN: 1/Det, used when barycentric interpolating attributes |
| const float* pSamplePosX; // IN: array of sample positions |
| const float* pSamplePosY; // IN: array of sample positions |
| simdvector shaded[SWR_NUM_RENDERTARGETS]; // OUT: result color per rendertarget |
| |
| uint32_t frontFace; // IN: front- 1, back- 0 |
| uint32_t sampleIndex; // IN: sampleIndex |
| uint32_t renderTargetArrayIndex; // IN: render target array index from GS |
| uint32_t viewportIndex; // IN: viewport index from GS |
| uint32_t rasterizerSampleCount; // IN: sample count used by the rasterizer |
| |
| uint8_t* pColorBuffer[SWR_NUM_RENDERTARGETS]; // IN: Pointers to render target hottiles |
| |
| SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast. |
| |
| BucketManager *pBucketManager; // @llvm_struct - IN: performance buckets. |
| }; |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// SWR_CS_CONTEXT |
| /// @brief Input to compute shader. |
| ///////////////////////////////////////////////////////////////////////// |
| struct SWR_CS_CONTEXT |
| { |
| // The ThreadGroupId is the current thread group index relative |
| // to all thread groups in the Dispatch call. The ThreadId, ThreadIdInGroup, |
| // and ThreadIdInGroupFlattened can be derived from ThreadGroupId in the shader. |
| |
| // Compute shader accepts the following system values. |
| // o ThreadId - Current thread id relative to all other threads in dispatch. |
| // o ThreadGroupId - Current thread group id relative to all other groups in dispatch. |
| // o ThreadIdInGroup - Current thread relative to all threads in the current thread group. |
| // o ThreadIdInGroupFlattened - Flattened linear id derived from ThreadIdInGroup. |
| // |
| // All of these system values can be computed in the shader. They will be |
| // derived from the current tile counter. The tile counter is an atomic counter that |
| // resides in the draw context and is initialized to the product of the dispatch dims. |
| // |
| // tileCounter = dispatchDims.x * dispatchDims.y * dispatchDims.z |
| // |
| // Each CPU worker thread will atomically decrement this counter and passes the current |
| // count into the shader. When the count reaches 0 then all thread groups in the |
| // dispatch call have been completed. |
| |
| uint32_t tileCounter; // The tile counter value for this thread group. |
| |
| // Dispatch dimensions used by shader to compute system values from the tile counter. |
| uint32_t dispatchDims[3]; |
| |
| uint8_t* pTGSM; // Thread Group Shared Memory pointer. |
| uint8_t* pSpillFillBuffer; // Spill/fill buffer for barrier support |
| uint8_t* pScratchSpace; // Pointer to scratch space buffer used by the shader, shader is |
| // responsible for subdividing scratch space per instance/simd |
| uint32_t scratchSpacePerWarp; // Scratch space per work item x SIMD_WIDTH |
| |
| SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast. |
| }; |
| |
| // enums |
| enum SWR_TILE_MODE |
| { |
| SWR_TILE_NONE = 0x0, // Linear mode (no tiling) |
| SWR_TILE_MODE_WMAJOR, // W major tiling |
| SWR_TILE_MODE_XMAJOR, // X major tiling |
| SWR_TILE_MODE_YMAJOR, // Y major tiling |
| SWR_TILE_SWRZ, // SWR-Z tiling |
| |
| |
| SWR_TILE_MODE_COUNT |
| }; |
| |
| enum SWR_SURFACE_TYPE |
| { |
| SURFACE_1D = 0, |
| SURFACE_2D = 1, |
| SURFACE_3D = 2, |
| SURFACE_CUBE = 3, |
| SURFACE_BUFFER = 4, |
| SURFACE_STRUCTURED_BUFFER = 5, |
| SURFACE_NULL = 7 |
| }; |
| |
| enum SWR_ZFUNCTION |
| { |
| ZFUNC_ALWAYS, |
| ZFUNC_NEVER, |
| ZFUNC_LT, |
| ZFUNC_EQ, |
| ZFUNC_LE, |
| ZFUNC_GT, |
| ZFUNC_NE, |
| ZFUNC_GE, |
| NUM_ZFUNC |
| }; |
| |
| enum SWR_STENCILOP |
| { |
| STENCILOP_KEEP, |
| STENCILOP_ZERO, |
| STENCILOP_REPLACE, |
| STENCILOP_INCRSAT, |
| STENCILOP_DECRSAT, |
| STENCILOP_INCR, |
| STENCILOP_DECR, |
| STENCILOP_INVERT |
| }; |
| |
| enum SWR_BLEND_FACTOR |
| { |
| BLENDFACTOR_ONE, |
| BLENDFACTOR_SRC_COLOR, |
| BLENDFACTOR_SRC_ALPHA, |
| BLENDFACTOR_DST_ALPHA, |
| BLENDFACTOR_DST_COLOR, |
| BLENDFACTOR_SRC_ALPHA_SATURATE, |
| BLENDFACTOR_CONST_COLOR, |
| BLENDFACTOR_CONST_ALPHA, |
| BLENDFACTOR_SRC1_COLOR, |
| BLENDFACTOR_SRC1_ALPHA, |
| BLENDFACTOR_ZERO, |
| BLENDFACTOR_INV_SRC_COLOR, |
| BLENDFACTOR_INV_SRC_ALPHA, |
| BLENDFACTOR_INV_DST_ALPHA, |
| BLENDFACTOR_INV_DST_COLOR, |
| BLENDFACTOR_INV_CONST_COLOR, |
| BLENDFACTOR_INV_CONST_ALPHA, |
| BLENDFACTOR_INV_SRC1_COLOR, |
| BLENDFACTOR_INV_SRC1_ALPHA |
| }; |
| |
| enum SWR_BLEND_OP |
| { |
| BLENDOP_ADD, |
| BLENDOP_SUBTRACT, |
| BLENDOP_REVSUBTRACT, |
| BLENDOP_MIN, |
| BLENDOP_MAX, |
| }; |
| |
| enum SWR_LOGIC_OP |
| { |
| LOGICOP_CLEAR, |
| LOGICOP_NOR, |
| LOGICOP_AND_INVERTED, |
| LOGICOP_COPY_INVERTED, |
| LOGICOP_AND_REVERSE, |
| LOGICOP_INVERT, |
| LOGICOP_XOR, |
| LOGICOP_NAND, |
| LOGICOP_AND, |
| LOGICOP_EQUIV, |
| LOGICOP_NOOP, |
| LOGICOP_OR_INVERTED, |
| LOGICOP_COPY, |
| LOGICOP_OR_REVERSE, |
| LOGICOP_OR, |
| LOGICOP_SET, |
| }; |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// SWR_AUX_MODE |
| /// @brief Specifies how the auxiliary buffer is used by the driver. |
| ////////////////////////////////////////////////////////////////////////// |
| enum SWR_AUX_MODE |
| { |
| AUX_MODE_NONE, |
| AUX_MODE_COLOR, |
| AUX_MODE_UAV, |
| AUX_MODE_DEPTH, |
| }; |
| |
| // vertex fetch state |
| // WARNING- any changes to this struct need to be reflected |
| // in the fetch shader jit |
| struct SWR_VERTEX_BUFFER_STATE |
| { |
| gfxptr_t xpData; |
| uint32_t index; |
| uint32_t pitch; |
| uint32_t size; |
| uint32_t minVertex; // min vertex (for bounds checking) |
| uint32_t maxVertex; // size / pitch. precalculated value used by fetch shader for OOB checks |
| uint32_t partialInboundsSize; // size % pitch. precalculated value used by fetch shader for |
| // partially OOB vertices |
| }; |
| |
| struct SWR_INDEX_BUFFER_STATE |
| { |
| gfxptr_t xpIndices; |
| // Format type for indices (e.g. UINT16, UINT32, etc.) |
| SWR_FORMAT format; // @llvm_enum |
| uint32_t size; |
| }; |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// SWR_FETCH_CONTEXT |
| /// @brief Input to fetch shader. |
| /// @note WARNING - Changes to this struct need to be reflected in the |
| /// fetch shader jit. |
| ///////////////////////////////////////////////////////////////////////// |
| struct SWR_FETCH_CONTEXT |
| { |
| const SWR_VERTEX_BUFFER_STATE* pStreams; // IN: array of bound vertex buffers |
| gfxptr_t xpIndices; // IN: pointer to int32 index buffer for indexed draws |
| gfxptr_t xpLastIndex; // IN: pointer to end of index buffer, used for bounds checking |
| uint32_t CurInstance; // IN: current instance |
| uint32_t BaseVertex; // IN: base vertex |
| uint32_t StartVertex; // IN: start vertex |
| uint32_t StartInstance; // IN: start instance |
| simdscalari VertexID; // OUT: vector of vertex IDs |
| simdscalari CutMask; // OUT: vector mask of indices which have the cut index value |
| #if USE_SIMD16_SHADERS |
| // simd16scalari VertexID; // OUT: vector of vertex IDs |
| // simd16scalari CutMask; // OUT: vector mask of indices which have the |
| // cut index value |
| simdscalari VertexID2; // OUT: vector of vertex IDs |
| simdscalari CutMask2; // OUT: vector mask of indices which have the cut index value |
| #endif |
| }; |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// SWR_STATS |
| /// |
| /// @brief All statistics generated by SWR go here. These are public |
| /// to driver. |
| ///////////////////////////////////////////////////////////////////////// |
| OSALIGNLINE(struct) SWR_STATS |
| { |
| // Occlusion Query |
| uint64_t DepthPassCount; // Number of passing depth tests. Not exact. |
| |
| // Pipeline Stats |
| uint64_t PsInvocations; // Number of Pixel Shader invocations |
| uint64_t CsInvocations; // Number of Compute Shader invocations |
| |
| }; |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// SWR_STATS |
| /// |
| /// @brief All statistics generated by FE. |
| ///////////////////////////////////////////////////////////////////////// |
| OSALIGNLINE(struct) SWR_STATS_FE |
| { |
| uint64_t IaVertices; // Number of Fetch Shader vertices |
| uint64_t IaPrimitives; // Number of PA primitives. |
| uint64_t VsInvocations; // Number of Vertex Shader invocations |
| uint64_t HsInvocations; // Number of Hull Shader invocations |
| uint64_t DsInvocations; // Number of Domain Shader invocations |
| uint64_t GsInvocations; // Number of Geometry Shader invocations |
| uint64_t GsPrimitives; // Number of prims GS outputs. |
| uint64_t CInvocations; // Number of clipper invocations |
| uint64_t CPrimitives; // Number of clipper primitives. |
| |
| // Streamout Stats |
| uint64_t SoPrimStorageNeeded[4]; |
| uint64_t SoNumPrimsWritten[4]; |
| }; |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// STREAMOUT_BUFFERS |
| ///////////////////////////////////////////////////////////////////////// |
| |
| #define MAX_SO_STREAMS 4 |
| #define MAX_SO_BUFFERS 4 |
| #define MAX_ATTRIBUTES 32 |
| |
| struct SWR_STREAMOUT_BUFFER |
| { |
| // Pointers to streamout buffers. |
| gfxptr_t pBuffer; |
| |
| // Offset to the SO write offset. If not null then we update offset here. |
| gfxptr_t pWriteOffset; |
| |
| bool enable; |
| bool soWriteEnable; |
| |
| // Size of buffer in dwords. |
| uint32_t bufferSize; |
| |
| // Vertex pitch of buffer in dwords. |
| uint32_t pitch; |
| |
| // Offset into buffer in dwords. SOS will increment this offset. |
| uint32_t streamOffset; |
| }; |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// STREAMOUT_STATE |
| ///////////////////////////////////////////////////////////////////////// |
| struct SWR_STREAMOUT_STATE |
| { |
| // This disables stream output. |
| bool soEnable; |
| |
| // which streams are enabled for streamout |
| bool streamEnable[MAX_SO_STREAMS]; |
| |
| // If set then do not send any streams to the rasterizer. |
| bool rasterizerDisable; |
| |
| // Specifies which stream to send to the rasterizer. |
| uint32_t streamToRasterizer; |
| |
| // The stream masks specify which attributes are sent to which streams. |
| // These masks help the FE to setup the pPrimData buffer that is passed |
| // the Stream Output Shader (SOS) function. |
| uint64_t streamMasks[MAX_SO_STREAMS]; |
| |
| // Number of attributes, including position, per vertex that are streamed out. |
| // This should match number of bits in stream mask. |
| uint32_t streamNumEntries[MAX_SO_STREAMS]; |
| |
| // Offset to the start of the attributes of the input vertices, in simdvector units |
| uint32_t vertexAttribOffset[MAX_SO_STREAMS]; |
| }; |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// STREAMOUT_CONTEXT - Passed to SOS |
| ///////////////////////////////////////////////////////////////////////// |
| struct SWR_STREAMOUT_CONTEXT |
| { |
| uint32_t* pPrimData; |
| SWR_STREAMOUT_BUFFER* pBuffer[MAX_SO_STREAMS]; |
| |
| // Num prims written for this stream |
| uint32_t numPrimsWritten; |
| |
| // Num prims that should have been written if there were no overflow. |
| uint32_t numPrimStorageNeeded; |
| }; |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// SWR_GS_STATE - Geometry shader state |
| ///////////////////////////////////////////////////////////////////////// |
| struct SWR_GS_STATE |
| { |
| bool gsEnable; |
| |
| // If true, geometry shader emits a single stream, with separate cut buffer. |
| // If false, geometry shader emits vertices for multiple streams to the stream buffer, with a |
| // separate StreamID buffer to map vertices to streams |
| bool isSingleStream; |
| |
| // Number of input attributes per vertex. Used by the frontend to |
| // optimize assembling primitives for GS |
| uint32_t numInputAttribs; |
| |
| // Stride of incoming verts in attributes |
| uint32_t inputVertStride; |
| |
| // Output topology - can be point, tristrip, linestrip, or rectlist |
| PRIMITIVE_TOPOLOGY outputTopology; // @llvm_enum |
| |
| // Maximum number of verts that can be emitted by a single instance of the GS |
| uint32_t maxNumVerts; |
| |
| // Instance count |
| uint32_t instanceCount; |
| |
| // When single stream is enabled, singleStreamID dictates which stream is being output. |
| // field ignored if isSingleStream is false |
| uint32_t singleStreamID; |
| |
| // Total amount of memory to allocate for one instance of the shader output in bytes |
| uint32_t allocationSize; |
| |
| // Offset to the start of the attributes of the input vertices, in simdvector units, as read by |
| // the GS |
| uint32_t vertexAttribOffset; |
| |
| // Offset to the attributes as stored by the preceding shader stage. |
| uint32_t srcVertexAttribOffset; |
| |
| // Size of the control data section which contains cut or streamID data, in simdscalar units. |
| // Should be sized to handle the maximum number of verts output by the GS. Can be 0 if there are |
| // no cuts or streamID bits. |
| uint32_t controlDataSize; |
| |
| // Offset to the control data section, in bytes |
| uint32_t controlDataOffset; |
| |
| // Total size of an output vertex, in simdvector units |
| uint32_t outputVertexSize; |
| |
| // Offset to the start of the vertex section, in bytes |
| uint32_t outputVertexOffset; |
| |
| // Set this to non-zero to indicate that the shader outputs a static number of verts. If zero, |
| // shader is expected to store the final vertex count in the first dword of the gs output |
| // stream. |
| uint32_t staticVertexCount; |
| |
| uint32_t pad; |
| }; |
| static_assert(sizeof(SWR_GS_STATE) == 64, "Adjust padding to keep size (or remove this assert)"); |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// SWR_TS_OUTPUT_TOPOLOGY - Defines data output by the tessellator / DS |
| ///////////////////////////////////////////////////////////////////////// |
| enum SWR_TS_OUTPUT_TOPOLOGY |
| { |
| SWR_TS_OUTPUT_POINT, |
| SWR_TS_OUTPUT_LINE, |
| SWR_TS_OUTPUT_TRI_CW, |
| SWR_TS_OUTPUT_TRI_CCW, |
| |
| SWR_TS_OUTPUT_TOPOLOGY_COUNT |
| }; |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// SWR_TS_PARTITIONING - Defines tessellation algorithm |
| ///////////////////////////////////////////////////////////////////////// |
| enum SWR_TS_PARTITIONING |
| { |
| SWR_TS_INTEGER, |
| SWR_TS_ODD_FRACTIONAL, |
| SWR_TS_EVEN_FRACTIONAL, |
| |
| SWR_TS_PARTITIONING_COUNT |
| }; |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// SWR_TS_DOMAIN - Defines Tessellation Domain |
| ///////////////////////////////////////////////////////////////////////// |
| enum SWR_TS_DOMAIN |
| { |
| SWR_TS_QUAD, |
| SWR_TS_TRI, |
| SWR_TS_ISOLINE, |
| |
| SWR_TS_DOMAIN_COUNT |
| }; |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// SWR_TS_STATE - Tessellation state |
| ///////////////////////////////////////////////////////////////////////// |
| struct SWR_TS_STATE |
| { |
| bool tsEnable; |
| |
| SWR_TS_OUTPUT_TOPOLOGY tsOutputTopology; // @llvm_enum |
| SWR_TS_PARTITIONING partitioning; // @llvm_enum |
| SWR_TS_DOMAIN domain; // @llvm_enum |
| |
| PRIMITIVE_TOPOLOGY postDSTopology; // @llvm_enum |
| |
| uint32_t numHsInputAttribs; |
| uint32_t numHsOutputAttribs; |
| uint32_t hsAllocationSize; // Size of HS output in bytes, per lane |
| |
| uint32_t numDsOutputAttribs; |
| uint32_t dsAllocationSize; |
| uint32_t dsOutVtxAttribOffset; |
| |
| // Offset to the start of the attributes of the input vertices, in simdvector units |
| uint32_t srcVertexAttribOffset; |
| |
| // Offset to the start of the attributes expected by the hull shader |
| uint32_t vertexAttribOffset; |
| }; |
| |
| // output merger state |
| struct SWR_RENDER_TARGET_BLEND_STATE |
| { |
| uint8_t writeDisableRed : 1; |
| uint8_t writeDisableGreen : 1; |
| uint8_t writeDisableBlue : 1; |
| uint8_t writeDisableAlpha : 1; |
| }; |
| static_assert(sizeof(SWR_RENDER_TARGET_BLEND_STATE) == 1, |
| "Invalid SWR_RENDER_TARGET_BLEND_STATE size"); |
| |
| enum SWR_MULTISAMPLE_COUNT |
| { |
| SWR_MULTISAMPLE_1X = 0, |
| SWR_MULTISAMPLE_2X, |
| SWR_MULTISAMPLE_4X, |
| SWR_MULTISAMPLE_8X, |
| SWR_MULTISAMPLE_16X, |
| SWR_MULTISAMPLE_TYPE_COUNT |
| }; |
| |
| static INLINE uint32_t GetNumSamples(/* SWR_SAMPLE_COUNT */ int sampleCountEnum) // @llvm_func_start |
| { |
| return uint32_t(1) << sampleCountEnum; |
| } // @llvm_func_end |
| |
| struct SWR_BLEND_STATE |
| { |
| // constant blend factor color in RGBA float |
| float constantColor[4]; |
| |
| // alpha test reference value in unorm8 or float32 |
| uint32_t alphaTestReference; |
| uint32_t sampleMask; |
| // all RT's have the same sample count |
| ///@todo move this to Output Merger state when we refactor |
| SWR_MULTISAMPLE_COUNT sampleCount; // @llvm_enum |
| |
| SWR_RENDER_TARGET_BLEND_STATE renderTarget[SWR_NUM_RENDERTARGETS]; |
| }; |
| static_assert(sizeof(SWR_BLEND_STATE) == 36, "Invalid SWR_BLEND_STATE size"); |
| |
| struct SWR_BLEND_CONTEXT |
| { |
| const SWR_BLEND_STATE* pBlendState; |
| simdvector* src; |
| simdvector* src1; |
| simdvector* src0alpha; |
| uint32_t sampleNum; |
| simdvector* pDst; |
| simdvector* result; |
| simdscalari* oMask; |
| simdscalari* pMask; |
| uint32_t isAlphaTested; |
| uint32_t isAlphaBlended; |
| }; |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// FUNCTION POINTERS FOR SHADERS |
| |
| #if USE_SIMD16_SHADERS |
| typedef void(__cdecl *PFN_FETCH_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_FETCH_CONTEXT& fetchInfo, simd16vertex& out); |
| #else |
| typedef void(__cdecl *PFN_FETCH_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_FETCH_CONTEXT& fetchInfo, simdvertex& out); |
| #endif |
| typedef void(__cdecl *PFN_VERTEX_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_VS_CONTEXT* pVsContext); |
| typedef void(__cdecl *PFN_HS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_HS_CONTEXT* pHsContext); |
| typedef void(__cdecl *PFN_DS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_DS_CONTEXT* pDsContext); |
| typedef void(__cdecl *PFN_GS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_GS_CONTEXT* pGsContext); |
| typedef void(__cdecl *PFN_CS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_CS_CONTEXT* pCsContext); |
| typedef void(__cdecl *PFN_SO_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_STREAMOUT_CONTEXT& soContext); |
| typedef void(__cdecl *PFN_PIXEL_KERNEL)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_PS_CONTEXT* pContext); |
| typedef void(__cdecl *PFN_CPIXEL_KERNEL)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_PS_CONTEXT* pContext); |
| typedef void(__cdecl *PFN_BLEND_JIT_FUNC)(SWR_BLEND_CONTEXT*); |
| typedef simdscalar(*PFN_QUANTIZE_DEPTH)(simdscalar const &); |
| |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// FRONTEND_STATE |
| ///////////////////////////////////////////////////////////////////////// |
| struct SWR_FRONTEND_STATE |
| { |
| // skip clip test, perspective divide, and viewport transform |
| // intended for verts in screen space |
| bool vpTransformDisable; |
| bool bEnableCutIndex; |
| union |
| { |
| struct |
| { |
| uint32_t triFan : 2; |
| uint32_t lineStripList : 1; |
| uint32_t triStripList : 2; |
| }; |
| uint32_t bits; |
| } provokingVertex; |
| uint32_t topologyProvokingVertex; // provoking vertex for the draw topology |
| |
| // Size of a vertex in simdvector units. Should be sized to the |
| // maximum of the input/output of the vertex shader. |
| uint32_t vsVertexSize; |
| }; |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// VIEWPORT_MATRIX |
| ///////////////////////////////////////////////////////////////////////// |
| struct SWR_VIEWPORT_MATRIX |
| { |
| float m00; |
| float m11; |
| float m22; |
| float m30; |
| float m31; |
| float m32; |
| }; |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// VIEWPORT_MATRIXES |
| ///////////////////////////////////////////////////////////////////////// |
| struct SWR_VIEWPORT_MATRICES |
| { |
| float m00[KNOB_NUM_VIEWPORTS_SCISSORS]; |
| float m11[KNOB_NUM_VIEWPORTS_SCISSORS]; |
| float m22[KNOB_NUM_VIEWPORTS_SCISSORS]; |
| float m30[KNOB_NUM_VIEWPORTS_SCISSORS]; |
| float m31[KNOB_NUM_VIEWPORTS_SCISSORS]; |
| float m32[KNOB_NUM_VIEWPORTS_SCISSORS]; |
| }; |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// SWR_VIEWPORT |
| ///////////////////////////////////////////////////////////////////////// |
| struct SWR_VIEWPORT |
| { |
| float x; |
| float y; |
| float width; |
| float height; |
| float minZ; |
| float maxZ; |
| }; |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// SWR_CULLMODE |
| ////////////////////////////////////////////////////////////////////////// |
| enum SWR_CULLMODE |
| { |
| SWR_CULLMODE_BOTH, |
| SWR_CULLMODE_NONE, |
| SWR_CULLMODE_FRONT, |
| SWR_CULLMODE_BACK |
| }; |
| |
| enum SWR_FILLMODE |
| { |
| SWR_FILLMODE_POINT, |
| SWR_FILLMODE_WIREFRAME, |
| SWR_FILLMODE_SOLID |
| }; |
| |
| enum SWR_FRONTWINDING |
| { |
| SWR_FRONTWINDING_CW, |
| SWR_FRONTWINDING_CCW |
| }; |
| |
| |
| enum SWR_PIXEL_LOCATION |
| { |
| SWR_PIXEL_LOCATION_CENTER, |
| SWR_PIXEL_LOCATION_UL, |
| }; |
| |
| // fixed point screen space sample locations within a pixel |
| struct SWR_MULTISAMPLE_POS |
| { |
| public: |
| INLINE void SetXi(uint32_t sampleNum, uint32_t val) { _xi[sampleNum] = val; }; // @llvm_func |
| INLINE void SetYi(uint32_t sampleNum, uint32_t val) { _yi[sampleNum] = val; }; // @llvm_func |
| INLINE uint32_t Xi(uint32_t sampleNum) const { return _xi[sampleNum]; }; // @llvm_func |
| INLINE uint32_t Yi(uint32_t sampleNum) const { return _yi[sampleNum]; }; // @llvm_func |
| INLINE void SetX(uint32_t sampleNum, float val) { _x[sampleNum] = val; }; // @llvm_func |
| INLINE void SetY(uint32_t sampleNum, float val) { _y[sampleNum] = val; }; // @llvm_func |
| INLINE float X(uint32_t sampleNum) const { return _x[sampleNum]; }; // @llvm_func |
| INLINE float Y(uint32_t sampleNum) const { return _y[sampleNum]; }; // @llvm_func |
| typedef const float (&sampleArrayT)[SWR_MAX_NUM_MULTISAMPLES]; //@llvm_typedef |
| INLINE sampleArrayT X() const { return _x; }; // @llvm_func |
| INLINE sampleArrayT Y() const { return _y; }; // @llvm_func |
| INLINE const __m128i& vXi(uint32_t sampleNum) const { return _vXi[sampleNum]; }; // @llvm_func |
| INLINE const __m128i& vYi(uint32_t sampleNum) const { return _vYi[sampleNum]; }; // @llvm_func |
| INLINE const simdscalar& vX(uint32_t sampleNum) const { return _vX[sampleNum]; }; // @llvm_func |
| INLINE const simdscalar& vY(uint32_t sampleNum) const { return _vY[sampleNum]; }; // @llvm_func |
| INLINE const __m128i& TileSampleOffsetsX() const { return tileSampleOffsetsX; }; // @llvm_func |
| INLINE const __m128i& TileSampleOffsetsY() const { return tileSampleOffsetsY; }; // @llvm_func |
| |
| INLINE void PrecalcSampleData(int numSamples); //@llvm_func |
| |
| private: |
| template <typename MaskT> |
| INLINE __m128i expandThenBlend4(uint32_t* min, uint32_t* max); // @llvm_func |
| INLINE void CalcTileSampleOffsets(int numSamples); // @llvm_func |
| |
| // scalar sample values |
| uint32_t _xi[SWR_MAX_NUM_MULTISAMPLES]; |
| uint32_t _yi[SWR_MAX_NUM_MULTISAMPLES]; |
| float _x[SWR_MAX_NUM_MULTISAMPLES]; |
| float _y[SWR_MAX_NUM_MULTISAMPLES]; |
| |
| // precalc'd / vectorized samples |
| __m128i _vXi[SWR_MAX_NUM_MULTISAMPLES]; |
| __m128i _vYi[SWR_MAX_NUM_MULTISAMPLES]; |
| simdscalar _vX[SWR_MAX_NUM_MULTISAMPLES]; |
| simdscalar _vY[SWR_MAX_NUM_MULTISAMPLES]; |
| __m128i tileSampleOffsetsX; |
| __m128i tileSampleOffsetsY; |
| }; |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// SWR_RASTSTATE |
| ////////////////////////////////////////////////////////////////////////// |
| struct SWR_RASTSTATE |
| { |
| uint32_t cullMode : 2; |
| uint32_t fillMode : 2; |
| uint32_t frontWinding : 1; |
| uint32_t scissorEnable : 1; |
| uint32_t depthClipEnable : 1; |
| uint32_t clipEnable : 1; |
| uint32_t clipHalfZ : 1; |
| uint32_t pointParam : 1; |
| uint32_t pointSpriteEnable : 1; |
| uint32_t pointSpriteTopOrigin : 1; |
| uint32_t forcedSampleCount : 1; |
| uint32_t pixelOffset : 1; |
| uint32_t depthBiasPreAdjusted : 1; ///< depth bias constant is in float units, not per-format Z units |
| uint32_t conservativeRast : 1; |
| |
| float pointSize; |
| float lineWidth; |
| |
| float depthBias; |
| float slopeScaledDepthBias; |
| float depthBiasClamp; |
| SWR_FORMAT depthFormat; // @llvm_enum |
| |
| // sample count the rasterizer is running at |
| SWR_MULTISAMPLE_COUNT sampleCount; // @llvm_enum |
| uint32_t pixelLocation; // UL or Center |
| SWR_MULTISAMPLE_POS samplePositions; // @llvm_struct |
| bool bIsCenterPattern; // @llvm_enum |
| }; |
| |
| |
| enum SWR_CONSTANT_SOURCE |
| { |
| SWR_CONSTANT_SOURCE_CONST_0000, |
| SWR_CONSTANT_SOURCE_CONST_0001_FLOAT, |
| SWR_CONSTANT_SOURCE_CONST_1111_FLOAT, |
| SWR_CONSTANT_SOURCE_PRIM_ID |
| }; |
| |
| struct SWR_ATTRIB_SWIZZLE |
| { |
| uint16_t sourceAttrib : 5; // source attribute |
| uint16_t constantSource : 2; // constant source to apply |
| uint16_t componentOverrideMask : 4; // override component with constant source |
| }; |
| |
| // backend state |
| struct SWR_BACKEND_STATE |
| { |
| uint32_t constantInterpolationMask; // bitmask indicating which attributes have constant |
| // interpolation |
| uint32_t pointSpriteTexCoordMask; // bitmask indicating the attribute(s) which should be |
| // interpreted as tex coordinates |
| |
| bool swizzleEnable; // when enabled, core will parse the swizzle map when |
| // setting up attributes for the backend, otherwise |
| // all attributes up to numAttributes will be sent |
| uint8_t numAttributes; // total number of attributes to send to backend (up to 32) |
| uint8_t numComponents[32]; // number of components to setup per attribute, this reduces some |
| // calculations for unneeded components |
| |
| bool readRenderTargetArrayIndex; // Forward render target array index from last FE stage to the |
| // backend |
| bool readViewportArrayIndex; // Read viewport array index from last FE stage during binning |
| |
| // User clip/cull distance enables |
| uint8_t cullDistanceMask; |
| uint8_t clipDistanceMask; |
| |
| // padding to ensure swizzleMap starts 64B offset from start of the struct |
| // and that the next fields are dword aligned. |
| uint8_t pad[10]; |
| |
| // Offset to the start of the attributes of the input vertices, in simdvector units |
| uint32_t vertexAttribOffset; |
| |
| // Offset to clip/cull attrib section of the vertex, in simdvector units |
| uint32_t vertexClipCullOffset; |
| |
| SWR_ATTRIB_SWIZZLE swizzleMap[32]; |
| }; |
| static_assert(sizeof(SWR_BACKEND_STATE) == 128, |
| "Adjust padding to keep size (or remove this assert)"); |
| |
| |
| union SWR_DEPTH_STENCIL_STATE |
| { |
| struct |
| { |
| // dword 0 |
| uint32_t depthWriteEnable : 1; |
| uint32_t depthTestEnable : 1; |
| uint32_t stencilWriteEnable : 1; |
| uint32_t stencilTestEnable : 1; |
| uint32_t doubleSidedStencilTestEnable : 1; |
| |
| uint32_t depthTestFunc : 3; |
| uint32_t stencilTestFunc : 3; |
| |
| uint32_t backfaceStencilPassDepthPassOp : 3; |
| uint32_t backfaceStencilPassDepthFailOp : 3; |
| uint32_t backfaceStencilFailOp : 3; |
| uint32_t backfaceStencilTestFunc : 3; |
| uint32_t stencilPassDepthPassOp : 3; |
| uint32_t stencilPassDepthFailOp : 3; |
| uint32_t stencilFailOp : 3; |
| |
| // dword 1 |
| uint8_t backfaceStencilWriteMask; |
| uint8_t backfaceStencilTestMask; |
| uint8_t stencilWriteMask; |
| uint8_t stencilTestMask; |
| |
| // dword 2 |
| uint8_t backfaceStencilRefValue; |
| uint8_t stencilRefValue; |
| }; |
| uint32_t value[3]; |
| }; |
| |
| enum SWR_SHADING_RATE |
| { |
| SWR_SHADING_RATE_PIXEL, |
| SWR_SHADING_RATE_SAMPLE, |
| SWR_SHADING_RATE_COUNT, |
| }; |
| |
| enum SWR_INPUT_COVERAGE |
| { |
| SWR_INPUT_COVERAGE_NONE, |
| SWR_INPUT_COVERAGE_NORMAL, |
| SWR_INPUT_COVERAGE_INNER_CONSERVATIVE, |
| SWR_INPUT_COVERAGE_COUNT, |
| }; |
| |
| enum SWR_PS_POSITION_OFFSET |
| { |
| SWR_PS_POSITION_SAMPLE_NONE, |
| SWR_PS_POSITION_SAMPLE_OFFSET, |
| SWR_PS_POSITION_CENTROID_OFFSET, |
| SWR_PS_POSITION_OFFSET_COUNT, |
| }; |
| |
| enum SWR_BARYCENTRICS_MASK |
| { |
| SWR_BARYCENTRIC_PER_PIXEL_MASK = 0x1, |
| SWR_BARYCENTRIC_CENTROID_MASK = 0x2, |
| SWR_BARYCENTRIC_PER_SAMPLE_MASK = 0x4, |
| }; |
| |
| // pixel shader state |
| struct SWR_PS_STATE |
| { |
| // dword 0-1 |
| PFN_PIXEL_KERNEL pfnPixelShader; // @llvm_pfn |
| |
| // dword 2 |
| uint32_t killsPixel : 1; // pixel shader can kill pixels |
| uint32_t inputCoverage : 2; // ps uses input coverage |
| uint32_t writesODepth : 1; // pixel shader writes to depth |
| uint32_t usesSourceDepth : 1; // pixel shader reads depth |
| uint32_t shadingRate : 2; // shading per pixel / sample / coarse pixel |
| uint32_t posOffset : 2; // type of offset (none, sample, centroid) to add to pixel position |
| uint32_t barycentricsMask : 3; // which type(s) of barycentric coords does the PS interpolate |
| // attributes with |
| uint32_t usesUAV : 1; // pixel shader accesses UAV |
| uint32_t forceEarlyZ : 1; // force execution of early depth/stencil test |
| |
| uint8_t renderTargetMask; // Mask of render targets written |
| }; |
| |
| // depth bounds state |
| struct SWR_DEPTH_BOUNDS_STATE |
| { |
| bool depthBoundsTestEnable; |
| float depthBoundsTestMinValue; |
| float depthBoundsTestMaxValue; |
| }; |
| // clang-format on |