blob: 57181927c1fc42b77432f57468ec2f5b5e7de987 [file] [log] [blame]
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <assert.h>
#include <stdbool.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include "anv_private.h"
#include "genxml/gen_macros.h"
#include "genxml/genX_pack.h"
#include "common/intel_genX_state_brw.h"
#include "common/intel_guardband.h"
#include "common/intel_tiled_render.h"
#include "compiler/brw_prim.h"
#include "genX_mi_builder.h"
static const uint32_t vk_to_intel_blend[] = {
[VK_BLEND_FACTOR_ZERO] = BLENDFACTOR_ZERO,
[VK_BLEND_FACTOR_ONE] = BLENDFACTOR_ONE,
[VK_BLEND_FACTOR_SRC_COLOR] = BLENDFACTOR_SRC_COLOR,
[VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR] = BLENDFACTOR_INV_SRC_COLOR,
[VK_BLEND_FACTOR_DST_COLOR] = BLENDFACTOR_DST_COLOR,
[VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR] = BLENDFACTOR_INV_DST_COLOR,
[VK_BLEND_FACTOR_SRC_ALPHA] = BLENDFACTOR_SRC_ALPHA,
[VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA] = BLENDFACTOR_INV_SRC_ALPHA,
[VK_BLEND_FACTOR_DST_ALPHA] = BLENDFACTOR_DST_ALPHA,
[VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA] = BLENDFACTOR_INV_DST_ALPHA,
[VK_BLEND_FACTOR_CONSTANT_COLOR] = BLENDFACTOR_CONST_COLOR,
[VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR]= BLENDFACTOR_INV_CONST_COLOR,
[VK_BLEND_FACTOR_CONSTANT_ALPHA] = BLENDFACTOR_CONST_ALPHA,
[VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA]= BLENDFACTOR_INV_CONST_ALPHA,
[VK_BLEND_FACTOR_SRC_ALPHA_SATURATE] = BLENDFACTOR_SRC_ALPHA_SATURATE,
[VK_BLEND_FACTOR_SRC1_COLOR] = BLENDFACTOR_SRC1_COLOR,
[VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR] = BLENDFACTOR_INV_SRC1_COLOR,
[VK_BLEND_FACTOR_SRC1_ALPHA] = BLENDFACTOR_SRC1_ALPHA,
[VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA] = BLENDFACTOR_INV_SRC1_ALPHA,
};
static const uint32_t vk_to_intel_blend_op[] = {
[VK_BLEND_OP_ADD] = BLENDFUNCTION_ADD,
[VK_BLEND_OP_SUBTRACT] = BLENDFUNCTION_SUBTRACT,
[VK_BLEND_OP_REVERSE_SUBTRACT] = BLENDFUNCTION_REVERSE_SUBTRACT,
[VK_BLEND_OP_MIN] = BLENDFUNCTION_MIN,
[VK_BLEND_OP_MAX] = BLENDFUNCTION_MAX,
};
static const uint32_t vk_to_intel_cullmode[] = {
[VK_CULL_MODE_NONE] = CULLMODE_NONE,
[VK_CULL_MODE_FRONT_BIT] = CULLMODE_FRONT,
[VK_CULL_MODE_BACK_BIT] = CULLMODE_BACK,
[VK_CULL_MODE_FRONT_AND_BACK] = CULLMODE_BOTH
};
static const uint32_t vk_to_intel_fillmode[] = {
[VK_POLYGON_MODE_FILL] = FILL_MODE_SOLID,
[VK_POLYGON_MODE_LINE] = FILL_MODE_WIREFRAME,
[VK_POLYGON_MODE_POINT] = FILL_MODE_POINT,
};
static const uint32_t vk_to_intel_front_face[] = {
[VK_FRONT_FACE_COUNTER_CLOCKWISE] = 1,
[VK_FRONT_FACE_CLOCKWISE] = 0
};
static const uint32_t vk_to_intel_logic_op[] = {
[VK_LOGIC_OP_COPY] = LOGICOP_COPY,
[VK_LOGIC_OP_CLEAR] = LOGICOP_CLEAR,
[VK_LOGIC_OP_AND] = LOGICOP_AND,
[VK_LOGIC_OP_AND_REVERSE] = LOGICOP_AND_REVERSE,
[VK_LOGIC_OP_AND_INVERTED] = LOGICOP_AND_INVERTED,
[VK_LOGIC_OP_NO_OP] = LOGICOP_NOOP,
[VK_LOGIC_OP_XOR] = LOGICOP_XOR,
[VK_LOGIC_OP_OR] = LOGICOP_OR,
[VK_LOGIC_OP_NOR] = LOGICOP_NOR,
[VK_LOGIC_OP_EQUIVALENT] = LOGICOP_EQUIV,
[VK_LOGIC_OP_INVERT] = LOGICOP_INVERT,
[VK_LOGIC_OP_OR_REVERSE] = LOGICOP_OR_REVERSE,
[VK_LOGIC_OP_COPY_INVERTED] = LOGICOP_COPY_INVERTED,
[VK_LOGIC_OP_OR_INVERTED] = LOGICOP_OR_INVERTED,
[VK_LOGIC_OP_NAND] = LOGICOP_NAND,
[VK_LOGIC_OP_SET] = LOGICOP_SET,
};
static const uint32_t vk_to_intel_compare_op[] = {
[VK_COMPARE_OP_NEVER] = PREFILTEROP_NEVER,
[VK_COMPARE_OP_LESS] = PREFILTEROP_LESS,
[VK_COMPARE_OP_EQUAL] = PREFILTEROP_EQUAL,
[VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROP_LEQUAL,
[VK_COMPARE_OP_GREATER] = PREFILTEROP_GREATER,
[VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROP_NOTEQUAL,
[VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROP_GEQUAL,
[VK_COMPARE_OP_ALWAYS] = PREFILTEROP_ALWAYS,
};
static const uint32_t vk_to_intel_stencil_op[] = {
[VK_STENCIL_OP_KEEP] = STENCILOP_KEEP,
[VK_STENCIL_OP_ZERO] = STENCILOP_ZERO,
[VK_STENCIL_OP_REPLACE] = STENCILOP_REPLACE,
[VK_STENCIL_OP_INCREMENT_AND_CLAMP] = STENCILOP_INCRSAT,
[VK_STENCIL_OP_DECREMENT_AND_CLAMP] = STENCILOP_DECRSAT,
[VK_STENCIL_OP_INVERT] = STENCILOP_INVERT,
[VK_STENCIL_OP_INCREMENT_AND_WRAP] = STENCILOP_INCR,
[VK_STENCIL_OP_DECREMENT_AND_WRAP] = STENCILOP_DECR,
};
static const uint32_t vk_to_intel_primitive_type[] = {
[VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST,
[VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST,
[VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP,
[VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST,
[VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
[VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
[VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
[VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
[VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
[VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
};
static uint32_t vk_to_intel_index_type(VkIndexType type)
{
switch (type) {
case VK_INDEX_TYPE_UINT8_KHR:
return INDEX_BYTE;
case VK_INDEX_TYPE_UINT16:
return INDEX_WORD;
case VK_INDEX_TYPE_UINT32:
return INDEX_DWORD;
default:
unreachable("invalid index type");
}
}
void
genX(batch_emit_wa_16014912113)(struct anv_batch *batch,
const struct intel_urb_config *urb_cfg)
{
#if INTEL_NEEDS_WA_16014912113
if (urb_cfg->size[0] == 0)
return;
for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) {
#if GFX_VER >= 12
anv_batch_emit(batch, GENX(3DSTATE_URB_ALLOC_VS), urb) {
urb._3DCommandSubOpcode += i;
urb.VSURBEntryAllocationSize = urb_cfg->size[i] - 1;
urb.VSURBStartingAddressSlice0 = urb_cfg->start[i];
urb.VSURBStartingAddressSliceN = urb_cfg->start[i];
urb.VSNumberofURBEntriesSlice0 = i == 0 ? 256 : 0;
urb.VSNumberofURBEntriesSliceN = i == 0 ? 256 : 0;
}
#else
anv_batch_emit(batch, GENX(3DSTATE_URB_VS), urb) {
urb._3DCommandSubOpcode += i;
urb.VSURBStartingAddress = urb_cfg->start[i];
urb.VSURBEntryAllocationSize = urb_cfg->size[i] - 1;
urb.VSNumberofURBEntries = i == 0 ? 256 : 0;
}
#endif
}
anv_batch_emit(batch, GENX(PIPE_CONTROL), pc) {
pc.HDCPipelineFlushEnable = true;
}
#endif
}
static void
genX(streamout_prologue)(struct anv_cmd_buffer *cmd_buffer)
{
#if INTEL_WA_16013994831_GFX_VER
/* Wa_16013994831 - Disable preemption during streamout, enable back
* again if XFB not used by the current pipeline.
*/
if (!intel_needs_workaround(cmd_buffer->device->info, 16013994831))
return;
struct anv_graphics_pipeline *pipeline =
anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline);
if (pipeline->uses_xfb) {
genX(cmd_buffer_set_preemption)(cmd_buffer, false);
return;
}
if (!cmd_buffer->state.gfx.object_preemption)
genX(cmd_buffer_set_preemption)(cmd_buffer, true);
#endif
}
#if GFX_VER >= 12 && GFX_VER < 30
static uint32_t
get_cps_state_offset(const struct anv_device *device,
const struct vk_fragment_shading_rate_state *fsr)
{
uint32_t offset;
static const uint32_t size_index[] = {
[1] = 0,
[2] = 1,
[4] = 2,
};
#if GFX_VERx10 >= 125
offset =
1 + /* skip disabled */
fsr->combiner_ops[0] * 5 * 3 * 3 +
fsr->combiner_ops[1] * 3 * 3 +
size_index[fsr->fragment_size.width] * 3 +
size_index[fsr->fragment_size.height];
#else
offset =
1 + /* skip disabled */
size_index[fsr->fragment_size.width] * 3 +
size_index[fsr->fragment_size.height];
#endif
offset *= MAX_VIEWPORTS * GENX(CPS_STATE_length) * 4;
return device->cps_states.offset + offset;
}
#endif /* GFX_VER >= 12 && GFX_VER < 30 */
#if GFX_VER >= 30
static uint32_t
get_cps_size(uint32_t size)
{
switch (size) {
case 1:
return CPSIZE_1;
case 2:
return CPSIZE_2;
case 4:
return CPSIZE_4;
default:
unreachable("Invalid size");
}
}
static const uint32_t vk_to_intel_shading_rate_combiner_op[] = {
[VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR] = CPS_COMB_OP_PASSTHROUGH,
[VK_FRAGMENT_SHADING_RATE_COMBINER_OP_REPLACE_KHR] = CPS_COMB_OP_OVERRIDE,
[VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MIN_KHR] = CPS_COMB_OP_HIGH_QUALITY,
[VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MAX_KHR] = CPS_COMB_OP_LOW_QUALITY,
[VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MUL_KHR] = CPS_COMB_OP_RELATIVE,
};
#endif
static bool
has_ds_feedback_loop(const struct anv_pipeline_bind_map *bind_map,
const struct vk_dynamic_graphics_state *dyn)
{
if (BITSET_IS_EMPTY(bind_map->input_attachments))
return false;
const unsigned depth_att = dyn->ial.depth_att == MESA_VK_ATTACHMENT_NO_INDEX ?
MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS : dyn->ial.depth_att;
const unsigned stencil_att = dyn->ial.stencil_att == MESA_VK_ATTACHMENT_NO_INDEX ?
MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS : dyn->ial.stencil_att;
return
(dyn->feedback_loops & (VK_IMAGE_ASPECT_DEPTH_BIT |
VK_IMAGE_ASPECT_STENCIL_BIT)) != 0 ||
(dyn->ial.depth_att != MESA_VK_ATTACHMENT_UNUSED &&
BITSET_TEST(bind_map->input_attachments, depth_att)) ||
(dyn->ial.stencil_att != MESA_VK_ATTACHMENT_UNUSED &&
BITSET_TEST(bind_map->input_attachments, stencil_att));
}
UNUSED static bool
want_stencil_pma_fix(const struct vk_dynamic_graphics_state *dyn,
const struct anv_cmd_graphics_state *gfx,
const struct vk_depth_stencil_state *ds)
{
if (GFX_VER > 9)
return false;
assert(GFX_VER == 9);
/* From the Skylake PRM Vol. 2c CACHE_MODE_1::STC PMA Optimization Enable:
*
* Clearing this bit will force the STC cache to wait for pending
* retirement of pixels at the HZ-read stage and do the STC-test for
* Non-promoted, R-computed and Computed depth modes instead of
* postponing the STC-test to RCPFE.
*
* STC_TEST_EN = 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
* 3DSTATE_WM_DEPTH_STENCIL::StencilTestEnable
*
* STC_WRITE_EN = 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
* (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
* 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE)
*
* COMP_STC_EN = STC_TEST_EN &&
* 3DSTATE_PS_EXTRA::PixelShaderComputesStencil
*
* SW parses the pipeline states to generate the following logical
* signal indicating if PMA FIX can be enabled.
*
* STC_PMA_OPT =
* 3DSTATE_WM::ForceThreadDispatch != 1 &&
* !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0) &&
* 3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL &&
* 3DSTATE_DEPTH_BUFFER::HIZ Enable &&
* !(3DSTATE_WM::EDSC_Mode == 2) &&
* 3DSTATE_PS_EXTRA::PixelShaderValid &&
* !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
* 3DSTATE_WM_HZ_OP::DepthBufferResolve ||
* 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
* 3DSTATE_WM_HZ_OP::StencilBufferClear) &&
* (COMP_STC_EN || STC_WRITE_EN) &&
* ((3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
* 3DSTATE_WM::ForceKillPix == ON ||
* 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
* 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
* 3DSTATE_PS_BLEND::AlphaTestEnable ||
* 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) ||
* (3DSTATE_PS_EXTRA::Pixel Shader Computed Depth mode != PSCDEPTH_OFF))
*/
/* These are always true:
* 3DSTATE_WM::ForceThreadDispatch != 1 &&
* !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0)
*/
/* We only enable the PMA fix if we know for certain that HiZ is enabled.
* If we don't know whether HiZ is enabled or not, we disable the PMA fix
* and there is no harm.
*
* (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) &&
* 3DSTATE_DEPTH_BUFFER::HIZ Enable
*/
if (!gfx->hiz_enabled)
return false;
/* We can't possibly know if HiZ is enabled without the depth attachment */
ASSERTED const struct anv_image_view *d_iview = gfx->depth_att.iview;
assert(d_iview && d_iview->image->planes[0].aux_usage == ISL_AUX_USAGE_HIZ);
/* 3DSTATE_PS_EXTRA::PixelShaderValid */
struct anv_graphics_pipeline *pipeline =
anv_pipeline_to_graphics(gfx->base.pipeline);
if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT))
return false;
/* !(3DSTATE_WM::EDSC_Mode == 2) */
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
if (wm_prog_data->early_fragment_tests)
return false;
/* We never use anv_pipeline for HiZ ops so this is trivially true:
* !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
* 3DSTATE_WM_HZ_OP::DepthBufferResolve ||
* 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
* 3DSTATE_WM_HZ_OP::StencilBufferClear)
*/
/* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
* 3DSTATE_WM_DEPTH_STENCIL::StencilTestEnable
*/
const bool stc_test_en = ds->stencil.test_enable;
/* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
* (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
* 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE)
*/
const bool stc_write_en = ds->stencil.write_enable;
/* STC_TEST_EN && 3DSTATE_PS_EXTRA::PixelShaderComputesStencil */
const bool comp_stc_en = stc_test_en && wm_prog_data->computed_stencil;
/* COMP_STC_EN || STC_WRITE_EN */
if (!(comp_stc_en || stc_write_en))
return false;
/* (3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
* 3DSTATE_WM::ForceKillPix == ON ||
* 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
* 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
* 3DSTATE_PS_BLEND::AlphaTestEnable ||
* 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) ||
* (3DSTATE_PS_EXTRA::Pixel Shader Computed Depth mode != PSCDEPTH_OFF)
*/
struct anv_shader_bin *fs_bin = pipeline->base.shaders[MESA_SHADER_FRAGMENT];
return pipeline->kill_pixel ||
has_ds_feedback_loop(&fs_bin->bind_map, dyn) ||
wm_prog_data->computed_depth_mode != PSCDEPTH_OFF;
}
static inline bool
anv_rasterization_aa_mode(VkPolygonMode raster_mode,
VkLineRasterizationModeKHR line_mode)
{
if (raster_mode == VK_POLYGON_MODE_LINE &&
line_mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_KHR)
return true;
return false;
}
static inline VkLineRasterizationModeKHR
anv_line_rasterization_mode(VkLineRasterizationModeKHR line_mode,
unsigned rasterization_samples)
{
if (line_mode == VK_LINE_RASTERIZATION_MODE_DEFAULT_KHR) {
if (rasterization_samples > 1) {
return VK_LINE_RASTERIZATION_MODE_RECTANGULAR_KHR;
} else {
return VK_LINE_RASTERIZATION_MODE_BRESENHAM_KHR;
}
}
return line_mode;
}
/** Returns the final polygon mode for rasterization
*
* This function takes into account polygon mode, primitive topology and the
* different shader stages which might generate their own type of primitives.
*/
static inline VkPolygonMode
anv_raster_polygon_mode(const struct anv_graphics_pipeline *pipeline,
VkPolygonMode polygon_mode,
VkPrimitiveTopology primitive_topology)
{
if (anv_pipeline_is_mesh(pipeline)) {
switch (get_mesh_prog_data(pipeline)->primitive_type) {
case MESA_PRIM_POINTS:
return VK_POLYGON_MODE_POINT;
case MESA_PRIM_LINES:
return VK_POLYGON_MODE_LINE;
case MESA_PRIM_TRIANGLES:
return polygon_mode;
default:
unreachable("invalid primitive type for mesh");
}
} else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) {
switch (get_gs_prog_data(pipeline)->output_topology) {
case _3DPRIM_POINTLIST:
return VK_POLYGON_MODE_POINT;
case _3DPRIM_LINELIST:
case _3DPRIM_LINESTRIP:
case _3DPRIM_LINELOOP:
return VK_POLYGON_MODE_LINE;
case _3DPRIM_TRILIST:
case _3DPRIM_TRIFAN:
case _3DPRIM_TRISTRIP:
case _3DPRIM_RECTLIST:
case _3DPRIM_QUADLIST:
case _3DPRIM_QUADSTRIP:
case _3DPRIM_POLYGON:
return polygon_mode;
}
unreachable("Unsupported GS output topology");
} else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) {
switch (get_tes_prog_data(pipeline)->output_topology) {
case INTEL_TESS_OUTPUT_TOPOLOGY_POINT:
return VK_POLYGON_MODE_POINT;
case INTEL_TESS_OUTPUT_TOPOLOGY_LINE:
return VK_POLYGON_MODE_LINE;
case INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CW:
case INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CCW:
return polygon_mode;
}
unreachable("Unsupported TCS output topology");
} else {
switch (primitive_topology) {
case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
return VK_POLYGON_MODE_POINT;
case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
return VK_POLYGON_MODE_LINE;
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
return polygon_mode;
default:
unreachable("Unsupported primitive topology");
}
}
}
static inline bool
anv_is_dual_src_blend_factor(VkBlendFactor factor)
{
return factor == VK_BLEND_FACTOR_SRC1_COLOR ||
factor == VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR ||
factor == VK_BLEND_FACTOR_SRC1_ALPHA ||
factor == VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA;
}
static inline bool
anv_is_dual_src_blend_equation(const struct vk_color_blend_attachment_state *cb)
{
return anv_is_dual_src_blend_factor(cb->src_color_blend_factor) &&
anv_is_dual_src_blend_factor(cb->dst_color_blend_factor) &&
anv_is_dual_src_blend_factor(cb->src_alpha_blend_factor) &&
anv_is_dual_src_blend_factor(cb->dst_alpha_blend_factor);
}
static void
anv_rasterization_mode(VkPolygonMode raster_mode,
VkLineRasterizationModeKHR line_mode,
float line_width,
uint32_t *api_mode,
bool *msaa_rasterization_enable)
{
if (raster_mode == VK_POLYGON_MODE_LINE) {
/* Unfortunately, configuring our line rasterization hardware on gfx8
* and later is rather painful. Instead of giving us bits to tell the
* hardware what line mode to use like we had on gfx7, we now have an
* arcane combination of API Mode and MSAA enable bits which do things
* in a table which are expected to magically put the hardware into the
* right mode for your API. Sadly, Vulkan isn't any of the APIs the
* hardware people thought of so nothing works the way you want it to.
*
* Look at the table titled "Multisample Rasterization Modes" in Vol 7
* of the Skylake PRM for more details.
*/
switch (line_mode) {
case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT:
*api_mode = DX101;
#if GFX_VER <= 9
/* Prior to ICL, the algorithm the HW uses to draw wide lines
* doesn't quite match what the CTS expects, at least for rectangular
* lines, so we set this to false here, making it draw parallelograms
* instead, which work well enough.
*/
*msaa_rasterization_enable = line_width < 1.0078125;
#else
*msaa_rasterization_enable = true;
#endif
break;
case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT:
case VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT:
*api_mode = DX9OGL;
*msaa_rasterization_enable = false;
break;
default:
unreachable("Unsupported line rasterization mode");
}
} else {
*api_mode = DX101;
*msaa_rasterization_enable = true;
}
}
static bool
is_src1_blend_factor(enum GENX(3D_Color_Buffer_Blend_Factor) factor)
{
return factor == BLENDFACTOR_SRC1_COLOR ||
factor == BLENDFACTOR_SRC1_ALPHA ||
factor == BLENDFACTOR_INV_SRC1_COLOR ||
factor == BLENDFACTOR_INV_SRC1_ALPHA;
}
#if GFX_VERx10 == 125
/**
* Return the dimensions of the current rendering area, defined as the
* bounding box of all present color, depth and stencil attachments.
*/
UNUSED static bool
calculate_render_area(const struct anv_cmd_graphics_state *gfx,
unsigned *width, unsigned *height)
{
*width = gfx->render_area.offset.x + gfx->render_area.extent.width;
*height = gfx->render_area.offset.y + gfx->render_area.extent.height;
for (unsigned i = 0; i < gfx->color_att_count; i++) {
const struct anv_attachment *att = &gfx->color_att[i];
if (att->iview) {
*width = MAX2(*width, att->iview->vk.extent.width);
*height = MAX2(*height, att->iview->vk.extent.height);
}
}
const struct anv_image_view *const z_view = gfx->depth_att.iview;
if (z_view) {
*width = MAX2(*width, z_view->vk.extent.width);
*height = MAX2(*height, z_view->vk.extent.height);
}
const struct anv_image_view *const s_view = gfx->stencil_att.iview;
if (s_view) {
*width = MAX2(*width, s_view->vk.extent.width);
*height = MAX2(*height, s_view->vk.extent.height);
}
return *width && *height;
}
/* Calculate TBIMR tiling parameters adequate for the current pipeline
* setup. Return true if TBIMR should be enabled.
*/
UNUSED static bool
calculate_tile_dimensions(const struct anv_device *device,
const struct anv_cmd_graphics_state *gfx,
const struct intel_l3_config *l3_config,
unsigned fb_width, unsigned fb_height,
unsigned *tile_width, unsigned *tile_height)
{
assert(GFX_VER == 12);
const unsigned aux_scale = ISL_MAIN_TO_CCS_SIZE_RATIO_XE;
unsigned pixel_size = 0;
/* Perform a rough calculation of the tile cache footprint of the
* pixel pipeline, approximating it as the sum of the amount of
* memory used per pixel by every render target, depth, stencil and
* auxiliary surfaces bound to the pipeline.
*/
for (uint32_t i = 0; i < gfx->color_att_count; i++) {
const struct anv_attachment *att = &gfx->color_att[i];
if (att->iview) {
const struct anv_image *image = att->iview->image;
const unsigned p = anv_image_aspect_to_plane(image,
VK_IMAGE_ASPECT_COLOR_BIT);
const struct anv_image_plane *plane = &image->planes[p];
pixel_size += intel_calculate_surface_pixel_size(
&plane->primary_surface.isl);
if (isl_aux_usage_has_mcs(att->aux_usage))
pixel_size += intel_calculate_surface_pixel_size(
&plane->aux_surface.isl);
if (isl_aux_usage_has_ccs(att->aux_usage))
pixel_size += DIV_ROUND_UP(intel_calculate_surface_pixel_size(
&plane->primary_surface.isl),
aux_scale);
}
}
const struct anv_image_view *const z_view = gfx->depth_att.iview;
if (z_view) {
const struct anv_image *image = z_view->image;
assert(image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT);
const unsigned p = anv_image_aspect_to_plane(image,
VK_IMAGE_ASPECT_DEPTH_BIT);
const struct anv_image_plane *plane = &image->planes[p];
pixel_size += intel_calculate_surface_pixel_size(
&plane->primary_surface.isl);
if (isl_aux_usage_has_hiz(image->planes[p].aux_usage))
pixel_size += intel_calculate_surface_pixel_size(
&plane->aux_surface.isl);
if (isl_aux_usage_has_ccs(image->planes[p].aux_usage))
pixel_size += DIV_ROUND_UP(intel_calculate_surface_pixel_size(
&plane->primary_surface.isl),
aux_scale);
}
const struct anv_image_view *const s_view = gfx->depth_att.iview;
if (s_view && s_view != z_view) {
const struct anv_image *image = s_view->image;
assert(image->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT);
const unsigned p = anv_image_aspect_to_plane(image,
VK_IMAGE_ASPECT_STENCIL_BIT);
const struct anv_image_plane *plane = &image->planes[p];
pixel_size += intel_calculate_surface_pixel_size(
&plane->primary_surface.isl);
}
if (!pixel_size)
return false;
/* Compute a tile layout that allows reasonable utilization of the
* tile cache based on the per-pixel cache footprint estimated
* above.
*/
intel_calculate_tile_dimensions(device->info, l3_config,
32, 32, fb_width, fb_height,
pixel_size, tile_width, tile_height);
/* Perform TBIMR tile passes only if the framebuffer covers more
* than a single tile.
*/
return *tile_width < fb_width || *tile_height < fb_height;
}
#endif
#define GET(field) hw_state->field
#define SET(bit, field, value) \
do { \
__typeof(hw_state->field) __v = value; \
if (hw_state->field != __v) { \
hw_state->field = __v; \
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_##bit); \
} \
} while (0)
#define SET_STAGE(bit, field, value, stage) \
do { \
__typeof(hw_state->field) __v = value; \
if (!anv_pipeline_has_stage(pipeline, \
MESA_SHADER_##stage)) { \
hw_state->field = __v; \
break; \
} \
if (hw_state->field != __v) { \
hw_state->field = __v; \
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_##bit); \
} \
} while (0)
#define SETUP_PROVOKING_VERTEX(bit, cmd, mode) \
switch (mode) { \
case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT: \
SET(bit, cmd.TriangleStripListProvokingVertexSelect, 0); \
SET(bit, cmd.LineStripListProvokingVertexSelect, 0); \
SET(bit, cmd.TriangleFanProvokingVertexSelect, 1); \
break; \
case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT: \
SET(bit, cmd.TriangleStripListProvokingVertexSelect, 2); \
SET(bit, cmd.LineStripListProvokingVertexSelect, 1); \
SET(bit, cmd.TriangleFanProvokingVertexSelect, 2); \
break; \
default: \
unreachable("Invalid provoking vertex mode"); \
} \
#define SETUP_PROVOKING_VERTEX_FSB(bit, cmd, mode) \
switch (mode) { \
case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT: \
SET(bit, cmd.TriangleStripListProvokingVertexSelect, 0); \
SET(bit, cmd.LineStripListProvokingVertexSelect, 0); \
SET(bit, cmd.TriangleFanProvokingVertexSelect, 1); \
SET(bit, cmd.TriangleStripOddProvokingVertexSelect, 0); \
break; \
case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT: \
SET(bit, cmd.TriangleStripListProvokingVertexSelect, 0); \
SET(bit, cmd.LineStripListProvokingVertexSelect, 0); \
SET(bit, cmd.TriangleFanProvokingVertexSelect, 0); \
SET(bit, cmd.TriangleStripOddProvokingVertexSelect, 1); \
break; \
default: \
unreachable("Invalid provoking vertex mode"); \
} \
ALWAYS_INLINE static void
update_fs_msaa_flags(struct anv_gfx_dynamic_state *hw_state,
const struct vk_dynamic_graphics_state *dyn,
const struct anv_graphics_pipeline *pipeline)
{
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
if (!wm_prog_data)
return;
/* If we have any dynamic bits here, we might need to update the value
* in the push constant for the shader.
*/
if (!brw_wm_prog_data_is_dynamic(wm_prog_data))
return;
enum intel_msaa_flags fs_msaa_flags =
intel_fs_msaa_flags((struct intel_fs_params) {
.shader_sample_shading = wm_prog_data->sample_shading,
.shader_min_sample_shading = pipeline->min_sample_shading,
.state_sample_shading = pipeline->sample_shading_enable,
.rasterization_samples = dyn->ms.rasterization_samples,
.coarse_pixel = !vk_fragment_shading_rate_is_disabled(&dyn->fsr),
.alpha_to_coverage = dyn->ms.alpha_to_coverage_enable,
.provoking_vertex_last = dyn->rs.provoking_vertex == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT,
.primitive_id_index = pipeline->primitive_id_index,
});
SET(FS_MSAA_FLAGS, fs_msaa_flags, fs_msaa_flags);
}
ALWAYS_INLINE static void
update_ps(struct anv_gfx_dynamic_state *hw_state,
const struct anv_device *device,
const struct vk_dynamic_graphics_state *dyn,
const struct anv_graphics_pipeline *pipeline)
{
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
if (!wm_prog_data) {
#if GFX_VER < 20
SET(PS, ps._8PixelDispatchEnable, false);
SET(PS, ps._16PixelDispatchEnable, false);
SET(PS, ps._32PixelDispatchEnable, false);
#else
SET(PS, ps.Kernel0Enable, false);
SET(PS, ps.Kernel1Enable, false);
#endif
return;
}
const struct anv_shader_bin *fs_bin =
pipeline->base.shaders[MESA_SHADER_FRAGMENT];
struct GENX(3DSTATE_PS) ps = {};
intel_set_ps_dispatch_state(&ps, device->info, wm_prog_data,
MAX2(dyn->ms.rasterization_samples, 1),
hw_state->fs_msaa_flags);
SET(PS, ps.KernelStartPointer0,
fs_bin->kernel.offset +
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0));
SET(PS, ps.KernelStartPointer1,
fs_bin->kernel.offset +
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1));
#if GFX_VER < 20
SET(PS, ps.KernelStartPointer2,
fs_bin->kernel.offset +
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2));
#endif
SET(PS, ps.DispatchGRFStartRegisterForConstantSetupData0,
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0));
SET(PS, ps.DispatchGRFStartRegisterForConstantSetupData1,
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1));
#if GFX_VER < 20
SET(PS, ps.DispatchGRFStartRegisterForConstantSetupData2,
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2));
#endif
#if GFX_VER < 20
SET(PS, ps._8PixelDispatchEnable, ps._8PixelDispatchEnable);
SET(PS, ps._16PixelDispatchEnable, ps._16PixelDispatchEnable);
SET(PS, ps._32PixelDispatchEnable, ps._32PixelDispatchEnable);
#else
SET(PS, ps.Kernel0Enable, ps.Kernel0Enable);
SET(PS, ps.Kernel1Enable, ps.Kernel1Enable);
SET(PS, ps.Kernel0SIMDWidth, ps.Kernel0SIMDWidth);
SET(PS, ps.Kernel1SIMDWidth, ps.Kernel1SIMDWidth);
SET(PS, ps.Kernel0PolyPackingPolicy, ps.Kernel0PolyPackingPolicy);
SET(PS, ps.Kernel0MaximumPolysperThread, ps.Kernel0MaximumPolysperThread);
#endif
SET(PS, ps.PositionXYOffsetSelect,
!wm_prog_data->uses_pos_offset ? POSOFFSET_NONE :
brw_wm_prog_data_is_persample(wm_prog_data,
hw_state->fs_msaa_flags) ?
POSOFFSET_SAMPLE : POSOFFSET_CENTROID);
}
ALWAYS_INLINE static void
update_ps_extra_wm(struct anv_gfx_dynamic_state *hw_state,
const struct anv_graphics_pipeline *pipeline)
{
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
if (!wm_prog_data)
return;
SET(PS_EXTRA, ps_extra.PixelShaderIsPerSample,
brw_wm_prog_data_is_persample(wm_prog_data,
hw_state->fs_msaa_flags));
#if GFX_VER >= 11
const bool uses_coarse_pixel =
brw_wm_prog_data_is_coarse(wm_prog_data, hw_state->fs_msaa_flags);
SET(PS_EXTRA, ps_extra.PixelShaderIsPerCoarsePixel, uses_coarse_pixel);
#endif
#if GFX_VERx10 >= 125
/* TODO: We should only require this when the last geometry shader uses a
* fragment shading rate that is not constant.
*/
SET(PS_EXTRA, ps_extra.EnablePSDependencyOnCPsizeChange, uses_coarse_pixel);
#endif
SET(WM, wm.BarycentricInterpolationMode,
wm_prog_data_barycentric_modes(wm_prog_data, hw_state->fs_msaa_flags));
}
ALWAYS_INLINE static void
update_ps_extra_has_uav(struct anv_gfx_dynamic_state *hw_state,
const struct anv_cmd_graphics_state *gfx,
const struct anv_graphics_pipeline *pipeline)
{
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
/* Force fragment shader execution if occlusion queries are active to
* ensure PS_DEPTH_COUNT is correct. Otherwise a fragment shader with
* discard and no render target setup could be increment PS_DEPTH_COUNT if
* the HW internally decides to not run the shader because it has already
* established that depth-test is passing.
*/
SET_STAGE(PS_EXTRA, ps_extra.PixelShaderHasUAV,
wm_prog_data && (wm_prog_data->has_side_effects ||
gfx->n_occlusion_queries > 0),
FRAGMENT);
}
ALWAYS_INLINE static void
update_ps_extra_kills_pixel(struct anv_gfx_dynamic_state *hw_state,
const struct vk_dynamic_graphics_state *dyn,
const struct anv_cmd_graphics_state *gfx,
const struct anv_graphics_pipeline *pipeline)
{
struct anv_shader_bin *fs_bin = pipeline->base.shaders[MESA_SHADER_FRAGMENT];
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
SET_STAGE(PS_EXTRA, ps_extra.PixelShaderKillsPixel,
wm_prog_data &&
(has_ds_feedback_loop(&fs_bin->bind_map, dyn) ||
wm_prog_data->uses_kill),
FRAGMENT);
}
#if GFX_VERx10 >= 125
ALWAYS_INLINE static void
update_vfg_list_cut_index(struct anv_gfx_dynamic_state *hw_state,
const struct vk_dynamic_graphics_state *dyn)
{
SET(VFG, vfg.ListCutIndexEnable, dyn->ia.primitive_restart_enable);
}
#endif
ALWAYS_INLINE static void
update_streamout(struct anv_gfx_dynamic_state *hw_state,
const struct vk_dynamic_graphics_state *dyn,
const struct anv_cmd_graphics_state *gfx,
const struct anv_graphics_pipeline *pipeline)
{
SET(STREAMOUT, so.RenderingDisable, dyn->rs.rasterizer_discard_enable);
SET(STREAMOUT, so.RenderStreamSelect, dyn->rs.rasterization_stream);
#if INTEL_NEEDS_WA_18022508906
/* Wa_18022508906 :
*
* SKL PRMs, Volume 7: 3D-Media-GPGPU, Stream Output Logic (SOL) Stage:
*
* SOL_INT::Render_Enable =
* (3DSTATE_STREAMOUT::Force_Rending == Force_On) ||
* (
* (3DSTATE_STREAMOUT::Force_Rending != Force_Off) &&
* !(3DSTATE_GS::Enable && 3DSTATE_GS::Output Vertex Size == 0) &&
* !3DSTATE_STREAMOUT::API_Render_Disable &&
* (
* 3DSTATE_DEPTH_STENCIL_STATE::Stencil_TestEnable ||
* 3DSTATE_DEPTH_STENCIL_STATE::Depth_TestEnable ||
* 3DSTATE_DEPTH_STENCIL_STATE::Depth_WriteEnable ||
* 3DSTATE_PS_EXTRA::PS_Valid ||
* 3DSTATE_WM::Legacy Depth_Buffer_Clear ||
* 3DSTATE_WM::Legacy Depth_Buffer_Resolve_Enable ||
* 3DSTATE_WM::Legacy Hierarchical_Depth_Buffer_Resolve_Enable
* )
* )
*
* If SOL_INT::Render_Enable is false, the SO stage will not forward any
* topologies down the pipeline. Which is not what we want for occlusion
* queries.
*
* Here we force rendering to get SOL_INT::Render_Enable when occlusion
* queries are active.
*/
SET(STREAMOUT, so.ForceRendering,
(!GET(so.RenderingDisable) && gfx->n_occlusion_queries > 0) ?
Force_on : 0);
#endif
}
ALWAYS_INLINE static void
update_provoking_vertex(struct anv_gfx_dynamic_state *hw_state,
const struct vk_dynamic_graphics_state *dyn,
const struct anv_graphics_pipeline *pipeline)
{
#if GFX_VERx10 >= 200
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
/* In order to respect the table indicated by Vulkan 1.4.312,
* 28.9. Barycentric Interpolation, we need to program the provoking
* vertex state differently depending on whether we need to set
* vertex_attributes_bypass or not.
* At this point we only deal with full pipelines, so if we don't have
* a wm_prog_data, there is no fragment shader and none of this matters.
*/
if (wm_prog_data && wm_prog_data->vertex_attributes_bypass) {
SETUP_PROVOKING_VERTEX_FSB(SF, sf, dyn->rs.provoking_vertex);
SETUP_PROVOKING_VERTEX_FSB(CLIP, clip, dyn->rs.provoking_vertex);
} else {
/* If we are not setting vertex attributes bypass, we can just use
* the same macro as older generations. There's one bit missing from
* it, but that one is only used for the case above and ignored
* otherwise, so we can pretend it doesn't exist here.
*/
SETUP_PROVOKING_VERTEX(SF, sf, dyn->rs.provoking_vertex);
SETUP_PROVOKING_VERTEX(CLIP, clip, dyn->rs.provoking_vertex);
}
#else
SETUP_PROVOKING_VERTEX(SF, sf, dyn->rs.provoking_vertex);
SETUP_PROVOKING_VERTEX(CLIP, clip, dyn->rs.provoking_vertex);
#endif
switch (dyn->rs.provoking_vertex) {
case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT:
SET(STREAMOUT, so.ReorderMode, LEADING);
SET_STAGE(GS, gs.ReorderMode, LEADING, GEOMETRY);
break;
case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT:
SET(STREAMOUT, so.ReorderMode, TRAILING);
SET_STAGE(GS, gs.ReorderMode, TRAILING, GEOMETRY);
break;
default:
unreachable("Invalid provoking vertex mode");
}
}
ALWAYS_INLINE static void
update_topology(struct anv_gfx_dynamic_state *hw_state,
const struct vk_dynamic_graphics_state *dyn,
const struct anv_graphics_pipeline *pipeline)
{
uint32_t topology =
anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL) ?
_3DPRIM_PATCHLIST(dyn->ts.patch_control_points) :
vk_to_intel_primitive_type[dyn->ia.primitive_topology];
SET(VF_TOPOLOGY, vft.PrimitiveTopologyType, topology);
}
#if GFX_VER >= 11
ALWAYS_INLINE static void
update_cps(struct anv_gfx_dynamic_state *hw_state,
const struct anv_device *device,
const struct vk_dynamic_graphics_state *dyn,
const struct anv_graphics_pipeline *pipeline)
{
#if GFX_VER >= 30
SET(COARSE_PIXEL, coarse_pixel.CPSizeX,
get_cps_size(dyn->fsr.fragment_size.width));
SET(COARSE_PIXEL, coarse_pixel.CPSizeY,
get_cps_size(dyn->fsr.fragment_size.height));
SET(COARSE_PIXEL, coarse_pixel.CPSizeCombiner0Opcode,
vk_to_intel_shading_rate_combiner_op[dyn->fsr.combiner_ops[0]]);
SET(COARSE_PIXEL, coarse_pixel.CPSizeCombiner1Opcode,
vk_to_intel_shading_rate_combiner_op[dyn->fsr.combiner_ops[1]]);
#elif GFX_VER >= 12
SET(CPS, cps.CoarsePixelShadingStateArrayPointer,
get_cps_state_offset(device, &dyn->fsr));
#else
STATIC_ASSERT(GFX_VER == 11);
SET(CPS, cps.CoarsePixelShadingMode, CPS_MODE_CONSTANT);
SET(CPS, cps.MinCPSizeX, dyn->fsr.fragment_size.width);
SET(CPS, cps.MinCPSizeY, dyn->fsr.fragment_size.height);
#endif
}
#endif
ALWAYS_INLINE static void
update_te(struct anv_gfx_dynamic_state *hw_state,
const struct vk_dynamic_graphics_state *dyn,
const struct anv_graphics_pipeline *pipeline)
{
const struct brw_tes_prog_data *tes_prog_data = get_tes_prog_data(pipeline);
if (tes_prog_data && anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) {
if (dyn->ts.domain_origin == VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT) {
SET(TE, te.OutputTopology, tes_prog_data->output_topology);
} else {
/* When the origin is upper-left, we have to flip the winding order */
if (tes_prog_data->output_topology == OUTPUT_TRI_CCW) {
SET(TE, te.OutputTopology, OUTPUT_TRI_CW);
} else if (tes_prog_data->output_topology == OUTPUT_TRI_CW) {
SET(TE, te.OutputTopology, OUTPUT_TRI_CCW);
} else {
SET(TE, te.OutputTopology, tes_prog_data->output_topology);
}
}
} else {
SET(TE, te.OutputTopology, OUTPUT_POINT);
}
}
ALWAYS_INLINE static void
update_line_width(struct anv_gfx_dynamic_state *hw_state,
const struct vk_dynamic_graphics_state *dyn)
{
SET(SF, sf.LineWidth, dyn->rs.line.width);
}
ALWAYS_INLINE static void
update_sf_global_depth_bias(struct anv_gfx_dynamic_state *hw_state,
const struct vk_dynamic_graphics_state *dyn)
{
/**
* From the Vulkan Spec:
*
* "VK_DEPTH_BIAS_REPRESENTATION_FLOAT_EXT specifies that the depth bias
* representation is a factor of constant r equal to 1."
*
* From the SKL PRMs, Volume 7: 3D-Media-GPGPU, Depth Offset:
*
* "When UNORM Depth Buffer is at Output Merger (or no Depth Buffer):
*
* Bias = GlobalDepthOffsetConstant * r + GlobalDepthOffsetScale * MaxDepthSlope
*
* Where r is the minimum representable value > 0 in the depth buffer
* format, converted to float32 (note: If state bit Legacy Global Depth
* Bias Enable is set, the r term will be forced to 1.0)"
*
* When VK_DEPTH_BIAS_REPRESENTATION_FLOAT_EXT is set, enable
* LegacyGlobalDepthBiasEnable.
*/
SET(SF, sf.LegacyGlobalDepthBiasEnable,
dyn->rs.depth_bias.representation ==
VK_DEPTH_BIAS_REPRESENTATION_FLOAT_EXT);
}
ALWAYS_INLINE static void
update_clip_api_mode(struct anv_gfx_dynamic_state *hw_state,
const struct vk_dynamic_graphics_state *dyn)
{
SET(CLIP, clip.APIMode,
dyn->vp.depth_clip_negative_one_to_one ?
APIMODE_OGL : APIMODE_D3D);
}
ALWAYS_INLINE static void
update_clip_max_viewport(struct anv_gfx_dynamic_state *hw_state,
const struct vk_dynamic_graphics_state *dyn)
{
/* From the Vulkan 1.0.45 spec:
*
* "If the last active vertex processing stage shader entry point's
* interface does not include a variable decorated with ViewportIndex,
* then the first viewport is used."
*
* This could mean that we might need to set the MaximumVPIndex based on
* the pipeline's last stage, but if the last shader doesn't write the
* viewport index and the VUE header is used, the compiler will force the
* value to 0 (which is what the spec requires above). Otherwise it seems
* like the HW should be pulling 0 if the VUE header is not present.
*
* Avoiding a check on the pipeline seems to prevent additional emissions
* of 3DSTATE_CLIP which appear to impact performance on Assassin's Creed
* Valhalla..
*/
SET(CLIP, clip.MaximumVPIndex, dyn->vp.viewport_count > 0 ?
dyn->vp.viewport_count - 1 : 0);
}
ALWAYS_INLINE static void
update_clip_raster(struct anv_gfx_dynamic_state *hw_state,
const struct vk_dynamic_graphics_state *dyn,
const struct anv_cmd_graphics_state *gfx,
const struct anv_graphics_pipeline *pipeline)
{
/* Take dynamic primitive topology in to account with
* 3DSTATE_RASTER::APIMode
* 3DSTATE_RASTER::DXMultisampleRasterizationEnable
* 3DSTATE_RASTER::AntialiasingEnable
*/
uint32_t api_mode = 0;
bool msaa_raster_enable = false;
const VkLineRasterizationModeKHR line_mode =
anv_line_rasterization_mode(dyn->rs.line.mode,
dyn->ms.rasterization_samples);
const VkPolygonMode dynamic_raster_mode =
anv_raster_polygon_mode(pipeline,
dyn->rs.polygon_mode,
dyn->ia.primitive_topology);
anv_rasterization_mode(dynamic_raster_mode,
line_mode, dyn->rs.line.width,
&api_mode, &msaa_raster_enable);
/* From the Browadwell PRM, Volume 2, documentation for 3DSTATE_RASTER,
* "Antialiasing Enable":
*
* "This field must be disabled if any of the render targets have integer
* (UINT or SINT) surface format."
*
* Additionally internal documentation for Gfx12+ states:
*
* "This bit MUST not be set when NUM_MULTISAMPLES > 1 OR
* FORCED_SAMPLE_COUNT > 1."
*/
const bool aa_enable =
anv_rasterization_aa_mode(dynamic_raster_mode, line_mode) &&
!gfx->has_uint_rt &&
!(GFX_VER >= 12 && gfx->samples > 1);
const bool depth_clip_enable =
vk_rasterization_state_depth_clip_enable(&dyn->rs);
const bool xy_clip_test_enable =
(dynamic_raster_mode == VK_POLYGON_MODE_FILL);
SET(CLIP, clip.ViewportXYClipTestEnable, xy_clip_test_enable);
SET(RASTER, raster.APIMode, api_mode);
SET(RASTER, raster.DXMultisampleRasterizationEnable, msaa_raster_enable);
SET(RASTER, raster.AntialiasingEnable, aa_enable);
SET(RASTER, raster.CullMode, vk_to_intel_cullmode[dyn->rs.cull_mode]);
SET(RASTER, raster.FrontWinding, vk_to_intel_front_face[dyn->rs.front_face]);
SET(RASTER, raster.GlobalDepthOffsetEnableSolid, dyn->rs.depth_bias.enable);
SET(RASTER, raster.GlobalDepthOffsetEnableWireframe, dyn->rs.depth_bias.enable);
SET(RASTER, raster.GlobalDepthOffsetEnablePoint, dyn->rs.depth_bias.enable);
SET(RASTER, raster.GlobalDepthOffsetConstant, dyn->rs.depth_bias.constant_factor);
SET(RASTER, raster.GlobalDepthOffsetScale, dyn->rs.depth_bias.slope_factor);
SET(RASTER, raster.GlobalDepthOffsetClamp, dyn->rs.depth_bias.clamp);
SET(RASTER, raster.FrontFaceFillMode, vk_to_intel_fillmode[dyn->rs.polygon_mode]);
SET(RASTER, raster.BackFaceFillMode, vk_to_intel_fillmode[dyn->rs.polygon_mode]);
SET(RASTER, raster.ViewportZFarClipTestEnable, depth_clip_enable);
SET(RASTER, raster.ViewportZNearClipTestEnable, depth_clip_enable);
SET(RASTER, raster.ConservativeRasterizationEnable,
dyn->rs.conservative_mode !=
VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT);
#if GFX_VERx10 >= 200
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
SET(RASTER, raster.LegacyBaryAssignmentDisable,
wm_prog_data && wm_prog_data->vertex_attributes_bypass);
#endif
}
ALWAYS_INLINE static void
update_multisample(struct anv_gfx_dynamic_state *hw_state,
const struct vk_dynamic_graphics_state *dyn)
{
SET(MULTISAMPLE, ms.NumberofMultisamples,
__builtin_ffs(MAX2(dyn->ms.rasterization_samples, 1)) - 1);
}
ALWAYS_INLINE static void
update_sample_mask(struct anv_gfx_dynamic_state *hw_state,
const struct vk_dynamic_graphics_state *dyn)
{
/* From the Vulkan 1.0 spec:
* If pSampleMask is NULL, it is treated as if the mask has all bits
* enabled, i.e. no coverage is removed from fragments.
*
* 3DSTATE_SAMPLE_MASK.SampleMask is 16 bits.
*/
SET(SAMPLE_MASK, sm.SampleMask, dyn->ms.sample_mask & 0xffff);
}
ALWAYS_INLINE static void
update_wm_depth_stencil(struct anv_gfx_dynamic_state *hw_state,
const struct vk_dynamic_graphics_state *dyn,
const struct anv_cmd_graphics_state *gfx,
const struct anv_device *device)
{
VkImageAspectFlags ds_aspects = 0;
if (gfx->depth_att.vk_format != VK_FORMAT_UNDEFINED)
ds_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
if (gfx->stencil_att.vk_format != VK_FORMAT_UNDEFINED)
ds_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
struct vk_depth_stencil_state opt_ds = dyn->ds;
vk_optimize_depth_stencil_state(&opt_ds, ds_aspects, true);
SET(WM_DEPTH_STENCIL, ds.DoubleSidedStencilEnable, true);
SET(WM_DEPTH_STENCIL, ds.StencilTestMask,
opt_ds.stencil.front.compare_mask & 0xff);
SET(WM_DEPTH_STENCIL, ds.StencilWriteMask,
opt_ds.stencil.front.write_mask & 0xff);
SET(WM_DEPTH_STENCIL, ds.BackfaceStencilTestMask, opt_ds.stencil.back.compare_mask & 0xff);
SET(WM_DEPTH_STENCIL, ds.BackfaceStencilWriteMask, opt_ds.stencil.back.write_mask & 0xff);
SET(WM_DEPTH_STENCIL, ds.StencilReferenceValue,
opt_ds.stencil.front.reference & 0xff);
SET(WM_DEPTH_STENCIL, ds.BackfaceStencilReferenceValue,
opt_ds.stencil.back.reference & 0xff);
SET(WM_DEPTH_STENCIL, ds.DepthTestEnable, opt_ds.depth.test_enable);
SET(WM_DEPTH_STENCIL, ds.DepthBufferWriteEnable, opt_ds.depth.write_enable);
SET(WM_DEPTH_STENCIL, ds.DepthTestFunction,
vk_to_intel_compare_op[opt_ds.depth.compare_op]);
SET(WM_DEPTH_STENCIL, ds.StencilTestEnable, opt_ds.stencil.test_enable);
SET(WM_DEPTH_STENCIL, ds.StencilBufferWriteEnable,
opt_ds.stencil.write_enable);
SET(WM_DEPTH_STENCIL, ds.StencilFailOp,
vk_to_intel_stencil_op[opt_ds.stencil.front.op.fail]);
SET(WM_DEPTH_STENCIL, ds.StencilPassDepthPassOp,
vk_to_intel_stencil_op[opt_ds.stencil.front.op.pass]);
SET(WM_DEPTH_STENCIL, ds.StencilPassDepthFailOp,
vk_to_intel_stencil_op[
opt_ds.stencil.front.op.depth_fail]);
SET(WM_DEPTH_STENCIL, ds.StencilTestFunction,
vk_to_intel_compare_op[
opt_ds.stencil.front.op.compare]);
SET(WM_DEPTH_STENCIL, ds.BackfaceStencilFailOp,
vk_to_intel_stencil_op[
opt_ds.stencil.back.op.fail]);
SET(WM_DEPTH_STENCIL, ds.BackfaceStencilPassDepthPassOp,
vk_to_intel_stencil_op[
opt_ds.stencil.back.op.pass]);
SET(WM_DEPTH_STENCIL, ds.BackfaceStencilPassDepthFailOp,
vk_to_intel_stencil_op[
opt_ds.stencil.back.op.depth_fail]);
SET(WM_DEPTH_STENCIL, ds.BackfaceStencilTestFunction,
vk_to_intel_compare_op[
opt_ds.stencil.back.op.compare]);
#if GFX_VER == 9
const bool pma = want_stencil_pma_fix(dyn, gfx, &opt_ds);
SET(PMA_FIX, pma_fix, pma);
#endif
#if INTEL_WA_18019816803_GFX_VER
if (intel_needs_workaround(device->info, 18019816803)) {
bool ds_write_state = opt_ds.depth.write_enable || opt_ds.stencil.write_enable;
SET(WA_18019816803, ds_write_state, ds_write_state);
}
#endif
}
ALWAYS_INLINE static void
update_depth_bounds(struct anv_gfx_dynamic_state *hw_state,
const struct vk_dynamic_graphics_state *dyn)
{
SET(DEPTH_BOUNDS, db.DepthBoundsTestEnable, dyn->ds.depth.bounds_test.enable);
/* Only look at updating the bounds if testing is enabled */
if (dyn->ds.depth.bounds_test.enable) {
SET(DEPTH_BOUNDS, db.DepthBoundsTestMinValue, dyn->ds.depth.bounds_test.min);
SET(DEPTH_BOUNDS, db.DepthBoundsTestMaxValue, dyn->ds.depth.bounds_test.max);
}
}
ALWAYS_INLINE static void
update_line_stipple(struct anv_gfx_dynamic_state *hw_state,
const struct vk_dynamic_graphics_state *dyn)
{
SET(LINE_STIPPLE, ls.LineStipplePattern, dyn->rs.line.stipple.pattern);
SET(LINE_STIPPLE, ls.LineStippleInverseRepeatCount,
1.0f / MAX2(1, dyn->rs.line.stipple.factor));
SET(LINE_STIPPLE, ls.LineStippleRepeatCount, dyn->rs.line.stipple.factor);
SET(WM, wm.LineStippleEnable, dyn->rs.line.stipple.enable);
}
ALWAYS_INLINE static void
update_vf_restart(struct anv_gfx_dynamic_state *hw_state,
const struct vk_dynamic_graphics_state *dyn,
const struct anv_cmd_graphics_state *gfx)
{
SET(VF, vf.IndexedDrawCutIndexEnable, dyn->ia.primitive_restart_enable);
SET(VF, vf.CutIndex, vk_index_to_restart(gfx->index_type));
}
ALWAYS_INLINE static void
update_blend_state(struct anv_gfx_dynamic_state *hw_state,
const struct vk_dynamic_graphics_state *dyn,
struct anv_cmd_graphics_state *gfx,
const struct anv_device *device,
bool has_fs_stage,
bool has_fs_dual_src)
{
const struct anv_instance *instance = device->physical->instance;
const uint8_t color_writes = dyn->cb.color_write_enables;
bool has_writeable_rt =
has_fs_stage &&
!anv_gfx_all_color_write_masked(gfx, dyn);
SET(BLEND_STATE, blend.AlphaToCoverageEnable,
dyn->ms.alpha_to_coverage_enable);
SET(BLEND_STATE, blend.AlphaToOneEnable,
dyn->ms.alpha_to_one_enable);
SET(BLEND_STATE, blend.ColorDitherEnable,
gfx->rendering_flags &
VK_RENDERING_ENABLE_LEGACY_DITHERING_BIT_EXT);
bool independent_alpha_blend = false;
/* Wa_14018912822, check if we set these during RT setup. */
bool color_blend_zero = false;
bool alpha_blend_zero = false;
uint32_t rt_0 = MESA_VK_ATTACHMENT_UNUSED;
for (uint32_t rt = 0; rt < MAX_RTS; rt++) {
if (gfx->color_output_mapping[rt] >= gfx->color_att_count)
continue;
uint32_t att = gfx->color_output_mapping[rt];
if (att == 0)
rt_0 = att;
/* Disable anything above the current number of color attachments. */
bool write_disabled = (color_writes & BITFIELD_BIT(att)) == 0;
SET(BLEND_STATE, blend.rts[rt].WriteDisableAlpha,
write_disabled ||
(dyn->cb.attachments[att].write_mask &
VK_COLOR_COMPONENT_A_BIT) == 0);
SET(BLEND_STATE, blend.rts[rt].WriteDisableRed,
write_disabled ||
(dyn->cb.attachments[att].write_mask &
VK_COLOR_COMPONENT_R_BIT) == 0);
SET(BLEND_STATE, blend.rts[rt].WriteDisableGreen,
write_disabled ||
(dyn->cb.attachments[att].write_mask &
VK_COLOR_COMPONENT_G_BIT) == 0);
SET(BLEND_STATE, blend.rts[rt].WriteDisableBlue,
write_disabled ||
(dyn->cb.attachments[att].write_mask &
VK_COLOR_COMPONENT_B_BIT) == 0);
/* Vulkan specification 1.2.168, VkLogicOp:
*
* "Logical operations are controlled by the logicOpEnable and logicOp
* members of VkPipelineColorBlendStateCreateInfo. If logicOpEnable is
* VK_TRUE, then a logical operation selected by logicOp is applied
* between each color attachment and the fragment’s corresponding
* output value, and blending of all attachments is treated as if it
* were disabled."
*
* From the Broadwell PRM Volume 2d: Command Reference: Structures:
* BLEND_STATE_ENTRY:
*
* "Enabling LogicOp and Color Buffer Blending at the same time is
* UNDEFINED"
*
* The Vulkan spec also says:
* "Logical operations are not applied to floating-point or sRGB format
* color attachments."
* and
* "Any attachments using color formats for which logical operations
* are not supported simply pass through the color values unmodified."
*/
bool ignores_logic_op =
vk_format_is_float(gfx->color_att[att].vk_format) ||
vk_format_is_srgb(gfx->color_att[att].vk_format);
SET(BLEND_STATE, blend.rts[rt].LogicOpFunction,
vk_to_intel_logic_op[dyn->cb.logic_op]);
SET(BLEND_STATE, blend.rts[rt].LogicOpEnable,
dyn->cb.logic_op_enable && !ignores_logic_op);
SET(BLEND_STATE, blend.rts[rt].ColorClampRange, COLORCLAMP_RTFORMAT);
SET(BLEND_STATE, blend.rts[rt].PreBlendColorClampEnable, true);
SET(BLEND_STATE, blend.rts[rt].PostBlendColorClampEnable, true);
#if GFX_VER >= 30
SET(BLEND_STATE, blend.rts[rt].SimpleFloatBlendEnable, true);
#endif
/* Setup blend equation. */
SET(BLEND_STATE, blend.rts[rt].ColorBlendFunction,
vk_to_intel_blend_op[
dyn->cb.attachments[att].color_blend_op]);
SET(BLEND_STATE, blend.rts[rt].AlphaBlendFunction,
vk_to_intel_blend_op[
dyn->cb.attachments[att].alpha_blend_op]);
if (dyn->cb.attachments[att].src_color_blend_factor !=
dyn->cb.attachments[att].src_alpha_blend_factor ||
dyn->cb.attachments[att].dst_color_blend_factor !=
dyn->cb.attachments[att].dst_alpha_blend_factor ||
dyn->cb.attachments[att].color_blend_op !=
dyn->cb.attachments[att].alpha_blend_op)
independent_alpha_blend = true;
/* The Dual Source Blending documentation says:
*
* "If SRC1 is included in a src/dst blend factor and a DualSource RT
* Write message is not used, results are UNDEFINED. (This reflects the
* same restriction in DX APIs, where undefined results are produced if
* “o1” is not written by a PS – there are no default values defined)."
*
* There is no way to gracefully fix this undefined situation so we just
* disable the blending to prevent possible issues.
*/
if (has_fs_stage && !has_fs_dual_src &&
anv_is_dual_src_blend_equation(&dyn->cb.attachments[att])) {
SET(BLEND_STATE, blend.rts[rt].ColorBufferBlendEnable, false);
} else {
SET(BLEND_STATE, blend.rts[rt].ColorBufferBlendEnable,
!dyn->cb.logic_op_enable &&
dyn->cb.attachments[att].blend_enable);
}
/* Our hardware applies the blend factor prior to the blend function
* regardless of what function is used. Technically, this means the
* hardware can do MORE than GL or Vulkan specify. However, it also
* means that, for MIN and MAX, we have to stomp the blend factor to ONE
* to make it a no-op.
*/
uint32_t SourceBlendFactor;
uint32_t DestinationBlendFactor;
uint32_t SourceAlphaBlendFactor;
uint32_t DestinationAlphaBlendFactor;
if (dyn->cb.attachments[att].color_blend_op == VK_BLEND_OP_MIN ||
dyn->cb.attachments[att].color_blend_op == VK_BLEND_OP_MAX) {
SourceBlendFactor = BLENDFACTOR_ONE;
DestinationBlendFactor = BLENDFACTOR_ONE;
} else {
SourceBlendFactor = vk_to_intel_blend[
dyn->cb.attachments[att].src_color_blend_factor];
DestinationBlendFactor = vk_to_intel_blend[
dyn->cb.attachments[att].dst_color_blend_factor];
}
if (dyn->cb.attachments[att].alpha_blend_op == VK_BLEND_OP_MIN ||
dyn->cb.attachments[att].alpha_blend_op == VK_BLEND_OP_MAX) {
SourceAlphaBlendFactor = BLENDFACTOR_ONE;
DestinationAlphaBlendFactor = BLENDFACTOR_ONE;
} else {
SourceAlphaBlendFactor = vk_to_intel_blend[
dyn->cb.attachments[att].src_alpha_blend_factor];
DestinationAlphaBlendFactor = vk_to_intel_blend[
dyn->cb.attachments[att].dst_alpha_blend_factor];
}
/* Replace and Src1 value by 1.0 if dual source blending is not
* enabled.
*/
if (has_fs_stage && !has_fs_dual_src) {
if (is_src1_blend_factor(SourceBlendFactor))
SourceBlendFactor = BLENDFACTOR_ONE;
if (is_src1_blend_factor(DestinationBlendFactor))
DestinationBlendFactor = BLENDFACTOR_ONE;
}
if (instance->intel_enable_wa_14018912822 &&
intel_needs_workaround(device->info, 14018912822) &&
dyn->ms.rasterization_samples > 1) {
if (DestinationBlendFactor == BLENDFACTOR_ZERO) {
DestinationBlendFactor = BLENDFACTOR_CONST_COLOR;
color_blend_zero = true;
}
if (DestinationAlphaBlendFactor == BLENDFACTOR_ZERO) {
DestinationAlphaBlendFactor = BLENDFACTOR_CONST_ALPHA;
alpha_blend_zero = true;
}
}
SET(BLEND_STATE, blend.rts[rt].SourceBlendFactor, SourceBlendFactor);
SET(BLEND_STATE, blend.rts[rt].DestinationBlendFactor, DestinationBlendFactor);
SET(BLEND_STATE, blend.rts[rt].SourceAlphaBlendFactor, SourceAlphaBlendFactor);
SET(BLEND_STATE, blend.rts[rt].DestinationAlphaBlendFactor, DestinationAlphaBlendFactor);
}
gfx->color_blend_zero = color_blend_zero;
gfx->alpha_blend_zero = alpha_blend_zero;
SET(BLEND_STATE, blend.IndependentAlphaBlendEnable, independent_alpha_blend);
if (rt_0 == MESA_VK_ATTACHMENT_UNUSED)
rt_0 = 0;
/* 3DSTATE_PS_BLEND to be consistent with the rest of the
* BLEND_STATE_ENTRY.
*/
SET(PS_BLEND, ps_blend.HasWriteableRT, has_writeable_rt);
SET(PS_BLEND, ps_blend.ColorBufferBlendEnable,
GET(blend.rts[rt_0].ColorBufferBlendEnable));
SET(PS_BLEND, ps_blend.SourceAlphaBlendFactor,
GET(blend.rts[rt_0].SourceAlphaBlendFactor));
SET(PS_BLEND, ps_blend.DestinationAlphaBlendFactor,
gfx->alpha_blend_zero ?
BLENDFACTOR_CONST_ALPHA :
GET(blend.rts[rt_0].DestinationAlphaBlendFactor));
SET(PS_BLEND, ps_blend.SourceBlendFactor,
GET(blend.rts[rt_0].SourceBlendFactor));
SET(PS_BLEND, ps_blend.DestinationBlendFactor,
gfx->color_blend_zero ?
BLENDFACTOR_CONST_COLOR :
GET(blend.rts[rt_0].DestinationBlendFactor));
SET(PS_BLEND, ps_blend.AlphaTestEnable, false);
SET(PS_BLEND, ps_blend.IndependentAlphaBlendEnable,
GET(blend.IndependentAlphaBlendEnable));
SET(PS_BLEND, ps_blend.AlphaToCoverageEnable,
dyn->ms.alpha_to_coverage_enable);
}
ALWAYS_INLINE static void
update_blend_constants(struct anv_gfx_dynamic_state *hw_state,
const struct vk_dynamic_graphics_state *dyn,
const struct anv_cmd_graphics_state *gfx)
{
SET(CC_STATE, cc.BlendConstantColorRed,
gfx->color_blend_zero ? 0.0f : dyn->cb.blend_constants[0]);
SET(CC_STATE, cc.BlendConstantColorGreen,
gfx->color_blend_zero ? 0.0f : dyn->cb.blend_constants[1]);
SET(CC_STATE, cc.BlendConstantColorBlue,
gfx->color_blend_zero ? 0.0f : dyn->cb.blend_constants[2]);
SET(CC_STATE, cc.BlendConstantColorAlpha,
gfx->alpha_blend_zero ? 0.0f : dyn->cb.blend_constants[3]);
}
ALWAYS_INLINE static void
update_viewports(struct anv_gfx_dynamic_state *hw_state,
const struct vk_dynamic_graphics_state *dyn,
const struct anv_cmd_graphics_state *gfx,
const struct anv_device *device)
{
const struct anv_instance *instance = device->physical->instance;
const VkViewport *viewports = dyn->vp.viewports;
const float scale = dyn->vp.depth_clip_negative_one_to_one ? 0.5f : 1.0f;
for (uint32_t i = 0; i < dyn->vp.viewport_count; i++) {
const VkViewport *vp = &viewports[i];
/* The gfx7 state struct has just the matrix and guardband fields, the
* gfx8 struct adds the min/max viewport fields. */
struct GENX(SF_CLIP_VIEWPORT) sfv = {
.ViewportMatrixElementm00 = vp->width / 2,
.ViewportMatrixElementm11 = vp->height / 2,
.ViewportMatrixElementm22 = (vp->maxDepth - vp->minDepth) * scale,
.ViewportMatrixElementm30 = vp->x + vp->width / 2,
.ViewportMatrixElementm31 = vp->y + vp->height / 2,
.ViewportMatrixElementm32 = dyn->vp.depth_clip_negative_one_to_one ?
(vp->minDepth + vp->maxDepth) * scale : vp->minDepth,
.XMinClipGuardband = -1.0f,
.XMaxClipGuardband = 1.0f,
.YMinClipGuardband = -1.0f,
.YMaxClipGuardband = 1.0f,
.XMinViewPort = vp->x,
.XMaxViewPort = vp->x + vp->width - 1,
.YMinViewPort = MIN2(vp->y, vp->y + vp->height),
.YMaxViewPort = MAX2(vp->y, vp->y + vp->height) - 1,
};
/* Fix depth test misrenderings by lowering translated depth range */
if (instance->lower_depth_range_rate != 1.0f)
sfv.ViewportMatrixElementm32 *= instance->lower_depth_range_rate;
const uint32_t fb_size_max = 1 << 14;
uint32_t x_min = 0, x_max = fb_size_max;
uint32_t y_min = 0, y_max = fb_size_max;
/* If we have a valid renderArea, include that */
if (gfx->render_area.extent.width > 0 &&
gfx->render_area.extent.height > 0) {
x_min = MAX2(x_min, gfx->render_area.offset.x);
x_max = MIN2(x_max, gfx->render_area.offset.x +
gfx->render_area.extent.width);
y_min = MAX2(y_min, gfx->render_area.offset.y);
y_max = MIN2(y_max, gfx->render_area.offset.y +
gfx->render_area.extent.height);
}
/* The client is required to have enough scissors for whatever it
* sets as ViewportIndex but it's possible that they've got more
* viewports set from a previous command. Also, from the Vulkan
* 1.3.207:
*
* "The application must ensure (using scissor if necessary) that
* all rendering is contained within the render area."
*
* If the client doesn't set a scissor, that basically means it
* guarantees everything is in-bounds already. If we end up using a
* guardband of [-1, 1] in that case, there shouldn't be much loss.
* It's theoretically possible that they could do all their clipping
* with clip planes but that'd be a bit odd.
*/
if (i < dyn->vp.scissor_count) {
const VkRect2D *scissor = &dyn->vp.scissors[i];
x_min = MAX2(x_min, scissor->offset.x);
x_max = MIN2(x_max, scissor->offset.x + scissor->extent.width);
y_min = MAX2(y_min, scissor->offset.y);
y_max = MIN2(y_max, scissor->offset.y + scissor->extent.height);
}
/* Only bother calculating the guardband if our known render area is
* less than the maximum size. Otherwise, it will calculate [-1, 1]
* anyway but possibly with precision loss.
*/
if (x_min > 0 || x_max < fb_size_max ||
y_min > 0 || y_max < fb_size_max) {
intel_calculate_guardband_size(x_min, x_max, y_min, y_max,
sfv.ViewportMatrixElementm00,
sfv.ViewportMatrixElementm11,
sfv.ViewportMatrixElementm30,
sfv.ViewportMatrixElementm31,
&sfv.XMinClipGuardband,
&sfv.XMaxClipGuardband,
&sfv.YMinClipGuardband,
&sfv.YMaxClipGuardband);
}
#define SET_VP(bit, state, field) \
do { \
if (hw_state->state.field != sfv.field) { \
hw_state->state.field = sfv.field; \
BITSET_SET(hw_state->dirty, \
ANV_GFX_STATE_##bit); \
} \
} while (0)
SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], ViewportMatrixElementm00);
SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], ViewportMatrixElementm11);
SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], ViewportMatrixElementm22);
SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], ViewportMatrixElementm30);
SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], ViewportMatrixElementm31);
SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], ViewportMatrixElementm32);
SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], XMinClipGuardband);
SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], XMaxClipGuardband);
SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], YMinClipGuardband);
SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], YMaxClipGuardband);
SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], XMinViewPort);
SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], XMaxViewPort);
SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], YMinViewPort);
SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], YMaxViewPort);
#undef SET_VP
const bool depth_range_unrestricted =
device->vk.enabled_extensions.EXT_depth_range_unrestricted;
float min_depth_limit = depth_range_unrestricted ? -FLT_MAX : 0.0f;
float max_depth_limit = depth_range_unrestricted ? FLT_MAX : 1.0f;
float min_depth = dyn->rs.depth_clamp_enable ?
MIN2(vp->minDepth, vp->maxDepth) : min_depth_limit;
float max_depth = dyn->rs.depth_clamp_enable ?
MAX2(vp->minDepth, vp->maxDepth) : max_depth_limit;
if (dyn->rs.depth_clamp_enable &&
dyn->vp.depth_clamp_mode == VK_DEPTH_CLAMP_MODE_USER_DEFINED_RANGE_EXT) {
min_depth = dyn->vp.depth_clamp_range.minDepthClamp;
max_depth = dyn->vp.depth_clamp_range.maxDepthClamp;
}
SET(VIEWPORT_CC, vp_cc.elem[i].MinimumDepth, min_depth);
SET(VIEWPORT_CC, vp_cc.elem[i].MaximumDepth, max_depth);
}
/* If the HW state is already considered dirty or the previous
* programmed viewport count is smaller than what we need, update the
* viewport count and ensure the HW state is dirty. Otherwise if the
* number of viewport programmed previously was larger than what we need
* now, no need to reemit we can just keep the old programmed values.
*/
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_SF_CLIP) ||
hw_state->vp_sf_clip.count < dyn->vp.viewport_count) {
hw_state->vp_sf_clip.count = dyn->vp.viewport_count;
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_SF_CLIP);
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC) ||
hw_state->vp_cc.count < dyn->vp.viewport_count) {
hw_state->vp_cc.count = dyn->vp.viewport_count;
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC);
}
}
ALWAYS_INLINE static void
update_scissors(struct anv_gfx_dynamic_state *hw_state,
const struct vk_dynamic_graphics_state *dyn,
const struct anv_cmd_graphics_state *gfx,
VkCommandBufferLevel cmd_buffer_level)
{
const VkRect2D *scissors = dyn->vp.scissors;
const VkViewport *viewports = dyn->vp.viewports;
for (uint32_t i = 0; i < dyn->vp.scissor_count; i++) {
const VkRect2D *s = &scissors[i];
const VkViewport *vp = &viewports[i];
const int max = 0xffff;
uint32_t y_min = MAX2(s->offset.y, MIN2(vp->y, vp->y + vp->height));
uint32_t x_min = MAX2(s->offset.x, vp->x);
int64_t y_max = MIN2(s->offset.y + s->extent.height - 1,
MAX2(vp->y, vp->y + vp->height) - 1);
int64_t x_max = MIN2(s->offset.x + s->extent.width - 1,
vp->x + vp->width - 1);
y_max = CLAMP(y_max, 0, INT16_MAX >> 1);
x_max = CLAMP(x_max, 0, INT16_MAX >> 1);
/* Do this math using int64_t so overflow gets clamped correctly. */
if (cmd_buffer_level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
y_min = CLAMP((uint64_t) y_min, gfx->render_area.offset.y, max);
x_min = CLAMP((uint64_t) x_min, gfx->render_area.offset.x, max);
y_max = CLAMP((uint64_t) y_max, 0,
gfx->render_area.offset.y +
gfx->render_area.extent.height - 1);
x_max = CLAMP((uint64_t) x_max, 0,
gfx->render_area.offset.x +
gfx->render_area.extent.width - 1);
}
if (s->extent.width <= 0 || s->extent.height <= 0) {
/* Since xmax and ymax are inclusive, we have to have xmax < xmin or
* ymax < ymin for empty clips. In case clip x, y, width height are
* all 0, the clamps below produce 0 for xmin, ymin, xmax, ymax,
* which isn't what we want. Just special case empty clips and
* produce a canonical empty clip.
*/
SET(SCISSOR, scissor.elem[i].ScissorRectangleYMin, 1);
SET(SCISSOR, scissor.elem[i].ScissorRectangleXMin, 1);
SET(SCISSOR, scissor.elem[i].ScissorRectangleYMax, 0);
SET(SCISSOR, scissor.elem[i].ScissorRectangleXMax, 0);
} else {
SET(SCISSOR, scissor.elem[i].ScissorRectangleYMin, y_min);
SET(SCISSOR, scissor.elem[i].ScissorRectangleXMin, x_min);
SET(SCISSOR, scissor.elem[i].ScissorRectangleYMax, y_max);
SET(SCISSOR, scissor.elem[i].ScissorRectangleXMax, x_max);
}
}
/* If the HW state is already considered dirty or the previous programmed
* viewport count is smaller than what we need, update the viewport count
* and ensure the HW state is dirty. Otherwise if the number of viewport
* programmed previously was larger than what we need now, no need to
* reemit we can just keep the old programmed values.
*/
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SCISSOR) ||
hw_state->scissor.count < dyn->vp.scissor_count) {
hw_state->scissor.count = dyn->vp.scissor_count;
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SCISSOR);
}
}
#if GFX_VERx10 == 125
ALWAYS_INLINE static void
update_tbimr_info(struct anv_gfx_dynamic_state *hw_state,
const struct anv_device *device,
const struct anv_cmd_graphics_state *gfx,
const struct intel_l3_config *l3_config)
{
unsigned fb_width, fb_height, tile_width, tile_height;
if (device->physical->instance->enable_tbimr &&
calculate_render_area(gfx, &fb_width, &fb_height) &&
calculate_tile_dimensions(device, gfx, l3_config,
fb_width, fb_height,
&tile_width, &tile_height)) {
/* Use a batch size of 128 polygons per slice as recommended */
/* by BSpec 68436 "TBIMR Programming". */
const unsigned num_slices = device->info->num_slices;
const unsigned batch_size = DIV_ROUND_UP(num_slices, 2) * 256;
SET(TBIMR_TILE_PASS_INFO, tbimr.TileRectangleHeight, tile_height);
SET(TBIMR_TILE_PASS_INFO, tbimr.TileRectangleWidth, tile_width);
SET(TBIMR_TILE_PASS_INFO, tbimr.VerticalTileCount,
DIV_ROUND_UP(fb_height, tile_height));
SET(TBIMR_TILE_PASS_INFO, tbimr.HorizontalTileCount,
DIV_ROUND_UP(fb_width, tile_width));
SET(TBIMR_TILE_PASS_INFO, tbimr.TBIMRBatchSize,
util_logbase2(batch_size) - 5);
SET(TBIMR_TILE_PASS_INFO, tbimr.TileBoxCheck, true);
SET(TBIMR_TILE_PASS_INFO, use_tbimr, true);
} else {
hw_state->use_tbimr = false;
}
}
#endif
/**
* This function takes the vulkan runtime values & dirty states and updates
* the values in anv_gfx_dynamic_state, flagging HW instructions for
* reemission if the values are changing.
*
* Nothing is emitted in the batch buffer.
*/
static void
cmd_buffer_flush_gfx_runtime_state(struct anv_gfx_dynamic_state *hw_state,
const struct anv_device *device,
const struct vk_dynamic_graphics_state *dyn,
struct anv_cmd_graphics_state *gfx,
const struct anv_graphics_pipeline *pipeline,
VkCommandBufferLevel cmd_buffer_level)
{
UNUSED bool fs_msaa_changed = false;
if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_ALPHA_TO_COVERAGE_ENABLE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_PROVOKING_VERTEX) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_FSR))
update_fs_msaa_flags(hw_state, dyn, pipeline);
if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_FS_MSAA_FLAGS)) {
update_ps(hw_state, device, dyn, pipeline);
update_ps_extra_wm(hw_state, pipeline);
}
if (gfx->dirty &
#if GFX_VERx10 >= 125
ANV_CMD_DIRTY_PIPELINE
#else
(ANV_CMD_DIRTY_PIPELINE | ANV_CMD_DIRTY_OCCLUSION_QUERY_ACTIVE)
#endif
)
update_ps_extra_has_uav(hw_state, gfx, pipeline);
if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE))
update_ps_extra_kills_pixel(hw_state, dyn, gfx, pipeline);
if ((gfx->dirty & ANV_CMD_DIRTY_OCCLUSION_QUERY_ACTIVE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_RASTERIZER_DISCARD_ENABLE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_RASTERIZATION_STREAM))
update_streamout(hw_state, dyn, gfx, pipeline);
if (
#if GFX_VERx10 >= 200
/* Xe2+ might need to update this if the FS changed */
(gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
#endif
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_PROVOKING_VERTEX))
update_provoking_vertex(hw_state, dyn, pipeline);
if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY))
update_topology(hw_state, dyn, pipeline);
if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VI) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VI_BINDINGS_VALID) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VI_BINDING_STRIDES))
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VERTEX_INPUT);
#if GFX_VER >= 11
if (device->vk.enabled_extensions.KHR_fragment_shading_rate &&
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_FSR))
update_cps(hw_state, device, dyn, pipeline);
#endif /* GFX_VER >= 11 */
if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_TS_DOMAIN_ORIGIN))
update_te(hw_state, dyn, pipeline);
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_WIDTH))
update_line_width(hw_state, dyn);
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_FACTORS))
update_sf_global_depth_bias(hw_state, dyn);
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE))
update_clip_api_mode(hw_state, dyn);
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_VIEWPORT_COUNT))
update_clip_max_viewport(hw_state, dyn);
if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
(gfx->dirty & ANV_CMD_DIRTY_RENDER_TARGETS) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_CULL_MODE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_FRONT_FACE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_ENABLE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_FACTORS) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_POLYGON_MODE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_MODE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_WIDTH) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_CLIP_ENABLE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_CLAMP_ENABLE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_CONSERVATIVE_MODE))
update_clip_raster(hw_state, dyn, gfx, pipeline);
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES))
update_multisample(hw_state, dyn);
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_MASK))
update_sample_mask(hw_state, dyn);
if ((gfx->dirty & ANV_CMD_DIRTY_RENDER_TARGETS) ||
#if GFX_VER == 9
/* For the PMA fix */
(gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
#endif
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_COMPARE_OP) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_OP) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE))
update_wm_depth_stencil(hw_state, dyn, gfx, device);
#if GFX_VER >= 12
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_ENABLE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_BOUNDS))
update_depth_bounds(hw_state, dyn);
#endif
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_STIPPLE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_STIPPLE_ENABLE))
update_line_stipple(hw_state, dyn);
if ((gfx->dirty & ANV_CMD_DIRTY_INDEX_TYPE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE))
update_vf_restart(hw_state, dyn, gfx);
if ((gfx->dirty & ANV_CMD_DIRTY_INDEX_BUFFER) ||
(gfx->dirty & ANV_CMD_DIRTY_INDEX_TYPE))
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_INDEX_BUFFER);
#if GFX_VERx10 >= 125
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE))
update_vfg_list_cut_index(hw_state, dyn);
#endif
if (device->vk.enabled_extensions.EXT_sample_locations &&
(BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS_ENABLE)))
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SAMPLE_PATTERN);
if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
(gfx->dirty & ANV_CMD_DIRTY_RENDER_TARGETS) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP_ENABLE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_ALPHA_TO_ONE_ENABLE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_ALPHA_TO_COVERAGE_ENABLE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_WRITE_MASKS) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_ENABLES) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_EQUATIONS)) {
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
update_blend_state(hw_state, dyn, gfx, device,
wm_prog_data != NULL,
wm_prog_data != NULL ?
wm_prog_data->dual_src_blend : false);
}
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS))
update_blend_constants(hw_state, dyn, gfx);
if ((gfx->dirty & ANV_CMD_DIRTY_RENDER_AREA) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_VIEWPORTS) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_SCISSORS) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_CLAMP_ENABLE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_DEPTH_CLAMP_RANGE))
update_viewports(hw_state, dyn, gfx, device);
if ((gfx->dirty & ANV_CMD_DIRTY_RENDER_AREA) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_SCISSORS) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_VIEWPORTS))
update_scissors(hw_state, dyn, gfx, cmd_buffer_level);
#if GFX_VERx10 == 125
if ((gfx->dirty & ANV_CMD_DIRTY_RENDER_TARGETS))
update_tbimr_info(hw_state, device, gfx, pipeline->base.base.l3_config);
#endif
#if INTEL_WA_14018283232_GFX_VER
if (intel_needs_workaround(device->info, 14018283232) &&
((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_ENABLE))) {
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
SET(WA_14018283232, wa_14018283232_toggle,
dyn->ds.depth.bounds_test.enable &&
wm_prog_data &&
wm_prog_data->uses_kill);
}
#endif
/* If the pipeline uses a dynamic value of patch_control_points and either
* the pipeline change or the dynamic value change, check the value and
* reemit if needed.
*/
const struct brw_tcs_prog_data *tcs_prog_data = get_tcs_prog_data(pipeline);
if (tcs_prog_data && tcs_prog_data->input_vertices == 0 &&
((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_TS_PATCH_CONTROL_POINTS)))
SET(TCS_INPUT_VERTICES, tcs_input_vertices, dyn->ts.patch_control_points);
}
#undef GET
#undef SET
#undef SET_STAGE
#undef SETUP_PROVOKING_VERTEX
/**
* This function takes the vulkan runtime values & dirty states and updates
* the values in anv_gfx_dynamic_state, flagging HW instructions for
* reemission if the values are changing.
*
* Nothing is emitted in the batch buffer.
*/
void
genX(cmd_buffer_flush_gfx_runtime_state)(struct anv_cmd_buffer *cmd_buffer)
{
cmd_buffer_flush_gfx_runtime_state(
&cmd_buffer->state.gfx.dyn_state,
cmd_buffer->device,
&cmd_buffer->vk.dynamic_graphics_state,
&cmd_buffer->state.gfx,
anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline),
cmd_buffer->vk.level);
vk_dynamic_graphics_state_clear_dirty(&cmd_buffer->vk.dynamic_graphics_state);
}
static void
emit_wa_18020335297_dummy_draw(struct anv_cmd_buffer *cmd_buffer)
{
/* For Wa_16012775297, ensure VF_STATISTICS is emitted before 3DSTATE_VF
*/
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_STATISTICS), zero);
#if GFX_VERx10 >= 125
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VFG), vfg) {
vfg.DistributionMode = RR_STRICT;
}
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF), vf) {
vf.GeometryDistributionEnable =
cmd_buffer->device->physical->instance->enable_vf_distribution;
}
#endif
#if GFX_VER >= 12
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr) {
pr.ReplicaMask = 1;
}
#endif
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_RASTER), rr) {
rr.CullMode = CULLMODE_NONE;
rr.FrontFaceFillMode = FILL_MODE_SOLID;
rr.BackFaceFillMode = FILL_MODE_SOLID;
}
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_SGVS), zero);
#if GFX_VER >= 11
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_SGVS_2), zero);
#endif
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CLIP), clip) {
clip.ClipEnable = true;
clip.ClipMode = CLIPMODE_REJECT_ALL;
}
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VS), zero);
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_GS), zero);
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_HS), zero);
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_TE), zero);
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DS), zero);
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STREAMOUT), zero);
uint32_t *vertex_elements = anv_batch_emitn(&cmd_buffer->batch, 1 + 2 * 2,
GENX(3DSTATE_VERTEX_ELEMENTS));
uint32_t *ve_pack_dest = &vertex_elements[1];
for (int i = 0; i < 2; i++) {
struct GENX(VERTEX_ELEMENT_STATE) element = {
.Valid = true,
.SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT,
.Component0Control = VFCOMP_STORE_0,
.Component1Control = VFCOMP_STORE_0,
.Component2Control = i == 0 ? VFCOMP_STORE_0 : VFCOMP_STORE_1_FP,
.Component3Control = i == 0 ? VFCOMP_STORE_0 : VFCOMP_STORE_1_FP,
};
GENX(VERTEX_ELEMENT_STATE_pack)(NULL, ve_pack_dest, &element);
ve_pack_dest += GENX(VERTEX_ELEMENT_STATE_length);
}
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_TOPOLOGY), topo) {
topo.PrimitiveTopologyType = _3DPRIM_TRILIST;
}
/* Emit dummy draw per slice. */
for (unsigned i = 0; i < cmd_buffer->device->info->num_slices; i++) {
anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
prim.VertexCountPerInstance = 3;
prim.PrimitiveTopologyType = _3DPRIM_TRILIST;
prim.InstanceCount = 1;
prim.VertexAccessType = SEQUENTIAL;
}
}
}
#if INTEL_WA_14018283232_GFX_VER
void
genX(batch_emit_wa_14018283232)(struct anv_batch *batch)
{
anv_batch_emit(batch, GENX(RESOURCE_BARRIER), barrier) {
barrier.ResourceBarrierBody = (struct GENX(RESOURCE_BARRIER_BODY)) {
.BarrierType = RESOURCE_BARRIER_TYPE_IMMEDIATE,
.SignalStage = RESOURCE_BARRIER_STAGE_COLOR,
.WaitStage = RESOURCE_BARRIER_STAGE_PIXEL,
};
}
}
#endif
/**
* This function handles dirty state emission to the batch buffer.
*/
static void
cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
{
struct anv_device *device = cmd_buffer->device;
struct anv_instance *instance = device->physical->instance;
struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
struct anv_graphics_pipeline *pipeline =
anv_pipeline_to_graphics(gfx->base.pipeline);
const struct vk_dynamic_graphics_state *dyn =
&cmd_buffer->vk.dynamic_graphics_state;
struct anv_push_constants *push_consts =
&cmd_buffer->state.gfx.base.push_constants;
struct anv_gfx_dynamic_state *hw_state = &gfx->dyn_state;
const bool protected = cmd_buffer->vk.pool->flags &
VK_COMMAND_POOL_CREATE_PROTECTED_BIT;
#define DEBUG_SHADER_HASH(stage) do { \
if (unlikely( \
(instance->debug & ANV_DEBUG_SHADER_HASH) && \
anv_pipeline_has_stage(pipeline, stage))) { \
mi_store(&b, \
mi_mem32(device->workaround_address), \
mi_imm(pipeline->base.shaders[stage]-> \
prog_data->source_hash)); \
} \
} while (0)
struct mi_builder b;
if (unlikely(instance->debug & ANV_DEBUG_SHADER_HASH)) {
mi_builder_init(&b, device->info, &cmd_buffer->batch);
mi_builder_set_mocs(&b, isl_mocs(&device->isl_dev, 0, false));
}
#if INTEL_WA_16011107343_GFX_VER
/* Will be emitted in front of every draw instead */
if (intel_needs_workaround(device->info, 16011107343) &&
anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL))
BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_HS);
#endif
#if INTEL_WA_22018402687_GFX_VER
/* Will be emitted in front of every draw instead */
if (intel_needs_workaround(device->info, 22018402687) &&
anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_DS);
#endif
/*
* Values provided by push constants
*/
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TCS_INPUT_VERTICES)) {
push_consts->gfx.tcs_input_vertices = dyn->ts.patch_control_points;
cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
gfx->base.push_constants_data_dirty = true;
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_FS_MSAA_FLAGS)) {
push_consts->gfx.fs_msaa_flags = hw_state->fs_msaa_flags;
cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
gfx->base.push_constants_data_dirty = true;
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_URB)) {
if (genX(need_wa_16014912113)(&gfx->urb_cfg, &pipeline->urb_cfg)) {
genX(batch_emit_wa_16014912113)(&cmd_buffer->batch,
&gfx->urb_cfg);
}
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.urb);
memcpy(&gfx->urb_cfg, &pipeline->urb_cfg,
sizeof(struct intel_urb_config));
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PRIMITIVE_REPLICATION))
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.primitive_replication);
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF_SGVS_INSTANCING))
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.vf_sgvs_instancing);
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF_SGVS))
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.vf_sgvs);
#if GFX_VER >= 11
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF_SGVS_2))
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.vf_sgvs_2);
#endif
if (device->physical->instance->vf_component_packing &&
BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF_COMPONENT_PACKING)) {
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline,
final.vf_component_packing);
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VS)) {
DEBUG_SHADER_HASH(MESA_SHADER_VERTEX);
anv_batch_emit_pipeline_state_protected(&cmd_buffer->batch, pipeline,
final.vs, protected);
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_HS)) {
DEBUG_SHADER_HASH(MESA_SHADER_TESS_CTRL);
anv_batch_emit_pipeline_state_protected(&cmd_buffer->batch, pipeline,
final.hs, protected);
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_DS)) {
DEBUG_SHADER_HASH(MESA_SHADER_TESS_EVAL);
anv_batch_emit_pipeline_state_protected(&cmd_buffer->batch, pipeline,
final.ds, protected);
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF_STATISTICS)) {
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_STATISTICS), vfs) {
vfs.StatisticsEnable = true;
}
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SBE))
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.sbe);
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SBE_SWIZ))
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.sbe_swiz);
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SO_DECL_LIST)) {
/* Wa_16011773973:
* If SOL is enabled and SO_DECL state has to be programmed,
* 1. Send 3D State SOL state with SOL disabled
* 2. Send SO_DECL NP state
* 3. Send 3D State SOL with SOL Enabled
*/
if (intel_needs_workaround(device->info, 16011773973) &&
pipeline->uses_xfb)
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STREAMOUT), so);
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline,
final.so_decl_list);
#if GFX_VER >= 11 && GFX_VER < 20
/* ICL PRMs, Volume 2a - Command Reference: Instructions,
* 3DSTATE_SO_DECL_LIST:
*
* "Workaround: This command must be followed by a PIPE_CONTROL with
* CS Stall bit set."
*
* On DG2+ also known as Wa_1509820217.
*/
genx_batch_emit_pipe_control(&cmd_buffer->batch, device->info,
cmd_buffer->state.current_pipeline,
ANV_PIPE_CS_STALL_BIT);
#endif
}
if (device->vk.enabled_extensions.EXT_mesh_shader) {
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_CONTROL)) {
anv_batch_emit_pipeline_state_protected(&cmd_buffer->batch, pipeline,
final.mesh_control, protected);
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_SHADER))
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.mesh_shader);
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_DISTRIB))
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.mesh_distrib);
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_CONTROL)) {
anv_batch_emit_pipeline_state_protected(&cmd_buffer->batch, pipeline,
final.task_control, protected);
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_SHADER))
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.task_shader);
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_REDISTRIB))
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.task_redistrib);
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SBE_MESH))
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.sbe_mesh);
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CLIP_MESH))
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.clip_mesh);
} else {
assert(!BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_CONTROL) &&
!BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_SHADER) &&
!BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_DISTRIB) &&
!BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_CONTROL) &&
!BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_SHADER) &&
!BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_REDISTRIB) &&
!BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CLIP_MESH) &&
!BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SBE_MESH));
}
#define INIT(category, name) \
.name = hw_state->category.name
#define SET(s, category, name) \
s.name = hw_state->category.name
/* Now the potentially dynamic instructions */
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PS)) {
DEBUG_SHADER_HASH(MESA_SHADER_FRAGMENT);
anv_batch_emit_merge_protected(&cmd_buffer->batch, GENX(3DSTATE_PS),
pipeline, partial.ps, ps, protected) {
SET(ps, ps, KernelStartPointer0);
SET(ps, ps, KernelStartPointer1);
SET(ps, ps, DispatchGRFStartRegisterForConstantSetupData0);
SET(ps, ps, DispatchGRFStartRegisterForConstantSetupData1);
#if GFX_VER < 20
SET(ps, ps, KernelStartPointer2);
SET(ps, ps, DispatchGRFStartRegisterForConstantSetupData2);
SET(ps, ps, _8PixelDispatchEnable);
SET(ps, ps, _16PixelDispatchEnable);
SET(ps, ps, _32PixelDispatchEnable);
#else
SET(ps, ps, Kernel0Enable);
SET(ps, ps, Kernel1Enable);
SET(ps, ps, Kernel0SIMDWidth);
SET(ps, ps, Kernel1SIMDWidth);
SET(ps, ps, Kernel0PolyPackingPolicy);
SET(ps, ps, Kernel0MaximumPolysperThread);
#endif
SET(ps, ps, PositionXYOffsetSelect);
}
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PS_EXTRA) ||
BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_COARSE_STATE)) {
anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_PS_EXTRA),
pipeline, partial.ps_extra, pse) {
SET(pse, ps_extra, PixelShaderHasUAV);
SET(pse, ps_extra, PixelShaderIsPerSample);
#if GFX_VER >= 11
SET(pse, ps_extra, PixelShaderIsPerCoarsePixel);
#endif
SET(pse, ps_extra, PixelShaderKillsPixel);
#if INTEL_WA_18038825448_GFX_VER
/* Add a dependency if easier the shader needs it (because of runtime
* change through pre-rasterization shader) or if we notice a change.
*/
pse.EnablePSDependencyOnCPsizeChange =
hw_state->ps_extra.EnablePSDependencyOnCPsizeChange ||
BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_COARSE_STATE);
#elif GFX_VERx10 >= 125
SET(pse, ps_extra, EnablePSDependencyOnCPsizeChange);
#endif
}
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CLIP)) {
anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_CLIP),
pipeline, partial.clip, clip) {
SET(clip, clip, APIMode);
SET(clip, clip, ViewportXYClipTestEnable);
SET(clip, clip, TriangleStripListProvokingVertexSelect);
SET(clip, clip, LineStripListProvokingVertexSelect);
SET(clip, clip, TriangleFanProvokingVertexSelect);
#if GFX_VERx10 >= 200
SET(clip, clip, TriangleStripOddProvokingVertexSelect);
#endif
SET(clip, clip, MaximumVPIndex);
}
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_STREAMOUT)) {
genX(streamout_prologue)(cmd_buffer);
anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_STREAMOUT),
pipeline, partial.so, so) {
SET(so, so, RenderingDisable);
SET(so, so, RenderStreamSelect);
SET(so, so, ReorderMode);
SET(so, so, ForceRendering);
}
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_SF_CLIP)) {
struct anv_state sf_clip_state =
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
hw_state->vp_sf_clip.count * 64, 64);
for (uint32_t i = 0; i < hw_state->vp_sf_clip.count; i++) {
struct GENX(SF_CLIP_VIEWPORT) sfv = {
INIT(vp_sf_clip.elem[i], ViewportMatrixElementm00),
INIT(vp_sf_clip.elem[i], ViewportMatrixElementm11),
INIT(vp_sf_clip.elem[i], ViewportMatrixElementm22),
INIT(vp_sf_clip.elem[i], ViewportMatrixElementm30),
INIT(vp_sf_clip.elem[i], ViewportMatrixElementm31),
INIT(vp_sf_clip.elem[i], ViewportMatrixElementm32),
INIT(vp_sf_clip.elem[i], XMinClipGuardband),
INIT(vp_sf_clip.elem[i], XMaxClipGuardband),
INIT(vp_sf_clip.elem[i], YMinClipGuardband),
INIT(vp_sf_clip.elem[i], YMaxClipGuardband),
INIT(vp_sf_clip.elem[i], XMinViewPort),
INIT(vp_sf_clip.elem[i], XMaxViewPort),
INIT(vp_sf_clip.elem[i], YMinViewPort),
INIT(vp_sf_clip.elem[i], YMaxViewPort),
};
GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_state.map + i * 64, &sfv);
}
anv_batch_emit(&cmd_buffer->batch,
GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), clip) {
clip.SFClipViewportPointer = sf_clip_state.offset;
}
}
/* Force CC_VIEWPORT reallocation on Gfx9 when reprogramming
* 3DSTATE_VIEWPORT_STATE_POINTERS_CC :
* https://gitlab.freedesktop.org/mesa/mesa/-/issues/11647
*/
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC) ||
(GFX_VER == 9 &&
BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC_PTR))) {
hw_state->vp_cc.state =
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
hw_state->vp_cc.count * 8, 32);
for (uint32_t i = 0; i < hw_state->vp_cc.count; i++) {
struct GENX(CC_VIEWPORT) cc_viewport = {
INIT(vp_cc.elem[i], MinimumDepth),
INIT(vp_cc.elem[i], MaximumDepth),
};
GENX(CC_VIEWPORT_pack)(NULL, hw_state->vp_cc.state.map + i * 8,
&cc_viewport);
}
/* Dirty the pointers to reemit 3DSTATE_VIEWPORT_STATE_POINTERS_CC below
*/
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC_PTR);
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC_PTR)) {
anv_batch_emit(&cmd_buffer->batch,
GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) {
cc.CCViewportPointer = hw_state->vp_cc.state.offset;
}
cmd_buffer->state.gfx.viewport_set = true;
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SCISSOR)) {
/* Wa_1409725701:
*
* "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
* stored as an array of up to 16 elements. The location of first
* element of the array, as specified by Pointer to SCISSOR_RECT,
* should be aligned to a 64-byte boundary.
*/
struct anv_state scissor_state =
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
hw_state->scissor.count * 8, 64);
for (uint32_t i = 0; i < hw_state->scissor.count; i++) {
struct GENX(SCISSOR_RECT) scissor = {
INIT(scissor.elem[i], ScissorRectangleYMin),
INIT(scissor.elem[i], ScissorRectangleXMin),
INIT(scissor.elem[i], ScissorRectangleYMax),
INIT(scissor.elem[i], ScissorRectangleXMax),
};
GENX(SCISSOR_RECT_pack)(NULL, scissor_state.map + i * 8, &scissor);
}
anv_batch_emit(&cmd_buffer->batch,
GENX(3DSTATE_SCISSOR_STATE_POINTERS), ssp) {
ssp.ScissorRectPointer = scissor_state.offset;
}
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF_TOPOLOGY)) {
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_TOPOLOGY), vft) {
SET(vft, vft, PrimitiveTopologyType);
}
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VERTEX_INPUT)) {
genX(batch_emit_vertex_input)(&cmd_buffer->batch, device,
pipeline, dyn->vi);
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TE)) {
anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_TE),
pipeline, partial.te, te) {
SET(te, te, OutputTopology);
}
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_GS)) {
DEBUG_SHADER_HASH(MESA_SHADER_GEOMETRY);
anv_batch_emit_merge_protected(&cmd_buffer->batch, GENX(3DSTATE_GS),
pipeline, partial.gs, gs, protected) {
SET(gs, gs, ReorderMode);
}
}
#if GFX_VER >= 30
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_COARSE_PIXEL)) {
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_COARSE_PIXEL), coarse_pixel) {
coarse_pixel.DisableCPSPointers = true;
SET(coarse_pixel, coarse_pixel, CPSizeX);
SET(coarse_pixel, coarse_pixel, CPSizeY);
SET(coarse_pixel, coarse_pixel, CPSizeCombiner0Opcode);
SET(coarse_pixel, coarse_pixel, CPSizeCombiner1Opcode);
}
}
#else
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CPS)) {
#if GFX_VER == 11
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CPS), cps) {
SET(cps, cps, CoarsePixelShadingMode);
SET(cps, cps, MinCPSizeX);
SET(cps, cps, MinCPSizeY);
}
#elif GFX_VER >= 12
/* TODO: we can optimize this flush in the following cases:
*
* In the case where the last geometry shader emits a value that is
* not constant, we can avoid this stall because we can synchronize
* the pixel shader internally with
* 3DSTATE_PS::EnablePSDependencyOnCPsizeChange.
*
* If we know that the previous pipeline and the current one are
* using the same fragment shading rate.
*/
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
#if GFX_VERx10 >= 125
pc.PSSStallSyncEnable = true;
#else
pc.PSDSyncEnable = true;
#endif
}
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CPS_POINTERS), cps) {
SET(cps, cps, CoarsePixelShadingStateArrayPointer);
}
#endif
}
#endif /* GFX_VER >= 30 */
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SF)) {
anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_SF),
pipeline, partial.sf, sf) {
SET(sf, sf, LineWidth);
SET(sf, sf, TriangleStripListProvokingVertexSelect);
SET(sf, sf, LineStripListProvokingVertexSelect);
SET(sf, sf, TriangleFanProvokingVertexSelect);
#if GFX_VERx10 >= 200
SET(sf, sf, TriangleStripOddProvokingVertexSelect);
#endif
SET(sf, sf, LegacyGlobalDepthBiasEnable);
}
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_RASTER)) {
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_RASTER), raster) {
/* For details on 3DSTATE_RASTER multisample state, see the BSpec
* table "Multisample Modes State".
*
* NOTE: 3DSTATE_RASTER::ForcedSampleCount affects the SKL PMA fix
* computations. If we ever set this bit to a different value, they
* will need to be updated accordingly.
*/
raster.ForcedSampleCount = FSC_NUMRASTSAMPLES_0;
raster.ForceMultisampling = false;
raster.ScissorRectangleEnable = true;
SET(raster, raster, APIMode);
SET(raster, raster, DXMultisampleRasterizationEnable);
SET(raster, raster, AntialiasingEnable);
SET(raster, raster, CullMode);
SET(raster, raster, FrontWinding);
SET(raster, raster, GlobalDepthOffsetEnableSolid);
SET(raster, raster, GlobalDepthOffsetEnableWireframe);
SET(raster, raster, GlobalDepthOffsetEnablePoint);
SET(raster, raster, GlobalDepthOffsetConstant);
SET(raster, raster, GlobalDepthOffsetScale);
SET(raster, raster, GlobalDepthOffsetClamp);
SET(raster, raster, FrontFaceFillMode);
SET(raster, raster, BackFaceFillMode);
SET(raster, raster, ViewportZFarClipTestEnable);
SET(raster, raster, ViewportZNearClipTestEnable);
SET(raster, raster, ConservativeRasterizationEnable);
#if GFX_VERx10 >= 200
SET(raster, raster, LegacyBaryAssignmentDisable);
#endif
}
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MULTISAMPLE)) {
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_MULTISAMPLE), ms) {
ms.PixelLocation = CENTER;
/* The PRM says that this bit is valid only for DX9:
*
* SW can choose to set this bit only for DX9 API. DX10/OGL API's
* should not have any effect by setting or not setting this bit.
*/
ms.PixelPositionOffsetEnable = false;
SET(ms, ms, NumberofMultisamples);
}
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CC_STATE)) {
hw_state->cc.state =
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
GENX(COLOR_CALC_STATE_length) * 4,
64);
struct GENX(COLOR_CALC_STATE) cc = {
INIT(cc, BlendConstantColorRed),
INIT(cc, BlendConstantColorGreen),
INIT(cc, BlendConstantColorBlue),
INIT(cc, BlendConstantColorAlpha),
};
GENX(COLOR_CALC_STATE_pack)(NULL, hw_state->cc.state.map, &cc);
/* Dirty the pointers to reemit 3DSTATE_CC_STATE_POINTERS below
*/
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_CC_STATE_PTR);
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CC_STATE_PTR)) {
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) {
ccp.ColorCalcStatePointer = hw_state->cc.state.offset;
ccp.ColorCalcStatePointerValid = true;
}
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SAMPLE_MASK)) {
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_SAMPLE_MASK), sm) {
SET(sm, sm, SampleMask);
}
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_WM_DEPTH_STENCIL)) {
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_WM_DEPTH_STENCIL), ds) {
SET(ds, ds, DoubleSidedStencilEnable);
SET(ds, ds, StencilTestMask);
SET(ds, ds, StencilWriteMask);
SET(ds, ds, BackfaceStencilTestMask);
SET(ds, ds, BackfaceStencilWriteMask);
SET(ds, ds, StencilReferenceValue);
SET(ds, ds, BackfaceStencilReferenceValue);
SET(ds, ds, DepthTestEnable);
SET(ds, ds, DepthBufferWriteEnable);
SET(ds, ds, DepthTestFunction);
SET(ds, ds, StencilTestEnable);
SET(ds, ds, StencilBufferWriteEnable);
SET(ds, ds, StencilFailOp);
SET(ds, ds, StencilPassDepthPassOp);
SET(ds, ds, StencilPassDepthFailOp);
SET(ds, ds, StencilTestFunction);
SET(ds, ds, BackfaceStencilFailOp);
SET(ds, ds, BackfaceStencilPassDepthPassOp);
SET(ds, ds, BackfaceStencilPassDepthFailOp);
SET(ds, ds, BackfaceStencilTestFunction);
}
}
#if GFX_VER >= 12
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_DEPTH_BOUNDS)) {
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BOUNDS), db) {
SET(db, db, DepthBoundsTestEnable);
SET(db, db, DepthBoundsTestMinValue);
SET(db, db, DepthBoundsTestMaxValue);
}
}
#endif
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_LINE_STIPPLE)) {
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_LINE_STIPPLE), ls) {
SET(ls, ls, LineStipplePattern);
SET(ls, ls, LineStippleInverseRepeatCount);
SET(ls, ls, LineStippleRepeatCount);
}
#if GFX_VER >= 11
/* ICL PRMs, Volume 2a - Command Reference: Instructions,
* 3DSTATE_LINE_STIPPLE:
*
* "Workaround: This command must be followed by a PIPE_CONTROL with
* CS Stall bit set."
*/
genx_batch_emit_pipe_control(&cmd_buffer->batch, device->info,
cmd_buffer->state.current_pipeline,
ANV_PIPE_CS_STALL_BIT);
#endif
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF)) {
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF), vf) {
#if GFX_VERx10 >= 125
vf.GeometryDistributionEnable =
device->physical->instance->enable_vf_distribution;
#endif
vf.ComponentPackingEnable =
device->physical->instance->vf_component_packing;
SET(vf, vf, IndexedDrawCutIndexEnable);
SET(vf, vf, CutIndex);
}
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_INDEX_BUFFER)) {
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), ib) {
ib.IndexFormat = vk_to_intel_index_type(gfx->index_type);
ib.MOCS = gfx->index_addr == 0 ?
anv_mocs(cmd_buffer->device, NULL, ISL_SURF_USAGE_INDEX_BUFFER_BIT) :
gfx->index_mocs;
#if GFX_VER >= 12
ib.L3BypassDisable = true;
#endif
ib.BufferStartingAddress = anv_address_from_u64(gfx->index_addr);
ib.BufferSize = gfx->index_size;
}
}
#if GFX_VERx10 >= 125
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VFG)) {
anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_VFG),
pipeline, partial.vfg, vfg) {
SET(vfg, vfg, ListCutIndexEnable);
}
}
#endif
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SAMPLE_PATTERN)) {
genX(emit_sample_pattern)(&cmd_buffer->batch,
dyn->ms.sample_locations_enable ?
dyn->ms.sample_locations : NULL);
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_WM)) {
anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_WM),
pipeline, partial.wm, wm) {
SET(wm, wm, LineStippleEnable);
SET(wm, wm, BarycentricInterpolationMode);
}
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PS_BLEND)) {
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_PS_BLEND), blend) {
SET(blend, ps_blend, HasWriteableRT);
SET(blend, ps_blend, ColorBufferBlendEnable);
SET(blend, ps_blend, SourceAlphaBlendFactor);
SET(blend, ps_blend, DestinationAlphaBlendFactor);
SET(blend, ps_blend, SourceBlendFactor);
SET(blend, ps_blend, DestinationBlendFactor);
SET(blend, ps_blend, AlphaTestEnable);
SET(blend, ps_blend, IndependentAlphaBlendEnable);
SET(blend, ps_blend, AlphaToCoverageEnable);
}
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_BLEND_STATE)) {
const uint32_t num_dwords = GENX(BLEND_STATE_length) +
GENX(BLEND_STATE_ENTRY_length) * MAX_RTS;
hw_state->blend.state =
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
num_dwords * 4,
64);
uint32_t *dws = hw_state->blend.state.map;
struct GENX(BLEND_STATE) blend_state = {
INIT(blend, AlphaToCoverageEnable),
INIT(blend, AlphaToOneEnable),
INIT(blend, IndependentAlphaBlendEnable),
INIT(blend, ColorDitherEnable),
};
GENX(BLEND_STATE_pack)(NULL, dws, &blend_state);
/* Jump to blend entries. */
dws += GENX(BLEND_STATE_length);
for (uint32_t i = 0; i < MAX_RTS; i++) {
struct GENX(BLEND_STATE_ENTRY) entry = {
INIT(blend.rts[i], WriteDisableAlpha),
INIT(blend.rts[i], WriteDisableRed),
INIT(blend.rts[i], WriteDisableGreen),
INIT(blend.rts[i], WriteDisableBlue),
INIT(blend.rts[i], LogicOpFunction),
INIT(blend.rts[i], LogicOpEnable),
INIT(blend.rts[i], ColorBufferBlendEnable),
INIT(blend.rts[i], ColorClampRange),
#if GFX_VER >= 30
INIT(blend.rts[i], SimpleFloatBlendEnable),
#endif
INIT(blend.rts[i], PreBlendColorClampEnable),
INIT(blend.rts[i], PostBlendColorClampEnable),
INIT(blend.rts[i], SourceBlendFactor),
INIT(blend.rts[i], DestinationBlendFactor),
INIT(blend.rts[i], ColorBlendFunction),
INIT(blend.rts[i], SourceAlphaBlendFactor),
INIT(blend.rts[i], DestinationAlphaBlendFactor),
INIT(blend.rts[i], AlphaBlendFunction),
};
GENX(BLEND_STATE_ENTRY_pack)(NULL, dws, &entry);
dws += GENX(BLEND_STATE_ENTRY_length);
}
/* Dirty the pointers to reemit 3DSTATE_BLEND_STATE_POINTERS below */
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_BLEND_STATE_PTR);
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_BLEND_STATE_PTR)) {
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) {
bsp.BlendStatePointer = hw_state->blend.state.offset;
bsp.BlendStatePointerValid = true;
}
}
#if INTEL_WA_18019816803_GFX_VER
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_WA_18019816803)) {
genx_batch_emit_pipe_control(&cmd_buffer->batch, device->info,
cmd_buffer->state.current_pipeline,
ANV_PIPE_PSS_STALL_SYNC_BIT);
}
#endif
#if INTEL_WA_14018283232_GFX_VER
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_WA_14018283232))
genX(batch_emit_wa_14018283232)(&cmd_buffer->batch);
#endif
#if GFX_VER == 9
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PMA_FIX))
genX(cmd_buffer_enable_pma_fix)(cmd_buffer, hw_state->pma_fix);
#endif
#if GFX_VERx10 >= 125
if (hw_state->use_tbimr &&
BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TBIMR_TILE_PASS_INFO)) {
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_TBIMR_TILE_PASS_INFO),
tbimr) {
SET(tbimr, tbimr, TileRectangleHeight);
SET(tbimr, tbimr, TileRectangleWidth);
SET(tbimr, tbimr, VerticalTileCount);
SET(tbimr, tbimr, HorizontalTileCount);
SET(tbimr, tbimr, TBIMRBatchSize);
SET(tbimr, tbimr, TileBoxCheck);
}
}
#endif
#undef INIT
#undef SET
#undef DEBUG_SHADER_HASH
BITSET_ZERO(hw_state->dirty);
}
/**
* This function handles possible state workarounds and emits the dirty
* instructions to the batch buffer.
*/
void
genX(cmd_buffer_flush_gfx_hw_state)(struct anv_cmd_buffer *cmd_buffer)
{
struct anv_device *device = cmd_buffer->device;
struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
struct anv_graphics_pipeline *pipeline =
anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline);
struct anv_gfx_dynamic_state *hw_state = &gfx->dyn_state;
if (INTEL_DEBUG(DEBUG_REEMIT)) {
BITSET_OR(gfx->dyn_state.dirty, gfx->dyn_state.dirty,
device->gfx_dirty_state);
}
/**
* Put potential workarounds here if you need to reemit an instruction
* because of another one is changing.
*/
/* Reprogram SF_CLIP & CC_STATE together. This reproduces the programming
* done on Windows drivers. Fixes flickering issues with multiple
* workloads.
*
* Since blorp disables 3DSTATE_CLIP::ClipEnable and dirties CC_STATE, this
* also takes care of Wa_14016820455 which requires SF_CLIP to be
* reprogrammed whenever 3DSTATE_CLIP::ClipEnable is enabled.
*/
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_SF_CLIP) ||
BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC_PTR)) {
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_SF_CLIP);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC_PTR);
}
/* Wa_16012775297 - Emit dummy VF statistics before each 3DSTATE_VF. */
#if INTEL_WA_16012775297_GFX_VER
if (intel_needs_workaround(device->info, 16012775297) &&
BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF))
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_STATISTICS);
#endif
/* Since Wa_16011773973 will disable 3DSTATE_STREAMOUT, we need to reemit
* it after.
*/
if (intel_needs_workaround(device->info, 16011773973) &&
pipeline->uses_xfb &&
BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SO_DECL_LIST)) {
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_STREAMOUT);
}
#if INTEL_WA_18038825448_GFX_VER
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
if (wm_prog_data) {
genX(cmd_buffer_set_coarse_pixel_active)(
cmd_buffer,
brw_wm_prog_data_is_coarse(wm_prog_data, hw_state->fs_msaa_flags));
}
#endif
/* Gfx11 undocumented issue :
* https://gitlab.freedesktop.org/mesa/mesa/-/issues/9781
*/
#if GFX_VER == 11
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_BLEND_STATE))
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_MULTISAMPLE);
#endif
/* Wa_18020335297 - Apply the WA when viewport ptr is reprogrammed. */
if (intel_needs_workaround(device->info, 18020335297) &&
(BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC) ||
BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC_PTR)) &&
cmd_buffer->state.gfx.viewport_set) {
/* For mesh, we implement the WA using CS stall. This is for
* simplicity and takes care of possible interaction with Wa_16014390852.
*/
if (anv_pipeline_is_mesh(pipeline)) {
genx_batch_emit_pipe_control(&cmd_buffer->batch, device->info,
_3D, ANV_PIPE_CS_STALL_BIT);
} else {
/* Mask off all instructions that we program. */
BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VFG);
BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VF);
BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_PRIMITIVE_REPLICATION);
BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_RASTER);
BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VF_STATISTICS);
BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VF_SGVS);
BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VF_SGVS_2);
BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_CLIP);
BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_STREAMOUT);
BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VERTEX_INPUT);
BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VF_TOPOLOGY);
BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VS);
BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_GS);
BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_HS);
BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_TE);
BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_DS);
cmd_buffer_gfx_state_emission(cmd_buffer);
emit_wa_18020335297_dummy_draw(cmd_buffer);
/* Dirty all emitted WA state to make sure that current real
* state is restored.
*/
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VFG);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_PRIMITIVE_REPLICATION);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_RASTER);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_STATISTICS);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_SGVS);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_SGVS_2);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_CLIP);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_STREAMOUT);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VERTEX_INPUT);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_TOPOLOGY);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VS);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_GS);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_HS);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_TE);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_DS);
}
}
cmd_buffer_gfx_state_emission(cmd_buffer);
}
void
genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable)
{
if (!anv_cmd_buffer_is_render_queue(cmd_buffer))
return;
if (cmd_buffer->state.gfx.pma_fix_enabled == enable)
return;
cmd_buffer->state.gfx.pma_fix_enabled = enable;
/* According to the Broadwell PIPE_CONTROL documentation, software should
* emit a PIPE_CONTROL with the CS Stall and Depth Cache Flush bits set
* prior to the LRI. If stencil buffer writes are enabled, then a Render
* Cache Flush is also necessary.
*
* The Skylake docs say to use a depth stall rather than a command
* streamer stall. However, the hardware seems to violently disagree.
* A full command streamer stall seems to be needed in both cases.
*/
genx_batch_emit_pipe_control
(&cmd_buffer->batch, cmd_buffer->device->info,
cmd_buffer->state.current_pipeline,
ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
ANV_PIPE_CS_STALL_BIT |
#if GFX_VER >= 12
ANV_PIPE_TILE_CACHE_FLUSH_BIT |
#endif
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT);
#if GFX_VER == 9
uint32_t cache_mode;
anv_pack_struct(&cache_mode, GENX(CACHE_MODE_0),
.STCPMAOptimizationEnable = enable,
.STCPMAOptimizationEnableMask = true);
anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
lri.RegisterOffset = GENX(CACHE_MODE_0_num);
lri.DataDWord = cache_mode;
}
#endif /* GFX_VER == 9 */
/* After the LRI, a PIPE_CONTROL with both the Depth Stall and Depth Cache
* Flush bits is often necessary. We do it regardless because it's easier.
* The render cache flush is also necessary if stencil writes are enabled.
*
* Again, the Skylake docs give a different set of flushes but the BDW
* flushes seem to work just as well.
*/
genx_batch_emit_pipe_control
(&cmd_buffer->batch, cmd_buffer->device->info,
cmd_buffer->state.current_pipeline,
ANV_PIPE_DEPTH_STALL_BIT |
ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
#if GFX_VER >= 12
ANV_PIPE_TILE_CACHE_FLUSH_BIT |
#endif
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT);
}