| /* |
| * Copyright © 2015 Intel Corporation |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| */ |
| |
| #include <assert.h> |
| #include <stdbool.h> |
| #include <string.h> |
| #include <unistd.h> |
| #include <fcntl.h> |
| |
| #include "anv_private.h" |
| |
| #include "genxml/gen_macros.h" |
| #include "genxml/genX_pack.h" |
| #include "common/intel_genX_state_brw.h" |
| #include "common/intel_guardband.h" |
| #include "common/intel_tiled_render.h" |
| #include "compiler/brw_prim.h" |
| |
| #include "genX_mi_builder.h" |
| |
| static const uint32_t vk_to_intel_blend[] = { |
| [VK_BLEND_FACTOR_ZERO] = BLENDFACTOR_ZERO, |
| [VK_BLEND_FACTOR_ONE] = BLENDFACTOR_ONE, |
| [VK_BLEND_FACTOR_SRC_COLOR] = BLENDFACTOR_SRC_COLOR, |
| [VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR] = BLENDFACTOR_INV_SRC_COLOR, |
| [VK_BLEND_FACTOR_DST_COLOR] = BLENDFACTOR_DST_COLOR, |
| [VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR] = BLENDFACTOR_INV_DST_COLOR, |
| [VK_BLEND_FACTOR_SRC_ALPHA] = BLENDFACTOR_SRC_ALPHA, |
| [VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA] = BLENDFACTOR_INV_SRC_ALPHA, |
| [VK_BLEND_FACTOR_DST_ALPHA] = BLENDFACTOR_DST_ALPHA, |
| [VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA] = BLENDFACTOR_INV_DST_ALPHA, |
| [VK_BLEND_FACTOR_CONSTANT_COLOR] = BLENDFACTOR_CONST_COLOR, |
| [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR]= BLENDFACTOR_INV_CONST_COLOR, |
| [VK_BLEND_FACTOR_CONSTANT_ALPHA] = BLENDFACTOR_CONST_ALPHA, |
| [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA]= BLENDFACTOR_INV_CONST_ALPHA, |
| [VK_BLEND_FACTOR_SRC_ALPHA_SATURATE] = BLENDFACTOR_SRC_ALPHA_SATURATE, |
| [VK_BLEND_FACTOR_SRC1_COLOR] = BLENDFACTOR_SRC1_COLOR, |
| [VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR] = BLENDFACTOR_INV_SRC1_COLOR, |
| [VK_BLEND_FACTOR_SRC1_ALPHA] = BLENDFACTOR_SRC1_ALPHA, |
| [VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA] = BLENDFACTOR_INV_SRC1_ALPHA, |
| }; |
| |
| static const uint32_t vk_to_intel_blend_op[] = { |
| [VK_BLEND_OP_ADD] = BLENDFUNCTION_ADD, |
| [VK_BLEND_OP_SUBTRACT] = BLENDFUNCTION_SUBTRACT, |
| [VK_BLEND_OP_REVERSE_SUBTRACT] = BLENDFUNCTION_REVERSE_SUBTRACT, |
| [VK_BLEND_OP_MIN] = BLENDFUNCTION_MIN, |
| [VK_BLEND_OP_MAX] = BLENDFUNCTION_MAX, |
| }; |
| |
| static const uint32_t vk_to_intel_cullmode[] = { |
| [VK_CULL_MODE_NONE] = CULLMODE_NONE, |
| [VK_CULL_MODE_FRONT_BIT] = CULLMODE_FRONT, |
| [VK_CULL_MODE_BACK_BIT] = CULLMODE_BACK, |
| [VK_CULL_MODE_FRONT_AND_BACK] = CULLMODE_BOTH |
| }; |
| |
| static const uint32_t vk_to_intel_fillmode[] = { |
| [VK_POLYGON_MODE_FILL] = FILL_MODE_SOLID, |
| [VK_POLYGON_MODE_LINE] = FILL_MODE_WIREFRAME, |
| [VK_POLYGON_MODE_POINT] = FILL_MODE_POINT, |
| }; |
| |
| static const uint32_t vk_to_intel_front_face[] = { |
| [VK_FRONT_FACE_COUNTER_CLOCKWISE] = 1, |
| [VK_FRONT_FACE_CLOCKWISE] = 0 |
| }; |
| |
| static const uint32_t vk_to_intel_logic_op[] = { |
| [VK_LOGIC_OP_COPY] = LOGICOP_COPY, |
| [VK_LOGIC_OP_CLEAR] = LOGICOP_CLEAR, |
| [VK_LOGIC_OP_AND] = LOGICOP_AND, |
| [VK_LOGIC_OP_AND_REVERSE] = LOGICOP_AND_REVERSE, |
| [VK_LOGIC_OP_AND_INVERTED] = LOGICOP_AND_INVERTED, |
| [VK_LOGIC_OP_NO_OP] = LOGICOP_NOOP, |
| [VK_LOGIC_OP_XOR] = LOGICOP_XOR, |
| [VK_LOGIC_OP_OR] = LOGICOP_OR, |
| [VK_LOGIC_OP_NOR] = LOGICOP_NOR, |
| [VK_LOGIC_OP_EQUIVALENT] = LOGICOP_EQUIV, |
| [VK_LOGIC_OP_INVERT] = LOGICOP_INVERT, |
| [VK_LOGIC_OP_OR_REVERSE] = LOGICOP_OR_REVERSE, |
| [VK_LOGIC_OP_COPY_INVERTED] = LOGICOP_COPY_INVERTED, |
| [VK_LOGIC_OP_OR_INVERTED] = LOGICOP_OR_INVERTED, |
| [VK_LOGIC_OP_NAND] = LOGICOP_NAND, |
| [VK_LOGIC_OP_SET] = LOGICOP_SET, |
| }; |
| |
| static const uint32_t vk_to_intel_compare_op[] = { |
| [VK_COMPARE_OP_NEVER] = PREFILTEROP_NEVER, |
| [VK_COMPARE_OP_LESS] = PREFILTEROP_LESS, |
| [VK_COMPARE_OP_EQUAL] = PREFILTEROP_EQUAL, |
| [VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROP_LEQUAL, |
| [VK_COMPARE_OP_GREATER] = PREFILTEROP_GREATER, |
| [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROP_NOTEQUAL, |
| [VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROP_GEQUAL, |
| [VK_COMPARE_OP_ALWAYS] = PREFILTEROP_ALWAYS, |
| }; |
| |
| static const uint32_t vk_to_intel_stencil_op[] = { |
| [VK_STENCIL_OP_KEEP] = STENCILOP_KEEP, |
| [VK_STENCIL_OP_ZERO] = STENCILOP_ZERO, |
| [VK_STENCIL_OP_REPLACE] = STENCILOP_REPLACE, |
| [VK_STENCIL_OP_INCREMENT_AND_CLAMP] = STENCILOP_INCRSAT, |
| [VK_STENCIL_OP_DECREMENT_AND_CLAMP] = STENCILOP_DECRSAT, |
| [VK_STENCIL_OP_INVERT] = STENCILOP_INVERT, |
| [VK_STENCIL_OP_INCREMENT_AND_WRAP] = STENCILOP_INCR, |
| [VK_STENCIL_OP_DECREMENT_AND_WRAP] = STENCILOP_DECR, |
| }; |
| |
| static const uint32_t vk_to_intel_primitive_type[] = { |
| [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST, |
| [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST, |
| [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP, |
| [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST, |
| [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, |
| [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN, |
| [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = _3DPRIM_LINELIST_ADJ, |
| [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ, |
| [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = _3DPRIM_TRILIST_ADJ, |
| [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ, |
| }; |
| |
| static uint32_t vk_to_intel_index_type(VkIndexType type) |
| { |
| switch (type) { |
| case VK_INDEX_TYPE_UINT8_KHR: |
| return INDEX_BYTE; |
| case VK_INDEX_TYPE_UINT16: |
| return INDEX_WORD; |
| case VK_INDEX_TYPE_UINT32: |
| return INDEX_DWORD; |
| default: |
| unreachable("invalid index type"); |
| } |
| } |
| |
| void |
| genX(batch_emit_wa_16014912113)(struct anv_batch *batch, |
| const struct intel_urb_config *urb_cfg) |
| { |
| #if INTEL_NEEDS_WA_16014912113 |
| if (urb_cfg->size[0] == 0) |
| return; |
| |
| for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) { |
| #if GFX_VER >= 12 |
| anv_batch_emit(batch, GENX(3DSTATE_URB_ALLOC_VS), urb) { |
| urb._3DCommandSubOpcode += i; |
| urb.VSURBEntryAllocationSize = urb_cfg->size[i] - 1; |
| urb.VSURBStartingAddressSlice0 = urb_cfg->start[i]; |
| urb.VSURBStartingAddressSliceN = urb_cfg->start[i]; |
| urb.VSNumberofURBEntriesSlice0 = i == 0 ? 256 : 0; |
| urb.VSNumberofURBEntriesSliceN = i == 0 ? 256 : 0; |
| } |
| #else |
| anv_batch_emit(batch, GENX(3DSTATE_URB_VS), urb) { |
| urb._3DCommandSubOpcode += i; |
| urb.VSURBStartingAddress = urb_cfg->start[i]; |
| urb.VSURBEntryAllocationSize = urb_cfg->size[i] - 1; |
| urb.VSNumberofURBEntries = i == 0 ? 256 : 0; |
| } |
| #endif |
| } |
| anv_batch_emit(batch, GENX(PIPE_CONTROL), pc) { |
| pc.HDCPipelineFlushEnable = true; |
| } |
| #endif |
| } |
| |
| static void |
| genX(streamout_prologue)(struct anv_cmd_buffer *cmd_buffer) |
| { |
| #if INTEL_WA_16013994831_GFX_VER |
| /* Wa_16013994831 - Disable preemption during streamout, enable back |
| * again if XFB not used by the current pipeline. |
| */ |
| if (!intel_needs_workaround(cmd_buffer->device->info, 16013994831)) |
| return; |
| |
| struct anv_graphics_pipeline *pipeline = |
| anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline); |
| if (pipeline->uses_xfb) { |
| genX(cmd_buffer_set_preemption)(cmd_buffer, false); |
| return; |
| } |
| |
| if (!cmd_buffer->state.gfx.object_preemption) |
| genX(cmd_buffer_set_preemption)(cmd_buffer, true); |
| #endif |
| } |
| |
| #if GFX_VER >= 12 && GFX_VER < 30 |
| static uint32_t |
| get_cps_state_offset(const struct anv_device *device, |
| const struct vk_fragment_shading_rate_state *fsr) |
| { |
| uint32_t offset; |
| static const uint32_t size_index[] = { |
| [1] = 0, |
| [2] = 1, |
| [4] = 2, |
| }; |
| |
| #if GFX_VERx10 >= 125 |
| offset = |
| 1 + /* skip disabled */ |
| fsr->combiner_ops[0] * 5 * 3 * 3 + |
| fsr->combiner_ops[1] * 3 * 3 + |
| size_index[fsr->fragment_size.width] * 3 + |
| size_index[fsr->fragment_size.height]; |
| #else |
| offset = |
| 1 + /* skip disabled */ |
| size_index[fsr->fragment_size.width] * 3 + |
| size_index[fsr->fragment_size.height]; |
| #endif |
| |
| offset *= MAX_VIEWPORTS * GENX(CPS_STATE_length) * 4; |
| |
| return device->cps_states.offset + offset; |
| } |
| #endif /* GFX_VER >= 12 && GFX_VER < 30 */ |
| |
| #if GFX_VER >= 30 |
| static uint32_t |
| get_cps_size(uint32_t size) |
| { |
| switch (size) { |
| case 1: |
| return CPSIZE_1; |
| case 2: |
| return CPSIZE_2; |
| case 4: |
| return CPSIZE_4; |
| default: |
| unreachable("Invalid size"); |
| } |
| } |
| |
| static const uint32_t vk_to_intel_shading_rate_combiner_op[] = { |
| [VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR] = CPS_COMB_OP_PASSTHROUGH, |
| [VK_FRAGMENT_SHADING_RATE_COMBINER_OP_REPLACE_KHR] = CPS_COMB_OP_OVERRIDE, |
| [VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MIN_KHR] = CPS_COMB_OP_HIGH_QUALITY, |
| [VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MAX_KHR] = CPS_COMB_OP_LOW_QUALITY, |
| [VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MUL_KHR] = CPS_COMB_OP_RELATIVE, |
| }; |
| #endif |
| |
| static bool |
| has_ds_feedback_loop(const struct anv_pipeline_bind_map *bind_map, |
| const struct vk_dynamic_graphics_state *dyn) |
| { |
| if (BITSET_IS_EMPTY(bind_map->input_attachments)) |
| return false; |
| |
| const unsigned depth_att = dyn->ial.depth_att == MESA_VK_ATTACHMENT_NO_INDEX ? |
| MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS : dyn->ial.depth_att; |
| const unsigned stencil_att = dyn->ial.stencil_att == MESA_VK_ATTACHMENT_NO_INDEX ? |
| MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS : dyn->ial.stencil_att; |
| |
| return |
| (dyn->feedback_loops & (VK_IMAGE_ASPECT_DEPTH_BIT | |
| VK_IMAGE_ASPECT_STENCIL_BIT)) != 0 || |
| (dyn->ial.depth_att != MESA_VK_ATTACHMENT_UNUSED && |
| BITSET_TEST(bind_map->input_attachments, depth_att)) || |
| (dyn->ial.stencil_att != MESA_VK_ATTACHMENT_UNUSED && |
| BITSET_TEST(bind_map->input_attachments, stencil_att)); |
| } |
| |
| UNUSED static bool |
| want_stencil_pma_fix(const struct vk_dynamic_graphics_state *dyn, |
| const struct anv_cmd_graphics_state *gfx, |
| const struct vk_depth_stencil_state *ds) |
| { |
| if (GFX_VER > 9) |
| return false; |
| assert(GFX_VER == 9); |
| |
| /* From the Skylake PRM Vol. 2c CACHE_MODE_1::STC PMA Optimization Enable: |
| * |
| * Clearing this bit will force the STC cache to wait for pending |
| * retirement of pixels at the HZ-read stage and do the STC-test for |
| * Non-promoted, R-computed and Computed depth modes instead of |
| * postponing the STC-test to RCPFE. |
| * |
| * STC_TEST_EN = 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE && |
| * 3DSTATE_WM_DEPTH_STENCIL::StencilTestEnable |
| * |
| * STC_WRITE_EN = 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE && |
| * (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable && |
| * 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE) |
| * |
| * COMP_STC_EN = STC_TEST_EN && |
| * 3DSTATE_PS_EXTRA::PixelShaderComputesStencil |
| * |
| * SW parses the pipeline states to generate the following logical |
| * signal indicating if PMA FIX can be enabled. |
| * |
| * STC_PMA_OPT = |
| * 3DSTATE_WM::ForceThreadDispatch != 1 && |
| * !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0) && |
| * 3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL && |
| * 3DSTATE_DEPTH_BUFFER::HIZ Enable && |
| * !(3DSTATE_WM::EDSC_Mode == 2) && |
| * 3DSTATE_PS_EXTRA::PixelShaderValid && |
| * !(3DSTATE_WM_HZ_OP::DepthBufferClear || |
| * 3DSTATE_WM_HZ_OP::DepthBufferResolve || |
| * 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable || |
| * 3DSTATE_WM_HZ_OP::StencilBufferClear) && |
| * (COMP_STC_EN || STC_WRITE_EN) && |
| * ((3DSTATE_PS_EXTRA::PixelShaderKillsPixels || |
| * 3DSTATE_WM::ForceKillPix == ON || |
| * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget || |
| * 3DSTATE_PS_BLEND::AlphaToCoverageEnable || |
| * 3DSTATE_PS_BLEND::AlphaTestEnable || |
| * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) || |
| * (3DSTATE_PS_EXTRA::Pixel Shader Computed Depth mode != PSCDEPTH_OFF)) |
| */ |
| |
| /* These are always true: |
| * 3DSTATE_WM::ForceThreadDispatch != 1 && |
| * !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0) |
| */ |
| |
| /* We only enable the PMA fix if we know for certain that HiZ is enabled. |
| * If we don't know whether HiZ is enabled or not, we disable the PMA fix |
| * and there is no harm. |
| * |
| * (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) && |
| * 3DSTATE_DEPTH_BUFFER::HIZ Enable |
| */ |
| if (!gfx->hiz_enabled) |
| return false; |
| |
| /* We can't possibly know if HiZ is enabled without the depth attachment */ |
| ASSERTED const struct anv_image_view *d_iview = gfx->depth_att.iview; |
| assert(d_iview && d_iview->image->planes[0].aux_usage == ISL_AUX_USAGE_HIZ); |
| |
| /* 3DSTATE_PS_EXTRA::PixelShaderValid */ |
| struct anv_graphics_pipeline *pipeline = |
| anv_pipeline_to_graphics(gfx->base.pipeline); |
| if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) |
| return false; |
| |
| /* !(3DSTATE_WM::EDSC_Mode == 2) */ |
| const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); |
| if (wm_prog_data->early_fragment_tests) |
| return false; |
| |
| /* We never use anv_pipeline for HiZ ops so this is trivially true: |
| * !(3DSTATE_WM_HZ_OP::DepthBufferClear || |
| * 3DSTATE_WM_HZ_OP::DepthBufferResolve || |
| * 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable || |
| * 3DSTATE_WM_HZ_OP::StencilBufferClear) |
| */ |
| |
| /* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE && |
| * 3DSTATE_WM_DEPTH_STENCIL::StencilTestEnable |
| */ |
| const bool stc_test_en = ds->stencil.test_enable; |
| |
| /* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE && |
| * (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable && |
| * 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE) |
| */ |
| const bool stc_write_en = ds->stencil.write_enable; |
| |
| /* STC_TEST_EN && 3DSTATE_PS_EXTRA::PixelShaderComputesStencil */ |
| const bool comp_stc_en = stc_test_en && wm_prog_data->computed_stencil; |
| |
| /* COMP_STC_EN || STC_WRITE_EN */ |
| if (!(comp_stc_en || stc_write_en)) |
| return false; |
| |
| /* (3DSTATE_PS_EXTRA::PixelShaderKillsPixels || |
| * 3DSTATE_WM::ForceKillPix == ON || |
| * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget || |
| * 3DSTATE_PS_BLEND::AlphaToCoverageEnable || |
| * 3DSTATE_PS_BLEND::AlphaTestEnable || |
| * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) || |
| * (3DSTATE_PS_EXTRA::Pixel Shader Computed Depth mode != PSCDEPTH_OFF) |
| */ |
| struct anv_shader_bin *fs_bin = pipeline->base.shaders[MESA_SHADER_FRAGMENT]; |
| |
| return pipeline->kill_pixel || |
| has_ds_feedback_loop(&fs_bin->bind_map, dyn) || |
| wm_prog_data->computed_depth_mode != PSCDEPTH_OFF; |
| } |
| |
| static inline bool |
| anv_rasterization_aa_mode(VkPolygonMode raster_mode, |
| VkLineRasterizationModeKHR line_mode) |
| { |
| if (raster_mode == VK_POLYGON_MODE_LINE && |
| line_mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_KHR) |
| return true; |
| return false; |
| } |
| |
| static inline VkLineRasterizationModeKHR |
| anv_line_rasterization_mode(VkLineRasterizationModeKHR line_mode, |
| unsigned rasterization_samples) |
| { |
| if (line_mode == VK_LINE_RASTERIZATION_MODE_DEFAULT_KHR) { |
| if (rasterization_samples > 1) { |
| return VK_LINE_RASTERIZATION_MODE_RECTANGULAR_KHR; |
| } else { |
| return VK_LINE_RASTERIZATION_MODE_BRESENHAM_KHR; |
| } |
| } |
| return line_mode; |
| } |
| |
| /** Returns the final polygon mode for rasterization |
| * |
| * This function takes into account polygon mode, primitive topology and the |
| * different shader stages which might generate their own type of primitives. |
| */ |
| static inline VkPolygonMode |
| anv_raster_polygon_mode(const struct anv_graphics_pipeline *pipeline, |
| VkPolygonMode polygon_mode, |
| VkPrimitiveTopology primitive_topology) |
| { |
| if (anv_pipeline_is_mesh(pipeline)) { |
| switch (get_mesh_prog_data(pipeline)->primitive_type) { |
| case MESA_PRIM_POINTS: |
| return VK_POLYGON_MODE_POINT; |
| case MESA_PRIM_LINES: |
| return VK_POLYGON_MODE_LINE; |
| case MESA_PRIM_TRIANGLES: |
| return polygon_mode; |
| default: |
| unreachable("invalid primitive type for mesh"); |
| } |
| } else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) { |
| switch (get_gs_prog_data(pipeline)->output_topology) { |
| case _3DPRIM_POINTLIST: |
| return VK_POLYGON_MODE_POINT; |
| |
| case _3DPRIM_LINELIST: |
| case _3DPRIM_LINESTRIP: |
| case _3DPRIM_LINELOOP: |
| return VK_POLYGON_MODE_LINE; |
| |
| case _3DPRIM_TRILIST: |
| case _3DPRIM_TRIFAN: |
| case _3DPRIM_TRISTRIP: |
| case _3DPRIM_RECTLIST: |
| case _3DPRIM_QUADLIST: |
| case _3DPRIM_QUADSTRIP: |
| case _3DPRIM_POLYGON: |
| return polygon_mode; |
| } |
| unreachable("Unsupported GS output topology"); |
| } else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) { |
| switch (get_tes_prog_data(pipeline)->output_topology) { |
| case INTEL_TESS_OUTPUT_TOPOLOGY_POINT: |
| return VK_POLYGON_MODE_POINT; |
| |
| case INTEL_TESS_OUTPUT_TOPOLOGY_LINE: |
| return VK_POLYGON_MODE_LINE; |
| |
| case INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CW: |
| case INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CCW: |
| return polygon_mode; |
| } |
| unreachable("Unsupported TCS output topology"); |
| } else { |
| switch (primitive_topology) { |
| case VK_PRIMITIVE_TOPOLOGY_POINT_LIST: |
| return VK_POLYGON_MODE_POINT; |
| |
| case VK_PRIMITIVE_TOPOLOGY_LINE_LIST: |
| case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP: |
| case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY: |
| case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY: |
| return VK_POLYGON_MODE_LINE; |
| |
| case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST: |
| case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP: |
| case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN: |
| case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY: |
| case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY: |
| return polygon_mode; |
| |
| default: |
| unreachable("Unsupported primitive topology"); |
| } |
| } |
| } |
| |
| static inline bool |
| anv_is_dual_src_blend_factor(VkBlendFactor factor) |
| { |
| return factor == VK_BLEND_FACTOR_SRC1_COLOR || |
| factor == VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR || |
| factor == VK_BLEND_FACTOR_SRC1_ALPHA || |
| factor == VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA; |
| } |
| |
| static inline bool |
| anv_is_dual_src_blend_equation(const struct vk_color_blend_attachment_state *cb) |
| { |
| return anv_is_dual_src_blend_factor(cb->src_color_blend_factor) && |
| anv_is_dual_src_blend_factor(cb->dst_color_blend_factor) && |
| anv_is_dual_src_blend_factor(cb->src_alpha_blend_factor) && |
| anv_is_dual_src_blend_factor(cb->dst_alpha_blend_factor); |
| } |
| |
| static void |
| anv_rasterization_mode(VkPolygonMode raster_mode, |
| VkLineRasterizationModeKHR line_mode, |
| float line_width, |
| uint32_t *api_mode, |
| bool *msaa_rasterization_enable) |
| { |
| if (raster_mode == VK_POLYGON_MODE_LINE) { |
| /* Unfortunately, configuring our line rasterization hardware on gfx8 |
| * and later is rather painful. Instead of giving us bits to tell the |
| * hardware what line mode to use like we had on gfx7, we now have an |
| * arcane combination of API Mode and MSAA enable bits which do things |
| * in a table which are expected to magically put the hardware into the |
| * right mode for your API. Sadly, Vulkan isn't any of the APIs the |
| * hardware people thought of so nothing works the way you want it to. |
| * |
| * Look at the table titled "Multisample Rasterization Modes" in Vol 7 |
| * of the Skylake PRM for more details. |
| */ |
| switch (line_mode) { |
| case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT: |
| *api_mode = DX101; |
| #if GFX_VER <= 9 |
| /* Prior to ICL, the algorithm the HW uses to draw wide lines |
| * doesn't quite match what the CTS expects, at least for rectangular |
| * lines, so we set this to false here, making it draw parallelograms |
| * instead, which work well enough. |
| */ |
| *msaa_rasterization_enable = line_width < 1.0078125; |
| #else |
| *msaa_rasterization_enable = true; |
| #endif |
| break; |
| |
| case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT: |
| case VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT: |
| *api_mode = DX9OGL; |
| *msaa_rasterization_enable = false; |
| break; |
| |
| default: |
| unreachable("Unsupported line rasterization mode"); |
| } |
| } else { |
| *api_mode = DX101; |
| *msaa_rasterization_enable = true; |
| } |
| } |
| |
| static bool |
| is_src1_blend_factor(enum GENX(3D_Color_Buffer_Blend_Factor) factor) |
| { |
| return factor == BLENDFACTOR_SRC1_COLOR || |
| factor == BLENDFACTOR_SRC1_ALPHA || |
| factor == BLENDFACTOR_INV_SRC1_COLOR || |
| factor == BLENDFACTOR_INV_SRC1_ALPHA; |
| } |
| |
| #if GFX_VERx10 == 125 |
| /** |
| * Return the dimensions of the current rendering area, defined as the |
| * bounding box of all present color, depth and stencil attachments. |
| */ |
| UNUSED static bool |
| calculate_render_area(const struct anv_cmd_graphics_state *gfx, |
| unsigned *width, unsigned *height) |
| { |
| *width = gfx->render_area.offset.x + gfx->render_area.extent.width; |
| *height = gfx->render_area.offset.y + gfx->render_area.extent.height; |
| |
| for (unsigned i = 0; i < gfx->color_att_count; i++) { |
| const struct anv_attachment *att = &gfx->color_att[i]; |
| if (att->iview) { |
| *width = MAX2(*width, att->iview->vk.extent.width); |
| *height = MAX2(*height, att->iview->vk.extent.height); |
| } |
| } |
| |
| const struct anv_image_view *const z_view = gfx->depth_att.iview; |
| if (z_view) { |
| *width = MAX2(*width, z_view->vk.extent.width); |
| *height = MAX2(*height, z_view->vk.extent.height); |
| } |
| |
| const struct anv_image_view *const s_view = gfx->stencil_att.iview; |
| if (s_view) { |
| *width = MAX2(*width, s_view->vk.extent.width); |
| *height = MAX2(*height, s_view->vk.extent.height); |
| } |
| |
| return *width && *height; |
| } |
| |
| /* Calculate TBIMR tiling parameters adequate for the current pipeline |
| * setup. Return true if TBIMR should be enabled. |
| */ |
| UNUSED static bool |
| calculate_tile_dimensions(const struct anv_device *device, |
| const struct anv_cmd_graphics_state *gfx, |
| const struct intel_l3_config *l3_config, |
| unsigned fb_width, unsigned fb_height, |
| unsigned *tile_width, unsigned *tile_height) |
| { |
| assert(GFX_VER == 12); |
| const unsigned aux_scale = ISL_MAIN_TO_CCS_SIZE_RATIO_XE; |
| |
| unsigned pixel_size = 0; |
| |
| /* Perform a rough calculation of the tile cache footprint of the |
| * pixel pipeline, approximating it as the sum of the amount of |
| * memory used per pixel by every render target, depth, stencil and |
| * auxiliary surfaces bound to the pipeline. |
| */ |
| for (uint32_t i = 0; i < gfx->color_att_count; i++) { |
| const struct anv_attachment *att = &gfx->color_att[i]; |
| |
| if (att->iview) { |
| const struct anv_image *image = att->iview->image; |
| const unsigned p = anv_image_aspect_to_plane(image, |
| VK_IMAGE_ASPECT_COLOR_BIT); |
| const struct anv_image_plane *plane = &image->planes[p]; |
| |
| pixel_size += intel_calculate_surface_pixel_size( |
| &plane->primary_surface.isl); |
| |
| if (isl_aux_usage_has_mcs(att->aux_usage)) |
| pixel_size += intel_calculate_surface_pixel_size( |
| &plane->aux_surface.isl); |
| |
| if (isl_aux_usage_has_ccs(att->aux_usage)) |
| pixel_size += DIV_ROUND_UP(intel_calculate_surface_pixel_size( |
| &plane->primary_surface.isl), |
| aux_scale); |
| } |
| } |
| |
| const struct anv_image_view *const z_view = gfx->depth_att.iview; |
| if (z_view) { |
| const struct anv_image *image = z_view->image; |
| assert(image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT); |
| const unsigned p = anv_image_aspect_to_plane(image, |
| VK_IMAGE_ASPECT_DEPTH_BIT); |
| const struct anv_image_plane *plane = &image->planes[p]; |
| |
| pixel_size += intel_calculate_surface_pixel_size( |
| &plane->primary_surface.isl); |
| |
| if (isl_aux_usage_has_hiz(image->planes[p].aux_usage)) |
| pixel_size += intel_calculate_surface_pixel_size( |
| &plane->aux_surface.isl); |
| |
| if (isl_aux_usage_has_ccs(image->planes[p].aux_usage)) |
| pixel_size += DIV_ROUND_UP(intel_calculate_surface_pixel_size( |
| &plane->primary_surface.isl), |
| aux_scale); |
| } |
| |
| const struct anv_image_view *const s_view = gfx->depth_att.iview; |
| if (s_view && s_view != z_view) { |
| const struct anv_image *image = s_view->image; |
| assert(image->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT); |
| const unsigned p = anv_image_aspect_to_plane(image, |
| VK_IMAGE_ASPECT_STENCIL_BIT); |
| const struct anv_image_plane *plane = &image->planes[p]; |
| |
| pixel_size += intel_calculate_surface_pixel_size( |
| &plane->primary_surface.isl); |
| } |
| |
| if (!pixel_size) |
| return false; |
| |
| /* Compute a tile layout that allows reasonable utilization of the |
| * tile cache based on the per-pixel cache footprint estimated |
| * above. |
| */ |
| intel_calculate_tile_dimensions(device->info, l3_config, |
| 32, 32, fb_width, fb_height, |
| pixel_size, tile_width, tile_height); |
| |
| /* Perform TBIMR tile passes only if the framebuffer covers more |
| * than a single tile. |
| */ |
| return *tile_width < fb_width || *tile_height < fb_height; |
| } |
| #endif |
| |
| #define GET(field) hw_state->field |
| #define SET(bit, field, value) \ |
| do { \ |
| __typeof(hw_state->field) __v = value; \ |
| if (hw_state->field != __v) { \ |
| hw_state->field = __v; \ |
| BITSET_SET(hw_state->dirty, ANV_GFX_STATE_##bit); \ |
| } \ |
| } while (0) |
| #define SET_STAGE(bit, field, value, stage) \ |
| do { \ |
| __typeof(hw_state->field) __v = value; \ |
| if (!anv_pipeline_has_stage(pipeline, \ |
| MESA_SHADER_##stage)) { \ |
| hw_state->field = __v; \ |
| break; \ |
| } \ |
| if (hw_state->field != __v) { \ |
| hw_state->field = __v; \ |
| BITSET_SET(hw_state->dirty, ANV_GFX_STATE_##bit); \ |
| } \ |
| } while (0) |
| #define SETUP_PROVOKING_VERTEX(bit, cmd, mode) \ |
| switch (mode) { \ |
| case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT: \ |
| SET(bit, cmd.TriangleStripListProvokingVertexSelect, 0); \ |
| SET(bit, cmd.LineStripListProvokingVertexSelect, 0); \ |
| SET(bit, cmd.TriangleFanProvokingVertexSelect, 1); \ |
| break; \ |
| case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT: \ |
| SET(bit, cmd.TriangleStripListProvokingVertexSelect, 2); \ |
| SET(bit, cmd.LineStripListProvokingVertexSelect, 1); \ |
| SET(bit, cmd.TriangleFanProvokingVertexSelect, 2); \ |
| break; \ |
| default: \ |
| unreachable("Invalid provoking vertex mode"); \ |
| } \ |
| |
| #define SETUP_PROVOKING_VERTEX_FSB(bit, cmd, mode) \ |
| switch (mode) { \ |
| case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT: \ |
| SET(bit, cmd.TriangleStripListProvokingVertexSelect, 0); \ |
| SET(bit, cmd.LineStripListProvokingVertexSelect, 0); \ |
| SET(bit, cmd.TriangleFanProvokingVertexSelect, 1); \ |
| SET(bit, cmd.TriangleStripOddProvokingVertexSelect, 0); \ |
| break; \ |
| case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT: \ |
| SET(bit, cmd.TriangleStripListProvokingVertexSelect, 0); \ |
| SET(bit, cmd.LineStripListProvokingVertexSelect, 0); \ |
| SET(bit, cmd.TriangleFanProvokingVertexSelect, 0); \ |
| SET(bit, cmd.TriangleStripOddProvokingVertexSelect, 1); \ |
| break; \ |
| default: \ |
| unreachable("Invalid provoking vertex mode"); \ |
| } \ |
| |
| ALWAYS_INLINE static void |
| update_fs_msaa_flags(struct anv_gfx_dynamic_state *hw_state, |
| const struct vk_dynamic_graphics_state *dyn, |
| const struct anv_graphics_pipeline *pipeline) |
| { |
| const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); |
| |
| if (!wm_prog_data) |
| return; |
| |
| /* If we have any dynamic bits here, we might need to update the value |
| * in the push constant for the shader. |
| */ |
| if (!brw_wm_prog_data_is_dynamic(wm_prog_data)) |
| return; |
| |
| enum intel_msaa_flags fs_msaa_flags = |
| intel_fs_msaa_flags((struct intel_fs_params) { |
| .shader_sample_shading = wm_prog_data->sample_shading, |
| .shader_min_sample_shading = pipeline->min_sample_shading, |
| .state_sample_shading = pipeline->sample_shading_enable, |
| .rasterization_samples = dyn->ms.rasterization_samples, |
| .coarse_pixel = !vk_fragment_shading_rate_is_disabled(&dyn->fsr), |
| .alpha_to_coverage = dyn->ms.alpha_to_coverage_enable, |
| .provoking_vertex_last = dyn->rs.provoking_vertex == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT, |
| .primitive_id_index = pipeline->primitive_id_index, |
| }); |
| |
| SET(FS_MSAA_FLAGS, fs_msaa_flags, fs_msaa_flags); |
| } |
| |
| ALWAYS_INLINE static void |
| update_ps(struct anv_gfx_dynamic_state *hw_state, |
| const struct anv_device *device, |
| const struct vk_dynamic_graphics_state *dyn, |
| const struct anv_graphics_pipeline *pipeline) |
| { |
| const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); |
| |
| if (!wm_prog_data) { |
| #if GFX_VER < 20 |
| SET(PS, ps._8PixelDispatchEnable, false); |
| SET(PS, ps._16PixelDispatchEnable, false); |
| SET(PS, ps._32PixelDispatchEnable, false); |
| #else |
| SET(PS, ps.Kernel0Enable, false); |
| SET(PS, ps.Kernel1Enable, false); |
| #endif |
| return; |
| } |
| |
| const struct anv_shader_bin *fs_bin = |
| pipeline->base.shaders[MESA_SHADER_FRAGMENT]; |
| struct GENX(3DSTATE_PS) ps = {}; |
| intel_set_ps_dispatch_state(&ps, device->info, wm_prog_data, |
| MAX2(dyn->ms.rasterization_samples, 1), |
| hw_state->fs_msaa_flags); |
| |
| SET(PS, ps.KernelStartPointer0, |
| fs_bin->kernel.offset + |
| brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0)); |
| SET(PS, ps.KernelStartPointer1, |
| fs_bin->kernel.offset + |
| brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1)); |
| #if GFX_VER < 20 |
| SET(PS, ps.KernelStartPointer2, |
| fs_bin->kernel.offset + |
| brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2)); |
| #endif |
| |
| SET(PS, ps.DispatchGRFStartRegisterForConstantSetupData0, |
| brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0)); |
| SET(PS, ps.DispatchGRFStartRegisterForConstantSetupData1, |
| brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1)); |
| #if GFX_VER < 20 |
| SET(PS, ps.DispatchGRFStartRegisterForConstantSetupData2, |
| brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2)); |
| #endif |
| |
| #if GFX_VER < 20 |
| SET(PS, ps._8PixelDispatchEnable, ps._8PixelDispatchEnable); |
| SET(PS, ps._16PixelDispatchEnable, ps._16PixelDispatchEnable); |
| SET(PS, ps._32PixelDispatchEnable, ps._32PixelDispatchEnable); |
| #else |
| SET(PS, ps.Kernel0Enable, ps.Kernel0Enable); |
| SET(PS, ps.Kernel1Enable, ps.Kernel1Enable); |
| SET(PS, ps.Kernel0SIMDWidth, ps.Kernel0SIMDWidth); |
| SET(PS, ps.Kernel1SIMDWidth, ps.Kernel1SIMDWidth); |
| SET(PS, ps.Kernel0PolyPackingPolicy, ps.Kernel0PolyPackingPolicy); |
| SET(PS, ps.Kernel0MaximumPolysperThread, ps.Kernel0MaximumPolysperThread); |
| #endif |
| |
| SET(PS, ps.PositionXYOffsetSelect, |
| !wm_prog_data->uses_pos_offset ? POSOFFSET_NONE : |
| brw_wm_prog_data_is_persample(wm_prog_data, |
| hw_state->fs_msaa_flags) ? |
| POSOFFSET_SAMPLE : POSOFFSET_CENTROID); |
| } |
| |
| ALWAYS_INLINE static void |
| update_ps_extra_wm(struct anv_gfx_dynamic_state *hw_state, |
| const struct anv_graphics_pipeline *pipeline) |
| { |
| const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); |
| |
| if (!wm_prog_data) |
| return; |
| |
| SET(PS_EXTRA, ps_extra.PixelShaderIsPerSample, |
| brw_wm_prog_data_is_persample(wm_prog_data, |
| hw_state->fs_msaa_flags)); |
| #if GFX_VER >= 11 |
| const bool uses_coarse_pixel = |
| brw_wm_prog_data_is_coarse(wm_prog_data, hw_state->fs_msaa_flags); |
| SET(PS_EXTRA, ps_extra.PixelShaderIsPerCoarsePixel, uses_coarse_pixel); |
| #endif |
| #if GFX_VERx10 >= 125 |
| /* TODO: We should only require this when the last geometry shader uses a |
| * fragment shading rate that is not constant. |
| */ |
| SET(PS_EXTRA, ps_extra.EnablePSDependencyOnCPsizeChange, uses_coarse_pixel); |
| #endif |
| |
| SET(WM, wm.BarycentricInterpolationMode, |
| wm_prog_data_barycentric_modes(wm_prog_data, hw_state->fs_msaa_flags)); |
| } |
| |
| ALWAYS_INLINE static void |
| update_ps_extra_has_uav(struct anv_gfx_dynamic_state *hw_state, |
| const struct anv_cmd_graphics_state *gfx, |
| const struct anv_graphics_pipeline *pipeline) |
| { |
| const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); |
| |
| /* Force fragment shader execution if occlusion queries are active to |
| * ensure PS_DEPTH_COUNT is correct. Otherwise a fragment shader with |
| * discard and no render target setup could be increment PS_DEPTH_COUNT if |
| * the HW internally decides to not run the shader because it has already |
| * established that depth-test is passing. |
| */ |
| SET_STAGE(PS_EXTRA, ps_extra.PixelShaderHasUAV, |
| wm_prog_data && (wm_prog_data->has_side_effects || |
| gfx->n_occlusion_queries > 0), |
| FRAGMENT); |
| } |
| |
| ALWAYS_INLINE static void |
| update_ps_extra_kills_pixel(struct anv_gfx_dynamic_state *hw_state, |
| const struct vk_dynamic_graphics_state *dyn, |
| const struct anv_cmd_graphics_state *gfx, |
| const struct anv_graphics_pipeline *pipeline) |
| { |
| struct anv_shader_bin *fs_bin = pipeline->base.shaders[MESA_SHADER_FRAGMENT]; |
| const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); |
| |
| SET_STAGE(PS_EXTRA, ps_extra.PixelShaderKillsPixel, |
| wm_prog_data && |
| (has_ds_feedback_loop(&fs_bin->bind_map, dyn) || |
| wm_prog_data->uses_kill), |
| FRAGMENT); |
| } |
| |
| #if GFX_VERx10 >= 125 |
| ALWAYS_INLINE static void |
| update_vfg_list_cut_index(struct anv_gfx_dynamic_state *hw_state, |
| const struct vk_dynamic_graphics_state *dyn) |
| { |
| SET(VFG, vfg.ListCutIndexEnable, dyn->ia.primitive_restart_enable); |
| } |
| #endif |
| |
| ALWAYS_INLINE static void |
| update_streamout(struct anv_gfx_dynamic_state *hw_state, |
| const struct vk_dynamic_graphics_state *dyn, |
| const struct anv_cmd_graphics_state *gfx, |
| const struct anv_graphics_pipeline *pipeline) |
| { |
| SET(STREAMOUT, so.RenderingDisable, dyn->rs.rasterizer_discard_enable); |
| SET(STREAMOUT, so.RenderStreamSelect, dyn->rs.rasterization_stream); |
| |
| #if INTEL_NEEDS_WA_18022508906 |
| /* Wa_18022508906 : |
| * |
| * SKL PRMs, Volume 7: 3D-Media-GPGPU, Stream Output Logic (SOL) Stage: |
| * |
| * SOL_INT::Render_Enable = |
| * (3DSTATE_STREAMOUT::Force_Rending == Force_On) || |
| * ( |
| * (3DSTATE_STREAMOUT::Force_Rending != Force_Off) && |
| * !(3DSTATE_GS::Enable && 3DSTATE_GS::Output Vertex Size == 0) && |
| * !3DSTATE_STREAMOUT::API_Render_Disable && |
| * ( |
| * 3DSTATE_DEPTH_STENCIL_STATE::Stencil_TestEnable || |
| * 3DSTATE_DEPTH_STENCIL_STATE::Depth_TestEnable || |
| * 3DSTATE_DEPTH_STENCIL_STATE::Depth_WriteEnable || |
| * 3DSTATE_PS_EXTRA::PS_Valid || |
| * 3DSTATE_WM::Legacy Depth_Buffer_Clear || |
| * 3DSTATE_WM::Legacy Depth_Buffer_Resolve_Enable || |
| * 3DSTATE_WM::Legacy Hierarchical_Depth_Buffer_Resolve_Enable |
| * ) |
| * ) |
| * |
| * If SOL_INT::Render_Enable is false, the SO stage will not forward any |
| * topologies down the pipeline. Which is not what we want for occlusion |
| * queries. |
| * |
| * Here we force rendering to get SOL_INT::Render_Enable when occlusion |
| * queries are active. |
| */ |
| SET(STREAMOUT, so.ForceRendering, |
| (!GET(so.RenderingDisable) && gfx->n_occlusion_queries > 0) ? |
| Force_on : 0); |
| #endif |
| } |
| |
| ALWAYS_INLINE static void |
| update_provoking_vertex(struct anv_gfx_dynamic_state *hw_state, |
| const struct vk_dynamic_graphics_state *dyn, |
| const struct anv_graphics_pipeline *pipeline) |
| { |
| #if GFX_VERx10 >= 200 |
| const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); |
| |
| /* In order to respect the table indicated by Vulkan 1.4.312, |
| * 28.9. Barycentric Interpolation, we need to program the provoking |
| * vertex state differently depending on whether we need to set |
| * vertex_attributes_bypass or not. |
| * At this point we only deal with full pipelines, so if we don't have |
| * a wm_prog_data, there is no fragment shader and none of this matters. |
| */ |
| if (wm_prog_data && wm_prog_data->vertex_attributes_bypass) { |
| SETUP_PROVOKING_VERTEX_FSB(SF, sf, dyn->rs.provoking_vertex); |
| SETUP_PROVOKING_VERTEX_FSB(CLIP, clip, dyn->rs.provoking_vertex); |
| } else { |
| /* If we are not setting vertex attributes bypass, we can just use |
| * the same macro as older generations. There's one bit missing from |
| * it, but that one is only used for the case above and ignored |
| * otherwise, so we can pretend it doesn't exist here. |
| */ |
| SETUP_PROVOKING_VERTEX(SF, sf, dyn->rs.provoking_vertex); |
| SETUP_PROVOKING_VERTEX(CLIP, clip, dyn->rs.provoking_vertex); |
| } |
| #else |
| SETUP_PROVOKING_VERTEX(SF, sf, dyn->rs.provoking_vertex); |
| SETUP_PROVOKING_VERTEX(CLIP, clip, dyn->rs.provoking_vertex); |
| #endif |
| |
| switch (dyn->rs.provoking_vertex) { |
| case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT: |
| SET(STREAMOUT, so.ReorderMode, LEADING); |
| SET_STAGE(GS, gs.ReorderMode, LEADING, GEOMETRY); |
| break; |
| |
| case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT: |
| SET(STREAMOUT, so.ReorderMode, TRAILING); |
| SET_STAGE(GS, gs.ReorderMode, TRAILING, GEOMETRY); |
| break; |
| |
| default: |
| unreachable("Invalid provoking vertex mode"); |
| } |
| } |
| |
| ALWAYS_INLINE static void |
| update_topology(struct anv_gfx_dynamic_state *hw_state, |
| const struct vk_dynamic_graphics_state *dyn, |
| const struct anv_graphics_pipeline *pipeline) |
| { |
| uint32_t topology = |
| anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL) ? |
| _3DPRIM_PATCHLIST(dyn->ts.patch_control_points) : |
| vk_to_intel_primitive_type[dyn->ia.primitive_topology]; |
| |
| SET(VF_TOPOLOGY, vft.PrimitiveTopologyType, topology); |
| } |
| |
| #if GFX_VER >= 11 |
| ALWAYS_INLINE static void |
| update_cps(struct anv_gfx_dynamic_state *hw_state, |
| const struct anv_device *device, |
| const struct vk_dynamic_graphics_state *dyn, |
| const struct anv_graphics_pipeline *pipeline) |
| { |
| #if GFX_VER >= 30 |
| SET(COARSE_PIXEL, coarse_pixel.CPSizeX, |
| get_cps_size(dyn->fsr.fragment_size.width)); |
| SET(COARSE_PIXEL, coarse_pixel.CPSizeY, |
| get_cps_size(dyn->fsr.fragment_size.height)); |
| SET(COARSE_PIXEL, coarse_pixel.CPSizeCombiner0Opcode, |
| vk_to_intel_shading_rate_combiner_op[dyn->fsr.combiner_ops[0]]); |
| SET(COARSE_PIXEL, coarse_pixel.CPSizeCombiner1Opcode, |
| vk_to_intel_shading_rate_combiner_op[dyn->fsr.combiner_ops[1]]); |
| #elif GFX_VER >= 12 |
| SET(CPS, cps.CoarsePixelShadingStateArrayPointer, |
| get_cps_state_offset(device, &dyn->fsr)); |
| #else |
| STATIC_ASSERT(GFX_VER == 11); |
| SET(CPS, cps.CoarsePixelShadingMode, CPS_MODE_CONSTANT); |
| SET(CPS, cps.MinCPSizeX, dyn->fsr.fragment_size.width); |
| SET(CPS, cps.MinCPSizeY, dyn->fsr.fragment_size.height); |
| #endif |
| } |
| #endif |
| |
| ALWAYS_INLINE static void |
| update_te(struct anv_gfx_dynamic_state *hw_state, |
| const struct vk_dynamic_graphics_state *dyn, |
| const struct anv_graphics_pipeline *pipeline) |
| { |
| const struct brw_tes_prog_data *tes_prog_data = get_tes_prog_data(pipeline); |
| |
| if (tes_prog_data && anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) { |
| if (dyn->ts.domain_origin == VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT) { |
| SET(TE, te.OutputTopology, tes_prog_data->output_topology); |
| } else { |
| /* When the origin is upper-left, we have to flip the winding order */ |
| if (tes_prog_data->output_topology == OUTPUT_TRI_CCW) { |
| SET(TE, te.OutputTopology, OUTPUT_TRI_CW); |
| } else if (tes_prog_data->output_topology == OUTPUT_TRI_CW) { |
| SET(TE, te.OutputTopology, OUTPUT_TRI_CCW); |
| } else { |
| SET(TE, te.OutputTopology, tes_prog_data->output_topology); |
| } |
| } |
| } else { |
| SET(TE, te.OutputTopology, OUTPUT_POINT); |
| } |
| } |
| |
| ALWAYS_INLINE static void |
| update_line_width(struct anv_gfx_dynamic_state *hw_state, |
| const struct vk_dynamic_graphics_state *dyn) |
| { |
| SET(SF, sf.LineWidth, dyn->rs.line.width); |
| } |
| |
| ALWAYS_INLINE static void |
| update_sf_global_depth_bias(struct anv_gfx_dynamic_state *hw_state, |
| const struct vk_dynamic_graphics_state *dyn) |
| { |
| /** |
| * From the Vulkan Spec: |
| * |
| * "VK_DEPTH_BIAS_REPRESENTATION_FLOAT_EXT specifies that the depth bias |
| * representation is a factor of constant r equal to 1." |
| * |
| * From the SKL PRMs, Volume 7: 3D-Media-GPGPU, Depth Offset: |
| * |
| * "When UNORM Depth Buffer is at Output Merger (or no Depth Buffer): |
| * |
| * Bias = GlobalDepthOffsetConstant * r + GlobalDepthOffsetScale * MaxDepthSlope |
| * |
| * Where r is the minimum representable value > 0 in the depth buffer |
| * format, converted to float32 (note: If state bit Legacy Global Depth |
| * Bias Enable is set, the r term will be forced to 1.0)" |
| * |
| * When VK_DEPTH_BIAS_REPRESENTATION_FLOAT_EXT is set, enable |
| * LegacyGlobalDepthBiasEnable. |
| */ |
| SET(SF, sf.LegacyGlobalDepthBiasEnable, |
| dyn->rs.depth_bias.representation == |
| VK_DEPTH_BIAS_REPRESENTATION_FLOAT_EXT); |
| } |
| |
| ALWAYS_INLINE static void |
| update_clip_api_mode(struct anv_gfx_dynamic_state *hw_state, |
| const struct vk_dynamic_graphics_state *dyn) |
| { |
| SET(CLIP, clip.APIMode, |
| dyn->vp.depth_clip_negative_one_to_one ? |
| APIMODE_OGL : APIMODE_D3D); |
| } |
| |
| ALWAYS_INLINE static void |
| update_clip_max_viewport(struct anv_gfx_dynamic_state *hw_state, |
| const struct vk_dynamic_graphics_state *dyn) |
| { |
| /* From the Vulkan 1.0.45 spec: |
| * |
| * "If the last active vertex processing stage shader entry point's |
| * interface does not include a variable decorated with ViewportIndex, |
| * then the first viewport is used." |
| * |
| * This could mean that we might need to set the MaximumVPIndex based on |
| * the pipeline's last stage, but if the last shader doesn't write the |
| * viewport index and the VUE header is used, the compiler will force the |
| * value to 0 (which is what the spec requires above). Otherwise it seems |
| * like the HW should be pulling 0 if the VUE header is not present. |
| * |
| * Avoiding a check on the pipeline seems to prevent additional emissions |
| * of 3DSTATE_CLIP which appear to impact performance on Assassin's Creed |
| * Valhalla.. |
| */ |
| SET(CLIP, clip.MaximumVPIndex, dyn->vp.viewport_count > 0 ? |
| dyn->vp.viewport_count - 1 : 0); |
| } |
| |
| ALWAYS_INLINE static void |
| update_clip_raster(struct anv_gfx_dynamic_state *hw_state, |
| const struct vk_dynamic_graphics_state *dyn, |
| const struct anv_cmd_graphics_state *gfx, |
| const struct anv_graphics_pipeline *pipeline) |
| { |
| /* Take dynamic primitive topology in to account with |
| * 3DSTATE_RASTER::APIMode |
| * 3DSTATE_RASTER::DXMultisampleRasterizationEnable |
| * 3DSTATE_RASTER::AntialiasingEnable |
| */ |
| uint32_t api_mode = 0; |
| bool msaa_raster_enable = false; |
| |
| const VkLineRasterizationModeKHR line_mode = |
| anv_line_rasterization_mode(dyn->rs.line.mode, |
| dyn->ms.rasterization_samples); |
| |
| const VkPolygonMode dynamic_raster_mode = |
| anv_raster_polygon_mode(pipeline, |
| dyn->rs.polygon_mode, |
| dyn->ia.primitive_topology); |
| |
| anv_rasterization_mode(dynamic_raster_mode, |
| line_mode, dyn->rs.line.width, |
| &api_mode, &msaa_raster_enable); |
| |
| /* From the Browadwell PRM, Volume 2, documentation for 3DSTATE_RASTER, |
| * "Antialiasing Enable": |
| * |
| * "This field must be disabled if any of the render targets have integer |
| * (UINT or SINT) surface format." |
| * |
| * Additionally internal documentation for Gfx12+ states: |
| * |
| * "This bit MUST not be set when NUM_MULTISAMPLES > 1 OR |
| * FORCED_SAMPLE_COUNT > 1." |
| */ |
| const bool aa_enable = |
| anv_rasterization_aa_mode(dynamic_raster_mode, line_mode) && |
| !gfx->has_uint_rt && |
| !(GFX_VER >= 12 && gfx->samples > 1); |
| |
| const bool depth_clip_enable = |
| vk_rasterization_state_depth_clip_enable(&dyn->rs); |
| |
| const bool xy_clip_test_enable = |
| (dynamic_raster_mode == VK_POLYGON_MODE_FILL); |
| |
| SET(CLIP, clip.ViewportXYClipTestEnable, xy_clip_test_enable); |
| |
| SET(RASTER, raster.APIMode, api_mode); |
| SET(RASTER, raster.DXMultisampleRasterizationEnable, msaa_raster_enable); |
| SET(RASTER, raster.AntialiasingEnable, aa_enable); |
| SET(RASTER, raster.CullMode, vk_to_intel_cullmode[dyn->rs.cull_mode]); |
| SET(RASTER, raster.FrontWinding, vk_to_intel_front_face[dyn->rs.front_face]); |
| SET(RASTER, raster.GlobalDepthOffsetEnableSolid, dyn->rs.depth_bias.enable); |
| SET(RASTER, raster.GlobalDepthOffsetEnableWireframe, dyn->rs.depth_bias.enable); |
| SET(RASTER, raster.GlobalDepthOffsetEnablePoint, dyn->rs.depth_bias.enable); |
| SET(RASTER, raster.GlobalDepthOffsetConstant, dyn->rs.depth_bias.constant_factor); |
| SET(RASTER, raster.GlobalDepthOffsetScale, dyn->rs.depth_bias.slope_factor); |
| SET(RASTER, raster.GlobalDepthOffsetClamp, dyn->rs.depth_bias.clamp); |
| SET(RASTER, raster.FrontFaceFillMode, vk_to_intel_fillmode[dyn->rs.polygon_mode]); |
| SET(RASTER, raster.BackFaceFillMode, vk_to_intel_fillmode[dyn->rs.polygon_mode]); |
| SET(RASTER, raster.ViewportZFarClipTestEnable, depth_clip_enable); |
| SET(RASTER, raster.ViewportZNearClipTestEnable, depth_clip_enable); |
| SET(RASTER, raster.ConservativeRasterizationEnable, |
| dyn->rs.conservative_mode != |
| VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT); |
| |
| #if GFX_VERx10 >= 200 |
| const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); |
| SET(RASTER, raster.LegacyBaryAssignmentDisable, |
| wm_prog_data && wm_prog_data->vertex_attributes_bypass); |
| #endif |
| } |
| |
| ALWAYS_INLINE static void |
| update_multisample(struct anv_gfx_dynamic_state *hw_state, |
| const struct vk_dynamic_graphics_state *dyn) |
| { |
| SET(MULTISAMPLE, ms.NumberofMultisamples, |
| __builtin_ffs(MAX2(dyn->ms.rasterization_samples, 1)) - 1); |
| } |
| |
| ALWAYS_INLINE static void |
| update_sample_mask(struct anv_gfx_dynamic_state *hw_state, |
| const struct vk_dynamic_graphics_state *dyn) |
| { |
| /* From the Vulkan 1.0 spec: |
| * If pSampleMask is NULL, it is treated as if the mask has all bits |
| * enabled, i.e. no coverage is removed from fragments. |
| * |
| * 3DSTATE_SAMPLE_MASK.SampleMask is 16 bits. |
| */ |
| SET(SAMPLE_MASK, sm.SampleMask, dyn->ms.sample_mask & 0xffff); |
| } |
| |
| ALWAYS_INLINE static void |
| update_wm_depth_stencil(struct anv_gfx_dynamic_state *hw_state, |
| const struct vk_dynamic_graphics_state *dyn, |
| const struct anv_cmd_graphics_state *gfx, |
| const struct anv_device *device) |
| { |
| VkImageAspectFlags ds_aspects = 0; |
| if (gfx->depth_att.vk_format != VK_FORMAT_UNDEFINED) |
| ds_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT; |
| if (gfx->stencil_att.vk_format != VK_FORMAT_UNDEFINED) |
| ds_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; |
| |
| struct vk_depth_stencil_state opt_ds = dyn->ds; |
| vk_optimize_depth_stencil_state(&opt_ds, ds_aspects, true); |
| |
| SET(WM_DEPTH_STENCIL, ds.DoubleSidedStencilEnable, true); |
| |
| SET(WM_DEPTH_STENCIL, ds.StencilTestMask, |
| opt_ds.stencil.front.compare_mask & 0xff); |
| SET(WM_DEPTH_STENCIL, ds.StencilWriteMask, |
| opt_ds.stencil.front.write_mask & 0xff); |
| |
| SET(WM_DEPTH_STENCIL, ds.BackfaceStencilTestMask, opt_ds.stencil.back.compare_mask & 0xff); |
| SET(WM_DEPTH_STENCIL, ds.BackfaceStencilWriteMask, opt_ds.stencil.back.write_mask & 0xff); |
| |
| SET(WM_DEPTH_STENCIL, ds.StencilReferenceValue, |
| opt_ds.stencil.front.reference & 0xff); |
| SET(WM_DEPTH_STENCIL, ds.BackfaceStencilReferenceValue, |
| opt_ds.stencil.back.reference & 0xff); |
| |
| SET(WM_DEPTH_STENCIL, ds.DepthTestEnable, opt_ds.depth.test_enable); |
| SET(WM_DEPTH_STENCIL, ds.DepthBufferWriteEnable, opt_ds.depth.write_enable); |
| SET(WM_DEPTH_STENCIL, ds.DepthTestFunction, |
| vk_to_intel_compare_op[opt_ds.depth.compare_op]); |
| SET(WM_DEPTH_STENCIL, ds.StencilTestEnable, opt_ds.stencil.test_enable); |
| SET(WM_DEPTH_STENCIL, ds.StencilBufferWriteEnable, |
| opt_ds.stencil.write_enable); |
| SET(WM_DEPTH_STENCIL, ds.StencilFailOp, |
| vk_to_intel_stencil_op[opt_ds.stencil.front.op.fail]); |
| SET(WM_DEPTH_STENCIL, ds.StencilPassDepthPassOp, |
| vk_to_intel_stencil_op[opt_ds.stencil.front.op.pass]); |
| SET(WM_DEPTH_STENCIL, ds.StencilPassDepthFailOp, |
| vk_to_intel_stencil_op[ |
| opt_ds.stencil.front.op.depth_fail]); |
| SET(WM_DEPTH_STENCIL, ds.StencilTestFunction, |
| vk_to_intel_compare_op[ |
| opt_ds.stencil.front.op.compare]); |
| SET(WM_DEPTH_STENCIL, ds.BackfaceStencilFailOp, |
| vk_to_intel_stencil_op[ |
| opt_ds.stencil.back.op.fail]); |
| SET(WM_DEPTH_STENCIL, ds.BackfaceStencilPassDepthPassOp, |
| vk_to_intel_stencil_op[ |
| opt_ds.stencil.back.op.pass]); |
| SET(WM_DEPTH_STENCIL, ds.BackfaceStencilPassDepthFailOp, |
| vk_to_intel_stencil_op[ |
| opt_ds.stencil.back.op.depth_fail]); |
| SET(WM_DEPTH_STENCIL, ds.BackfaceStencilTestFunction, |
| vk_to_intel_compare_op[ |
| opt_ds.stencil.back.op.compare]); |
| |
| #if GFX_VER == 9 |
| const bool pma = want_stencil_pma_fix(dyn, gfx, &opt_ds); |
| SET(PMA_FIX, pma_fix, pma); |
| #endif |
| |
| #if INTEL_WA_18019816803_GFX_VER |
| if (intel_needs_workaround(device->info, 18019816803)) { |
| bool ds_write_state = opt_ds.depth.write_enable || opt_ds.stencil.write_enable; |
| SET(WA_18019816803, ds_write_state, ds_write_state); |
| } |
| #endif |
| } |
| |
| ALWAYS_INLINE static void |
| update_depth_bounds(struct anv_gfx_dynamic_state *hw_state, |
| const struct vk_dynamic_graphics_state *dyn) |
| { |
| SET(DEPTH_BOUNDS, db.DepthBoundsTestEnable, dyn->ds.depth.bounds_test.enable); |
| /* Only look at updating the bounds if testing is enabled */ |
| if (dyn->ds.depth.bounds_test.enable) { |
| SET(DEPTH_BOUNDS, db.DepthBoundsTestMinValue, dyn->ds.depth.bounds_test.min); |
| SET(DEPTH_BOUNDS, db.DepthBoundsTestMaxValue, dyn->ds.depth.bounds_test.max); |
| } |
| } |
| |
| ALWAYS_INLINE static void |
| update_line_stipple(struct anv_gfx_dynamic_state *hw_state, |
| const struct vk_dynamic_graphics_state *dyn) |
| { |
| SET(LINE_STIPPLE, ls.LineStipplePattern, dyn->rs.line.stipple.pattern); |
| SET(LINE_STIPPLE, ls.LineStippleInverseRepeatCount, |
| 1.0f / MAX2(1, dyn->rs.line.stipple.factor)); |
| SET(LINE_STIPPLE, ls.LineStippleRepeatCount, dyn->rs.line.stipple.factor); |
| |
| SET(WM, wm.LineStippleEnable, dyn->rs.line.stipple.enable); |
| } |
| |
| ALWAYS_INLINE static void |
| update_vf_restart(struct anv_gfx_dynamic_state *hw_state, |
| const struct vk_dynamic_graphics_state *dyn, |
| const struct anv_cmd_graphics_state *gfx) |
| { |
| SET(VF, vf.IndexedDrawCutIndexEnable, dyn->ia.primitive_restart_enable); |
| SET(VF, vf.CutIndex, vk_index_to_restart(gfx->index_type)); |
| } |
| |
| ALWAYS_INLINE static void |
| update_blend_state(struct anv_gfx_dynamic_state *hw_state, |
| const struct vk_dynamic_graphics_state *dyn, |
| struct anv_cmd_graphics_state *gfx, |
| const struct anv_device *device, |
| bool has_fs_stage, |
| bool has_fs_dual_src) |
| { |
| const struct anv_instance *instance = device->physical->instance; |
| const uint8_t color_writes = dyn->cb.color_write_enables; |
| bool has_writeable_rt = |
| has_fs_stage && |
| !anv_gfx_all_color_write_masked(gfx, dyn); |
| |
| SET(BLEND_STATE, blend.AlphaToCoverageEnable, |
| dyn->ms.alpha_to_coverage_enable); |
| SET(BLEND_STATE, blend.AlphaToOneEnable, |
| dyn->ms.alpha_to_one_enable); |
| SET(BLEND_STATE, blend.ColorDitherEnable, |
| gfx->rendering_flags & |
| VK_RENDERING_ENABLE_LEGACY_DITHERING_BIT_EXT); |
| |
| bool independent_alpha_blend = false; |
| /* Wa_14018912822, check if we set these during RT setup. */ |
| bool color_blend_zero = false; |
| bool alpha_blend_zero = false; |
| uint32_t rt_0 = MESA_VK_ATTACHMENT_UNUSED; |
| for (uint32_t rt = 0; rt < MAX_RTS; rt++) { |
| if (gfx->color_output_mapping[rt] >= gfx->color_att_count) |
| continue; |
| |
| uint32_t att = gfx->color_output_mapping[rt]; |
| if (att == 0) |
| rt_0 = att; |
| |
| /* Disable anything above the current number of color attachments. */ |
| bool write_disabled = (color_writes & BITFIELD_BIT(att)) == 0; |
| |
| SET(BLEND_STATE, blend.rts[rt].WriteDisableAlpha, |
| write_disabled || |
| (dyn->cb.attachments[att].write_mask & |
| VK_COLOR_COMPONENT_A_BIT) == 0); |
| SET(BLEND_STATE, blend.rts[rt].WriteDisableRed, |
| write_disabled || |
| (dyn->cb.attachments[att].write_mask & |
| VK_COLOR_COMPONENT_R_BIT) == 0); |
| SET(BLEND_STATE, blend.rts[rt].WriteDisableGreen, |
| write_disabled || |
| (dyn->cb.attachments[att].write_mask & |
| VK_COLOR_COMPONENT_G_BIT) == 0); |
| SET(BLEND_STATE, blend.rts[rt].WriteDisableBlue, |
| write_disabled || |
| (dyn->cb.attachments[att].write_mask & |
| VK_COLOR_COMPONENT_B_BIT) == 0); |
| /* Vulkan specification 1.2.168, VkLogicOp: |
| * |
| * "Logical operations are controlled by the logicOpEnable and logicOp |
| * members of VkPipelineColorBlendStateCreateInfo. If logicOpEnable is |
| * VK_TRUE, then a logical operation selected by logicOp is applied |
| * between each color attachment and the fragment’s corresponding |
| * output value, and blending of all attachments is treated as if it |
| * were disabled." |
| * |
| * From the Broadwell PRM Volume 2d: Command Reference: Structures: |
| * BLEND_STATE_ENTRY: |
| * |
| * "Enabling LogicOp and Color Buffer Blending at the same time is |
| * UNDEFINED" |
| * |
| * The Vulkan spec also says: |
| * "Logical operations are not applied to floating-point or sRGB format |
| * color attachments." |
| * and |
| * "Any attachments using color formats for which logical operations |
| * are not supported simply pass through the color values unmodified." |
| */ |
| bool ignores_logic_op = |
| vk_format_is_float(gfx->color_att[att].vk_format) || |
| vk_format_is_srgb(gfx->color_att[att].vk_format); |
| SET(BLEND_STATE, blend.rts[rt].LogicOpFunction, |
| vk_to_intel_logic_op[dyn->cb.logic_op]); |
| SET(BLEND_STATE, blend.rts[rt].LogicOpEnable, |
| dyn->cb.logic_op_enable && !ignores_logic_op); |
| |
| SET(BLEND_STATE, blend.rts[rt].ColorClampRange, COLORCLAMP_RTFORMAT); |
| SET(BLEND_STATE, blend.rts[rt].PreBlendColorClampEnable, true); |
| SET(BLEND_STATE, blend.rts[rt].PostBlendColorClampEnable, true); |
| |
| #if GFX_VER >= 30 |
| SET(BLEND_STATE, blend.rts[rt].SimpleFloatBlendEnable, true); |
| #endif |
| |
| /* Setup blend equation. */ |
| SET(BLEND_STATE, blend.rts[rt].ColorBlendFunction, |
| vk_to_intel_blend_op[ |
| dyn->cb.attachments[att].color_blend_op]); |
| SET(BLEND_STATE, blend.rts[rt].AlphaBlendFunction, |
| vk_to_intel_blend_op[ |
| dyn->cb.attachments[att].alpha_blend_op]); |
| |
| if (dyn->cb.attachments[att].src_color_blend_factor != |
| dyn->cb.attachments[att].src_alpha_blend_factor || |
| dyn->cb.attachments[att].dst_color_blend_factor != |
| dyn->cb.attachments[att].dst_alpha_blend_factor || |
| dyn->cb.attachments[att].color_blend_op != |
| dyn->cb.attachments[att].alpha_blend_op) |
| independent_alpha_blend = true; |
| |
| /* The Dual Source Blending documentation says: |
| * |
| * "If SRC1 is included in a src/dst blend factor and a DualSource RT |
| * Write message is not used, results are UNDEFINED. (This reflects the |
| * same restriction in DX APIs, where undefined results are produced if |
| * “o1” is not written by a PS – there are no default values defined)." |
| * |
| * There is no way to gracefully fix this undefined situation so we just |
| * disable the blending to prevent possible issues. |
| */ |
| if (has_fs_stage && !has_fs_dual_src && |
| anv_is_dual_src_blend_equation(&dyn->cb.attachments[att])) { |
| SET(BLEND_STATE, blend.rts[rt].ColorBufferBlendEnable, false); |
| } else { |
| SET(BLEND_STATE, blend.rts[rt].ColorBufferBlendEnable, |
| !dyn->cb.logic_op_enable && |
| dyn->cb.attachments[att].blend_enable); |
| } |
| |
| /* Our hardware applies the blend factor prior to the blend function |
| * regardless of what function is used. Technically, this means the |
| * hardware can do MORE than GL or Vulkan specify. However, it also |
| * means that, for MIN and MAX, we have to stomp the blend factor to ONE |
| * to make it a no-op. |
| */ |
| uint32_t SourceBlendFactor; |
| uint32_t DestinationBlendFactor; |
| uint32_t SourceAlphaBlendFactor; |
| uint32_t DestinationAlphaBlendFactor; |
| if (dyn->cb.attachments[att].color_blend_op == VK_BLEND_OP_MIN || |
| dyn->cb.attachments[att].color_blend_op == VK_BLEND_OP_MAX) { |
| SourceBlendFactor = BLENDFACTOR_ONE; |
| DestinationBlendFactor = BLENDFACTOR_ONE; |
| } else { |
| SourceBlendFactor = vk_to_intel_blend[ |
| dyn->cb.attachments[att].src_color_blend_factor]; |
| DestinationBlendFactor = vk_to_intel_blend[ |
| dyn->cb.attachments[att].dst_color_blend_factor]; |
| } |
| |
| if (dyn->cb.attachments[att].alpha_blend_op == VK_BLEND_OP_MIN || |
| dyn->cb.attachments[att].alpha_blend_op == VK_BLEND_OP_MAX) { |
| SourceAlphaBlendFactor = BLENDFACTOR_ONE; |
| DestinationAlphaBlendFactor = BLENDFACTOR_ONE; |
| } else { |
| SourceAlphaBlendFactor = vk_to_intel_blend[ |
| dyn->cb.attachments[att].src_alpha_blend_factor]; |
| DestinationAlphaBlendFactor = vk_to_intel_blend[ |
| dyn->cb.attachments[att].dst_alpha_blend_factor]; |
| } |
| |
| /* Replace and Src1 value by 1.0 if dual source blending is not |
| * enabled. |
| */ |
| if (has_fs_stage && !has_fs_dual_src) { |
| if (is_src1_blend_factor(SourceBlendFactor)) |
| SourceBlendFactor = BLENDFACTOR_ONE; |
| if (is_src1_blend_factor(DestinationBlendFactor)) |
| DestinationBlendFactor = BLENDFACTOR_ONE; |
| } |
| |
| if (instance->intel_enable_wa_14018912822 && |
| intel_needs_workaround(device->info, 14018912822) && |
| dyn->ms.rasterization_samples > 1) { |
| if (DestinationBlendFactor == BLENDFACTOR_ZERO) { |
| DestinationBlendFactor = BLENDFACTOR_CONST_COLOR; |
| color_blend_zero = true; |
| } |
| if (DestinationAlphaBlendFactor == BLENDFACTOR_ZERO) { |
| DestinationAlphaBlendFactor = BLENDFACTOR_CONST_ALPHA; |
| alpha_blend_zero = true; |
| } |
| } |
| |
| SET(BLEND_STATE, blend.rts[rt].SourceBlendFactor, SourceBlendFactor); |
| SET(BLEND_STATE, blend.rts[rt].DestinationBlendFactor, DestinationBlendFactor); |
| SET(BLEND_STATE, blend.rts[rt].SourceAlphaBlendFactor, SourceAlphaBlendFactor); |
| SET(BLEND_STATE, blend.rts[rt].DestinationAlphaBlendFactor, DestinationAlphaBlendFactor); |
| } |
| gfx->color_blend_zero = color_blend_zero; |
| gfx->alpha_blend_zero = alpha_blend_zero; |
| |
| SET(BLEND_STATE, blend.IndependentAlphaBlendEnable, independent_alpha_blend); |
| |
| if (rt_0 == MESA_VK_ATTACHMENT_UNUSED) |
| rt_0 = 0; |
| |
| /* 3DSTATE_PS_BLEND to be consistent with the rest of the |
| * BLEND_STATE_ENTRY. |
| */ |
| SET(PS_BLEND, ps_blend.HasWriteableRT, has_writeable_rt); |
| SET(PS_BLEND, ps_blend.ColorBufferBlendEnable, |
| GET(blend.rts[rt_0].ColorBufferBlendEnable)); |
| SET(PS_BLEND, ps_blend.SourceAlphaBlendFactor, |
| GET(blend.rts[rt_0].SourceAlphaBlendFactor)); |
| SET(PS_BLEND, ps_blend.DestinationAlphaBlendFactor, |
| gfx->alpha_blend_zero ? |
| BLENDFACTOR_CONST_ALPHA : |
| GET(blend.rts[rt_0].DestinationAlphaBlendFactor)); |
| SET(PS_BLEND, ps_blend.SourceBlendFactor, |
| GET(blend.rts[rt_0].SourceBlendFactor)); |
| SET(PS_BLEND, ps_blend.DestinationBlendFactor, |
| gfx->color_blend_zero ? |
| BLENDFACTOR_CONST_COLOR : |
| GET(blend.rts[rt_0].DestinationBlendFactor)); |
| SET(PS_BLEND, ps_blend.AlphaTestEnable, false); |
| SET(PS_BLEND, ps_blend.IndependentAlphaBlendEnable, |
| GET(blend.IndependentAlphaBlendEnable)); |
| SET(PS_BLEND, ps_blend.AlphaToCoverageEnable, |
| dyn->ms.alpha_to_coverage_enable); |
| } |
| |
| ALWAYS_INLINE static void |
| update_blend_constants(struct anv_gfx_dynamic_state *hw_state, |
| const struct vk_dynamic_graphics_state *dyn, |
| const struct anv_cmd_graphics_state *gfx) |
| { |
| SET(CC_STATE, cc.BlendConstantColorRed, |
| gfx->color_blend_zero ? 0.0f : dyn->cb.blend_constants[0]); |
| SET(CC_STATE, cc.BlendConstantColorGreen, |
| gfx->color_blend_zero ? 0.0f : dyn->cb.blend_constants[1]); |
| SET(CC_STATE, cc.BlendConstantColorBlue, |
| gfx->color_blend_zero ? 0.0f : dyn->cb.blend_constants[2]); |
| SET(CC_STATE, cc.BlendConstantColorAlpha, |
| gfx->alpha_blend_zero ? 0.0f : dyn->cb.blend_constants[3]); |
| } |
| |
| ALWAYS_INLINE static void |
| update_viewports(struct anv_gfx_dynamic_state *hw_state, |
| const struct vk_dynamic_graphics_state *dyn, |
| const struct anv_cmd_graphics_state *gfx, |
| const struct anv_device *device) |
| { |
| const struct anv_instance *instance = device->physical->instance; |
| const VkViewport *viewports = dyn->vp.viewports; |
| |
| const float scale = dyn->vp.depth_clip_negative_one_to_one ? 0.5f : 1.0f; |
| |
| for (uint32_t i = 0; i < dyn->vp.viewport_count; i++) { |
| const VkViewport *vp = &viewports[i]; |
| |
| /* The gfx7 state struct has just the matrix and guardband fields, the |
| * gfx8 struct adds the min/max viewport fields. */ |
| struct GENX(SF_CLIP_VIEWPORT) sfv = { |
| .ViewportMatrixElementm00 = vp->width / 2, |
| .ViewportMatrixElementm11 = vp->height / 2, |
| .ViewportMatrixElementm22 = (vp->maxDepth - vp->minDepth) * scale, |
| .ViewportMatrixElementm30 = vp->x + vp->width / 2, |
| .ViewportMatrixElementm31 = vp->y + vp->height / 2, |
| .ViewportMatrixElementm32 = dyn->vp.depth_clip_negative_one_to_one ? |
| (vp->minDepth + vp->maxDepth) * scale : vp->minDepth, |
| .XMinClipGuardband = -1.0f, |
| .XMaxClipGuardband = 1.0f, |
| .YMinClipGuardband = -1.0f, |
| .YMaxClipGuardband = 1.0f, |
| .XMinViewPort = vp->x, |
| .XMaxViewPort = vp->x + vp->width - 1, |
| .YMinViewPort = MIN2(vp->y, vp->y + vp->height), |
| .YMaxViewPort = MAX2(vp->y, vp->y + vp->height) - 1, |
| }; |
| |
| /* Fix depth test misrenderings by lowering translated depth range */ |
| if (instance->lower_depth_range_rate != 1.0f) |
| sfv.ViewportMatrixElementm32 *= instance->lower_depth_range_rate; |
| |
| const uint32_t fb_size_max = 1 << 14; |
| uint32_t x_min = 0, x_max = fb_size_max; |
| uint32_t y_min = 0, y_max = fb_size_max; |
| |
| /* If we have a valid renderArea, include that */ |
| if (gfx->render_area.extent.width > 0 && |
| gfx->render_area.extent.height > 0) { |
| x_min = MAX2(x_min, gfx->render_area.offset.x); |
| x_max = MIN2(x_max, gfx->render_area.offset.x + |
| gfx->render_area.extent.width); |
| y_min = MAX2(y_min, gfx->render_area.offset.y); |
| y_max = MIN2(y_max, gfx->render_area.offset.y + |
| gfx->render_area.extent.height); |
| } |
| |
| /* The client is required to have enough scissors for whatever it |
| * sets as ViewportIndex but it's possible that they've got more |
| * viewports set from a previous command. Also, from the Vulkan |
| * 1.3.207: |
| * |
| * "The application must ensure (using scissor if necessary) that |
| * all rendering is contained within the render area." |
| * |
| * If the client doesn't set a scissor, that basically means it |
| * guarantees everything is in-bounds already. If we end up using a |
| * guardband of [-1, 1] in that case, there shouldn't be much loss. |
| * It's theoretically possible that they could do all their clipping |
| * with clip planes but that'd be a bit odd. |
| */ |
| if (i < dyn->vp.scissor_count) { |
| const VkRect2D *scissor = &dyn->vp.scissors[i]; |
| x_min = MAX2(x_min, scissor->offset.x); |
| x_max = MIN2(x_max, scissor->offset.x + scissor->extent.width); |
| y_min = MAX2(y_min, scissor->offset.y); |
| y_max = MIN2(y_max, scissor->offset.y + scissor->extent.height); |
| } |
| |
| /* Only bother calculating the guardband if our known render area is |
| * less than the maximum size. Otherwise, it will calculate [-1, 1] |
| * anyway but possibly with precision loss. |
| */ |
| if (x_min > 0 || x_max < fb_size_max || |
| y_min > 0 || y_max < fb_size_max) { |
| intel_calculate_guardband_size(x_min, x_max, y_min, y_max, |
| sfv.ViewportMatrixElementm00, |
| sfv.ViewportMatrixElementm11, |
| sfv.ViewportMatrixElementm30, |
| sfv.ViewportMatrixElementm31, |
| &sfv.XMinClipGuardband, |
| &sfv.XMaxClipGuardband, |
| &sfv.YMinClipGuardband, |
| &sfv.YMaxClipGuardband); |
| } |
| |
| #define SET_VP(bit, state, field) \ |
| do { \ |
| if (hw_state->state.field != sfv.field) { \ |
| hw_state->state.field = sfv.field; \ |
| BITSET_SET(hw_state->dirty, \ |
| ANV_GFX_STATE_##bit); \ |
| } \ |
| } while (0) |
| SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], ViewportMatrixElementm00); |
| SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], ViewportMatrixElementm11); |
| SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], ViewportMatrixElementm22); |
| SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], ViewportMatrixElementm30); |
| SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], ViewportMatrixElementm31); |
| SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], ViewportMatrixElementm32); |
| SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], XMinClipGuardband); |
| SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], XMaxClipGuardband); |
| SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], YMinClipGuardband); |
| SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], YMaxClipGuardband); |
| SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], XMinViewPort); |
| SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], XMaxViewPort); |
| SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], YMinViewPort); |
| SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], YMaxViewPort); |
| #undef SET_VP |
| |
| const bool depth_range_unrestricted = |
| device->vk.enabled_extensions.EXT_depth_range_unrestricted; |
| |
| float min_depth_limit = depth_range_unrestricted ? -FLT_MAX : 0.0f; |
| float max_depth_limit = depth_range_unrestricted ? FLT_MAX : 1.0f; |
| |
| float min_depth = dyn->rs.depth_clamp_enable ? |
| MIN2(vp->minDepth, vp->maxDepth) : min_depth_limit; |
| float max_depth = dyn->rs.depth_clamp_enable ? |
| MAX2(vp->minDepth, vp->maxDepth) : max_depth_limit; |
| |
| if (dyn->rs.depth_clamp_enable && |
| dyn->vp.depth_clamp_mode == VK_DEPTH_CLAMP_MODE_USER_DEFINED_RANGE_EXT) { |
| min_depth = dyn->vp.depth_clamp_range.minDepthClamp; |
| max_depth = dyn->vp.depth_clamp_range.maxDepthClamp; |
| } |
| |
| SET(VIEWPORT_CC, vp_cc.elem[i].MinimumDepth, min_depth); |
| SET(VIEWPORT_CC, vp_cc.elem[i].MaximumDepth, max_depth); |
| } |
| |
| /* If the HW state is already considered dirty or the previous |
| * programmed viewport count is smaller than what we need, update the |
| * viewport count and ensure the HW state is dirty. Otherwise if the |
| * number of viewport programmed previously was larger than what we need |
| * now, no need to reemit we can just keep the old programmed values. |
| */ |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_SF_CLIP) || |
| hw_state->vp_sf_clip.count < dyn->vp.viewport_count) { |
| hw_state->vp_sf_clip.count = dyn->vp.viewport_count; |
| BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_SF_CLIP); |
| } |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC) || |
| hw_state->vp_cc.count < dyn->vp.viewport_count) { |
| hw_state->vp_cc.count = dyn->vp.viewport_count; |
| BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC); |
| } |
| } |
| |
| ALWAYS_INLINE static void |
| update_scissors(struct anv_gfx_dynamic_state *hw_state, |
| const struct vk_dynamic_graphics_state *dyn, |
| const struct anv_cmd_graphics_state *gfx, |
| VkCommandBufferLevel cmd_buffer_level) |
| { |
| const VkRect2D *scissors = dyn->vp.scissors; |
| const VkViewport *viewports = dyn->vp.viewports; |
| |
| for (uint32_t i = 0; i < dyn->vp.scissor_count; i++) { |
| const VkRect2D *s = &scissors[i]; |
| const VkViewport *vp = &viewports[i]; |
| |
| const int max = 0xffff; |
| |
| uint32_t y_min = MAX2(s->offset.y, MIN2(vp->y, vp->y + vp->height)); |
| uint32_t x_min = MAX2(s->offset.x, vp->x); |
| int64_t y_max = MIN2(s->offset.y + s->extent.height - 1, |
| MAX2(vp->y, vp->y + vp->height) - 1); |
| int64_t x_max = MIN2(s->offset.x + s->extent.width - 1, |
| vp->x + vp->width - 1); |
| |
| y_max = CLAMP(y_max, 0, INT16_MAX >> 1); |
| x_max = CLAMP(x_max, 0, INT16_MAX >> 1); |
| |
| /* Do this math using int64_t so overflow gets clamped correctly. */ |
| if (cmd_buffer_level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { |
| y_min = CLAMP((uint64_t) y_min, gfx->render_area.offset.y, max); |
| x_min = CLAMP((uint64_t) x_min, gfx->render_area.offset.x, max); |
| y_max = CLAMP((uint64_t) y_max, 0, |
| gfx->render_area.offset.y + |
| gfx->render_area.extent.height - 1); |
| x_max = CLAMP((uint64_t) x_max, 0, |
| gfx->render_area.offset.x + |
| gfx->render_area.extent.width - 1); |
| } |
| |
| if (s->extent.width <= 0 || s->extent.height <= 0) { |
| /* Since xmax and ymax are inclusive, we have to have xmax < xmin or |
| * ymax < ymin for empty clips. In case clip x, y, width height are |
| * all 0, the clamps below produce 0 for xmin, ymin, xmax, ymax, |
| * which isn't what we want. Just special case empty clips and |
| * produce a canonical empty clip. |
| */ |
| SET(SCISSOR, scissor.elem[i].ScissorRectangleYMin, 1); |
| SET(SCISSOR, scissor.elem[i].ScissorRectangleXMin, 1); |
| SET(SCISSOR, scissor.elem[i].ScissorRectangleYMax, 0); |
| SET(SCISSOR, scissor.elem[i].ScissorRectangleXMax, 0); |
| } else { |
| SET(SCISSOR, scissor.elem[i].ScissorRectangleYMin, y_min); |
| SET(SCISSOR, scissor.elem[i].ScissorRectangleXMin, x_min); |
| SET(SCISSOR, scissor.elem[i].ScissorRectangleYMax, y_max); |
| SET(SCISSOR, scissor.elem[i].ScissorRectangleXMax, x_max); |
| } |
| } |
| |
| /* If the HW state is already considered dirty or the previous programmed |
| * viewport count is smaller than what we need, update the viewport count |
| * and ensure the HW state is dirty. Otherwise if the number of viewport |
| * programmed previously was larger than what we need now, no need to |
| * reemit we can just keep the old programmed values. |
| */ |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SCISSOR) || |
| hw_state->scissor.count < dyn->vp.scissor_count) { |
| hw_state->scissor.count = dyn->vp.scissor_count; |
| BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SCISSOR); |
| } |
| } |
| |
| #if GFX_VERx10 == 125 |
| ALWAYS_INLINE static void |
| update_tbimr_info(struct anv_gfx_dynamic_state *hw_state, |
| const struct anv_device *device, |
| const struct anv_cmd_graphics_state *gfx, |
| const struct intel_l3_config *l3_config) |
| { |
| unsigned fb_width, fb_height, tile_width, tile_height; |
| |
| if (device->physical->instance->enable_tbimr && |
| calculate_render_area(gfx, &fb_width, &fb_height) && |
| calculate_tile_dimensions(device, gfx, l3_config, |
| fb_width, fb_height, |
| &tile_width, &tile_height)) { |
| /* Use a batch size of 128 polygons per slice as recommended */ |
| /* by BSpec 68436 "TBIMR Programming". */ |
| const unsigned num_slices = device->info->num_slices; |
| const unsigned batch_size = DIV_ROUND_UP(num_slices, 2) * 256; |
| |
| SET(TBIMR_TILE_PASS_INFO, tbimr.TileRectangleHeight, tile_height); |
| SET(TBIMR_TILE_PASS_INFO, tbimr.TileRectangleWidth, tile_width); |
| SET(TBIMR_TILE_PASS_INFO, tbimr.VerticalTileCount, |
| DIV_ROUND_UP(fb_height, tile_height)); |
| SET(TBIMR_TILE_PASS_INFO, tbimr.HorizontalTileCount, |
| DIV_ROUND_UP(fb_width, tile_width)); |
| SET(TBIMR_TILE_PASS_INFO, tbimr.TBIMRBatchSize, |
| util_logbase2(batch_size) - 5); |
| SET(TBIMR_TILE_PASS_INFO, tbimr.TileBoxCheck, true); |
| SET(TBIMR_TILE_PASS_INFO, use_tbimr, true); |
| } else { |
| hw_state->use_tbimr = false; |
| } |
| } |
| #endif |
| |
| /** |
| * This function takes the vulkan runtime values & dirty states and updates |
| * the values in anv_gfx_dynamic_state, flagging HW instructions for |
| * reemission if the values are changing. |
| * |
| * Nothing is emitted in the batch buffer. |
| */ |
| static void |
| cmd_buffer_flush_gfx_runtime_state(struct anv_gfx_dynamic_state *hw_state, |
| const struct anv_device *device, |
| const struct vk_dynamic_graphics_state *dyn, |
| struct anv_cmd_graphics_state *gfx, |
| const struct anv_graphics_pipeline *pipeline, |
| VkCommandBufferLevel cmd_buffer_level) |
| { |
| UNUSED bool fs_msaa_changed = false; |
| if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_ALPHA_TO_COVERAGE_ENABLE) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_PROVOKING_VERTEX) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_FSR)) |
| update_fs_msaa_flags(hw_state, dyn, pipeline); |
| |
| if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) || |
| BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_FS_MSAA_FLAGS)) { |
| update_ps(hw_state, device, dyn, pipeline); |
| update_ps_extra_wm(hw_state, pipeline); |
| } |
| |
| if (gfx->dirty & |
| #if GFX_VERx10 >= 125 |
| ANV_CMD_DIRTY_PIPELINE |
| #else |
| (ANV_CMD_DIRTY_PIPELINE | ANV_CMD_DIRTY_OCCLUSION_QUERY_ACTIVE) |
| #endif |
| ) |
| update_ps_extra_has_uav(hw_state, gfx, pipeline); |
| |
| if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE)) |
| update_ps_extra_kills_pixel(hw_state, dyn, gfx, pipeline); |
| |
| if ((gfx->dirty & ANV_CMD_DIRTY_OCCLUSION_QUERY_ACTIVE) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_RASTERIZER_DISCARD_ENABLE) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_RASTERIZATION_STREAM)) |
| update_streamout(hw_state, dyn, gfx, pipeline); |
| |
| if ( |
| #if GFX_VERx10 >= 200 |
| /* Xe2+ might need to update this if the FS changed */ |
| (gfx->dirty & ANV_CMD_DIRTY_PIPELINE) || |
| #endif |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_PROVOKING_VERTEX)) |
| update_provoking_vertex(hw_state, dyn, pipeline); |
| |
| if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY)) |
| update_topology(hw_state, dyn, pipeline); |
| |
| if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VI) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VI_BINDINGS_VALID) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VI_BINDING_STRIDES)) |
| BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VERTEX_INPUT); |
| |
| #if GFX_VER >= 11 |
| if (device->vk.enabled_extensions.KHR_fragment_shading_rate && |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_FSR)) |
| update_cps(hw_state, device, dyn, pipeline); |
| #endif /* GFX_VER >= 11 */ |
| |
| if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_TS_DOMAIN_ORIGIN)) |
| update_te(hw_state, dyn, pipeline); |
| |
| if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_WIDTH)) |
| update_line_width(hw_state, dyn); |
| |
| if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_FACTORS)) |
| update_sf_global_depth_bias(hw_state, dyn); |
| |
| if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE)) |
| update_clip_api_mode(hw_state, dyn); |
| |
| if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_VIEWPORT_COUNT)) |
| update_clip_max_viewport(hw_state, dyn); |
| |
| if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) || |
| (gfx->dirty & ANV_CMD_DIRTY_RENDER_TARGETS) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_CULL_MODE) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_FRONT_FACE) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_ENABLE) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_FACTORS) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_POLYGON_MODE) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_MODE) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_WIDTH) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_CLIP_ENABLE) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_CLAMP_ENABLE) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_CONSERVATIVE_MODE)) |
| update_clip_raster(hw_state, dyn, gfx, pipeline); |
| |
| if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES)) |
| update_multisample(hw_state, dyn); |
| |
| if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_MASK)) |
| update_sample_mask(hw_state, dyn); |
| |
| if ((gfx->dirty & ANV_CMD_DIRTY_RENDER_TARGETS) || |
| #if GFX_VER == 9 |
| /* For the PMA fix */ |
| (gfx->dirty & ANV_CMD_DIRTY_PIPELINE) || |
| #endif |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_COMPARE_OP) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_OP) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE)) |
| update_wm_depth_stencil(hw_state, dyn, gfx, device); |
| |
| #if GFX_VER >= 12 |
| if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_ENABLE) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_BOUNDS)) |
| update_depth_bounds(hw_state, dyn); |
| #endif |
| |
| if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_STIPPLE) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_STIPPLE_ENABLE)) |
| update_line_stipple(hw_state, dyn); |
| |
| if ((gfx->dirty & ANV_CMD_DIRTY_INDEX_TYPE) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE)) |
| update_vf_restart(hw_state, dyn, gfx); |
| |
| if ((gfx->dirty & ANV_CMD_DIRTY_INDEX_BUFFER) || |
| (gfx->dirty & ANV_CMD_DIRTY_INDEX_TYPE)) |
| BITSET_SET(hw_state->dirty, ANV_GFX_STATE_INDEX_BUFFER); |
| |
| #if GFX_VERx10 >= 125 |
| if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE)) |
| update_vfg_list_cut_index(hw_state, dyn); |
| #endif |
| |
| if (device->vk.enabled_extensions.EXT_sample_locations && |
| (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS_ENABLE))) |
| BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SAMPLE_PATTERN); |
| |
| if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) || |
| (gfx->dirty & ANV_CMD_DIRTY_RENDER_TARGETS) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP_ENABLE) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_ALPHA_TO_ONE_ENABLE) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_ALPHA_TO_COVERAGE_ENABLE) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_WRITE_MASKS) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_ENABLES) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_EQUATIONS)) { |
| const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); |
| update_blend_state(hw_state, dyn, gfx, device, |
| wm_prog_data != NULL, |
| wm_prog_data != NULL ? |
| wm_prog_data->dual_src_blend : false); |
| } |
| |
| if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS)) |
| update_blend_constants(hw_state, dyn, gfx); |
| |
| if ((gfx->dirty & ANV_CMD_DIRTY_RENDER_AREA) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_VIEWPORTS) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_SCISSORS) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_CLAMP_ENABLE) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_DEPTH_CLAMP_RANGE)) |
| update_viewports(hw_state, dyn, gfx, device); |
| |
| if ((gfx->dirty & ANV_CMD_DIRTY_RENDER_AREA) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_SCISSORS) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_VIEWPORTS)) |
| update_scissors(hw_state, dyn, gfx, cmd_buffer_level); |
| |
| #if GFX_VERx10 == 125 |
| if ((gfx->dirty & ANV_CMD_DIRTY_RENDER_TARGETS)) |
| update_tbimr_info(hw_state, device, gfx, pipeline->base.base.l3_config); |
| #endif |
| |
| #if INTEL_WA_14018283232_GFX_VER |
| if (intel_needs_workaround(device->info, 14018283232) && |
| ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_ENABLE))) { |
| const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); |
| SET(WA_14018283232, wa_14018283232_toggle, |
| dyn->ds.depth.bounds_test.enable && |
| wm_prog_data && |
| wm_prog_data->uses_kill); |
| } |
| #endif |
| |
| /* If the pipeline uses a dynamic value of patch_control_points and either |
| * the pipeline change or the dynamic value change, check the value and |
| * reemit if needed. |
| */ |
| const struct brw_tcs_prog_data *tcs_prog_data = get_tcs_prog_data(pipeline); |
| if (tcs_prog_data && tcs_prog_data->input_vertices == 0 && |
| ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) || |
| BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_TS_PATCH_CONTROL_POINTS))) |
| SET(TCS_INPUT_VERTICES, tcs_input_vertices, dyn->ts.patch_control_points); |
| } |
| |
| #undef GET |
| #undef SET |
| #undef SET_STAGE |
| #undef SETUP_PROVOKING_VERTEX |
| |
| /** |
| * This function takes the vulkan runtime values & dirty states and updates |
| * the values in anv_gfx_dynamic_state, flagging HW instructions for |
| * reemission if the values are changing. |
| * |
| * Nothing is emitted in the batch buffer. |
| */ |
| void |
| genX(cmd_buffer_flush_gfx_runtime_state)(struct anv_cmd_buffer *cmd_buffer) |
| { |
| cmd_buffer_flush_gfx_runtime_state( |
| &cmd_buffer->state.gfx.dyn_state, |
| cmd_buffer->device, |
| &cmd_buffer->vk.dynamic_graphics_state, |
| &cmd_buffer->state.gfx, |
| anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline), |
| cmd_buffer->vk.level); |
| |
| vk_dynamic_graphics_state_clear_dirty(&cmd_buffer->vk.dynamic_graphics_state); |
| } |
| |
| static void |
| emit_wa_18020335297_dummy_draw(struct anv_cmd_buffer *cmd_buffer) |
| { |
| /* For Wa_16012775297, ensure VF_STATISTICS is emitted before 3DSTATE_VF |
| */ |
| anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_STATISTICS), zero); |
| #if GFX_VERx10 >= 125 |
| anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VFG), vfg) { |
| vfg.DistributionMode = RR_STRICT; |
| } |
| anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF), vf) { |
| vf.GeometryDistributionEnable = |
| cmd_buffer->device->physical->instance->enable_vf_distribution; |
| } |
| #endif |
| |
| #if GFX_VER >= 12 |
| anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr) { |
| pr.ReplicaMask = 1; |
| } |
| #endif |
| |
| anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_RASTER), rr) { |
| rr.CullMode = CULLMODE_NONE; |
| rr.FrontFaceFillMode = FILL_MODE_SOLID; |
| rr.BackFaceFillMode = FILL_MODE_SOLID; |
| } |
| |
| anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_SGVS), zero); |
| |
| #if GFX_VER >= 11 |
| anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_SGVS_2), zero); |
| #endif |
| |
| anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CLIP), clip) { |
| clip.ClipEnable = true; |
| clip.ClipMode = CLIPMODE_REJECT_ALL; |
| } |
| |
| anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VS), zero); |
| anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_GS), zero); |
| anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_HS), zero); |
| anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_TE), zero); |
| anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DS), zero); |
| anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STREAMOUT), zero); |
| |
| uint32_t *vertex_elements = anv_batch_emitn(&cmd_buffer->batch, 1 + 2 * 2, |
| GENX(3DSTATE_VERTEX_ELEMENTS)); |
| uint32_t *ve_pack_dest = &vertex_elements[1]; |
| |
| for (int i = 0; i < 2; i++) { |
| struct GENX(VERTEX_ELEMENT_STATE) element = { |
| .Valid = true, |
| .SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT, |
| .Component0Control = VFCOMP_STORE_0, |
| .Component1Control = VFCOMP_STORE_0, |
| .Component2Control = i == 0 ? VFCOMP_STORE_0 : VFCOMP_STORE_1_FP, |
| .Component3Control = i == 0 ? VFCOMP_STORE_0 : VFCOMP_STORE_1_FP, |
| }; |
| GENX(VERTEX_ELEMENT_STATE_pack)(NULL, ve_pack_dest, &element); |
| ve_pack_dest += GENX(VERTEX_ELEMENT_STATE_length); |
| } |
| |
| anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_TOPOLOGY), topo) { |
| topo.PrimitiveTopologyType = _3DPRIM_TRILIST; |
| } |
| |
| /* Emit dummy draw per slice. */ |
| for (unsigned i = 0; i < cmd_buffer->device->info->num_slices; i++) { |
| anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) { |
| prim.VertexCountPerInstance = 3; |
| prim.PrimitiveTopologyType = _3DPRIM_TRILIST; |
| prim.InstanceCount = 1; |
| prim.VertexAccessType = SEQUENTIAL; |
| } |
| } |
| } |
| |
| #if INTEL_WA_14018283232_GFX_VER |
| void |
| genX(batch_emit_wa_14018283232)(struct anv_batch *batch) |
| { |
| anv_batch_emit(batch, GENX(RESOURCE_BARRIER), barrier) { |
| barrier.ResourceBarrierBody = (struct GENX(RESOURCE_BARRIER_BODY)) { |
| .BarrierType = RESOURCE_BARRIER_TYPE_IMMEDIATE, |
| .SignalStage = RESOURCE_BARRIER_STAGE_COLOR, |
| .WaitStage = RESOURCE_BARRIER_STAGE_PIXEL, |
| }; |
| } |
| } |
| #endif |
| |
| /** |
| * This function handles dirty state emission to the batch buffer. |
| */ |
| static void |
| cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer) |
| { |
| struct anv_device *device = cmd_buffer->device; |
| struct anv_instance *instance = device->physical->instance; |
| struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx; |
| struct anv_graphics_pipeline *pipeline = |
| anv_pipeline_to_graphics(gfx->base.pipeline); |
| const struct vk_dynamic_graphics_state *dyn = |
| &cmd_buffer->vk.dynamic_graphics_state; |
| struct anv_push_constants *push_consts = |
| &cmd_buffer->state.gfx.base.push_constants; |
| struct anv_gfx_dynamic_state *hw_state = &gfx->dyn_state; |
| const bool protected = cmd_buffer->vk.pool->flags & |
| VK_COMMAND_POOL_CREATE_PROTECTED_BIT; |
| |
| #define DEBUG_SHADER_HASH(stage) do { \ |
| if (unlikely( \ |
| (instance->debug & ANV_DEBUG_SHADER_HASH) && \ |
| anv_pipeline_has_stage(pipeline, stage))) { \ |
| mi_store(&b, \ |
| mi_mem32(device->workaround_address), \ |
| mi_imm(pipeline->base.shaders[stage]-> \ |
| prog_data->source_hash)); \ |
| } \ |
| } while (0) |
| |
| struct mi_builder b; |
| if (unlikely(instance->debug & ANV_DEBUG_SHADER_HASH)) { |
| mi_builder_init(&b, device->info, &cmd_buffer->batch); |
| mi_builder_set_mocs(&b, isl_mocs(&device->isl_dev, 0, false)); |
| } |
| |
| #if INTEL_WA_16011107343_GFX_VER |
| /* Will be emitted in front of every draw instead */ |
| if (intel_needs_workaround(device->info, 16011107343) && |
| anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL)) |
| BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_HS); |
| #endif |
| |
| #if INTEL_WA_22018402687_GFX_VER |
| /* Will be emitted in front of every draw instead */ |
| if (intel_needs_workaround(device->info, 22018402687) && |
| anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) |
| BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_DS); |
| #endif |
| |
| /* |
| * Values provided by push constants |
| */ |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TCS_INPUT_VERTICES)) { |
| push_consts->gfx.tcs_input_vertices = dyn->ts.patch_control_points; |
| cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; |
| gfx->base.push_constants_data_dirty = true; |
| } |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_FS_MSAA_FLAGS)) { |
| push_consts->gfx.fs_msaa_flags = hw_state->fs_msaa_flags; |
| cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; |
| gfx->base.push_constants_data_dirty = true; |
| } |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_URB)) { |
| if (genX(need_wa_16014912113)(&gfx->urb_cfg, &pipeline->urb_cfg)) { |
| genX(batch_emit_wa_16014912113)(&cmd_buffer->batch, |
| &gfx->urb_cfg); |
| } |
| anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.urb); |
| |
| memcpy(&gfx->urb_cfg, &pipeline->urb_cfg, |
| sizeof(struct intel_urb_config)); |
| } |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PRIMITIVE_REPLICATION)) |
| anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.primitive_replication); |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF_SGVS_INSTANCING)) |
| anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.vf_sgvs_instancing); |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF_SGVS)) |
| anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.vf_sgvs); |
| |
| #if GFX_VER >= 11 |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF_SGVS_2)) |
| anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.vf_sgvs_2); |
| #endif |
| |
| if (device->physical->instance->vf_component_packing && |
| BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF_COMPONENT_PACKING)) { |
| anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, |
| final.vf_component_packing); |
| } |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VS)) { |
| DEBUG_SHADER_HASH(MESA_SHADER_VERTEX); |
| anv_batch_emit_pipeline_state_protected(&cmd_buffer->batch, pipeline, |
| final.vs, protected); |
| } |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_HS)) { |
| DEBUG_SHADER_HASH(MESA_SHADER_TESS_CTRL); |
| anv_batch_emit_pipeline_state_protected(&cmd_buffer->batch, pipeline, |
| final.hs, protected); |
| } |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_DS)) { |
| DEBUG_SHADER_HASH(MESA_SHADER_TESS_EVAL); |
| anv_batch_emit_pipeline_state_protected(&cmd_buffer->batch, pipeline, |
| final.ds, protected); |
| } |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF_STATISTICS)) { |
| anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_STATISTICS), vfs) { |
| vfs.StatisticsEnable = true; |
| } |
| } |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SBE)) |
| anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.sbe); |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SBE_SWIZ)) |
| anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.sbe_swiz); |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SO_DECL_LIST)) { |
| /* Wa_16011773973: |
| * If SOL is enabled and SO_DECL state has to be programmed, |
| * 1. Send 3D State SOL state with SOL disabled |
| * 2. Send SO_DECL NP state |
| * 3. Send 3D State SOL with SOL Enabled |
| */ |
| if (intel_needs_workaround(device->info, 16011773973) && |
| pipeline->uses_xfb) |
| anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STREAMOUT), so); |
| |
| anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, |
| final.so_decl_list); |
| |
| #if GFX_VER >= 11 && GFX_VER < 20 |
| /* ICL PRMs, Volume 2a - Command Reference: Instructions, |
| * 3DSTATE_SO_DECL_LIST: |
| * |
| * "Workaround: This command must be followed by a PIPE_CONTROL with |
| * CS Stall bit set." |
| * |
| * On DG2+ also known as Wa_1509820217. |
| */ |
| genx_batch_emit_pipe_control(&cmd_buffer->batch, device->info, |
| cmd_buffer->state.current_pipeline, |
| ANV_PIPE_CS_STALL_BIT); |
| #endif |
| } |
| |
| if (device->vk.enabled_extensions.EXT_mesh_shader) { |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_CONTROL)) { |
| anv_batch_emit_pipeline_state_protected(&cmd_buffer->batch, pipeline, |
| final.mesh_control, protected); |
| } |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_SHADER)) |
| anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.mesh_shader); |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_DISTRIB)) |
| anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.mesh_distrib); |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_CONTROL)) { |
| anv_batch_emit_pipeline_state_protected(&cmd_buffer->batch, pipeline, |
| final.task_control, protected); |
| } |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_SHADER)) |
| anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.task_shader); |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_REDISTRIB)) |
| anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.task_redistrib); |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SBE_MESH)) |
| anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.sbe_mesh); |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CLIP_MESH)) |
| anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.clip_mesh); |
| } else { |
| assert(!BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_CONTROL) && |
| !BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_SHADER) && |
| !BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_DISTRIB) && |
| !BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_CONTROL) && |
| !BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_SHADER) && |
| !BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_REDISTRIB) && |
| !BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CLIP_MESH) && |
| !BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SBE_MESH)); |
| } |
| |
| #define INIT(category, name) \ |
| .name = hw_state->category.name |
| #define SET(s, category, name) \ |
| s.name = hw_state->category.name |
| |
| /* Now the potentially dynamic instructions */ |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PS)) { |
| DEBUG_SHADER_HASH(MESA_SHADER_FRAGMENT); |
| anv_batch_emit_merge_protected(&cmd_buffer->batch, GENX(3DSTATE_PS), |
| pipeline, partial.ps, ps, protected) { |
| SET(ps, ps, KernelStartPointer0); |
| SET(ps, ps, KernelStartPointer1); |
| SET(ps, ps, DispatchGRFStartRegisterForConstantSetupData0); |
| SET(ps, ps, DispatchGRFStartRegisterForConstantSetupData1); |
| |
| #if GFX_VER < 20 |
| SET(ps, ps, KernelStartPointer2); |
| SET(ps, ps, DispatchGRFStartRegisterForConstantSetupData2); |
| |
| SET(ps, ps, _8PixelDispatchEnable); |
| SET(ps, ps, _16PixelDispatchEnable); |
| SET(ps, ps, _32PixelDispatchEnable); |
| #else |
| SET(ps, ps, Kernel0Enable); |
| SET(ps, ps, Kernel1Enable); |
| SET(ps, ps, Kernel0SIMDWidth); |
| SET(ps, ps, Kernel1SIMDWidth); |
| SET(ps, ps, Kernel0PolyPackingPolicy); |
| SET(ps, ps, Kernel0MaximumPolysperThread); |
| #endif |
| SET(ps, ps, PositionXYOffsetSelect); |
| } |
| } |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PS_EXTRA) || |
| BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_COARSE_STATE)) { |
| anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_PS_EXTRA), |
| pipeline, partial.ps_extra, pse) { |
| SET(pse, ps_extra, PixelShaderHasUAV); |
| SET(pse, ps_extra, PixelShaderIsPerSample); |
| #if GFX_VER >= 11 |
| SET(pse, ps_extra, PixelShaderIsPerCoarsePixel); |
| #endif |
| SET(pse, ps_extra, PixelShaderKillsPixel); |
| |
| #if INTEL_WA_18038825448_GFX_VER |
| /* Add a dependency if easier the shader needs it (because of runtime |
| * change through pre-rasterization shader) or if we notice a change. |
| */ |
| pse.EnablePSDependencyOnCPsizeChange = |
| hw_state->ps_extra.EnablePSDependencyOnCPsizeChange || |
| BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_COARSE_STATE); |
| #elif GFX_VERx10 >= 125 |
| SET(pse, ps_extra, EnablePSDependencyOnCPsizeChange); |
| #endif |
| } |
| } |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CLIP)) { |
| anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_CLIP), |
| pipeline, partial.clip, clip) { |
| SET(clip, clip, APIMode); |
| SET(clip, clip, ViewportXYClipTestEnable); |
| SET(clip, clip, TriangleStripListProvokingVertexSelect); |
| SET(clip, clip, LineStripListProvokingVertexSelect); |
| SET(clip, clip, TriangleFanProvokingVertexSelect); |
| #if GFX_VERx10 >= 200 |
| SET(clip, clip, TriangleStripOddProvokingVertexSelect); |
| #endif |
| SET(clip, clip, MaximumVPIndex); |
| } |
| } |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_STREAMOUT)) { |
| genX(streamout_prologue)(cmd_buffer); |
| |
| anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_STREAMOUT), |
| pipeline, partial.so, so) { |
| SET(so, so, RenderingDisable); |
| SET(so, so, RenderStreamSelect); |
| SET(so, so, ReorderMode); |
| SET(so, so, ForceRendering); |
| } |
| } |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_SF_CLIP)) { |
| struct anv_state sf_clip_state = |
| anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, |
| hw_state->vp_sf_clip.count * 64, 64); |
| |
| for (uint32_t i = 0; i < hw_state->vp_sf_clip.count; i++) { |
| struct GENX(SF_CLIP_VIEWPORT) sfv = { |
| INIT(vp_sf_clip.elem[i], ViewportMatrixElementm00), |
| INIT(vp_sf_clip.elem[i], ViewportMatrixElementm11), |
| INIT(vp_sf_clip.elem[i], ViewportMatrixElementm22), |
| INIT(vp_sf_clip.elem[i], ViewportMatrixElementm30), |
| INIT(vp_sf_clip.elem[i], ViewportMatrixElementm31), |
| INIT(vp_sf_clip.elem[i], ViewportMatrixElementm32), |
| INIT(vp_sf_clip.elem[i], XMinClipGuardband), |
| INIT(vp_sf_clip.elem[i], XMaxClipGuardband), |
| INIT(vp_sf_clip.elem[i], YMinClipGuardband), |
| INIT(vp_sf_clip.elem[i], YMaxClipGuardband), |
| INIT(vp_sf_clip.elem[i], XMinViewPort), |
| INIT(vp_sf_clip.elem[i], XMaxViewPort), |
| INIT(vp_sf_clip.elem[i], YMinViewPort), |
| INIT(vp_sf_clip.elem[i], YMaxViewPort), |
| }; |
| GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_state.map + i * 64, &sfv); |
| } |
| |
| anv_batch_emit(&cmd_buffer->batch, |
| GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), clip) { |
| clip.SFClipViewportPointer = sf_clip_state.offset; |
| } |
| } |
| |
| /* Force CC_VIEWPORT reallocation on Gfx9 when reprogramming |
| * 3DSTATE_VIEWPORT_STATE_POINTERS_CC : |
| * https://gitlab.freedesktop.org/mesa/mesa/-/issues/11647 |
| */ |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC) || |
| (GFX_VER == 9 && |
| BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC_PTR))) { |
| hw_state->vp_cc.state = |
| anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, |
| hw_state->vp_cc.count * 8, 32); |
| |
| for (uint32_t i = 0; i < hw_state->vp_cc.count; i++) { |
| struct GENX(CC_VIEWPORT) cc_viewport = { |
| INIT(vp_cc.elem[i], MinimumDepth), |
| INIT(vp_cc.elem[i], MaximumDepth), |
| }; |
| GENX(CC_VIEWPORT_pack)(NULL, hw_state->vp_cc.state.map + i * 8, |
| &cc_viewport); |
| } |
| |
| /* Dirty the pointers to reemit 3DSTATE_VIEWPORT_STATE_POINTERS_CC below |
| */ |
| BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC_PTR); |
| } |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC_PTR)) { |
| anv_batch_emit(&cmd_buffer->batch, |
| GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) { |
| cc.CCViewportPointer = hw_state->vp_cc.state.offset; |
| } |
| cmd_buffer->state.gfx.viewport_set = true; |
| } |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SCISSOR)) { |
| /* Wa_1409725701: |
| * |
| * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is |
| * stored as an array of up to 16 elements. The location of first |
| * element of the array, as specified by Pointer to SCISSOR_RECT, |
| * should be aligned to a 64-byte boundary. |
| */ |
| struct anv_state scissor_state = |
| anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, |
| hw_state->scissor.count * 8, 64); |
| |
| for (uint32_t i = 0; i < hw_state->scissor.count; i++) { |
| struct GENX(SCISSOR_RECT) scissor = { |
| INIT(scissor.elem[i], ScissorRectangleYMin), |
| INIT(scissor.elem[i], ScissorRectangleXMin), |
| INIT(scissor.elem[i], ScissorRectangleYMax), |
| INIT(scissor.elem[i], ScissorRectangleXMax), |
| }; |
| GENX(SCISSOR_RECT_pack)(NULL, scissor_state.map + i * 8, &scissor); |
| } |
| |
| anv_batch_emit(&cmd_buffer->batch, |
| GENX(3DSTATE_SCISSOR_STATE_POINTERS), ssp) { |
| ssp.ScissorRectPointer = scissor_state.offset; |
| } |
| } |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF_TOPOLOGY)) { |
| anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_TOPOLOGY), vft) { |
| SET(vft, vft, PrimitiveTopologyType); |
| } |
| } |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VERTEX_INPUT)) { |
| genX(batch_emit_vertex_input)(&cmd_buffer->batch, device, |
| pipeline, dyn->vi); |
| } |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TE)) { |
| anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_TE), |
| pipeline, partial.te, te) { |
| SET(te, te, OutputTopology); |
| } |
| } |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_GS)) { |
| DEBUG_SHADER_HASH(MESA_SHADER_GEOMETRY); |
| anv_batch_emit_merge_protected(&cmd_buffer->batch, GENX(3DSTATE_GS), |
| pipeline, partial.gs, gs, protected) { |
| SET(gs, gs, ReorderMode); |
| } |
| } |
| |
| #if GFX_VER >= 30 |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_COARSE_PIXEL)) { |
| anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_COARSE_PIXEL), coarse_pixel) { |
| coarse_pixel.DisableCPSPointers = true; |
| SET(coarse_pixel, coarse_pixel, CPSizeX); |
| SET(coarse_pixel, coarse_pixel, CPSizeY); |
| SET(coarse_pixel, coarse_pixel, CPSizeCombiner0Opcode); |
| SET(coarse_pixel, coarse_pixel, CPSizeCombiner1Opcode); |
| } |
| } |
| #else |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CPS)) { |
| #if GFX_VER == 11 |
| anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CPS), cps) { |
| SET(cps, cps, CoarsePixelShadingMode); |
| SET(cps, cps, MinCPSizeX); |
| SET(cps, cps, MinCPSizeY); |
| } |
| #elif GFX_VER >= 12 |
| /* TODO: we can optimize this flush in the following cases: |
| * |
| * In the case where the last geometry shader emits a value that is |
| * not constant, we can avoid this stall because we can synchronize |
| * the pixel shader internally with |
| * 3DSTATE_PS::EnablePSDependencyOnCPsizeChange. |
| * |
| * If we know that the previous pipeline and the current one are |
| * using the same fragment shading rate. |
| */ |
| anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { |
| #if GFX_VERx10 >= 125 |
| pc.PSSStallSyncEnable = true; |
| #else |
| pc.PSDSyncEnable = true; |
| #endif |
| } |
| |
| anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CPS_POINTERS), cps) { |
| SET(cps, cps, CoarsePixelShadingStateArrayPointer); |
| } |
| #endif |
| } |
| #endif /* GFX_VER >= 30 */ |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SF)) { |
| anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_SF), |
| pipeline, partial.sf, sf) { |
| SET(sf, sf, LineWidth); |
| SET(sf, sf, TriangleStripListProvokingVertexSelect); |
| SET(sf, sf, LineStripListProvokingVertexSelect); |
| SET(sf, sf, TriangleFanProvokingVertexSelect); |
| #if GFX_VERx10 >= 200 |
| SET(sf, sf, TriangleStripOddProvokingVertexSelect); |
| #endif |
| SET(sf, sf, LegacyGlobalDepthBiasEnable); |
| } |
| } |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_RASTER)) { |
| anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_RASTER), raster) { |
| /* For details on 3DSTATE_RASTER multisample state, see the BSpec |
| * table "Multisample Modes State". |
| * |
| * NOTE: 3DSTATE_RASTER::ForcedSampleCount affects the SKL PMA fix |
| * computations. If we ever set this bit to a different value, they |
| * will need to be updated accordingly. |
| */ |
| raster.ForcedSampleCount = FSC_NUMRASTSAMPLES_0; |
| raster.ForceMultisampling = false; |
| raster.ScissorRectangleEnable = true; |
| |
| SET(raster, raster, APIMode); |
| SET(raster, raster, DXMultisampleRasterizationEnable); |
| SET(raster, raster, AntialiasingEnable); |
| SET(raster, raster, CullMode); |
| SET(raster, raster, FrontWinding); |
| SET(raster, raster, GlobalDepthOffsetEnableSolid); |
| SET(raster, raster, GlobalDepthOffsetEnableWireframe); |
| SET(raster, raster, GlobalDepthOffsetEnablePoint); |
| SET(raster, raster, GlobalDepthOffsetConstant); |
| SET(raster, raster, GlobalDepthOffsetScale); |
| SET(raster, raster, GlobalDepthOffsetClamp); |
| SET(raster, raster, FrontFaceFillMode); |
| SET(raster, raster, BackFaceFillMode); |
| SET(raster, raster, ViewportZFarClipTestEnable); |
| SET(raster, raster, ViewportZNearClipTestEnable); |
| SET(raster, raster, ConservativeRasterizationEnable); |
| #if GFX_VERx10 >= 200 |
| SET(raster, raster, LegacyBaryAssignmentDisable); |
| #endif |
| } |
| } |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MULTISAMPLE)) { |
| anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_MULTISAMPLE), ms) { |
| ms.PixelLocation = CENTER; |
| |
| /* The PRM says that this bit is valid only for DX9: |
| * |
| * SW can choose to set this bit only for DX9 API. DX10/OGL API's |
| * should not have any effect by setting or not setting this bit. |
| */ |
| ms.PixelPositionOffsetEnable = false; |
| |
| SET(ms, ms, NumberofMultisamples); |
| } |
| } |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CC_STATE)) { |
| hw_state->cc.state = |
| anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, |
| GENX(COLOR_CALC_STATE_length) * 4, |
| 64); |
| struct GENX(COLOR_CALC_STATE) cc = { |
| INIT(cc, BlendConstantColorRed), |
| INIT(cc, BlendConstantColorGreen), |
| INIT(cc, BlendConstantColorBlue), |
| INIT(cc, BlendConstantColorAlpha), |
| }; |
| GENX(COLOR_CALC_STATE_pack)(NULL, hw_state->cc.state.map, &cc); |
| |
| /* Dirty the pointers to reemit 3DSTATE_CC_STATE_POINTERS below |
| */ |
| BITSET_SET(hw_state->dirty, ANV_GFX_STATE_CC_STATE_PTR); |
| } |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CC_STATE_PTR)) { |
| anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) { |
| ccp.ColorCalcStatePointer = hw_state->cc.state.offset; |
| ccp.ColorCalcStatePointerValid = true; |
| } |
| } |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SAMPLE_MASK)) { |
| anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_SAMPLE_MASK), sm) { |
| SET(sm, sm, SampleMask); |
| } |
| } |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_WM_DEPTH_STENCIL)) { |
| anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_WM_DEPTH_STENCIL), ds) { |
| SET(ds, ds, DoubleSidedStencilEnable); |
| SET(ds, ds, StencilTestMask); |
| SET(ds, ds, StencilWriteMask); |
| SET(ds, ds, BackfaceStencilTestMask); |
| SET(ds, ds, BackfaceStencilWriteMask); |
| SET(ds, ds, StencilReferenceValue); |
| SET(ds, ds, BackfaceStencilReferenceValue); |
| SET(ds, ds, DepthTestEnable); |
| SET(ds, ds, DepthBufferWriteEnable); |
| SET(ds, ds, DepthTestFunction); |
| SET(ds, ds, StencilTestEnable); |
| SET(ds, ds, StencilBufferWriteEnable); |
| SET(ds, ds, StencilFailOp); |
| SET(ds, ds, StencilPassDepthPassOp); |
| SET(ds, ds, StencilPassDepthFailOp); |
| SET(ds, ds, StencilTestFunction); |
| SET(ds, ds, BackfaceStencilFailOp); |
| SET(ds, ds, BackfaceStencilPassDepthPassOp); |
| SET(ds, ds, BackfaceStencilPassDepthFailOp); |
| SET(ds, ds, BackfaceStencilTestFunction); |
| } |
| } |
| |
| #if GFX_VER >= 12 |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_DEPTH_BOUNDS)) { |
| anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BOUNDS), db) { |
| SET(db, db, DepthBoundsTestEnable); |
| SET(db, db, DepthBoundsTestMinValue); |
| SET(db, db, DepthBoundsTestMaxValue); |
| } |
| } |
| #endif |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_LINE_STIPPLE)) { |
| anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_LINE_STIPPLE), ls) { |
| SET(ls, ls, LineStipplePattern); |
| SET(ls, ls, LineStippleInverseRepeatCount); |
| SET(ls, ls, LineStippleRepeatCount); |
| } |
| #if GFX_VER >= 11 |
| /* ICL PRMs, Volume 2a - Command Reference: Instructions, |
| * 3DSTATE_LINE_STIPPLE: |
| * |
| * "Workaround: This command must be followed by a PIPE_CONTROL with |
| * CS Stall bit set." |
| */ |
| genx_batch_emit_pipe_control(&cmd_buffer->batch, device->info, |
| cmd_buffer->state.current_pipeline, |
| ANV_PIPE_CS_STALL_BIT); |
| #endif |
| } |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF)) { |
| anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF), vf) { |
| #if GFX_VERx10 >= 125 |
| vf.GeometryDistributionEnable = |
| device->physical->instance->enable_vf_distribution; |
| #endif |
| vf.ComponentPackingEnable = |
| device->physical->instance->vf_component_packing; |
| SET(vf, vf, IndexedDrawCutIndexEnable); |
| SET(vf, vf, CutIndex); |
| } |
| } |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_INDEX_BUFFER)) { |
| anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), ib) { |
| ib.IndexFormat = vk_to_intel_index_type(gfx->index_type); |
| ib.MOCS = gfx->index_addr == 0 ? |
| anv_mocs(cmd_buffer->device, NULL, ISL_SURF_USAGE_INDEX_BUFFER_BIT) : |
| gfx->index_mocs; |
| #if GFX_VER >= 12 |
| ib.L3BypassDisable = true; |
| #endif |
| ib.BufferStartingAddress = anv_address_from_u64(gfx->index_addr); |
| ib.BufferSize = gfx->index_size; |
| } |
| } |
| |
| #if GFX_VERx10 >= 125 |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VFG)) { |
| anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_VFG), |
| pipeline, partial.vfg, vfg) { |
| SET(vfg, vfg, ListCutIndexEnable); |
| } |
| } |
| #endif |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SAMPLE_PATTERN)) { |
| genX(emit_sample_pattern)(&cmd_buffer->batch, |
| dyn->ms.sample_locations_enable ? |
| dyn->ms.sample_locations : NULL); |
| } |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_WM)) { |
| anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_WM), |
| pipeline, partial.wm, wm) { |
| SET(wm, wm, LineStippleEnable); |
| SET(wm, wm, BarycentricInterpolationMode); |
| } |
| } |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PS_BLEND)) { |
| anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_PS_BLEND), blend) { |
| SET(blend, ps_blend, HasWriteableRT); |
| SET(blend, ps_blend, ColorBufferBlendEnable); |
| SET(blend, ps_blend, SourceAlphaBlendFactor); |
| SET(blend, ps_blend, DestinationAlphaBlendFactor); |
| SET(blend, ps_blend, SourceBlendFactor); |
| SET(blend, ps_blend, DestinationBlendFactor); |
| SET(blend, ps_blend, AlphaTestEnable); |
| SET(blend, ps_blend, IndependentAlphaBlendEnable); |
| SET(blend, ps_blend, AlphaToCoverageEnable); |
| } |
| } |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_BLEND_STATE)) { |
| const uint32_t num_dwords = GENX(BLEND_STATE_length) + |
| GENX(BLEND_STATE_ENTRY_length) * MAX_RTS; |
| hw_state->blend.state = |
| anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, |
| num_dwords * 4, |
| 64); |
| |
| uint32_t *dws = hw_state->blend.state.map; |
| |
| struct GENX(BLEND_STATE) blend_state = { |
| INIT(blend, AlphaToCoverageEnable), |
| INIT(blend, AlphaToOneEnable), |
| INIT(blend, IndependentAlphaBlendEnable), |
| INIT(blend, ColorDitherEnable), |
| }; |
| GENX(BLEND_STATE_pack)(NULL, dws, &blend_state); |
| |
| /* Jump to blend entries. */ |
| dws += GENX(BLEND_STATE_length); |
| for (uint32_t i = 0; i < MAX_RTS; i++) { |
| struct GENX(BLEND_STATE_ENTRY) entry = { |
| INIT(blend.rts[i], WriteDisableAlpha), |
| INIT(blend.rts[i], WriteDisableRed), |
| INIT(blend.rts[i], WriteDisableGreen), |
| INIT(blend.rts[i], WriteDisableBlue), |
| INIT(blend.rts[i], LogicOpFunction), |
| INIT(blend.rts[i], LogicOpEnable), |
| INIT(blend.rts[i], ColorBufferBlendEnable), |
| INIT(blend.rts[i], ColorClampRange), |
| #if GFX_VER >= 30 |
| INIT(blend.rts[i], SimpleFloatBlendEnable), |
| #endif |
| INIT(blend.rts[i], PreBlendColorClampEnable), |
| INIT(blend.rts[i], PostBlendColorClampEnable), |
| INIT(blend.rts[i], SourceBlendFactor), |
| INIT(blend.rts[i], DestinationBlendFactor), |
| INIT(blend.rts[i], ColorBlendFunction), |
| INIT(blend.rts[i], SourceAlphaBlendFactor), |
| INIT(blend.rts[i], DestinationAlphaBlendFactor), |
| INIT(blend.rts[i], AlphaBlendFunction), |
| }; |
| |
| GENX(BLEND_STATE_ENTRY_pack)(NULL, dws, &entry); |
| dws += GENX(BLEND_STATE_ENTRY_length); |
| } |
| |
| /* Dirty the pointers to reemit 3DSTATE_BLEND_STATE_POINTERS below */ |
| BITSET_SET(hw_state->dirty, ANV_GFX_STATE_BLEND_STATE_PTR); |
| } |
| |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_BLEND_STATE_PTR)) { |
| anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) { |
| bsp.BlendStatePointer = hw_state->blend.state.offset; |
| bsp.BlendStatePointerValid = true; |
| } |
| } |
| |
| #if INTEL_WA_18019816803_GFX_VER |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_WA_18019816803)) { |
| genx_batch_emit_pipe_control(&cmd_buffer->batch, device->info, |
| cmd_buffer->state.current_pipeline, |
| ANV_PIPE_PSS_STALL_SYNC_BIT); |
| } |
| #endif |
| |
| #if INTEL_WA_14018283232_GFX_VER |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_WA_14018283232)) |
| genX(batch_emit_wa_14018283232)(&cmd_buffer->batch); |
| #endif |
| |
| #if GFX_VER == 9 |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PMA_FIX)) |
| genX(cmd_buffer_enable_pma_fix)(cmd_buffer, hw_state->pma_fix); |
| #endif |
| |
| #if GFX_VERx10 >= 125 |
| if (hw_state->use_tbimr && |
| BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TBIMR_TILE_PASS_INFO)) { |
| anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_TBIMR_TILE_PASS_INFO), |
| tbimr) { |
| SET(tbimr, tbimr, TileRectangleHeight); |
| SET(tbimr, tbimr, TileRectangleWidth); |
| SET(tbimr, tbimr, VerticalTileCount); |
| SET(tbimr, tbimr, HorizontalTileCount); |
| SET(tbimr, tbimr, TBIMRBatchSize); |
| SET(tbimr, tbimr, TileBoxCheck); |
| } |
| } |
| #endif |
| |
| #undef INIT |
| #undef SET |
| #undef DEBUG_SHADER_HASH |
| |
| BITSET_ZERO(hw_state->dirty); |
| } |
| |
| /** |
| * This function handles possible state workarounds and emits the dirty |
| * instructions to the batch buffer. |
| */ |
| void |
| genX(cmd_buffer_flush_gfx_hw_state)(struct anv_cmd_buffer *cmd_buffer) |
| { |
| struct anv_device *device = cmd_buffer->device; |
| struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx; |
| struct anv_graphics_pipeline *pipeline = |
| anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline); |
| struct anv_gfx_dynamic_state *hw_state = &gfx->dyn_state; |
| |
| if (INTEL_DEBUG(DEBUG_REEMIT)) { |
| BITSET_OR(gfx->dyn_state.dirty, gfx->dyn_state.dirty, |
| device->gfx_dirty_state); |
| } |
| |
| /** |
| * Put potential workarounds here if you need to reemit an instruction |
| * because of another one is changing. |
| */ |
| |
| /* Reprogram SF_CLIP & CC_STATE together. This reproduces the programming |
| * done on Windows drivers. Fixes flickering issues with multiple |
| * workloads. |
| * |
| * Since blorp disables 3DSTATE_CLIP::ClipEnable and dirties CC_STATE, this |
| * also takes care of Wa_14016820455 which requires SF_CLIP to be |
| * reprogrammed whenever 3DSTATE_CLIP::ClipEnable is enabled. |
| */ |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_SF_CLIP) || |
| BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC_PTR)) { |
| BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_SF_CLIP); |
| BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC_PTR); |
| } |
| |
| /* Wa_16012775297 - Emit dummy VF statistics before each 3DSTATE_VF. */ |
| #if INTEL_WA_16012775297_GFX_VER |
| if (intel_needs_workaround(device->info, 16012775297) && |
| BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF)) |
| BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_STATISTICS); |
| #endif |
| |
| /* Since Wa_16011773973 will disable 3DSTATE_STREAMOUT, we need to reemit |
| * it after. |
| */ |
| if (intel_needs_workaround(device->info, 16011773973) && |
| pipeline->uses_xfb && |
| BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SO_DECL_LIST)) { |
| BITSET_SET(hw_state->dirty, ANV_GFX_STATE_STREAMOUT); |
| } |
| |
| #if INTEL_WA_18038825448_GFX_VER |
| const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); |
| if (wm_prog_data) { |
| genX(cmd_buffer_set_coarse_pixel_active)( |
| cmd_buffer, |
| brw_wm_prog_data_is_coarse(wm_prog_data, hw_state->fs_msaa_flags)); |
| } |
| #endif |
| |
| /* Gfx11 undocumented issue : |
| * https://gitlab.freedesktop.org/mesa/mesa/-/issues/9781 |
| */ |
| #if GFX_VER == 11 |
| if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_BLEND_STATE)) |
| BITSET_SET(hw_state->dirty, ANV_GFX_STATE_MULTISAMPLE); |
| #endif |
| |
| /* Wa_18020335297 - Apply the WA when viewport ptr is reprogrammed. */ |
| if (intel_needs_workaround(device->info, 18020335297) && |
| (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC) || |
| BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC_PTR)) && |
| cmd_buffer->state.gfx.viewport_set) { |
| /* For mesh, we implement the WA using CS stall. This is for |
| * simplicity and takes care of possible interaction with Wa_16014390852. |
| */ |
| if (anv_pipeline_is_mesh(pipeline)) { |
| genx_batch_emit_pipe_control(&cmd_buffer->batch, device->info, |
| _3D, ANV_PIPE_CS_STALL_BIT); |
| } else { |
| /* Mask off all instructions that we program. */ |
| BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VFG); |
| BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VF); |
| BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_PRIMITIVE_REPLICATION); |
| BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_RASTER); |
| BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VF_STATISTICS); |
| BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VF_SGVS); |
| BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VF_SGVS_2); |
| BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_CLIP); |
| BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_STREAMOUT); |
| BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VERTEX_INPUT); |
| BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VF_TOPOLOGY); |
| |
| BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VS); |
| BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_GS); |
| BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_HS); |
| BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_TE); |
| BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_DS); |
| |
| cmd_buffer_gfx_state_emission(cmd_buffer); |
| |
| emit_wa_18020335297_dummy_draw(cmd_buffer); |
| |
| /* Dirty all emitted WA state to make sure that current real |
| * state is restored. |
| */ |
| BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VFG); |
| BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF); |
| BITSET_SET(hw_state->dirty, ANV_GFX_STATE_PRIMITIVE_REPLICATION); |
| BITSET_SET(hw_state->dirty, ANV_GFX_STATE_RASTER); |
| BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_STATISTICS); |
| BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_SGVS); |
| BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_SGVS_2); |
| BITSET_SET(hw_state->dirty, ANV_GFX_STATE_CLIP); |
| BITSET_SET(hw_state->dirty, ANV_GFX_STATE_STREAMOUT); |
| BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VERTEX_INPUT); |
| BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_TOPOLOGY); |
| |
| BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VS); |
| BITSET_SET(hw_state->dirty, ANV_GFX_STATE_GS); |
| BITSET_SET(hw_state->dirty, ANV_GFX_STATE_HS); |
| BITSET_SET(hw_state->dirty, ANV_GFX_STATE_TE); |
| BITSET_SET(hw_state->dirty, ANV_GFX_STATE_DS); |
| } |
| } |
| |
| cmd_buffer_gfx_state_emission(cmd_buffer); |
| } |
| |
| void |
| genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable) |
| { |
| if (!anv_cmd_buffer_is_render_queue(cmd_buffer)) |
| return; |
| |
| if (cmd_buffer->state.gfx.pma_fix_enabled == enable) |
| return; |
| |
| cmd_buffer->state.gfx.pma_fix_enabled = enable; |
| |
| /* According to the Broadwell PIPE_CONTROL documentation, software should |
| * emit a PIPE_CONTROL with the CS Stall and Depth Cache Flush bits set |
| * prior to the LRI. If stencil buffer writes are enabled, then a Render |
| * Cache Flush is also necessary. |
| * |
| * The Skylake docs say to use a depth stall rather than a command |
| * streamer stall. However, the hardware seems to violently disagree. |
| * A full command streamer stall seems to be needed in both cases. |
| */ |
| genx_batch_emit_pipe_control |
| (&cmd_buffer->batch, cmd_buffer->device->info, |
| cmd_buffer->state.current_pipeline, |
| ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | |
| ANV_PIPE_CS_STALL_BIT | |
| #if GFX_VER >= 12 |
| ANV_PIPE_TILE_CACHE_FLUSH_BIT | |
| #endif |
| ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT); |
| |
| #if GFX_VER == 9 |
| uint32_t cache_mode; |
| anv_pack_struct(&cache_mode, GENX(CACHE_MODE_0), |
| .STCPMAOptimizationEnable = enable, |
| .STCPMAOptimizationEnableMask = true); |
| anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), lri) { |
| lri.RegisterOffset = GENX(CACHE_MODE_0_num); |
| lri.DataDWord = cache_mode; |
| } |
| |
| #endif /* GFX_VER == 9 */ |
| |
| /* After the LRI, a PIPE_CONTROL with both the Depth Stall and Depth Cache |
| * Flush bits is often necessary. We do it regardless because it's easier. |
| * The render cache flush is also necessary if stencil writes are enabled. |
| * |
| * Again, the Skylake docs give a different set of flushes but the BDW |
| * flushes seem to work just as well. |
| */ |
| genx_batch_emit_pipe_control |
| (&cmd_buffer->batch, cmd_buffer->device->info, |
| cmd_buffer->state.current_pipeline, |
| ANV_PIPE_DEPTH_STALL_BIT | |
| ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | |
| #if GFX_VER >= 12 |
| ANV_PIPE_TILE_CACHE_FLUSH_BIT | |
| #endif |
| ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT); |
| } |