| /* |
| * Copyright © 2011 Intel Corporation |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| */ |
| |
| #include "intel_batchbuffer.h" |
| #include "intel_mipmap_tree.h" |
| #include "intel_regions.h" |
| #include "intel_fbo.h" |
| #include "brw_context.h" |
| #include "brw_state.h" |
| #include "brw_defines.h" |
| |
| static void emit_depthbuffer(struct brw_context *brw) |
| { |
| struct intel_context *intel = &brw->intel; |
| struct gl_context *ctx = &intel->ctx; |
| struct gl_framebuffer *fb = ctx->DrawBuffer; |
| |
| /* _NEW_BUFFERS */ |
| struct intel_renderbuffer *drb = intel_get_renderbuffer(fb, BUFFER_DEPTH); |
| struct intel_renderbuffer *srb = intel_get_renderbuffer(fb, BUFFER_STENCIL); |
| struct intel_mipmap_tree *depth_mt = NULL, |
| *stencil_mt = NULL, |
| *hiz_mt = NULL; |
| |
| /* Amount by which drawing should be offset in order to draw to the |
| * appropriate miplevel/zoffset/cubeface. We will extract these values |
| * from depth_irb or stencil_irb once we determine which is present. |
| */ |
| uint32_t draw_x = 0, draw_y = 0; |
| |
| /* Masks used to determine how much of the draw_x and draw_y offsets should |
| * be performed using the fine adjustment of "depth coordinate offset X/Y" |
| * (dw5 of 3DSTATE_DEPTH_BUFFER). Any remaining coarse adjustment will be |
| * performed by changing the base addresses of the buffers. |
| * |
| * Since the HiZ, depth, and stencil buffers all use the same "depth |
| * coordinate offset X/Y" values, we need to make sure that the coarse |
| * adjustment will be possible to apply to all three buffers. Since coarse |
| * adjustment can only be applied in multiples of the tile size, we will OR |
| * together the tile masks of all the buffers to determine which offsets to |
| * perform as fine adjustments. |
| */ |
| uint32_t tile_mask_x = 0, tile_mask_y = 0; |
| |
| if (drb) |
| depth_mt = drb->mt; |
| |
| if (depth_mt) { |
| hiz_mt = depth_mt->hiz_mt; |
| |
| intel_region_get_tile_masks(depth_mt->region, |
| &tile_mask_x, &tile_mask_y, false); |
| |
| if (hiz_mt) { |
| uint32_t hiz_tile_mask_x, hiz_tile_mask_y; |
| intel_region_get_tile_masks(hiz_mt->region, |
| &hiz_tile_mask_x, &hiz_tile_mask_y, |
| false); |
| |
| /* Each HiZ row represents 2 rows of pixels */ |
| hiz_tile_mask_y = hiz_tile_mask_y << 1 | 1; |
| |
| tile_mask_x |= hiz_tile_mask_x; |
| tile_mask_y |= hiz_tile_mask_y; |
| } |
| } |
| |
| if (srb) { |
| stencil_mt = srb->mt; |
| if (stencil_mt->stencil_mt) |
| stencil_mt = stencil_mt->stencil_mt; |
| |
| assert(stencil_mt->format == MESA_FORMAT_S8); |
| |
| /* Stencil buffer uses 64x64 tiles. */ |
| tile_mask_x |= 63; |
| tile_mask_y |= 63; |
| } |
| |
| /* Gen7 doesn't support packed depth/stencil */ |
| assert(stencil_mt == NULL || depth_mt != stencil_mt); |
| assert(!depth_mt || !_mesa_is_format_packed_depth_stencil(depth_mt->format)); |
| |
| intel_emit_depth_stall_flushes(intel); |
| |
| if (depth_mt == NULL) { |
| uint32_t dw1 = BRW_DEPTHFORMAT_D32_FLOAT << 18; |
| uint32_t dw3 = 0; |
| uint32_t tile_x = 0, tile_y = 0; |
| |
| if (stencil_mt == NULL) { |
| dw1 |= (BRW_SURFACE_NULL << 29); |
| } else { |
| /* _NEW_STENCIL: enable stencil buffer writes */ |
| dw1 |= ((ctx->Stencil.WriteMask != 0) << 27); |
| |
| draw_x = srb->draw_x; |
| draw_y = srb->draw_y; |
| tile_x = draw_x & tile_mask_x; |
| tile_y = draw_y & tile_mask_y; |
| |
| /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327 |
| * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth |
| * Coordinate Offset X/Y": |
| * |
| * "The 3 LSBs of both offsets must be zero to ensure correct |
| * alignment" |
| * |
| * We have no guarantee that tile_x and tile_y are correctly aligned, |
| * since they are determined by the mipmap layout, which is only |
| * aligned to multiples of 4. |
| * |
| * So, to avoid hanging the GPU, just smash the low order 3 bits of |
| * tile_x and tile_y to 0. This is a temporary workaround until we |
| * come up with a better solution. |
| */ |
| tile_x &= ~7; |
| tile_y &= ~7; |
| |
| /* 3DSTATE_STENCIL_BUFFER inherits surface type and dimensions. */ |
| dw1 |= (BRW_SURFACE_2D << 29); |
| dw3 = ((srb->Base.Base.Width + tile_x - 1) << 4) | |
| ((srb->Base.Base.Height + tile_y - 1) << 18); |
| } |
| |
| BEGIN_BATCH(7); |
| OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2)); |
| OUT_BATCH(dw1); |
| OUT_BATCH(0); |
| OUT_BATCH(dw3); |
| OUT_BATCH(0); |
| OUT_BATCH(tile_x | (tile_y << 16)); |
| OUT_BATCH(0); |
| ADVANCE_BATCH(); |
| } else { |
| struct intel_region *region = depth_mt->region; |
| uint32_t tile_x, tile_y, offset; |
| |
| draw_x = drb->draw_x; |
| draw_y = drb->draw_y; |
| tile_x = draw_x & tile_mask_x; |
| tile_y = draw_y & tile_mask_y; |
| |
| /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327 |
| * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth |
| * Coordinate Offset X/Y": |
| * |
| * "The 3 LSBs of both offsets must be zero to ensure correct |
| * alignment" |
| * |
| * We have no guarantee that tile_x and tile_y are correctly aligned, |
| * since they are determined by the mipmap layout, which is only aligned |
| * to multiples of 4. |
| * |
| * So, to avoid hanging the GPU, just smash the low order 3 bits of |
| * tile_x and tile_y to 0. This is a temporary workaround until we come |
| * up with a better solution. |
| */ |
| tile_x &= ~7; |
| tile_y &= ~7; |
| |
| offset = intel_region_get_aligned_offset(region, |
| draw_x & ~tile_mask_x, |
| draw_y & ~tile_mask_y, |
| false); |
| |
| assert(region->tiling == I915_TILING_Y); |
| |
| /* _NEW_DEPTH, _NEW_STENCIL */ |
| BEGIN_BATCH(7); |
| OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2)); |
| OUT_BATCH(((region->pitch * region->cpp) - 1) | |
| (brw_depthbuffer_format(brw) << 18) | |
| ((hiz_mt ? 1 : 0) << 22) | /* hiz enable */ |
| ((stencil_mt != NULL && ctx->Stencil.WriteMask != 0) << 27) | |
| ((ctx->Depth.Mask != 0) << 28) | |
| (BRW_SURFACE_2D << 29)); |
| OUT_RELOC(region->bo, |
| I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, |
| offset); |
| OUT_BATCH((((drb->Base.Base.Width + tile_x) - 1) << 4) | |
| (((drb->Base.Base.Height + tile_y) - 1) << 18)); |
| OUT_BATCH(0); |
| OUT_BATCH(tile_x | (tile_y << 16)); |
| OUT_BATCH(0); |
| ADVANCE_BATCH(); |
| } |
| |
| if (hiz_mt == NULL) { |
| BEGIN_BATCH(3); |
| OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2)); |
| OUT_BATCH(0); |
| OUT_BATCH(0); |
| ADVANCE_BATCH(); |
| } else { |
| uint32_t hiz_offset = |
| intel_region_get_aligned_offset(hiz_mt->region, |
| draw_x & ~tile_mask_x, |
| (draw_y & ~tile_mask_y) / 2, |
| false); |
| BEGIN_BATCH(3); |
| OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2)); |
| OUT_BATCH(hiz_mt->region->pitch * hiz_mt->region->cpp - 1); |
| OUT_RELOC(hiz_mt->region->bo, |
| I915_GEM_DOMAIN_RENDER, |
| I915_GEM_DOMAIN_RENDER, |
| hiz_offset); |
| ADVANCE_BATCH(); |
| } |
| |
| if (stencil_mt == NULL) { |
| BEGIN_BATCH(3); |
| OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (3 - 2)); |
| OUT_BATCH(0); |
| OUT_BATCH(0); |
| ADVANCE_BATCH(); |
| } else { |
| const int enabled = intel->is_haswell ? HSW_STENCIL_ENABLED : 0; |
| |
| /* Note: We can't compute the stencil offset using |
| * intel_region_get_aligned_offset(), because the stencil region claims |
| * that the region is untiled; in fact it's W tiled. |
| */ |
| uint32_t stencil_offset = |
| (draw_y & ~tile_mask_y) * stencil_mt->region->pitch + |
| (draw_x & ~tile_mask_x) * 64; |
| |
| BEGIN_BATCH(3); |
| OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (3 - 2)); |
| /* The stencil buffer has quirky pitch requirements. From the Graphics |
| * BSpec: vol2a.11 3D Pipeline Windower > Early Depth/Stencil Processing |
| * > Depth/Stencil Buffer State > 3DSTATE_STENCIL_BUFFER [DevIVB+], |
| * field "Surface Pitch": |
| * |
| * The pitch must be set to 2x the value computed based on width, as |
| * the stencil buffer is stored with two rows interleaved. |
| * |
| * (Note that it is not 100% clear whether this intended to apply to |
| * Gen7; the BSpec flags this comment as "DevILK,DevSNB" (which would |
| * imply that it doesn't), however the comment appears on a "DevIVB+" |
| * page (which would imply that it does). Experiments with the hardware |
| * indicate that it does. |
| */ |
| OUT_BATCH(enabled | |
| (2 * stencil_mt->region->pitch * stencil_mt->region->cpp - 1)); |
| OUT_RELOC(stencil_mt->region->bo, |
| I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, |
| stencil_offset); |
| ADVANCE_BATCH(); |
| } |
| |
| BEGIN_BATCH(3); |
| OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS << 16 | (3 - 2)); |
| OUT_BATCH(depth_mt ? depth_mt->depth_clear_value : 0); |
| OUT_BATCH(1); |
| ADVANCE_BATCH(); |
| } |
| |
| /** |
| * \see brw_context.state.depth_region |
| */ |
| const struct brw_tracked_state gen7_depthbuffer = { |
| .dirty = { |
| .mesa = (_NEW_BUFFERS | _NEW_DEPTH | _NEW_STENCIL), |
| .brw = BRW_NEW_BATCH, |
| .cache = 0, |
| }, |
| .emit = emit_depthbuffer, |
| }; |