blob: dd7b0d4cfe10fb181ca2a450a6278957d0ba625e [file] [log] [blame]
/*
* Copyright © 2011 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "main/mtypes.h"
#include "main/samplerobj.h"
#include "program/prog_parameter.h"
#include "intel_mipmap_tree.h"
#include "intel_batchbuffer.h"
#include "intel_tex.h"
#include "intel_fbo.h"
#include "intel_buffer_objects.h"
#include "brw_context.h"
#include "brw_state.h"
#include "brw_defines.h"
#include "brw_wm.h"
/**
* Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
* "Shader Channel Select" enumerations (i.e. HSW_SCS_RED)
*/
static unsigned
swizzle_to_scs(GLenum swizzle)
{
switch (swizzle) {
case SWIZZLE_X:
return HSW_SCS_RED;
case SWIZZLE_Y:
return HSW_SCS_GREEN;
case SWIZZLE_Z:
return HSW_SCS_BLUE;
case SWIZZLE_W:
return HSW_SCS_ALPHA;
case SWIZZLE_ZERO:
return HSW_SCS_ZERO;
case SWIZZLE_ONE:
return HSW_SCS_ONE;
}
assert(!"Should not get here: invalid swizzle mode");
return HSW_SCS_ZERO;
}
void
gen7_set_surface_tiling(struct gen7_surface_state *surf, uint32_t tiling)
{
switch (tiling) {
case I915_TILING_NONE:
surf->ss0.tiled_surface = 0;
surf->ss0.tile_walk = 0;
break;
case I915_TILING_X:
surf->ss0.tiled_surface = 1;
surf->ss0.tile_walk = BRW_TILEWALK_XMAJOR;
break;
case I915_TILING_Y:
surf->ss0.tiled_surface = 1;
surf->ss0.tile_walk = BRW_TILEWALK_YMAJOR;
break;
}
}
void
gen7_set_surface_msaa(struct gen7_surface_state *surf, unsigned num_samples,
enum intel_msaa_layout layout)
{
if (num_samples > 4)
surf->ss4.num_multisamples = GEN7_SURFACE_MULTISAMPLECOUNT_8;
else if (num_samples > 1)
surf->ss4.num_multisamples = GEN7_SURFACE_MULTISAMPLECOUNT_4;
else
surf->ss4.num_multisamples = GEN7_SURFACE_MULTISAMPLECOUNT_1;
surf->ss4.multisampled_surface_storage_format =
layout == INTEL_MSAA_LAYOUT_IMS ?
GEN7_SURFACE_MSFMT_DEPTH_STENCIL :
GEN7_SURFACE_MSFMT_MSS;
}
void
gen7_set_surface_mcs_info(struct brw_context *brw,
struct gen7_surface_state *surf,
uint32_t surf_offset,
const struct intel_mipmap_tree *mcs_mt,
bool is_render_target)
{
/* From the Ivy Bridge PRM, Vol4 Part1 p76, "MCS Base Address":
*
* "The MCS surface must be stored as Tile Y."
*/
assert(mcs_mt->region->tiling == I915_TILING_Y);
/* Compute the pitch in units of tiles. To do this we need to divide the
* pitch in bytes by 128, since a single Y-tile is 128 bytes wide.
*/
unsigned pitch_bytes = mcs_mt->region->pitch * mcs_mt->cpp;
unsigned pitch_tiles = pitch_bytes / 128;
/* The upper 20 bits of surface state DWORD 6 are the upper 20 bits of the
* GPU address of the MCS buffer; the lower 12 bits contain other control
* information. Since buffer addresses are always on 4k boundaries (and
* thus have their lower 12 bits zero), we can use an ordinary reloc to do
* the necessary address translation.
*/
assert ((mcs_mt->region->bo->offset & 0xfff) == 0);
surf->ss6.mcs_enabled.mcs_enable = 1;
surf->ss6.mcs_enabled.mcs_surface_pitch = pitch_tiles - 1;
surf->ss6.mcs_enabled.mcs_base_address = mcs_mt->region->bo->offset >> 12;
drm_intel_bo_emit_reloc(brw->intel.batch.bo,
surf_offset +
offsetof(struct gen7_surface_state, ss6),
mcs_mt->region->bo,
surf->ss6.raw_data & 0xfff,
is_render_target ? I915_GEM_DOMAIN_RENDER
: I915_GEM_DOMAIN_SAMPLER,
is_render_target ? I915_GEM_DOMAIN_RENDER : 0);
}
void
gen7_check_surface_setup(struct gen7_surface_state *surf,
bool is_render_target)
{
bool is_multisampled =
surf->ss4.num_multisamples != GEN7_SURFACE_MULTISAMPLECOUNT_1;
/* From the Graphics BSpec: vol5c Shared Functions [SNB+] > State >
* SURFACE_STATE > SURFACE_STATE for most messages [DevIVB]: Surface Array
* Spacing:
*
* If Multisampled Surface Storage Format is MSFMT_MSS and Number of
* Multisamples is not MULTISAMPLECOUNT_1, this field must be set to
* ARYSPC_LOD0.
*/
if (surf->ss4.multisampled_surface_storage_format == GEN7_SURFACE_MSFMT_MSS
&& is_multisampled)
assert(surf->ss0.surface_array_spacing == GEN7_SURFACE_ARYSPC_LOD0);
/* From the Graphics BSpec: vol5c Shared Functions [SNB+] > State >
* SURFACE_STATE > SURFACE_STATE for most messages [DevIVB]: Multisampled
* Surface Storage Format:
*
* All multisampled render target surfaces must have this field set to
* MSFMT_MSS.
*
* But also:
*
* This field is ignored if Number of Multisamples is MULTISAMPLECOUNT_1.
*/
if (is_render_target && is_multisampled) {
assert(surf->ss4.multisampled_surface_storage_format ==
GEN7_SURFACE_MSFMT_MSS);
}
/* From the Graphics BSpec: vol5c Shared Functions [SNB+] > State >
* SURFACE_STATE > SURFACE_STATE for most messages [DevIVB]: Multisampled
* Surface Storage Format:
*
* If the surface’s Number of Multisamples is MULTISAMPLECOUNT_8, Width
* is >= 8192 (meaning the actual surface width is >= 8193 pixels), this
* field must be set to MSFMT_MSS.
*/
if (surf->ss4.num_multisamples == GEN7_SURFACE_MULTISAMPLECOUNT_8 &&
surf->ss2.width >= 8192) {
assert(surf->ss4.multisampled_surface_storage_format ==
GEN7_SURFACE_MSFMT_MSS);
}
/* From the Graphics BSpec: vol5c Shared Functions [SNB+] > State >
* SURFACE_STATE > SURFACE_STATE for most messages [DevIVB]: Multisampled
* Surface Storage Format:
*
* If the surface’s Number of Multisamples is MULTISAMPLECOUNT_8,
* ((Depth+1) * (Height+1)) is > 4,194,304, OR if the surface’s Number of
* Multisamples is MULTISAMPLECOUNT_4, ((Depth+1) * (Height+1)) is >
* 8,388,608, this field must be set to MSFMT_DEPTH_STENCIL.This field
* must be set to MSFMT_DEPTH_STENCIL if Surface Format is one of the
* following: I24X8_UNORM, L24X8_UNORM, A24X8_UNORM, or
* R24_UNORM_X8_TYPELESS.
*
* But also:
*
* This field is ignored if Number of Multisamples is MULTISAMPLECOUNT_1.
*/
uint32_t depth = surf->ss3.depth + 1;
uint32_t height = surf->ss2.height + 1;
if (surf->ss4.num_multisamples == GEN7_SURFACE_MULTISAMPLECOUNT_8 &&
depth * height > 4194304) {
assert(surf->ss4.multisampled_surface_storage_format ==
GEN7_SURFACE_MSFMT_DEPTH_STENCIL);
}
if (surf->ss4.num_multisamples == GEN7_SURFACE_MULTISAMPLECOUNT_4 &&
depth * height > 8388608) {
assert(surf->ss4.multisampled_surface_storage_format ==
GEN7_SURFACE_MSFMT_DEPTH_STENCIL);
}
if (is_multisampled) {
switch (surf->ss0.surface_format) {
case BRW_SURFACEFORMAT_I24X8_UNORM:
case BRW_SURFACEFORMAT_L24X8_UNORM:
case BRW_SURFACEFORMAT_A24X8_UNORM:
case BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS:
assert(surf->ss4.multisampled_surface_storage_format ==
GEN7_SURFACE_MSFMT_DEPTH_STENCIL);
}
}
}
static void
gen7_update_buffer_texture_surface(struct gl_context *ctx,
unsigned unit,
uint32_t *binding_table,
unsigned surf_index)
{
struct brw_context *brw = brw_context(ctx);
struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
struct gen7_surface_state *surf;
struct intel_buffer_object *intel_obj =
intel_buffer_object(tObj->BufferObject);
drm_intel_bo *bo = intel_obj ? intel_obj->buffer : NULL;
gl_format format = tObj->_BufferObjectFormat;
int texel_size = _mesa_get_format_bytes(format);
surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
sizeof(*surf), 32, &binding_table[surf_index]);
memset(surf, 0, sizeof(*surf));
surf->ss0.surface_type = BRW_SURFACE_BUFFER;
surf->ss0.surface_format = brw_format_for_mesa_format(format);
surf->ss0.render_cache_read_write = 1;
if (surf->ss0.surface_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
_mesa_problem(NULL, "bad format %s for texture buffer\n",
_mesa_get_format_name(format));
}
if (bo) {
surf->ss1.base_addr = bo->offset; /* reloc */
/* Emit relocation to surface contents. Section 5.1.1 of the gen4
* bspec ("Data Cache") says that the data cache does not exist as
* a separate cache and is just the sampler cache.
*/
drm_intel_bo_emit_reloc(brw->intel.batch.bo,
(binding_table[surf_index] +
offsetof(struct gen7_surface_state, ss1)),
bo, 0,
I915_GEM_DOMAIN_SAMPLER, 0);
int w = intel_obj->Base.Size / texel_size;
surf->ss2.width = w & 0x7f; /* bits 6:0 of size or width */
surf->ss2.height = (w >> 7) & 0x1fff; /* bits 19:7 of size or width */
surf->ss3.depth = (w >> 20) & 0x7f; /* bits 26:20 of size or width */
surf->ss3.pitch = texel_size - 1;
} else {
surf->ss1.base_addr = 0;
surf->ss2.width = 0;
surf->ss2.height = 0;
surf->ss3.depth = 0;
surf->ss3.pitch = 0;
}
gen7_set_surface_tiling(surf, I915_TILING_NONE);
gen7_check_surface_setup(surf, false /* is_render_target */);
}
static void
gen7_update_texture_surface(struct gl_context *ctx,
unsigned unit,
uint32_t *binding_table,
unsigned surf_index)
{
struct brw_context *brw = brw_context(ctx);
struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
struct intel_texture_object *intelObj = intel_texture_object(tObj);
struct intel_mipmap_tree *mt = intelObj->mt;
struct gl_texture_image *firstImage = tObj->Image[0][tObj->BaseLevel];
struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
struct gen7_surface_state *surf;
int width, height, depth;
if (tObj->Target == GL_TEXTURE_BUFFER) {
gen7_update_buffer_texture_surface(ctx, unit, binding_table, surf_index);
return;
}
/* We don't support MSAA for textures. */
assert(!mt->array_spacing_lod0);
assert(mt->num_samples <= 1);
intel_miptree_get_dimensions_for_image(firstImage, &width, &height, &depth);
surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
sizeof(*surf), 32, &binding_table[surf_index]);
memset(surf, 0, sizeof(*surf));
if (mt->align_h == 4)
surf->ss0.vertical_alignment = 1;
if (mt->align_w == 8)
surf->ss0.horizontal_alignment = 1;
surf->ss0.surface_type = translate_tex_target(tObj->Target);
surf->ss0.surface_format = translate_tex_format(mt->format,
firstImage->InternalFormat,
tObj->DepthMode,
sampler->sRGBDecode);
if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
surf->ss0.cube_pos_x = 1;
surf->ss0.cube_pos_y = 1;
surf->ss0.cube_pos_z = 1;
surf->ss0.cube_neg_x = 1;
surf->ss0.cube_neg_y = 1;
surf->ss0.cube_neg_z = 1;
}
surf->ss0.is_array = depth > 1 && tObj->Target != GL_TEXTURE_3D;
gen7_set_surface_tiling(surf, intelObj->mt->region->tiling);
/* ss0 remaining fields:
* - vert_line_stride (exists on gen6 but we ignore it)
* - vert_line_stride_ofs (exists on gen6 but we ignore it)
* - surface_array_spacing
* - render_cache_read_write (exists on gen6 but ignored here)
*/
surf->ss1.base_addr =
intelObj->mt->region->bo->offset + intelObj->mt->offset; /* reloc */
surf->ss2.width = width - 1;
surf->ss2.height = height - 1;
surf->ss3.pitch = (intelObj->mt->region->pitch * intelObj->mt->cpp) - 1;
surf->ss3.depth = depth - 1;
/* ss4: ignored? */
surf->ss5.mip_count = intelObj->_MaxLevel - tObj->BaseLevel;
surf->ss5.min_lod = 0;
/* ss5 remaining fields:
* - x_offset (N/A for textures?)
* - y_offset (ditto)
* - cache_control
*/
if (brw->intel.is_haswell) {
/* Handling GL_ALPHA as a surface format override breaks 1.30+ style
* texturing functions that return a float, as our code generation always
* selects the .x channel (which would always be 0).
*/
const bool alpha_depth = tObj->DepthMode == GL_ALPHA &&
(firstImage->_BaseFormat == GL_DEPTH_COMPONENT ||
firstImage->_BaseFormat == GL_DEPTH_STENCIL);
const int swizzle =
unlikely(alpha_depth) ? SWIZZLE_XYZW : brw_get_texture_swizzle(tObj);
surf->ss7.shader_channel_select_r = swizzle_to_scs(GET_SWZ(swizzle, 0));
surf->ss7.shader_channel_select_g = swizzle_to_scs(GET_SWZ(swizzle, 1));
surf->ss7.shader_channel_select_b = swizzle_to_scs(GET_SWZ(swizzle, 2));
surf->ss7.shader_channel_select_a = swizzle_to_scs(GET_SWZ(swizzle, 3));
}
/* Emit relocation to surface contents */
drm_intel_bo_emit_reloc(brw->intel.batch.bo,
binding_table[surf_index] +
offsetof(struct gen7_surface_state, ss1),
intelObj->mt->region->bo, intelObj->mt->offset,
I915_GEM_DOMAIN_SAMPLER, 0);
gen7_check_surface_setup(surf, false /* is_render_target */);
}
/**
* Create the constant buffer surface. Vertex/fragment shader constants will
* be read from this buffer with Data Port Read instructions/messages.
*/
void
gen7_create_constant_surface(struct brw_context *brw,
drm_intel_bo *bo,
uint32_t offset,
int width,
uint32_t *out_offset)
{
const GLint w = width - 1;
struct gen7_surface_state *surf;
surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
sizeof(*surf), 32, out_offset);
memset(surf, 0, sizeof(*surf));
surf->ss0.surface_type = BRW_SURFACE_BUFFER;
surf->ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
surf->ss0.render_cache_read_write = 1;
assert(bo);
surf->ss1.base_addr = bo->offset + offset; /* reloc */
surf->ss2.width = w & 0x7f; /* bits 6:0 of size or width */
surf->ss2.height = (w >> 7) & 0x1fff; /* bits 19:7 of size or width */
surf->ss3.depth = (w >> 20) & 0x7f; /* bits 26:20 of size or width */
surf->ss3.pitch = (16 - 1); /* stride between samples */
gen7_set_surface_tiling(surf, I915_TILING_NONE); /* tiling now allowed */
if (brw->intel.is_haswell) {
surf->ss7.shader_channel_select_r = HSW_SCS_RED;
surf->ss7.shader_channel_select_g = HSW_SCS_GREEN;
surf->ss7.shader_channel_select_b = HSW_SCS_BLUE;
surf->ss7.shader_channel_select_a = HSW_SCS_ALPHA;
}
/* Emit relocation to surface contents. Section 5.1.1 of the gen4
* bspec ("Data Cache") says that the data cache does not exist as
* a separate cache and is just the sampler cache.
*/
drm_intel_bo_emit_reloc(brw->intel.batch.bo,
(*out_offset +
offsetof(struct gen7_surface_state, ss1)),
bo, offset,
I915_GEM_DOMAIN_SAMPLER, 0);
gen7_check_surface_setup(surf, false /* is_render_target */);
}
static void
gen7_update_null_renderbuffer_surface(struct brw_context *brw, unsigned unit)
{
/* From the Ivy bridge PRM, Vol4 Part1 p62 (Surface Type: Programming
* Notes):
*
* A null surface is used in instances where an actual surface is not
* bound. When a write message is generated to a null surface, no
* actual surface is written to. When a read message (including any
* sampling engine message) is generated to a null surface, the result
* is all zeros. Note that a null surface type is allowed to be used
* with all messages, even if it is not specificially indicated as
* supported. All of the remaining fields in surface state are ignored
* for null surfaces, with the following exceptions: Width, Height,
* Depth, LOD, and Render Target View Extent fields must match the
* depth buffer’s corresponding state for all render target surfaces,
* including null.
*/
struct intel_context *intel = &brw->intel;
struct gl_context *ctx = &intel->ctx;
struct gen7_surface_state *surf;
/* _NEW_BUFFERS */
const struct gl_framebuffer *fb = ctx->DrawBuffer;
surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
sizeof(*surf), 32, &brw->wm.surf_offset[unit]);
memset(surf, 0, sizeof(*surf));
surf->ss0.surface_type = BRW_SURFACE_NULL;
surf->ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
surf->ss2.width = fb->Width - 1;
surf->ss2.height = fb->Height - 1;
/* From the Ivy bridge PRM, Vol4 Part1 p65 (Tiled Surface: Programming Notes):
*
* If Surface Type is SURFTYPE_NULL, this field must be TRUE.
*/
gen7_set_surface_tiling(surf, I915_TILING_Y);
gen7_check_surface_setup(surf, true /* is_render_target */);
}
/**
* Sets up a surface state structure to point at the given region.
* While it is only used for the front/back buffer currently, it should be
* usable for further buffers when doing ARB_draw_buffer support.
*/
static void
gen7_update_renderbuffer_surface(struct brw_context *brw,
struct gl_renderbuffer *rb,
unsigned int unit)
{
struct intel_context *intel = &brw->intel;
struct gl_context *ctx = &intel->ctx;
struct intel_renderbuffer *irb = intel_renderbuffer(rb);
struct intel_region *region = irb->mt->region;
struct gen7_surface_state *surf;
uint32_t tile_x, tile_y;
gl_format rb_format = intel_rb_format(irb);
surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
sizeof(*surf), 32, &brw->wm.surf_offset[unit]);
memset(surf, 0, sizeof(*surf));
/* Render targets can't use IMS layout */
assert(irb->mt->msaa_layout != INTEL_MSAA_LAYOUT_IMS);
if (irb->mt->align_h == 4)
surf->ss0.vertical_alignment = 1;
if (irb->mt->align_w == 8)
surf->ss0.horizontal_alignment = 1;
switch (rb_format) {
case MESA_FORMAT_SARGB8:
/* _NEW_BUFFERS
*
* Without GL_EXT_framebuffer_sRGB we shouldn't bind sRGB surfaces to the
* blend/update as sRGB.
*/
if (ctx->Color.sRGBEnabled)
surf->ss0.surface_format = brw_format_for_mesa_format(rb_format);
else
surf->ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
break;
default:
assert(brw_render_target_supported(intel, rb));
surf->ss0.surface_format = brw->render_target_format[rb_format];
if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
_mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
__FUNCTION__, _mesa_get_format_name(rb_format));
}
break;
}
surf->ss0.surface_type = BRW_SURFACE_2D;
surf->ss0.surface_array_spacing = irb->mt->array_spacing_lod0 ?
GEN7_SURFACE_ARYSPC_LOD0 : GEN7_SURFACE_ARYSPC_FULL;
/* reloc */
surf->ss1.base_addr = intel_renderbuffer_tile_offsets(irb, &tile_x, &tile_y);
surf->ss1.base_addr += region->bo->offset; /* reloc */
assert(brw->has_surface_tile_offset);
/* Note that the low bits of these fields are missing, so
* there's the possibility of getting in trouble.
*/
assert(tile_x % 4 == 0);
assert(tile_y % 2 == 0);
surf->ss5.x_offset = tile_x / 4;
surf->ss5.y_offset = tile_y / 2;
surf->ss2.width = rb->Width - 1;
surf->ss2.height = rb->Height - 1;
gen7_set_surface_tiling(surf, region->tiling);
surf->ss3.pitch = (region->pitch * region->cpp) - 1;
gen7_set_surface_msaa(surf, irb->mt->num_samples, irb->mt->msaa_layout);
if (irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) {
gen7_set_surface_mcs_info(brw, surf, brw->wm.surf_offset[unit],
irb->mt->mcs_mt, true /* is_render_target */);
}
if (intel->is_haswell) {
surf->ss7.shader_channel_select_r = HSW_SCS_RED;
surf->ss7.shader_channel_select_g = HSW_SCS_GREEN;
surf->ss7.shader_channel_select_b = HSW_SCS_BLUE;
surf->ss7.shader_channel_select_a = HSW_SCS_ALPHA;
}
drm_intel_bo_emit_reloc(brw->intel.batch.bo,
brw->wm.surf_offset[unit] +
offsetof(struct gen7_surface_state, ss1),
region->bo,
surf->ss1.base_addr - region->bo->offset,
I915_GEM_DOMAIN_RENDER,
I915_GEM_DOMAIN_RENDER);
gen7_check_surface_setup(surf, true /* is_render_target */);
}
void
gen7_init_vtable_surface_functions(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
intel->vtbl.update_texture_surface = gen7_update_texture_surface;
intel->vtbl.update_renderbuffer_surface = gen7_update_renderbuffer_surface;
intel->vtbl.update_null_renderbuffer_surface =
gen7_update_null_renderbuffer_surface;
intel->vtbl.create_constant_surface = gen7_create_constant_surface;
}