blob: 69b4b41791287e7186a42b4deb88ed9c8ee5884b [file] [log] [blame]
/*
* Copyright (C) 2020 Collabora, Ltd.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
*/
#include <math.h>
#include <stdio.h>
#include "pan_encoder.h"
#include "pan_pool.h"
#include "pan_scoreboard.h"
#include "pan_texture.h"
#include "panfrost-quirks.h"
#include "../midgard/midgard_compile.h"
#include "compiler/nir/nir_builder.h"
#include "util/u_math.h"
/* On Midgard, the native blit infrastructure (via MFBD preloads) is broken or
* missing in many cases. We instead use software paths as fallbacks to
* implement blits, which are done as TILER jobs. No vertex shader is
* necessary since we can supply screen-space coordinates directly.
*
* This is primarily designed as a fallback for preloads but could be extended
* for other clears/blits if needed in the future. */
static void
panfrost_build_blit_shader(panfrost_program *program, unsigned gpu_id, gl_frag_result loc, nir_alu_type T, bool ms)
{
bool is_colour = loc >= FRAG_RESULT_DATA0;
nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_FRAGMENT, &midgard_nir_options, NULL);
nir_function *fn = nir_function_create(shader, "main");
nir_function_impl *impl = nir_function_impl_create(fn);
nir_variable *c_src = nir_variable_create(shader, nir_var_shader_in, glsl_vector_type(GLSL_TYPE_FLOAT, 2), "coord");
nir_variable *c_out = nir_variable_create(shader, nir_var_shader_out, glsl_vector_type(
GLSL_TYPE_FLOAT, is_colour ? 4 : 1), "out");
c_src->data.location = VARYING_SLOT_TEX0;
c_out->data.location = loc;
nir_builder _b;
nir_builder *b = &_b;
nir_builder_init(b, impl);
b->cursor = nir_before_block(nir_start_block(impl));
nir_ssa_def *coord = nir_load_var(b, c_src);
nir_tex_instr *tex = nir_tex_instr_create(shader, ms ? 3 : 1);
tex->dest_type = T;
if (ms) {
tex->src[0].src_type = nir_tex_src_coord;
tex->src[0].src = nir_src_for_ssa(nir_f2i32(b, coord));
tex->coord_components = 2;
tex->src[1].src_type = nir_tex_src_ms_index;
tex->src[1].src = nir_src_for_ssa(nir_load_sample_id(b));
tex->src[2].src_type = nir_tex_src_lod;
tex->src[2].src = nir_src_for_ssa(nir_imm_int(b, 0));
tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
tex->op = nir_texop_txf_ms;
} else {
tex->op = nir_texop_tex;
tex->src[0].src_type = nir_tex_src_coord;
tex->src[0].src = nir_src_for_ssa(coord);
tex->coord_components = 2;
tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
}
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
nir_builder_instr_insert(b, &tex->instr);
if (is_colour)
nir_store_var(b, c_out, &tex->dest.ssa, 0xFF);
else
nir_store_var(b, c_out, nir_channel(b, &tex->dest.ssa, 0), 0xFF);
midgard_compile_shader_nir(shader, program, false, 0, gpu_id, false, true);
}
/* Compile and upload all possible blit shaders ahead-of-time to reduce draw
* time overhead. There's only ~30 of them at the moment, so this is fine */
void
panfrost_init_blit_shaders(struct panfrost_device *dev)
{
static const struct {
gl_frag_result loc;
unsigned types;
} shader_descs[] = {
{ FRAG_RESULT_DEPTH, 1 << PAN_BLIT_FLOAT },
{ FRAG_RESULT_STENCIL, 1 << PAN_BLIT_UINT },
{ FRAG_RESULT_DATA0, ~0 },
{ FRAG_RESULT_DATA1, ~0 },
{ FRAG_RESULT_DATA2, ~0 },
{ FRAG_RESULT_DATA3, ~0 },
{ FRAG_RESULT_DATA4, ~0 },
{ FRAG_RESULT_DATA5, ~0 },
{ FRAG_RESULT_DATA6, ~0 },
{ FRAG_RESULT_DATA7, ~0 }
};
nir_alu_type nir_types[PAN_BLIT_NUM_TYPES] = {
nir_type_float,
nir_type_uint,
nir_type_int
};
/* Total size = # of shaders * bytes per shader. There are
* shaders for each RT (so up to DATA7 -- overestimate is
* okay) and up to NUM_TYPES variants of each, * 2 for multisampling
* variants. These shaders are simple enough that they should be less
* than 8 quadwords each (again, overestimate is fine). */
unsigned offset = 0;
unsigned total_size = (FRAG_RESULT_DATA7 * PAN_BLIT_NUM_TYPES)
* (8 * 16) * 2;
dev->blit_shaders.bo = panfrost_bo_create(dev, total_size, PAN_BO_EXECUTE);
/* Don't bother generating multisampling variants if we don't actually
* support multisampling */
bool has_ms = !(dev->quirks & MIDGARD_SFBD);
for (unsigned ms = 0; ms <= has_ms; ++ms) {
for (unsigned i = 0; i < ARRAY_SIZE(shader_descs); ++i) {
unsigned loc = shader_descs[i].loc;
for (enum pan_blit_type T = 0; T < PAN_BLIT_NUM_TYPES; ++T) {
if (!(shader_descs[i].types & (1 << T)))
continue;
panfrost_program program;
panfrost_build_blit_shader(&program, dev->gpu_id, loc,
nir_types[T], ms);
assert(offset + program.compiled.size < total_size);
memcpy(dev->blit_shaders.bo->cpu + offset, program.compiled.data, program.compiled.size);
dev->blit_shaders.loads[loc][T][ms] = (dev->blit_shaders.bo->gpu + offset) | program.first_tag;
offset += ALIGN_POT(program.compiled.size, 64);
util_dynarray_fini(&program.compiled);
}
}
}
}
/* Add a shader-based load on Midgard (draw-time for GL). Shaders are
* precached */
void
panfrost_load_midg(
struct pan_pool *pool,
struct pan_scoreboard *scoreboard,
mali_ptr blend_shader,
mali_ptr fbd,
mali_ptr coordinates, unsigned vertex_count,
struct pan_image *image,
unsigned loc)
{
unsigned width = u_minify(image->width0, image->first_level);
unsigned height = u_minify(image->height0, image->first_level);
struct panfrost_transfer viewport = panfrost_pool_alloc(pool, MALI_VIEWPORT_LENGTH);
pan_pack(viewport.cpu, VIEWPORT, cfg) {
cfg.scissor_maximum_x = width - 1; /* Inclusive */
cfg.scissor_maximum_y = height - 1;
}
union mali_attr varying = {
.elements = coordinates | MALI_ATTR_LINEAR,
.stride = 4 * sizeof(float),
.size = 4 * sizeof(float) * vertex_count,
};
struct mali_attr_meta varying_meta = {
.index = 0,
.unknown1 = 2,
.swizzle = (MALI_CHANNEL_RED << 0) | (MALI_CHANNEL_GREEN << 3),
.format = MALI_RGBA32F
};
struct mali_stencil_packed stencil;
pan_pack(&stencil, STENCIL, cfg) {
cfg.compare_function = MALI_FUNC_ALWAYS;
cfg.stencil_fail = MALI_STENCIL_OP_REPLACE;
cfg.depth_fail = MALI_STENCIL_OP_REPLACE;
cfg.depth_pass = MALI_STENCIL_OP_REPLACE;
};
union midgard_blend replace = {
.equation = {
.rgb_mode = 0x122,
.alpha_mode = 0x122,
.color_mask = MALI_MASK_R | MALI_MASK_G | MALI_MASK_B | MALI_MASK_A,
}
};
if (blend_shader)
replace.shader = blend_shader;
/* Determine the sampler type needed. Stencil is always sampled as
* UINT. Pure (U)INT is always (U)INT. Everything else is FLOAT. */
enum pan_blit_type T =
(loc == FRAG_RESULT_STENCIL) ? PAN_BLIT_UINT :
(util_format_is_pure_uint(image->format)) ? PAN_BLIT_UINT :
(util_format_is_pure_sint(image->format)) ? PAN_BLIT_INT :
PAN_BLIT_FLOAT;
bool ms = image->nr_samples > 1;
struct mali_shader_meta shader_meta = {
.shader = pool->dev->blit_shaders.loads[loc][T][ms],
.sampler_count = 1,
.texture_count = 1,
.varying_count = 1,
.midgard1 = {
.flags_lo = 0x20,
.work_count = 4,
},
.coverage_mask = ~0,
.unknown2_3 = MALI_DEPTH_FUNC(MALI_FUNC_ALWAYS) | 0x10,
.unknown2_4 = 0x4e0,
.stencil_mask_front = ~0,
.stencil_mask_back = ~0,
.stencil_front = stencil,
.stencil_back = stencil,
.blend = {
.shader = blend_shader
}
};
if (ms)
shader_meta.unknown2_3 |= MALI_HAS_MSAA | MALI_PER_SAMPLE;
else
shader_meta.unknown2_4 |= MALI_NO_MSAA;
assert(shader_meta.shader);
if (pool->dev->quirks & MIDGARD_SFBD) {
shader_meta.unknown2_4 |= (0x10 | MALI_NO_DITHER);
shader_meta.blend = replace;
if (loc < FRAG_RESULT_DATA0)
shader_meta.blend.equation.color_mask = 0x0;
}
if (loc == FRAG_RESULT_DEPTH) {
shader_meta.midgard1.flags_lo |= MALI_WRITES_Z;
shader_meta.unknown2_3 |= MALI_DEPTH_WRITEMASK;
} else if (loc == FRAG_RESULT_STENCIL) {
shader_meta.midgard1.flags_hi |= MALI_WRITES_S;
shader_meta.unknown2_4 |= MALI_STENCIL_TEST;
} else {
shader_meta.midgard1.flags_lo |= MALI_EARLY_Z;
}
/* Create the texture descriptor. We partially compute the base address
* ourselves to account for layer, such that the texture descriptor
* itself is for a 2D texture with array size 1 even for 3D/array
* textures, removing the need to separately key the blit shaders for
* 2D and 3D variants */
struct panfrost_transfer texture_t = panfrost_pool_alloc(pool, sizeof(struct mali_texture_descriptor) + sizeof(mali_ptr) * 2 * MAX2(image->nr_samples, 1));
panfrost_new_texture(texture_t.cpu,
image->width0, image->height0,
MAX2(image->nr_samples, 1), 1,
image->format, MALI_TEX_2D,
image->modifier,
image->first_level, image->last_level,
0, 0,
image->nr_samples,
0,
(MALI_CHANNEL_RED << 0) | (MALI_CHANNEL_GREEN << 3) | (MALI_CHANNEL_BLUE << 6) | (MALI_CHANNEL_ALPHA << 9),
image->bo->gpu + image->first_layer *
panfrost_get_layer_stride(image->slices,
image->type == MALI_TEX_3D,
image->cubemap_stride, image->first_level),
image->slices);
struct mali_sampler_descriptor sampler = {
.filter_mode = MALI_SAMP_MAG_NEAREST | MALI_SAMP_MIN_NEAREST,
.wrap_s = MALI_WRAP_MODE_CLAMP_TO_EDGE,
.wrap_t = MALI_WRAP_MODE_CLAMP_TO_EDGE,
.wrap_r = MALI_WRAP_MODE_CLAMP_TO_EDGE,
};
struct panfrost_transfer shader_meta_t = panfrost_pool_alloc(pool, sizeof(shader_meta) + 8 * sizeof(struct midgard_blend_rt));
memcpy(shader_meta_t.cpu, &shader_meta, sizeof(shader_meta));
for (unsigned i = 0; i < 8; ++i) {
void *dest = shader_meta_t.cpu + sizeof(shader_meta) + sizeof(struct midgard_blend_rt) * i;
if (loc == (FRAG_RESULT_DATA0 + i)) {
struct midgard_blend_rt blend_rt = {
.flags = 0x200 | MALI_BLEND_NO_DITHER,
.blend = replace,
};
if (util_format_is_srgb(image->format))
blend_rt.flags |= MALI_BLEND_SRGB;
if (blend_shader) {
blend_rt.flags |= MALI_BLEND_MRT_SHADER;
blend_rt.blend.shader = blend_shader;
}
memcpy(dest, &blend_rt, sizeof(struct midgard_blend_rt));
} else {
memset(dest, 0x0, sizeof(struct midgard_blend_rt));
}
}
struct midgard_payload_vertex_tiler payload = {
.prefix = {
.draw_mode = MALI_DRAW_MODE_TRIANGLES,
.unknown_draw = 0x3000,
.index_count = MALI_POSITIVE(vertex_count)
},
.postfix = {
.gl_enables = 0x7,
.position_varying = coordinates,
.textures = panfrost_pool_upload(pool, &texture_t.gpu, sizeof(texture_t.gpu)),
.sampler_descriptor = panfrost_pool_upload(pool, &sampler, sizeof(sampler)),
.shader = shader_meta_t.gpu,
.varyings = panfrost_pool_upload(pool, &varying, sizeof(varying)),
.varying_meta = panfrost_pool_upload(pool, &varying_meta, sizeof(varying_meta)),
.viewport = viewport.gpu,
.shared_memory = fbd
}
};
panfrost_pack_work_groups_compute(&payload.prefix, 1, vertex_count, 1, 1, 1, 1, true);
payload.prefix.workgroups_x_shift_3 = 6;
panfrost_new_job(pool, scoreboard, MALI_JOB_TYPE_TILER, false, 0, &payload, sizeof(payload), true);
}