| /* |
| * Copyright © 2018 Collabora Ltd |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| * SOFTWARE. |
| */ |
| |
| #include "st_tgsi_lower_depth_clamp.h" |
| #include "tgsi/tgsi_transform.h" |
| #include "tgsi/tgsi_scan.h" |
| |
| struct tgsi_depth_clamp_transform { |
| struct tgsi_transform_context base; |
| |
| struct tgsi_shader_info info; |
| |
| int depth_range_const; |
| int next_generic; |
| int imm; |
| int depth_var; |
| int pos_input; |
| int pos_output; |
| int pos_input_temp; |
| int pos_output_temp; |
| int depth_range_corrected; |
| bool depth_clip_minus_one_to_one; |
| }; |
| |
| static inline struct tgsi_depth_clamp_transform * |
| tgsi_depth_clamp_transform(struct tgsi_transform_context *tctx) |
| { |
| return (struct tgsi_depth_clamp_transform *)tctx; |
| } |
| |
| static void |
| transform_decl(struct tgsi_transform_context *tctx, |
| struct tgsi_full_declaration *decl) |
| { |
| struct tgsi_depth_clamp_transform *ctx = tgsi_depth_clamp_transform(tctx); |
| |
| /* find the next generic index usable for our inserted varying */ |
| if (ctx->info.processor == PIPE_SHADER_FRAGMENT) { |
| if (decl->Declaration.File == TGSI_FILE_INPUT && |
| decl->Semantic.Name == TGSI_SEMANTIC_GENERIC) |
| ctx->next_generic = MAX2(ctx->next_generic, decl->Semantic.Index + 1); |
| } else { |
| if (decl->Declaration.File == TGSI_FILE_OUTPUT && |
| decl->Semantic.Name == TGSI_SEMANTIC_GENERIC) |
| ctx->next_generic = MAX2(ctx->next_generic, decl->Semantic.Index + 1); |
| } |
| |
| if (decl->Declaration.File == TGSI_FILE_OUTPUT && |
| decl->Semantic.Name == TGSI_SEMANTIC_POSITION) { |
| assert(decl->Semantic.Index == 0); |
| ctx->pos_output = decl->Range.First; |
| } else if (decl->Declaration.File == TGSI_FILE_INPUT && |
| decl->Semantic.Name == TGSI_SEMANTIC_POSITION) { |
| assert(decl->Semantic.Index == 0); |
| if (ctx->info.processor == PIPE_SHADER_FRAGMENT) |
| ctx->pos_input = decl->Range.First; |
| } |
| |
| tctx->emit_declaration(tctx, decl); |
| } |
| |
| static void |
| prolog_common(struct tgsi_depth_clamp_transform *ctx) |
| { |
| assert(ctx->depth_range_const >= 0); |
| if (ctx->info.const_file_max[0] < ctx->depth_range_const) |
| tgsi_transform_const_decl(&ctx->base, ctx->depth_range_const, |
| ctx->depth_range_const); |
| |
| /* declare a temp for the position-output */ |
| ctx->pos_output_temp = ctx->info.file_max[TGSI_FILE_TEMPORARY] + 1; |
| tgsi_transform_temp_decl(&ctx->base, ctx->pos_output_temp); |
| } |
| |
| static void |
| prolog_last_vertex_stage(struct tgsi_transform_context *tctx) |
| { |
| struct tgsi_depth_clamp_transform *ctx = tgsi_depth_clamp_transform(tctx); |
| |
| prolog_common(ctx); |
| |
| ctx->imm = ctx->info.immediate_count; |
| tgsi_transform_immediate_decl(tctx, 0.5, 0.0, 0.0, 0.0); |
| |
| /* declare the output */ |
| ctx->depth_var = ctx->info.num_outputs; |
| tgsi_transform_output_decl(tctx, ctx->depth_var, |
| TGSI_SEMANTIC_GENERIC, |
| ctx->next_generic, |
| TGSI_INTERPOLATE_LINEAR); |
| } |
| |
| static void |
| epilog_last_vertex_stage(struct tgsi_transform_context *tctx) |
| { |
| struct tgsi_depth_clamp_transform *ctx = tgsi_depth_clamp_transform(tctx); |
| |
| int mad_dst_file = TGSI_FILE_TEMPORARY; |
| int mad_dst_index = ctx->pos_output_temp; |
| |
| if (!ctx->depth_clip_minus_one_to_one) { |
| mad_dst_file = TGSI_FILE_OUTPUT; |
| mad_dst_index = ctx->depth_var; |
| } |
| |
| /* move from temp-register to output */ |
| tgsi_transform_op1_inst(tctx, TGSI_OPCODE_MOV, |
| TGSI_FILE_OUTPUT, ctx->pos_output, |
| TGSI_WRITEMASK_XYZW, |
| TGSI_FILE_TEMPORARY, ctx->pos_output_temp); |
| |
| /* Set gl_position.z to 0.0 to avoid clipping */ |
| tgsi_transform_op1_swz_inst(tctx, TGSI_OPCODE_MOV, |
| TGSI_FILE_OUTPUT, ctx->pos_output, |
| TGSI_WRITEMASK_Z, |
| TGSI_FILE_IMMEDIATE, ctx->imm, |
| TGSI_SWIZZLE_Y); |
| |
| /* Evaluate and pass true depth value in depthRange terms */ |
| /* z = gl_Position.z / gl_Position.w */ |
| |
| struct tgsi_full_instruction inst; |
| |
| inst = tgsi_default_full_instruction(); |
| inst.Instruction.Opcode = TGSI_OPCODE_DIV; |
| inst.Instruction.NumDstRegs = 1; |
| inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; |
| inst.Dst[0].Register.Index = ctx->pos_output_temp; |
| inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X; |
| inst.Instruction.NumSrcRegs = 2; |
| tgsi_transform_src_reg_xyzw(&inst.Src[0], TGSI_FILE_TEMPORARY, ctx->pos_output_temp); |
| tgsi_transform_src_reg_xyzw(&inst.Src[1], TGSI_FILE_TEMPORARY, ctx->pos_output_temp); |
| inst.Src[0].Register.SwizzleX = |
| inst.Src[0].Register.SwizzleY = |
| inst.Src[0].Register.SwizzleZ = |
| inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_Z; |
| |
| inst.Src[1].Register.SwizzleX = |
| inst.Src[1].Register.SwizzleY = |
| inst.Src[1].Register.SwizzleZ = |
| inst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W; |
| |
| tctx->emit_instruction(tctx, &inst); |
| |
| |
| /* OpenGL Core Profile 4.5 - 13.6.1 |
| * The vertex's windows z coordinate zw is given by zw = s * z + b. |
| * |
| * * With clip control depth mode ZERO_TO_ONE |
| * s = f - n, b = n, and hence |
| * |
| * zw_0_1 = z * gl_DepthRange.diff + gl_DepthRange.near |
| */ |
| tgsi_transform_op3_swz_inst(tctx, TGSI_OPCODE_MAD, |
| mad_dst_file, mad_dst_index, |
| TGSI_WRITEMASK_X, |
| TGSI_FILE_TEMPORARY, ctx->pos_output_temp, |
| TGSI_SWIZZLE_X, |
| false, |
| TGSI_FILE_CONSTANT, ctx->depth_range_const, |
| TGSI_SWIZZLE_Z, |
| TGSI_FILE_CONSTANT, ctx->depth_range_const, |
| TGSI_SWIZZLE_X); |
| |
| /* If clip control depth mode is NEGATIVE_ONE_TO_ONE, then |
| * s = 0.5 * (f - n), b = 0.5 * (n + f), and hence |
| * |
| * zw_m1_1 = 0.5 * (zw_01 + gl_DepthRange.far) |
| */ |
| if (ctx->depth_clip_minus_one_to_one) { |
| /* z += gl_DepthRange.far */ |
| tgsi_transform_op2_swz_inst(tctx, TGSI_OPCODE_ADD, |
| TGSI_FILE_TEMPORARY, ctx->pos_output_temp, |
| TGSI_WRITEMASK_X, |
| TGSI_FILE_TEMPORARY, ctx->pos_output_temp, |
| TGSI_SWIZZLE_X, |
| TGSI_FILE_CONSTANT, ctx->depth_range_const, |
| TGSI_SWIZZLE_Y, false); |
| /* z *= 0.5 */ |
| tgsi_transform_op2_swz_inst(tctx, TGSI_OPCODE_MUL, |
| TGSI_FILE_OUTPUT, ctx->depth_var, |
| TGSI_WRITEMASK_X, |
| TGSI_FILE_TEMPORARY, ctx->pos_output_temp, |
| TGSI_SWIZZLE_X, |
| TGSI_FILE_IMMEDIATE, ctx->imm, |
| TGSI_SWIZZLE_X, false); |
| } |
| } |
| |
| |
| static void |
| prolog_fs(struct tgsi_transform_context *tctx) |
| { |
| struct tgsi_depth_clamp_transform *ctx = tgsi_depth_clamp_transform(tctx); |
| |
| prolog_common(ctx); |
| |
| ctx->depth_range_corrected = ctx->info.file_max[TGSI_FILE_TEMPORARY] + 2; |
| tgsi_transform_temp_decl(tctx, ctx->depth_range_corrected); |
| |
| /* declare the input */ |
| ctx->depth_var = ctx->info.num_inputs; |
| tgsi_transform_input_decl(tctx, ctx->depth_var, |
| TGSI_SEMANTIC_GENERIC, |
| ctx->next_generic, |
| TGSI_INTERPOLATE_LINEAR); |
| |
| /* declare the output */ |
| if (ctx->pos_output < 0) { |
| ctx->pos_output = ctx->info.num_outputs; |
| tgsi_transform_output_decl(tctx, ctx->pos_output, |
| TGSI_SEMANTIC_POSITION, |
| 0, |
| TGSI_INTERPOLATE_LINEAR); |
| } |
| |
| if (ctx->info.reads_z) { |
| ctx->pos_input_temp = ctx->info.file_max[TGSI_FILE_TEMPORARY] + 3; |
| tgsi_transform_temp_decl(tctx, ctx->pos_input_temp); |
| |
| assert(ctx->pos_input_temp >= 0); |
| /* copy normal position */ |
| tgsi_transform_op1_inst(tctx, TGSI_OPCODE_MOV, |
| TGSI_FILE_TEMPORARY, ctx->pos_input_temp, |
| TGSI_WRITEMASK_XYZW, |
| TGSI_FILE_INPUT, ctx->pos_input); |
| /* replace z-component with varying */ |
| tgsi_transform_op1_swz_inst(tctx, TGSI_OPCODE_MOV, |
| TGSI_FILE_TEMPORARY, ctx->pos_input_temp, |
| TGSI_WRITEMASK_Z, |
| TGSI_FILE_INPUT, ctx->depth_var, |
| TGSI_SWIZZLE_X); |
| } |
| } |
| |
| static void |
| epilog_fs(struct tgsi_transform_context *tctx) |
| { |
| struct tgsi_depth_clamp_transform *ctx = tgsi_depth_clamp_transform(tctx); |
| |
| unsigned src0_file = TGSI_FILE_INPUT; |
| unsigned src0_index = ctx->depth_var; |
| unsigned src0_swizzle = TGSI_SWIZZLE_X; |
| |
| if (ctx->info.writes_z) { |
| src0_file = TGSI_FILE_TEMPORARY; |
| src0_index = ctx->pos_output_temp; |
| src0_swizzle = TGSI_SWIZZLE_Z; |
| } |
| |
| /* it is possible to have gl_DepthRange.near > gl_DepthRange.far, so first |
| * we have to sort the two */ |
| tgsi_transform_op2_swz_inst(tctx, TGSI_OPCODE_MIN, |
| TGSI_FILE_TEMPORARY, ctx->depth_range_corrected, |
| TGSI_WRITEMASK_X, |
| TGSI_FILE_CONSTANT, ctx->depth_range_const, |
| TGSI_SWIZZLE_X, |
| TGSI_FILE_CONSTANT, ctx->depth_range_const, |
| TGSI_SWIZZLE_Y, |
| false); |
| |
| tgsi_transform_op2_swz_inst(tctx, TGSI_OPCODE_MAX, |
| TGSI_FILE_TEMPORARY, ctx->depth_range_corrected, |
| TGSI_WRITEMASK_Y, |
| TGSI_FILE_CONSTANT, ctx->depth_range_const, |
| TGSI_SWIZZLE_X, |
| TGSI_FILE_CONSTANT, ctx->depth_range_const, |
| TGSI_SWIZZLE_Y, |
| false); |
| |
| /* gl_FragDepth = max(gl_FragDepth, min(gl_DepthRange.near, gl_DepthRange.far)) */ |
| tgsi_transform_op2_swz_inst(tctx, TGSI_OPCODE_MAX, |
| TGSI_FILE_TEMPORARY, ctx->pos_output_temp, |
| TGSI_WRITEMASK_X, |
| src0_file, src0_index, src0_swizzle, |
| TGSI_FILE_TEMPORARY, ctx->depth_range_corrected, |
| TGSI_SWIZZLE_X, false); |
| |
| /* gl_FragDepth = min(gl_FragDepth, max(gl_DepthRange.near, gl_DepthRange.far)) */ |
| tgsi_transform_op2_swz_inst(tctx, TGSI_OPCODE_MIN, |
| TGSI_FILE_OUTPUT, ctx->pos_output, |
| TGSI_WRITEMASK_Z, |
| TGSI_FILE_TEMPORARY, ctx->pos_output_temp, |
| TGSI_SWIZZLE_X, |
| TGSI_FILE_TEMPORARY, ctx->depth_range_corrected, |
| TGSI_SWIZZLE_Y, false); |
| } |
| |
| static void |
| transform_instr(struct tgsi_transform_context *tctx, |
| struct tgsi_full_instruction *inst) |
| { |
| struct tgsi_depth_clamp_transform *ctx = tgsi_depth_clamp_transform(tctx); |
| |
| if (ctx->pos_output >= 0) { |
| /* replace writes to gl_Position / gl_FragDepth with a temp-variable |
| */ |
| for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) { |
| if (inst->Dst[i].Register.File == TGSI_FILE_OUTPUT && |
| inst->Dst[i].Register.Index == ctx->pos_output) { |
| inst->Dst[i].Register.File = TGSI_FILE_TEMPORARY; |
| inst->Dst[i].Register.Index = ctx->pos_output_temp; |
| } |
| } |
| } |
| |
| if (ctx->info.reads_z) { |
| /* replace reads from gl_FragCoord with temp-variable |
| */ |
| assert(ctx->pos_input_temp >= 0); |
| for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) { |
| if (inst->Src[i].Register.File == TGSI_FILE_INPUT && |
| inst->Src[i].Register.Index == ctx->pos_input) { |
| inst->Src[i].Register.File = TGSI_FILE_TEMPORARY; |
| inst->Src[i].Register.Index = ctx->pos_input_temp; |
| } |
| } |
| } |
| |
| /* In a GS each we have to add the z-write opilog for each emit |
| */ |
| if (ctx->info.processor == PIPE_SHADER_GEOMETRY && |
| inst->Instruction.Opcode == TGSI_OPCODE_EMIT) |
| epilog_last_vertex_stage(tctx); |
| |
| tctx->emit_instruction(tctx, inst); |
| } |
| |
| const struct tgsi_token * |
| st_tgsi_lower_depth_clamp(const struct tgsi_token *tokens, |
| int depth_range_const, |
| bool clip_negative_one_to_one) |
| { |
| struct tgsi_depth_clamp_transform ctx; |
| struct tgsi_token *newtoks; |
| int newlen; |
| |
| memset(&ctx, 0, sizeof(ctx)); |
| tgsi_scan_shader(tokens, &ctx.info); |
| |
| /* we only want to do this for the fragment shader, and the shader-stage |
| * right before it, but in the first pass there might be no "next" shader |
| */ |
| if (ctx.info.processor != PIPE_SHADER_FRAGMENT && |
| ctx.info.processor != PIPE_SHADER_GEOMETRY && |
| ctx.info.processor != PIPE_SHADER_VERTEX && |
| ctx.info.processor != PIPE_SHADER_TESS_EVAL && |
| (ctx.info.properties[TGSI_PROPERTY_NEXT_SHADER] > PIPE_SHADER_VERTEX && |
| (ctx.info.properties[TGSI_PROPERTY_NEXT_SHADER] != PIPE_SHADER_FRAGMENT))) { |
| return tokens; |
| } |
| |
| ctx.base.transform_declaration = transform_decl; |
| ctx.base.transform_instruction = transform_instr; |
| |
| if (ctx.info.processor == PIPE_SHADER_FRAGMENT) { |
| ctx.base.prolog = prolog_fs; |
| ctx.base.epilog = epilog_fs; |
| } else { |
| ctx.base.prolog = prolog_last_vertex_stage; |
| ctx.base.epilog = epilog_last_vertex_stage; |
| } |
| |
| ctx.pos_output = ctx.pos_input = -1; |
| ctx.depth_range_const = depth_range_const; |
| ctx.depth_clip_minus_one_to_one = clip_negative_one_to_one; |
| |
| /* We add approximately 30 tokens per Z write, so add this per vertex in |
| * a GS and some additional tokes for VS and TES |
| */ |
| newlen = tgsi_num_tokens(tokens) + |
| 30 * ctx.info.properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES] + |
| 120; |
| |
| newtoks = tgsi_alloc_tokens(newlen); |
| if (!newtoks) |
| return tokens; |
| |
| tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base); |
| |
| return newtoks; |
| } |
| |
| const struct tgsi_token * |
| st_tgsi_lower_depth_clamp_fs(const struct tgsi_token *tokens, |
| int depth_range_const) |
| { |
| return st_tgsi_lower_depth_clamp(tokens, depth_range_const, false); |
| } |