| /* |
| * Copyright © 2016 Red Hat |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| * SOFTWARE. |
| */ |
| |
| #include <stdbool.h> |
| |
| #include "st_tgsi_lower_yuv.h" |
| #include "tgsi/tgsi_transform.h" |
| #include "tgsi/tgsi_scan.h" |
| #include "tgsi/tgsi_dump.h" |
| #include "util/u_debug.h" |
| |
| #include "util/bitscan.h" |
| |
| struct tgsi_yuv_transform { |
| struct tgsi_transform_context base; |
| struct tgsi_shader_info info; |
| struct tgsi_full_src_register imm[4]; |
| struct { |
| struct tgsi_full_src_register src; |
| struct tgsi_full_dst_register dst; |
| } tmp[2]; |
| #define A 0 |
| #define B 1 |
| |
| /* Maps a primary sampler (used for Y) to the U or UV sampler. In |
| * case of 3-plane YUV format, the V plane is next sampler after U. |
| */ |
| unsigned char sampler_map[PIPE_MAX_SAMPLERS][2]; |
| |
| bool first_instruction_emitted; |
| unsigned free_slots; |
| unsigned lower_nv12; |
| unsigned lower_iyuv; |
| }; |
| |
| static inline struct tgsi_yuv_transform * |
| tgsi_yuv_transform(struct tgsi_transform_context *tctx) |
| { |
| return (struct tgsi_yuv_transform *)tctx; |
| } |
| |
| static void |
| reg_dst(struct tgsi_full_dst_register *dst, |
| const struct tgsi_full_dst_register *orig_dst, unsigned wrmask) |
| { |
| *dst = *orig_dst; |
| dst->Register.WriteMask &= wrmask; |
| assert(dst->Register.WriteMask); |
| } |
| |
| static inline void |
| get_swiz(unsigned *swiz, const struct tgsi_src_register *src) |
| { |
| swiz[0] = src->SwizzleX; |
| swiz[1] = src->SwizzleY; |
| swiz[2] = src->SwizzleZ; |
| swiz[3] = src->SwizzleW; |
| } |
| |
| static void |
| reg_src(struct tgsi_full_src_register *src, |
| const struct tgsi_full_src_register *orig_src, |
| unsigned sx, unsigned sy, unsigned sz, unsigned sw) |
| { |
| unsigned swiz[4]; |
| get_swiz(swiz, &orig_src->Register); |
| *src = *orig_src; |
| src->Register.SwizzleX = swiz[sx]; |
| src->Register.SwizzleY = swiz[sy]; |
| src->Register.SwizzleZ = swiz[sz]; |
| src->Register.SwizzleW = swiz[sw]; |
| } |
| |
| #define TGSI_SWIZZLE__ TGSI_SWIZZLE_X /* don't-care value! */ |
| #define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y, \ |
| TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w |
| |
| static inline struct tgsi_full_instruction |
| tex_instruction(unsigned samp) |
| { |
| struct tgsi_full_instruction inst; |
| |
| inst = tgsi_default_full_instruction(); |
| inst.Instruction.Opcode = TGSI_OPCODE_TEX; |
| inst.Instruction.Texture = 1; |
| inst.Texture.Texture = TGSI_TEXTURE_2D; |
| inst.Instruction.NumDstRegs = 1; |
| inst.Instruction.NumSrcRegs = 2; |
| inst.Src[1].Register.File = TGSI_FILE_SAMPLER; |
| inst.Src[1].Register.Index = samp; |
| |
| return inst; |
| } |
| |
| static inline struct tgsi_full_instruction |
| mov_instruction(void) |
| { |
| struct tgsi_full_instruction inst; |
| |
| inst = tgsi_default_full_instruction(); |
| inst.Instruction.Opcode = TGSI_OPCODE_MOV; |
| inst.Instruction.Saturate = 0; |
| inst.Instruction.NumDstRegs = 1; |
| inst.Instruction.NumSrcRegs = 1; |
| |
| return inst; |
| } |
| |
| static inline struct tgsi_full_instruction |
| dp3_instruction(void) |
| { |
| struct tgsi_full_instruction inst; |
| |
| inst = tgsi_default_full_instruction(); |
| inst.Instruction.Opcode = TGSI_OPCODE_DP3; |
| inst.Instruction.NumDstRegs = 1; |
| inst.Instruction.NumSrcRegs = 2; |
| |
| return inst; |
| } |
| |
| |
| |
| static void |
| emit_immed(struct tgsi_transform_context *tctx, int idx, |
| float x, float y, float z, float w) |
| { |
| struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx); |
| struct tgsi_shader_info *info = &ctx->info; |
| struct tgsi_full_immediate immed; |
| |
| immed = tgsi_default_full_immediate(); |
| immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */ |
| immed.u[0].Float = x; |
| immed.u[1].Float = y; |
| immed.u[2].Float = z; |
| immed.u[3].Float = w; |
| tctx->emit_immediate(tctx, &immed); |
| |
| ctx->imm[idx].Register.File = TGSI_FILE_IMMEDIATE; |
| ctx->imm[idx].Register.Index = info->immediate_count + idx; |
| ctx->imm[idx].Register.SwizzleX = TGSI_SWIZZLE_X; |
| ctx->imm[idx].Register.SwizzleY = TGSI_SWIZZLE_Y; |
| ctx->imm[idx].Register.SwizzleZ = TGSI_SWIZZLE_Z; |
| ctx->imm[idx].Register.SwizzleW = TGSI_SWIZZLE_W; |
| } |
| |
| static void |
| emit_samp(struct tgsi_transform_context *tctx, unsigned samp) |
| { |
| tgsi_transform_sampler_decl(tctx, samp); |
| tgsi_transform_sampler_view_decl(tctx, samp, PIPE_TEXTURE_2D, |
| TGSI_RETURN_TYPE_FLOAT); |
| } |
| |
| /* Emit extra declarations we need: |
| * + 2 TEMP to hold intermediate results |
| * + 1 (for 2-plane YUV) or 2 (for 3-plane YUV) extra samplers per |
| * lowered YUV sampler |
| * + extra immediates for doing CSC |
| */ |
| static void |
| emit_decls(struct tgsi_transform_context *tctx) |
| { |
| struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx); |
| struct tgsi_shader_info *info = &ctx->info; |
| unsigned mask, tempbase, i; |
| struct tgsi_full_declaration decl; |
| |
| /* |
| * Declare immediates for CSC conversion: |
| */ |
| |
| /* ITU-R BT.601 conversion */ |
| emit_immed(tctx, 0, 1.164, 0.000, 1.596, 0.0); |
| emit_immed(tctx, 1, 1.164, -0.392, -0.813, 0.0); |
| emit_immed(tctx, 2, 1.164, 2.017, 0.000, 0.0); |
| emit_immed(tctx, 3, 0.0625, 0.500, 0.500, 1.0); |
| |
| /* |
| * Declare extra samplers / sampler-views: |
| */ |
| |
| mask = ctx->lower_nv12 | ctx->lower_iyuv; |
| while (mask) { |
| unsigned extra, y_samp = u_bit_scan(&mask); |
| |
| extra = u_bit_scan(&ctx->free_slots); |
| ctx->sampler_map[y_samp][0] = extra; |
| emit_samp(tctx, extra); |
| |
| if (ctx->lower_iyuv & (1 << y_samp)) { |
| extra = u_bit_scan(&ctx->free_slots); |
| ctx->sampler_map[y_samp][1] = extra; |
| emit_samp(tctx, extra); |
| } |
| } |
| |
| /* |
| * Declare extra temp: |
| */ |
| |
| tempbase = info->file_max[TGSI_FILE_TEMPORARY] + 1; |
| |
| for (i = 0; i < 2; i++) { |
| decl = tgsi_default_full_declaration(); |
| decl.Declaration.File = TGSI_FILE_TEMPORARY; |
| decl.Range.First = decl.Range.Last = tempbase + i; |
| tctx->emit_declaration(tctx, &decl); |
| |
| ctx->tmp[i].src.Register.File = TGSI_FILE_TEMPORARY; |
| ctx->tmp[i].src.Register.Index = tempbase + i; |
| ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X; |
| ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y; |
| ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z; |
| ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W; |
| |
| ctx->tmp[i].dst.Register.File = TGSI_FILE_TEMPORARY; |
| ctx->tmp[i].dst.Register.Index = tempbase + i; |
| ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW; |
| } |
| } |
| |
| /* call with YUV in tmpA.xyz */ |
| static void |
| yuv_to_rgb(struct tgsi_transform_context *tctx, |
| struct tgsi_full_dst_register *dst) |
| { |
| struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx); |
| struct tgsi_full_instruction inst; |
| |
| /* |
| * IMM[0] FLT32 { 1.164, 0.000, 1.596, 0.0 } |
| * IMM[1] FLT32 { 1.164, -0.392, -0.813, 0.0 } |
| * IMM[2] FLT32 { 1.164, 2.017, 0.000, 0.0 } |
| * IMM[3] FLT32 { 0.0625, 0.500, 0.500, 1.0 } |
| */ |
| |
| /* SUB tmpA.xyz, tmpA, imm[3] */ |
| inst = tgsi_default_full_instruction(); |
| inst.Instruction.Opcode = TGSI_OPCODE_SUB; |
| inst.Instruction.Saturate = 0; |
| inst.Instruction.NumDstRegs = 1; |
| inst.Instruction.NumSrcRegs = 2; |
| reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZ); |
| reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, _)); |
| reg_src(&inst.Src[1], &ctx->imm[3], SWIZ(X, Y, Z, _)); |
| tctx->emit_instruction(tctx, &inst); |
| |
| /* DP3 dst.x, tmpA, imm[0] */ |
| inst = dp3_instruction(); |
| reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_X); |
| reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); |
| reg_src(&inst.Src[1], &ctx->imm[0], SWIZ(X, Y, Z, W)); |
| tctx->emit_instruction(tctx, &inst); |
| |
| /* DP3 dst.y, tmpA, imm[1] */ |
| inst = dp3_instruction(); |
| reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_Y); |
| reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); |
| reg_src(&inst.Src[1], &ctx->imm[1], SWIZ(X, Y, Z, W)); |
| tctx->emit_instruction(tctx, &inst); |
| |
| /* DP3 dst.z, tmpA, imm[2] */ |
| inst = dp3_instruction(); |
| reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_Z); |
| reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); |
| reg_src(&inst.Src[1], &ctx->imm[2], SWIZ(X, Y, Z, W)); |
| tctx->emit_instruction(tctx, &inst); |
| |
| /* MOV dst.w, imm[0].x */ |
| inst = mov_instruction(); |
| reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_W); |
| reg_src(&inst.Src[0], &ctx->imm[3], SWIZ(_, _, _, W)); |
| tctx->emit_instruction(tctx, &inst); |
| } |
| |
| static void |
| lower_nv12(struct tgsi_transform_context *tctx, |
| struct tgsi_full_instruction *originst) |
| { |
| struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx); |
| struct tgsi_full_instruction inst; |
| struct tgsi_full_src_register *coord = &originst->Src[0]; |
| unsigned samp = originst->Src[1].Register.Index; |
| |
| /* sample Y: |
| * TEX tempA.x, coord, texture[samp], 2D; |
| */ |
| inst = tex_instruction(samp); |
| reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); |
| reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W)); |
| tctx->emit_instruction(tctx, &inst); |
| |
| /* sample UV: |
| * TEX tempB.xy, coord, texture[sampler_map[samp][0]], 2D; |
| * MOV tempA.yz, tempB._xy_ |
| */ |
| inst = tex_instruction(ctx->sampler_map[samp][0]); |
| reg_dst(&inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XY); |
| reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W)); |
| tctx->emit_instruction(tctx, &inst); |
| |
| inst = mov_instruction(); |
| reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_YZ); |
| reg_src(&inst.Src[0], &ctx->tmp[B].src, SWIZ(_, X, Y, _)); |
| tctx->emit_instruction(tctx, &inst); |
| |
| /* At this point, we have YUV in tempA.xyz, rest is common: */ |
| yuv_to_rgb(tctx, &originst->Dst[0]); |
| } |
| |
| static void |
| lower_iyuv(struct tgsi_transform_context *tctx, |
| struct tgsi_full_instruction *originst) |
| { |
| struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx); |
| struct tgsi_full_instruction inst; |
| struct tgsi_full_src_register *coord = &originst->Src[0]; |
| unsigned samp = originst->Src[1].Register.Index; |
| |
| /* sample Y: |
| * TEX tempA.x, coord, texture[samp], 2D; |
| */ |
| inst = tex_instruction(samp); |
| reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); |
| reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W)); |
| tctx->emit_instruction(tctx, &inst); |
| |
| /* sample U: |
| * TEX tempB.x, coord, texture[sampler_map[samp][0]], 2D; |
| * MOV tempA.y, tempB._x__ |
| */ |
| inst = tex_instruction(ctx->sampler_map[samp][0]); |
| reg_dst(&inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X); |
| reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W)); |
| tctx->emit_instruction(tctx, &inst); |
| |
| inst = mov_instruction(); |
| reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); |
| reg_src(&inst.Src[0], &ctx->tmp[B].src, SWIZ(_, X, _, _)); |
| tctx->emit_instruction(tctx, &inst); |
| |
| /* sample V: |
| * TEX tempB.x, coord, texture[sampler_map[samp][1]], 2D; |
| * MOV tempA.z, tempB.__x_ |
| */ |
| inst = tex_instruction(ctx->sampler_map[samp][1]); |
| reg_dst(&inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X); |
| reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W)); |
| tctx->emit_instruction(tctx, &inst); |
| |
| inst = mov_instruction(); |
| reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z); |
| reg_src(&inst.Src[0], &ctx->tmp[B].src, SWIZ(_, _, X, _)); |
| tctx->emit_instruction(tctx, &inst); |
| |
| /* At this point, we have YUV in tempA.xyz, rest is common: */ |
| yuv_to_rgb(tctx, &originst->Dst[0]); |
| } |
| |
| static void |
| transform_instr(struct tgsi_transform_context *tctx, |
| struct tgsi_full_instruction *inst) |
| { |
| struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx); |
| |
| if (!ctx->first_instruction_emitted) { |
| emit_decls(tctx); |
| ctx->first_instruction_emitted = true; |
| } |
| |
| switch (inst->Instruction.Opcode) { |
| /* TODO what other tex opcode's can be used w/ external eglimgs? */ |
| case TGSI_OPCODE_TEX: { |
| unsigned samp = inst->Src[1].Register.Index; |
| if (ctx->lower_nv12 & (1 << samp)) { |
| lower_nv12(tctx, inst); |
| } else if (ctx->lower_iyuv & (1 << samp)) { |
| lower_iyuv(tctx, inst); |
| } else { |
| goto skip; |
| } |
| break; |
| } |
| default: |
| skip: |
| tctx->emit_instruction(tctx, inst); |
| return; |
| } |
| } |
| |
| extern const struct tgsi_token * |
| st_tgsi_lower_yuv(const struct tgsi_token *tokens, unsigned free_slots, |
| unsigned lower_nv12, unsigned lower_iyuv) |
| { |
| struct tgsi_yuv_transform ctx; |
| struct tgsi_token *newtoks; |
| int newlen; |
| |
| assert(!(lower_nv12 & lower_iyuv)); /* bitmasks should be mutually exclusive */ |
| |
| // tgsi_dump(tokens, 0); |
| // debug_printf("\n"); |
| |
| memset(&ctx, 0, sizeof(ctx)); |
| ctx.base.transform_instruction = transform_instr; |
| ctx.free_slots = free_slots; |
| ctx.lower_nv12 = lower_nv12; |
| ctx.lower_iyuv = lower_iyuv; |
| tgsi_scan_shader(tokens, &ctx.info); |
| |
| /* TODO better job of figuring out how many extra tokens we need.. |
| * this is a pain about tgsi_transform :-/ |
| */ |
| newlen = tgsi_num_tokens(tokens) + 120; |
| newtoks = tgsi_alloc_tokens(newlen); |
| if (!newtoks) |
| return NULL; |
| |
| tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base); |
| |
| // tgsi_dump(newtoks, 0); |
| // debug_printf("\n"); |
| |
| return newtoks; |
| } |