blob: c6050bdf4ddc10ff9683d4ff865514a3f406bff0 [file] [log] [blame]
/*
* Copyright (C) 2009 Nicolai Haehnle.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "radeon_program_pair.h"
#include "radeon_compiler.h"
#include "radeon_compiler_util.h"
/**
* Finally rewrite ADD, MOV, MUL as the appropriate native instruction
* and reverse the order of arguments for CMP.
*/
static void final_rewrite(struct rc_sub_instruction *inst)
{
struct rc_src_register tmp;
switch(inst->Opcode) {
case RC_OPCODE_ADD:
inst->SrcReg[2] = inst->SrcReg[1];
inst->SrcReg[1].File = RC_FILE_NONE;
inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
inst->SrcReg[1].Negate = RC_MASK_NONE;
inst->Opcode = RC_OPCODE_MAD;
break;
case RC_OPCODE_CMP:
tmp = inst->SrcReg[2];
inst->SrcReg[2] = inst->SrcReg[0];
inst->SrcReg[0] = tmp;
break;
case RC_OPCODE_MOV:
/* AMD say we should use CMP.
* However, when we transform
* KIL -r0;
* into
* CMP tmp, -r0, -r0, 0;
* KIL tmp;
* we get incorrect behaviour on R500 when r0 == 0.0.
* It appears that the R500 KIL hardware treats -0.0 as less
* than zero.
*/
inst->SrcReg[1].File = RC_FILE_NONE;
inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
inst->SrcReg[2].File = RC_FILE_NONE;
inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
inst->Opcode = RC_OPCODE_MAD;
break;
case RC_OPCODE_MUL:
inst->SrcReg[2].File = RC_FILE_NONE;
inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
inst->Opcode = RC_OPCODE_MAD;
break;
default:
/* nothing to do */
break;
}
}
/**
* Classify an instruction according to which ALUs etc. it needs
*/
static void classify_instruction(struct rc_sub_instruction * inst,
int * needrgb, int * needalpha, int * istranscendent)
{
*needrgb = (inst->DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0;
*needalpha = (inst->DstReg.WriteMask & RC_MASK_W) ? 1 : 0;
*istranscendent = 0;
if (inst->WriteALUResult == RC_ALURESULT_X)
*needrgb = 1;
else if (inst->WriteALUResult == RC_ALURESULT_W)
*needalpha = 1;
switch(inst->Opcode) {
case RC_OPCODE_ADD:
case RC_OPCODE_CMP:
case RC_OPCODE_CND:
case RC_OPCODE_DDX:
case RC_OPCODE_DDY:
case RC_OPCODE_FRC:
case RC_OPCODE_MAD:
case RC_OPCODE_MAX:
case RC_OPCODE_MIN:
case RC_OPCODE_MOV:
case RC_OPCODE_MUL:
break;
case RC_OPCODE_COS:
case RC_OPCODE_EX2:
case RC_OPCODE_LG2:
case RC_OPCODE_RCP:
case RC_OPCODE_RSQ:
case RC_OPCODE_SIN:
*istranscendent = 1;
*needalpha = 1;
break;
case RC_OPCODE_DP4:
*needalpha = 1;
/* fall through */
case RC_OPCODE_DP3:
*needrgb = 1;
break;
default:
break;
}
}
static void src_uses(struct rc_src_register src, unsigned int * rgb,
unsigned int * alpha)
{
int j;
for(j = 0; j < 4; ++j) {
unsigned int swz = GET_SWZ(src.Swizzle, j);
if (swz < 3)
*rgb = 1;
else if (swz < 4)
*alpha = 1;
}
}
/**
* Fill the given ALU instruction's opcodes and source operands into the given pair,
* if possible.
*/
static void set_pair_instruction(struct r300_fragment_program_compiler *c,
struct rc_pair_instruction * pair,
struct rc_sub_instruction * inst)
{
int needrgb, needalpha, istranscendent;
const struct rc_opcode_info * opcode;
int i;
memset(pair, 0, sizeof(struct rc_pair_instruction));
classify_instruction(inst, &needrgb, &needalpha, &istranscendent);
if (needrgb) {
if (istranscendent)
pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA;
else
pair->RGB.Opcode = inst->Opcode;
if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
pair->RGB.Saturate = 1;
}
if (needalpha) {
pair->Alpha.Opcode = inst->Opcode;
if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
pair->Alpha.Saturate = 1;
}
opcode = rc_get_opcode_info(inst->Opcode);
/* Presubtract handling:
* We need to make sure that the values used by the presubtract
* operation end up in src0 or src1. */
if(inst->PreSub.Opcode != RC_PRESUB_NONE) {
/* rc_pair_alloc_source() will fill in data for
* pair->{RGB,ALPHA}.Src[RC_PAIR_PRESUB_SRC] */
int j;
for(j = 0; j < 3; j++) {
int src_regs;
if(inst->SrcReg[j].File != RC_FILE_PRESUB)
continue;
src_regs = rc_presubtract_src_reg_count(
inst->PreSub.Opcode);
for(i = 0; i < src_regs; i++) {
unsigned int rgb = 0;
unsigned int alpha = 0;
src_uses(inst->SrcReg[j], &rgb, &alpha);
if(rgb) {
pair->RGB.Src[i].File =
inst->PreSub.SrcReg[i].File;
pair->RGB.Src[i].Index =
inst->PreSub.SrcReg[i].Index;
pair->RGB.Src[i].Used = 1;
}
if(alpha) {
pair->Alpha.Src[i].File =
inst->PreSub.SrcReg[i].File;
pair->Alpha.Src[i].Index =
inst->PreSub.SrcReg[i].Index;
pair->Alpha.Src[i].Used = 1;
}
}
}
}
for(i = 0; i < opcode->NumSrcRegs; ++i) {
int source;
if (needrgb && !istranscendent) {
unsigned int srcrgb = 0;
unsigned int srcalpha = 0;
unsigned int srcmask = 0;
int j;
/* We don't care about the alpha channel here. We only
* want the part of the swizzle that writes to rgb,
* since we are creating an rgb instruction. */
for(j = 0; j < 3; ++j) {
unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
if (swz < RC_SWIZZLE_W)
srcrgb = 1;
else if (swz == RC_SWIZZLE_W)
srcalpha = 1;
if (swz < RC_SWIZZLE_UNUSED)
srcmask |= 1 << j;
}
source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
inst->SrcReg[i].File, inst->SrcReg[i].Index);
if (source < 0) {
rc_error(&c->Base, "Failed to translate "
"rgb instruction.\n");
return;
}
pair->RGB.Arg[i].Source = source;
pair->RGB.Arg[i].Swizzle =
rc_init_swizzle(inst->SrcReg[i].Swizzle, 3);
pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs;
pair->RGB.Arg[i].Negate = !!(srcmask & inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z));
}
if (needalpha) {
unsigned int srcrgb = 0;
unsigned int srcalpha = 0;
unsigned int swz;
if (istranscendent) {
swz = rc_get_scalar_src_swz(inst->SrcReg[i].Swizzle);
} else {
swz = GET_SWZ(inst->SrcReg[i].Swizzle, 3);
}
if (swz < 3)
srcrgb = 1;
else if (swz < 4)
srcalpha = 1;
source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
inst->SrcReg[i].File, inst->SrcReg[i].Index);
if (source < 0) {
rc_error(&c->Base, "Failed to translate "
"alpha instruction.\n");
return;
}
pair->Alpha.Arg[i].Source = source;
pair->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1);
pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
if (istranscendent) {
pair->Alpha.Arg[i].Negate =
!!(inst->SrcReg[i].Negate &
inst->DstReg.WriteMask);
} else {
pair->Alpha.Arg[i].Negate =
!!(inst->SrcReg[i].Negate & RC_MASK_W);
}
}
}
/* Destination handling */
if (inst->DstReg.File == RC_FILE_OUTPUT) {
if (inst->DstReg.Index == c->OutputDepth) {
pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
} else {
for (i = 0; i < 4; i++) {
if (inst->DstReg.Index == c->OutputColor[i]) {
pair->RGB.Target = i;
pair->Alpha.Target = i;
pair->RGB.OutputWriteMask |=
inst->DstReg.WriteMask & RC_MASK_XYZ;
pair->Alpha.OutputWriteMask |=
GET_BIT(inst->DstReg.WriteMask, 3);
break;
}
}
}
} else {
if (needrgb) {
pair->RGB.DestIndex = inst->DstReg.Index;
pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ;
}
if (needalpha) {
pair->Alpha.WriteMask |= (GET_BIT(inst->DstReg.WriteMask, 3) << 3);
if (pair->Alpha.WriteMask) {
pair->Alpha.DestIndex = inst->DstReg.Index;
}
}
}
if (needrgb) {
pair->RGB.Omod = inst->Omod;
}
if (needalpha) {
pair->Alpha.Omod = inst->Omod;
}
if (inst->WriteALUResult) {
pair->WriteALUResult = inst->WriteALUResult;
pair->ALUResultCompare = inst->ALUResultCompare;
}
}
static void check_opcode_support(struct r300_fragment_program_compiler *c,
struct rc_sub_instruction *inst)
{
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
if (opcode->HasDstReg) {
if (inst->SaturateMode == RC_SATURATE_MINUS_PLUS_ONE) {
rc_error(&c->Base, "Fragment program does not support signed Saturate.\n");
return;
}
}
for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
if (inst->SrcReg[i].RelAddr) {
rc_error(&c->Base, "Fragment program does not support relative addressing "
" of source operands.\n");
return;
}
}
}
/**
* Translate all ALU instructions into corresponding pair instructions,
* performing no other changes.
*/
void rc_pair_translate(struct radeon_compiler *cc, void *user)
{
struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
for(struct rc_instruction * inst = c->Base.Program.Instructions.Next;
inst != &c->Base.Program.Instructions;
inst = inst->Next) {
const struct rc_opcode_info * opcode;
struct rc_sub_instruction copy;
if (inst->Type != RC_INSTRUCTION_NORMAL)
continue;
opcode = rc_get_opcode_info(inst->U.I.Opcode);
if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL)
continue;
copy = inst->U.I;
check_opcode_support(c, &copy);
final_rewrite(&copy);
inst->Type = RC_INSTRUCTION_PAIR;
set_pair_instruction(c, &inst->U.P, &copy);
}
}