| /* |
| * Copyright (C) 2009 Nicolai Haehnle. |
| * Copyright 2011 Tom Stellard <tstellar@gmail.com> |
| * |
| * All Rights Reserved. |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining |
| * a copy of this software and associated documentation files (the |
| * "Software"), to deal in the Software without restriction, including |
| * without limitation the rights to use, copy, modify, merge, publish, |
| * distribute, sublicense, and/or sell copies of the Software, and to |
| * permit persons to whom the Software is furnished to do so, subject to |
| * the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the |
| * next paragraph) shall be included in all copies or substantial |
| * portions of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
| * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
| * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
| * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
| * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
| * |
| */ |
| |
| #include "radeon_program_pair.h" |
| |
| #include <stdio.h> |
| |
| #include "main/glheader.h" |
| #include "program/register_allocate.h" |
| #include "ralloc.h" |
| |
| #include "r300_fragprog_swizzle.h" |
| #include "radeon_compiler.h" |
| #include "radeon_compiler_util.h" |
| #include "radeon_dataflow.h" |
| #include "radeon_list.h" |
| #include "radeon_variable.h" |
| |
| #define VERBOSE 0 |
| |
| #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) |
| |
| |
| |
| struct register_info { |
| struct live_intervals Live[4]; |
| |
| unsigned int Used:1; |
| unsigned int Allocated:1; |
| unsigned int File:3; |
| unsigned int Index:RC_REGISTER_INDEX_BITS; |
| unsigned int Writemask; |
| }; |
| |
| struct regalloc_state { |
| struct radeon_compiler * C; |
| |
| struct register_info * Input; |
| unsigned int NumInputs; |
| |
| struct register_info * Temporary; |
| unsigned int NumTemporaries; |
| |
| unsigned int Simple; |
| int LoopEnd; |
| }; |
| |
| enum rc_reg_class { |
| RC_REG_CLASS_SINGLE, |
| RC_REG_CLASS_DOUBLE, |
| RC_REG_CLASS_TRIPLE, |
| RC_REG_CLASS_ALPHA, |
| RC_REG_CLASS_SINGLE_PLUS_ALPHA, |
| RC_REG_CLASS_DOUBLE_PLUS_ALPHA, |
| RC_REG_CLASS_TRIPLE_PLUS_ALPHA, |
| RC_REG_CLASS_X, |
| RC_REG_CLASS_Y, |
| RC_REG_CLASS_Z, |
| RC_REG_CLASS_XY, |
| RC_REG_CLASS_YZ, |
| RC_REG_CLASS_XZ, |
| RC_REG_CLASS_XW, |
| RC_REG_CLASS_YW, |
| RC_REG_CLASS_ZW, |
| RC_REG_CLASS_XYW, |
| RC_REG_CLASS_YZW, |
| RC_REG_CLASS_XZW, |
| RC_REG_CLASS_COUNT |
| }; |
| |
| struct rc_class { |
| enum rc_reg_class Class; |
| |
| unsigned int WritemaskCount; |
| |
| /** This is 1 if this class is being used by the register allocator |
| * and 0 otherwise */ |
| unsigned int Used; |
| |
| /** This is the ID number assigned to this class by ra. */ |
| unsigned int Id; |
| |
| /** List of writemasks that belong to this class */ |
| unsigned int Writemasks[3]; |
| |
| |
| }; |
| |
| static void print_live_intervals(struct live_intervals * src) |
| { |
| if (!src || !src->Used) { |
| DBG("(null)"); |
| return; |
| } |
| |
| DBG("(%i,%i)", src->Start, src->End); |
| } |
| |
| static int overlap_live_intervals(struct live_intervals * a, struct live_intervals * b) |
| { |
| if (VERBOSE) { |
| DBG("overlap_live_intervals: "); |
| print_live_intervals(a); |
| DBG(" to "); |
| print_live_intervals(b); |
| DBG("\n"); |
| } |
| |
| if (!a->Used || !b->Used) { |
| DBG(" unused interval\n"); |
| return 0; |
| } |
| |
| if (a->Start > b->Start) { |
| if (a->Start < b->End) { |
| DBG(" overlap\n"); |
| return 1; |
| } |
| } else if (b->Start > a->Start) { |
| if (b->Start < a->End) { |
| DBG(" overlap\n"); |
| return 1; |
| } |
| } else { /* a->Start == b->Start */ |
| if (a->Start != a->End && b->Start != b->End) { |
| DBG(" overlap\n"); |
| return 1; |
| } |
| } |
| |
| DBG(" no overlap\n"); |
| |
| return 0; |
| } |
| |
| static void scan_read_callback(void * data, struct rc_instruction * inst, |
| rc_register_file file, unsigned int index, unsigned int mask) |
| { |
| struct regalloc_state * s = data; |
| struct register_info * reg; |
| unsigned int i; |
| |
| if (file != RC_FILE_INPUT) |
| return; |
| |
| s->Input[index].Used = 1; |
| reg = &s->Input[index]; |
| |
| for (i = 0; i < 4; i++) { |
| if (!((mask >> i) & 0x1)) { |
| continue; |
| } |
| reg->Live[i].Used = 1; |
| reg->Live[i].Start = 0; |
| reg->Live[i].End = |
| s->LoopEnd > inst->IP ? s->LoopEnd : inst->IP; |
| } |
| } |
| |
| static void remap_register(void * data, struct rc_instruction * inst, |
| rc_register_file * file, unsigned int * index) |
| { |
| struct regalloc_state * s = data; |
| const struct register_info * reg; |
| |
| if (*file == RC_FILE_TEMPORARY && s->Simple) |
| reg = &s->Temporary[*index]; |
| else if (*file == RC_FILE_INPUT) |
| reg = &s->Input[*index]; |
| else |
| return; |
| |
| if (reg->Allocated) { |
| *index = reg->Index; |
| } |
| } |
| |
| static void alloc_input_simple(void * data, unsigned int input, |
| unsigned int hwreg) |
| { |
| struct regalloc_state * s = data; |
| |
| if (input >= s->NumInputs) |
| return; |
| |
| s->Input[input].Allocated = 1; |
| s->Input[input].File = RC_FILE_TEMPORARY; |
| s->Input[input].Index = hwreg; |
| } |
| |
| /* This functions offsets the temporary register indices by the number |
| * of input registers, because input registers are actually temporaries and |
| * should not occupy the same space. |
| * |
| * This pass is supposed to be used to maintain correct allocation of inputs |
| * if the standard register allocation is disabled. */ |
| static void do_regalloc_inputs_only(struct regalloc_state * s) |
| { |
| for (unsigned i = 0; i < s->NumTemporaries; i++) { |
| s->Temporary[i].Allocated = 1; |
| s->Temporary[i].File = RC_FILE_TEMPORARY; |
| s->Temporary[i].Index = i + s->NumInputs; |
| } |
| } |
| |
| static unsigned int is_derivative(rc_opcode op) |
| { |
| return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY); |
| } |
| |
| static int find_class( |
| struct rc_class * classes, |
| unsigned int writemask, |
| unsigned int max_writemask_count) |
| { |
| unsigned int i; |
| for (i = 0; i < RC_REG_CLASS_COUNT; i++) { |
| unsigned int j; |
| if (classes[i].WritemaskCount > max_writemask_count) { |
| continue; |
| } |
| for (j = 0; j < 3; j++) { |
| if (classes[i].Writemasks[j] == writemask) { |
| return i; |
| } |
| } |
| } |
| return -1; |
| } |
| |
| struct variable_get_class_cb_data { |
| unsigned int * can_change_writemask; |
| unsigned int conversion_swizzle; |
| }; |
| |
| static void variable_get_class_read_cb( |
| void * userdata, |
| struct rc_instruction * inst, |
| struct rc_pair_instruction_arg * arg, |
| struct rc_pair_instruction_source * src) |
| { |
| struct variable_get_class_cb_data * d = userdata; |
| unsigned int new_swizzle = rc_adjust_channels(arg->Swizzle, |
| d->conversion_swizzle); |
| if (!r300_swizzle_is_native_basic(new_swizzle)) { |
| *d->can_change_writemask = 0; |
| } |
| } |
| |
| static enum rc_reg_class variable_get_class( |
| struct rc_variable * variable, |
| struct rc_class * classes) |
| { |
| unsigned int i; |
| unsigned int can_change_writemask= 1; |
| unsigned int writemask = rc_variable_writemask_sum(variable); |
| struct rc_list * readers = rc_variable_readers_union(variable); |
| int class_index; |
| |
| if (!variable->C->is_r500) { |
| struct rc_class c; |
| struct rc_variable * var_ptr; |
| /* The assumption here is that if an instruction has type |
| * RC_INSTRUCTION_NORMAL then it is a TEX instruction. |
| * r300 and r400 can't swizzle the result of a TEX lookup. */ |
| for (var_ptr = variable; var_ptr; var_ptr = var_ptr->Friend) { |
| if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) { |
| writemask = RC_MASK_XYZW; |
| } |
| } |
| |
| /* Check if it is possible to do swizzle packing for r300/r400 |
| * without creating non-native swizzles. */ |
| class_index = find_class(classes, writemask, 3); |
| if (class_index < 0) { |
| goto error; |
| } |
| c = classes[class_index]; |
| if (c.WritemaskCount == 1) { |
| goto done; |
| } |
| for (i = 0; i < c.WritemaskCount; i++) { |
| struct rc_variable * var_ptr; |
| for (var_ptr = variable; var_ptr; |
| var_ptr = var_ptr->Friend) { |
| int j; |
| unsigned int conversion_swizzle = |
| rc_make_conversion_swizzle( |
| writemask, c.Writemasks[i]); |
| struct variable_get_class_cb_data d; |
| d.can_change_writemask = &can_change_writemask; |
| d.conversion_swizzle = conversion_swizzle; |
| /* If we get this far var_ptr->Inst has to |
| * be a pair instruction. If variable or any |
| * of its friends are normal instructions, |
| * then the writemask will be set to RC_MASK_XYZW |
| * and the function will return before it gets |
| * here. */ |
| rc_pair_for_all_reads_arg(var_ptr->Inst, |
| variable_get_class_read_cb, &d); |
| |
| for (j = 0; j < var_ptr->ReaderCount; j++) { |
| unsigned int old_swizzle; |
| unsigned int new_swizzle; |
| struct rc_reader r = var_ptr->Readers[j]; |
| if (r.Inst->Type == |
| RC_INSTRUCTION_PAIR ) { |
| old_swizzle = r.U.P.Arg->Swizzle; |
| } else { |
| old_swizzle = r.U.I.Src->Swizzle; |
| } |
| new_swizzle = rc_adjust_channels( |
| old_swizzle, conversion_swizzle); |
| if (!r300_swizzle_is_native_basic( |
| new_swizzle)) { |
| can_change_writemask = 0; |
| break; |
| } |
| } |
| if (!can_change_writemask) { |
| break; |
| } |
| } |
| if (!can_change_writemask) { |
| break; |
| } |
| } |
| } |
| |
| if (variable->Inst->Type == RC_INSTRUCTION_PAIR) { |
| /* DDX/DDY seem to always fail when their writemasks are |
| * changed.*/ |
| if (is_derivative(variable->Inst->U.P.RGB.Opcode) |
| || is_derivative(variable->Inst->U.P.Alpha.Opcode)) { |
| can_change_writemask = 0; |
| } |
| } |
| for ( ; readers; readers = readers->Next) { |
| struct rc_reader * r = readers->Item; |
| if (r->Inst->Type == RC_INSTRUCTION_PAIR) { |
| if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) { |
| can_change_writemask = 0; |
| break; |
| } |
| /* DDX/DDY also fail when their swizzles are changed. */ |
| if (is_derivative(r->Inst->U.P.RGB.Opcode) |
| || is_derivative(r->Inst->U.P.Alpha.Opcode)) { |
| can_change_writemask = 0; |
| break; |
| } |
| } |
| } |
| |
| class_index = find_class(classes, writemask, |
| can_change_writemask ? 3 : 1); |
| done: |
| if (class_index > -1) { |
| return classes[class_index].Class; |
| } else { |
| error: |
| rc_error(variable->C, |
| "Could not find class for index=%u mask=%u\n", |
| variable->Dst.Index, writemask); |
| return 0; |
| } |
| } |
| |
| static unsigned int overlap_live_intervals_array( |
| struct live_intervals * a, |
| struct live_intervals * b) |
| { |
| unsigned int a_chan, b_chan; |
| for (a_chan = 0; a_chan < 4; a_chan++) { |
| for (b_chan = 0; b_chan < 4; b_chan++) { |
| if (overlap_live_intervals(&a[a_chan], &b[b_chan])) { |
| return 1; |
| } |
| } |
| } |
| return 0; |
| } |
| |
| static unsigned int reg_get_index(int reg) |
| { |
| return reg / RC_MASK_XYZW; |
| } |
| |
| static unsigned int reg_get_writemask(int reg) |
| { |
| return (reg % RC_MASK_XYZW) + 1; |
| } |
| |
| static int get_reg_id(unsigned int index, unsigned int writemask) |
| { |
| assert(writemask); |
| if (writemask == 0) { |
| return 0; |
| } |
| return (index * RC_MASK_XYZW) + (writemask - 1); |
| } |
| |
| #if VERBOSE |
| static void print_reg(int reg) |
| { |
| unsigned int index = reg_get_index(reg); |
| unsigned int mask = reg_get_writemask(reg); |
| fprintf(stderr, "Temp[%u].%c%c%c%c", index, |
| mask & RC_MASK_X ? 'x' : '_', |
| mask & RC_MASK_Y ? 'y' : '_', |
| mask & RC_MASK_Z ? 'z' : '_', |
| mask & RC_MASK_W ? 'w' : '_'); |
| } |
| #endif |
| |
| static void add_register_conflicts( |
| struct ra_regs * regs, |
| unsigned int max_temp_regs) |
| { |
| unsigned int index, a_mask, b_mask; |
| for (index = 0; index < max_temp_regs; index++) { |
| for(a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) { |
| for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW; |
| b_mask++) { |
| if (a_mask & b_mask) { |
| ra_add_reg_conflict(regs, |
| get_reg_id(index, a_mask), |
| get_reg_id(index, b_mask)); |
| } |
| } |
| } |
| } |
| } |
| |
| static void do_advanced_regalloc(struct regalloc_state * s) |
| { |
| struct rc_class rc_class_list [] = { |
| {RC_REG_CLASS_SINGLE, 3, 0, 0, |
| {RC_MASK_X, |
| RC_MASK_Y, |
| RC_MASK_Z}}, |
| {RC_REG_CLASS_DOUBLE, 3, 0, 0, |
| {RC_MASK_X | RC_MASK_Y, |
| RC_MASK_X | RC_MASK_Z, |
| RC_MASK_Y | RC_MASK_Z}}, |
| {RC_REG_CLASS_TRIPLE, 1, 0, 0, |
| {RC_MASK_X | RC_MASK_Y | RC_MASK_Z, |
| RC_MASK_NONE, |
| RC_MASK_NONE}}, |
| {RC_REG_CLASS_ALPHA, 1, 0, 0, |
| {RC_MASK_W, |
| RC_MASK_NONE, |
| RC_MASK_NONE}}, |
| {RC_REG_CLASS_SINGLE_PLUS_ALPHA, 3, 0, 0, |
| {RC_MASK_X | RC_MASK_W, |
| RC_MASK_Y | RC_MASK_W, |
| RC_MASK_Z | RC_MASK_W}}, |
| {RC_REG_CLASS_DOUBLE_PLUS_ALPHA, 3, 0, 0, |
| {RC_MASK_X | RC_MASK_Y | RC_MASK_W, |
| RC_MASK_X | RC_MASK_Z | RC_MASK_W, |
| RC_MASK_Y | RC_MASK_Z | RC_MASK_W}}, |
| {RC_REG_CLASS_TRIPLE_PLUS_ALPHA, 1, 0, 0, |
| {RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W, |
| RC_MASK_NONE, |
| RC_MASK_NONE}}, |
| {RC_REG_CLASS_X, 1, 0, 0, |
| {RC_MASK_X, |
| RC_MASK_NONE, |
| RC_MASK_NONE}}, |
| {RC_REG_CLASS_Y, 1, 0, 0, |
| {RC_MASK_Y, |
| RC_MASK_NONE, |
| RC_MASK_NONE}}, |
| {RC_REG_CLASS_Z, 1, 0, 0, |
| {RC_MASK_Z, |
| RC_MASK_NONE, |
| RC_MASK_NONE}}, |
| {RC_REG_CLASS_XY, 1, 0, 0, |
| {RC_MASK_X | RC_MASK_Y, |
| RC_MASK_NONE, |
| RC_MASK_NONE}}, |
| {RC_REG_CLASS_YZ, 1, 0, 0, |
| {RC_MASK_Y | RC_MASK_Z, |
| RC_MASK_NONE, |
| RC_MASK_NONE}}, |
| {RC_REG_CLASS_XZ, 1, 0, 0, |
| {RC_MASK_X | RC_MASK_Z, |
| RC_MASK_NONE, |
| RC_MASK_NONE}}, |
| {RC_REG_CLASS_XW, 1, 0, 0, |
| {RC_MASK_X | RC_MASK_W, |
| RC_MASK_NONE, |
| RC_MASK_NONE}}, |
| {RC_REG_CLASS_YW, 1, 0, 0, |
| {RC_MASK_Y | RC_MASK_W, |
| RC_MASK_NONE, |
| RC_MASK_NONE}}, |
| {RC_REG_CLASS_ZW, 1, 0, 0, |
| {RC_MASK_Z | RC_MASK_W, |
| RC_MASK_NONE, |
| RC_MASK_NONE}}, |
| {RC_REG_CLASS_XYW, 1, 0, 0, |
| {RC_MASK_X | RC_MASK_Y | RC_MASK_W, |
| RC_MASK_NONE, |
| RC_MASK_NONE}}, |
| {RC_REG_CLASS_YZW, 1, 0, 0, |
| {RC_MASK_Y | RC_MASK_Z | RC_MASK_W, |
| RC_MASK_NONE, |
| RC_MASK_NONE}}, |
| {RC_REG_CLASS_XZW, 1, 0, 0, |
| {RC_MASK_X | RC_MASK_Z | RC_MASK_W, |
| RC_MASK_NONE, |
| RC_MASK_NONE}} |
| }; |
| |
| unsigned int i, j, index, input_node, node_count, node_index; |
| unsigned int * node_classes; |
| unsigned int * input_classes; |
| struct rc_instruction * inst; |
| struct rc_list * var_ptr; |
| struct rc_list * variables; |
| struct ra_regs * regs; |
| struct ra_graph * graph; |
| |
| /* Allocate the main ra data structure */ |
| regs = ra_alloc_reg_set(NULL, s->C->max_temp_regs * RC_MASK_XYZW); |
| |
| /* Get list of program variables */ |
| variables = rc_get_variables(s->C); |
| node_count = rc_list_count(variables); |
| node_classes = memory_pool_malloc(&s->C->Pool, |
| node_count * sizeof(unsigned int)); |
| input_classes = memory_pool_malloc(&s->C->Pool, |
| s->NumInputs * sizeof(unsigned int)); |
| |
| for (var_ptr = variables, node_index = 0; var_ptr; |
| var_ptr = var_ptr->Next, node_index++) { |
| unsigned int class_index; |
| /* Compute the live intervals */ |
| rc_variable_compute_live_intervals(var_ptr->Item); |
| |
| class_index = variable_get_class(var_ptr->Item, rc_class_list); |
| |
| /* If we haven't used this register class yet, mark it |
| * as used and allocate space for it. */ |
| if (!rc_class_list[class_index].Used) { |
| rc_class_list[class_index].Used = 1; |
| rc_class_list[class_index].Id = ra_alloc_reg_class(regs); |
| } |
| |
| node_classes[node_index] = rc_class_list[class_index].Id; |
| } |
| |
| |
| /* Assign registers to the classes */ |
| for (i = 0; i < RC_REG_CLASS_COUNT; i++) { |
| struct rc_class class = rc_class_list[i]; |
| if (!class.Used) { |
| continue; |
| } |
| |
| for (index = 0; index < s->C->max_temp_regs; index++) { |
| for (j = 0; j < class.WritemaskCount; j++) { |
| int reg_id = get_reg_id(index, |
| class.Writemasks[j]); |
| ra_class_add_reg(regs, class.Id, reg_id); |
| } |
| } |
| } |
| |
| /* Add register conflicts */ |
| add_register_conflicts(regs, s->C->max_temp_regs); |
| |
| /* Calculate live intervals for input registers */ |
| for (inst = s->C->Program.Instructions.Next; |
| inst != &s->C->Program.Instructions; |
| inst = inst->Next) { |
| rc_opcode op = rc_get_flow_control_inst(inst); |
| if (op == RC_OPCODE_BGNLOOP) { |
| struct rc_instruction * endloop = |
| rc_match_bgnloop(inst); |
| if (endloop->IP > s->LoopEnd) { |
| s->LoopEnd = endloop->IP; |
| } |
| } |
| rc_for_all_reads_mask(inst, scan_read_callback, s); |
| } |
| |
| /* Create classes for input registers */ |
| for (i = 0; i < s->NumInputs; i++) { |
| unsigned int chan, class_id, writemask = 0; |
| for (chan = 0; chan < 4; chan++) { |
| if (s->Input[i].Live[chan].Used) { |
| writemask |= (1 << chan); |
| } |
| } |
| s->Input[i].Writemask = writemask; |
| if (!writemask) { |
| continue; |
| } |
| |
| class_id = ra_alloc_reg_class(regs); |
| input_classes[i] = class_id; |
| ra_class_add_reg(regs, class_id, |
| get_reg_id(s->Input[i].Index, writemask)); |
| } |
| |
| ra_set_finalize(regs); |
| |
| graph = ra_alloc_interference_graph(regs, node_count + s->NumInputs); |
| |
| /* Build the interference graph */ |
| for (var_ptr = variables, node_index = 0; var_ptr; |
| var_ptr = var_ptr->Next,node_index++) { |
| struct rc_list * a, * b; |
| unsigned int b_index; |
| |
| ra_set_node_class(graph, node_index, node_classes[node_index]); |
| |
| for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1; |
| b; b = b->Next, b_index++) { |
| struct rc_variable * var_a = a->Item; |
| while (var_a) { |
| struct rc_variable * var_b = b->Item; |
| while (var_b) { |
| if (overlap_live_intervals_array(var_a->Live, var_b->Live)) { |
| ra_add_node_interference(graph, |
| node_index, b_index); |
| } |
| var_b = var_b->Friend; |
| } |
| var_a = var_a->Friend; |
| } |
| } |
| } |
| |
| /* Add input registers to the interference graph */ |
| for (i = 0, input_node = 0; i< s->NumInputs; i++) { |
| if (!s->Input[i].Writemask) { |
| continue; |
| } |
| ra_set_node_class(graph, node_count + input_node, |
| input_classes[i]); |
| for (var_ptr = variables, node_index = 0; |
| var_ptr; var_ptr = var_ptr->Next, node_index++) { |
| struct rc_variable * var = var_ptr->Item; |
| if (overlap_live_intervals_array(s->Input[i].Live, |
| var->Live)) { |
| ra_add_node_interference(graph, node_index, |
| node_count + input_node); |
| } |
| } |
| /* Manually allocate a register for this input */ |
| ra_set_node_reg(graph, node_count + input_node, get_reg_id( |
| s->Input[i].Index, s->Input[i].Writemask)); |
| input_node++; |
| } |
| |
| if (!ra_allocate_no_spills(graph)) { |
| rc_error(s->C, "Ran out of hardware temporaries\n"); |
| return; |
| } |
| |
| /* Rewrite the registers */ |
| for (var_ptr = variables, node_index = 0; var_ptr; |
| var_ptr = var_ptr->Next, node_index++) { |
| int reg = ra_get_node_reg(graph, node_index); |
| unsigned int writemask = reg_get_writemask(reg); |
| unsigned int index = reg_get_index(reg); |
| struct rc_variable * var = var_ptr->Item; |
| |
| if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) { |
| writemask = rc_variable_writemask_sum(var); |
| } |
| |
| if (var->Dst.File == RC_FILE_INPUT) { |
| continue; |
| } |
| rc_variable_change_dst(var, index, writemask); |
| } |
| |
| ralloc_free(graph); |
| ralloc_free(regs); |
| } |
| |
| /** |
| * @param user This parameter should be a pointer to an integer value. If this |
| * integer value is zero, then a simple register allocator will be used that |
| * only allocates space for input registers (\sa do_regalloc_inputs_only). If |
| * user is non-zero, then the regular register allocator will be used |
| * (\sa do_regalloc). |
| */ |
| void rc_pair_regalloc(struct radeon_compiler *cc, void *user) |
| { |
| struct r300_fragment_program_compiler *c = |
| (struct r300_fragment_program_compiler*)cc; |
| struct regalloc_state s; |
| int * do_full_regalloc = (int*)user; |
| |
| memset(&s, 0, sizeof(s)); |
| s.C = cc; |
| s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1; |
| s.Input = memory_pool_malloc(&cc->Pool, |
| s.NumInputs * sizeof(struct register_info)); |
| memset(s.Input, 0, s.NumInputs * sizeof(struct register_info)); |
| |
| s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1; |
| s.Temporary = memory_pool_malloc(&cc->Pool, |
| s.NumTemporaries * sizeof(struct register_info)); |
| memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info)); |
| |
| rc_recompute_ips(s.C); |
| |
| c->AllocateHwInputs(c, &alloc_input_simple, &s); |
| if (*do_full_regalloc) { |
| do_advanced_regalloc(&s); |
| } else { |
| s.Simple = 1; |
| do_regalloc_inputs_only(&s); |
| } |
| |
| /* Rewrite inputs and if we are doing the simple allocation, rewrite |
| * temporaries too. */ |
| for (struct rc_instruction *inst = s.C->Program.Instructions.Next; |
| inst != &s.C->Program.Instructions; |
| inst = inst->Next) { |
| rc_remap_registers(inst, &remap_register, &s); |
| } |
| } |