blob: e54bb7b9f4d8b412d63f6faa35a7178a5b0275ef [file] [log] [blame]
/*
* Copyright © 2017 Gert Wollny
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/* A short overview on how the array merging works:
*
* Inputs:
* - per array information: live range, access mask, size
* - the program
*
* Output:
* - the program with updated array addressing
*
* Pseudo algorithm:
*
* repeat
* for all pairs of arrays:
* if they have non-overlapping live ranges and equal access masks:
* - pick shorter array
* - merge its live range into the longer array
* - set its merge target array to the longer array
* - mark the shorter array as processed
*
* for all pairs of arrays:
* if they have overlapping live ranges use in sum at most four components:
* - pick shorter array
* - evaluate reswizzle map to move its components into the components
* that are not used by the longer array
* - set its merge target array to the longer array
* - mark the shorter array as processed
* - bail out loop
* until no more successfull merges were found
*
* for all pairs of arrays:
* if they have non-overlapping live ranges:
* - pick shorter array
* - merge its live range into the longer array
* - set its merge target array to the longer array
* - mark the shorter array as processed
*
* Finalize remapping map so that target arrays are always final, i.e. have
* themselfes no merge target set.
*
* Example:
* ID | Length | Live range | access mask | target id | reswizzle
* ================================================================
* 1 3 3-10 x___ 0 ____
* 2 4 13-20 x___ 0 ____
* 3 8 3-20 x___ 0 ____
* 4 6 21-40 xy__ 0 ____
* 5 7 12-30 xy__ 0 ____
*
* 1. merge live ranges 1 and 2
*
* ID | Length | Live range | access mask | target id | reswizzle
* ================================================================
* 1 - - x___ 2 ____
* 2 4 3-20 x___ 0 ____
* 3 8 3-20 x___ 0 ____
* 4 6 21-40 xy__ 0 ____
* 5 7 12-30 xy__ 0 ____
*
*
* 3. interleave 2 and 3
*
* ID | Length | Live range | access mask | target id | reswizzle
* ================================================================
* 1 - - x___ 2 ____
* 2 - - x___ 3 _x__
* 3 8 3-20 xy__ 0 ____
* 4 6 21-40 xy__ 0 ____
* 5 7 12-30 xy__ 0 ____
*
* 3. merge live ranges 3 and 4
*
* ID | Length | Live range | access mask | target id | reswizzle
* ================================================================
* 1 - - x___ 2 ____
* 2 - - x___ 3 _x__
* 3 8 3-40 xy__ 0 ____
* 4 - - xy__ 3 ____
* 5 7 3-21 xy__ 0 ____
*
* 4. interleave 3 and 5
*
* ID | Length | Live range | access mask | target id | reswizzle
* ================================================================
* 1 - - x___ 2 ____
* 2 - - x___ 3 _x__
* 3 8 3-40 xy__ 0 ____
* 4 - - xy__ 3 ____
* 5 - - xy__ 3 __xy
*
* 5. finalize remapping
* (Array 1 has been merged with 2 that was later interleaved, so
* the reswizzeling must be propagated.
*
* ID | Length | Live range | new access mask | target id | reswizzle
* ================================================================
* 1 - - _y__ 3 _x__
* 2 - - _y__ 3 _x__
* 3 8 3-40 xy__ 0 ____
* 4 - - xy__ 3 ____
* 5 - - __zw 3 __xy
*
*/
#include "program/prog_instruction.h"
#include "util/u_math.h"
#include <ostream>
#include <cassert>
#include <algorithm>
#include <iostream>
#include "st_glsl_to_tgsi_array_merge.h"
#if __cplusplus >= 201402L
#include <memory>
using std::unique_ptr;
using std::make_unique;
#endif
#define ARRAY_MERGE_DEBUG 0
#if ARRAY_MERGE_DEBUG > 0
#define ARRAY_MERGE_DUMP(x) do std::cerr << x; while (0)
#define ARRAY_MERGE_DUMP_BLOCK(x) do { x } while (0)
#else
#define ARRAY_MERGE_DUMP(x)
#define ARRAY_MERGE_DUMP_BLOCK(x)
#endif
static const char xyzw[] = "xyzw";
array_live_range::array_live_range():
id(0),
length(0),
first_access(0),
last_access(0),
component_access_mask(0),
used_component_count(0),
target_array(nullptr)
{
init_swizzles();
}
array_live_range::array_live_range(unsigned aid, unsigned alength):
id(aid),
length(alength),
first_access(0),
last_access(0),
component_access_mask(0),
used_component_count(0),
target_array(nullptr)
{
init_swizzles();
}
array_live_range::array_live_range(unsigned aid, unsigned alength, int begin,
int end, int sw):
id(aid),
length(alength),
first_access(begin),
last_access(end),
component_access_mask(sw),
used_component_count(util_bitcount(sw)),
target_array(nullptr)
{
init_swizzles();
}
void array_live_range::init_swizzles()
{
for (int i = 0; i < 4; ++i)
swizzle_map[i] = i;
}
void array_live_range::set_live_range(int _begin, int _end)
{
set_begin(_begin);
set_end(_end);
}
void array_live_range::set_access_mask(int mask)
{
component_access_mask = mask;
used_component_count = util_bitcount(mask);
}
void array_live_range::merge(array_live_range *a, array_live_range *b)
{
if (a->array_length() < b->array_length())
b->merge_live_range_from(a);
else
a->merge_live_range_from(b);
}
void array_live_range::interleave(array_live_range *a, array_live_range *b)
{
if (a->array_length() < b->array_length())
a->interleave_into(b);
else
b->interleave_into(a);
}
void array_live_range::interleave_into(array_live_range *other)
{
for (int i = 0; i < 4; ++i) {
swizzle_map[i] = -1;
}
int trgt_access_mask = other->access_mask();
int summary_access_mask = trgt_access_mask;
int src_swizzle_bit = 1;
int next_free_swizzle_bit = 1;
int k = 0;
unsigned i;
unsigned last_src_bit = util_last_bit(component_access_mask);
for (i = 0; i <= last_src_bit ; ++i, src_swizzle_bit <<= 1) {
/* Jump over empty src component slots (e.g. x__w). This is just a
* safety measure and it is tested for, but it is very likely that the
* emitted code always uses slots staring from x without leaving holes
* (i.e. always xy__ not x_z_ or _yz_ etc).
*/
if (!(src_swizzle_bit & component_access_mask))
continue;
/* Find the next free access slot in the target. */
while ((trgt_access_mask & next_free_swizzle_bit) &&
k < 4) {
next_free_swizzle_bit <<= 1;
++k;
}
assert(k < 4 &&
"Interleaved array would have more then four components");
/* Set the mapping for this component. */
swizzle_map[i] = k;
trgt_access_mask |= next_free_swizzle_bit;
/* Update the joined access mask if we didn't just fill the mapping.*/
if (src_swizzle_bit & component_access_mask)
summary_access_mask |= next_free_swizzle_bit;
}
other->set_access_mask(summary_access_mask);
other->merge_live_range_from(this);
ARRAY_MERGE_DUMP_BLOCK(
std::cerr << "Interleave " << id << " into " << other->id << ", swz:";
for (unsigned i = 0; i < 4; ++i) {
std::cerr << ((swizzle_map[i] >= 0) ? xyzw[swizzle_map[i]] : '_');
}
std::cerr << '\n';
);
}
void array_live_range::merge_live_range_from(array_live_range *other)
{
other->set_target(this);
if (other->begin() < first_access)
first_access = other->begin();
if (other->end() > last_access)
last_access = other->end();
}
int8_t array_live_range::remap_one_swizzle(int8_t idx) const
{
// needs testing
if (target_array) {
idx = swizzle_map[idx];
if (idx >= 0)
idx = target_array->remap_one_swizzle(idx);
}
return idx;
}
void array_live_range::set_target(array_live_range *target)
{
target_array = target;
}
void array_live_range::print(std::ostream& os) const
{
os << "[id:" << id
<< ", length:" << length
<< ", (b:" << first_access
<< ", e:" << last_access
<< "), sw:" << (int)component_access_mask
<< ", nc:" << (int)used_component_count
<< "]";
}
bool array_live_range::time_doesnt_overlap(const array_live_range& other) const
{
return (other.last_access < first_access ||
last_access < other.first_access);
}
namespace tgsi_array_merge {
array_remapping::array_remapping():
target_id(0)
{
for (int i = 0; i < 4; ++i) {
read_swizzle_map[i] = i;
}
}
array_remapping::array_remapping(int trgt_array_id, const int8_t swizzle[]):
target_id(trgt_array_id)
{
for (int i = 0; i < 4; ++i) {
read_swizzle_map[i] = swizzle[i];
}
}
void array_remapping::init_from(const array_live_range& range)
{
target_id = range.is_mapped() ? range.final_target()->array_id(): 0;
for (int i = 0; i < 4; ++i)
read_swizzle_map[i] = range.remap_one_swizzle(i);
}
int array_remapping::map_writemask(int write_mask) const
{
assert(is_valid());
int result_write_mask = 0;
for (int i = 0; i < 4; ++i) {
if (1 << i & write_mask) {
assert(read_swizzle_map[i] >= 0);
result_write_mask |= 1 << read_swizzle_map[i];
}
}
return result_write_mask;
}
uint16_t array_remapping::move_read_swizzles(uint16_t original_swizzle) const
{
assert(is_valid());
/* Since
*
* dst.zw = src.xy in glsl actually is MOV dst.__zw src.__xy
*
* when interleaving the arrays the source swizzles must be moved
* according to the changed dst write mask.
*/
uint16_t out_swizzle = 0;
for (int idx = 0; idx < 4; ++idx) {
uint16_t orig_swz = GET_SWZ(original_swizzle, idx);
int new_idx = read_swizzle_map[idx];
if (new_idx >= 0)
out_swizzle |= orig_swz << 3 * new_idx;
}
return out_swizzle;
}
uint16_t array_remapping::map_swizzles(uint16_t old_swizzle) const
{
uint16_t out_swizzle = 0;
for (int idx = 0; idx < 4; ++idx) {
uint16_t swz = read_swizzle_map[GET_SWZ(old_swizzle, idx)];
out_swizzle |= swz << 3 * idx;
}
return out_swizzle;
}
void array_remapping::print(std::ostream& os) const
{
if (is_valid()) {
os << "[aid: " << target_id << " swz: ";
for (int i = 0; i < 4; ++i)
os << (read_swizzle_map[i] >= 0 ? xyzw[read_swizzle_map[i]] : '_');
os << "]";
} else {
os << "[unused]";
}
}
/* Required by the unit tests */
bool operator == (const array_remapping& lhs, const array_remapping& rhs)
{
if (lhs.target_id != rhs.target_id)
return false;
if (lhs.target_id == 0)
return true;
for (int i = 0; i < 4; ++i) {
if (lhs.read_swizzle_map[i] != rhs.read_swizzle_map[i])
return false;
}
return true;
}
static
bool sort_by_begin(const array_live_range& lhs, const array_live_range& rhs) {
return lhs.begin() < rhs.begin();
}
/* Helper class to evaluate merging and interleaving of arrays */
class array_merge_evaluator {
public:
typedef int (*array_merger)(array_live_range& range_1,
array_live_range& range_2);
array_merge_evaluator(int _narrays, array_live_range *_ranges,
bool _restart);
/** Run the merge strategy on all arrays
* @returns number of successfull merges
*/
int run();
private:
virtual int do_run(array_live_range& range_1, array_live_range& range_2) = 0;
int narrays;
array_live_range *ranges;
bool restart;
};
array_merge_evaluator::array_merge_evaluator(int _narrays,
array_live_range *_ranges,
bool _restart):
narrays(_narrays),
ranges(_ranges),
restart(_restart)
{
}
int array_merge_evaluator::run()
{
int remaps = 0;
for (int i = 0; i < narrays; ++i) {
if (ranges[i].is_mapped())
continue;
for (int j = i + 1; j < narrays; ++j) {
if (!ranges[j].is_mapped()) {
ARRAY_MERGE_DUMP("try merge " << i << " id:" << ranges[i].array_id()
<< " and " << j << " id: "<< ranges[j].array_id()
<< "\n");
int n = do_run(ranges[i], ranges[j]);
if (restart && n)
return n;
remaps += n;
}
}
}
return remaps;
}
/* Merge live ranges if possible at all */
class merge_live_range_always: public array_merge_evaluator {
public:
merge_live_range_always(int _narrays, array_live_range *_ranges):
array_merge_evaluator(_narrays, _ranges, false) {
}
protected:
int do_run(array_live_range& range_1, array_live_range& range_2){
if (range_2.time_doesnt_overlap(range_1)) {
ARRAY_MERGE_DUMP("merge " << range_2 << " into " << range_1 << "\n");
array_live_range::merge(&range_1,&range_2);
return 1;
}
return 0;
}
};
/* Merge live ranges only if they use the same swizzle */
class merge_live_range_equal_swizzle: public merge_live_range_always {
public:
merge_live_range_equal_swizzle(int _narrays, array_live_range *_ranges):
merge_live_range_always(_narrays, _ranges) {
}
private:
int do_run(array_live_range& range_1, array_live_range& range_2){
if (range_1.access_mask() == range_2.access_mask()) {
return merge_live_range_always::do_run(range_1, range_2);
}
return 0;
}
};
/* Interleave arrays if possible */
class interleave_live_range: public array_merge_evaluator {
public:
interleave_live_range(int _narrays, array_live_range *_ranges):
array_merge_evaluator(_narrays, _ranges, true) {
}
private:
int do_run(array_live_range& range_1, array_live_range& range_2){
if ((range_2.used_components() + range_1.used_components() <= 4) &&
!range_1.time_doesnt_overlap(range_2)) {
ARRAY_MERGE_DUMP("Interleave " << range_2 << " into " << range_1 << "\n");
array_live_range::interleave(&range_1, &range_2);
return 1;
}
return 0;
}
};
/* Estimate the array merging: First in a loop, arrays with equal access mask
* are merged, then interleave arrays that together use at most four components,
* and have overlapping live ranges. Finally arrays are merged regardless of
* access mask.
* @param[in] narrays number of arrays
* @param[in,out] alt array life times, the merge target life time will be
* updated with the new life time.
* @param[in,out] remapping track the arraay index remapping and reswizzeling.
* @returns number of merged arrays
*/
bool get_array_remapping(int narrays, array_live_range *ranges,
array_remapping *remapping)
{
int total_remapped = 0;
int n_remapped;
/* Sort by "begin of live range" so that we don't have to restart searching
* after every merge.
*/
std::sort(ranges, ranges + narrays, sort_by_begin);
merge_live_range_equal_swizzle merge_evaluator_es(narrays, ranges);
interleave_live_range interleave_lr(narrays, ranges);
do {
n_remapped = merge_evaluator_es.run();
/* try only one array interleave, if successfull, another
* live_range merge is tried. The test MergeAndInterleave5
* (mesa/st/tests/test_glsl_to_tgsi_array_merge.cpp)
* shows that this can result in more arrays being merged/interleaved.
*/
n_remapped += interleave_lr.run();
total_remapped += n_remapped;
ARRAY_MERGE_DUMP("Remapped " << n_remapped << " arrays\n");
} while (n_remapped > 0);
total_remapped += merge_live_range_always(narrays, ranges).run();
ARRAY_MERGE_DUMP("Remapped a total of " << total_remapped << " arrays\n");
/* Resolve the remapping chain */
for (int i = 1; i <= narrays; ++i) {
ARRAY_MERGE_DUMP("Map " << i << ":");
remapping[ranges[i-1].array_id()].init_from(ranges[i-1]);
}
return total_remapped > 0;
}
/* Remap the arrays in a TGSI program according to the given mapping.
* @param narrays number of arrays
* @param array_sizes array of arrays sizes
* @param map the array remapping information
* @param instructions TGSI program
* @returns number of arrays after remapping
*/
int remap_arrays(int narrays, unsigned *array_sizes,
exec_list *instructions,
array_remapping *map)
{
/* re-calculate arrays */
#if __cplusplus < 201402L
int *idx_map = new int[narrays + 1];
unsigned *old_sizes = new unsigned[narrays];
#else
unique_ptr<int[]> idx_map = make_unique<int[]>(narrays + 1);
unique_ptr<unsigned[]> old_sizes = make_unique<unsigned[]>(narrays);
#endif
memcpy(&old_sizes[0], &array_sizes[0], sizeof(unsigned) * narrays);
/* Evaluate mapping for the array indices and update array sizes */
int new_narrays = 0;
for (int i = 1; i <= narrays; ++i) {
if (!map[i].is_valid()) {
++new_narrays;
array_sizes[new_narrays-1] = old_sizes[i-1];
idx_map[i] = new_narrays;
}
}
/* Map the array ids of merged arrays. */
for (int i = 1; i <= narrays; ++i) {
if (map[i].is_valid()) {
map[i].set_target_id(idx_map[map[i].target_array_id()]);
}
}
/* Map the array ids of merge targets that got only renumbered. */
for (int i = 1; i <= narrays; ++i) {
if (!map[i].is_valid()) {
map[i].set_target_id(idx_map[i]);
}
}
/* Update the array ids and swizzles in the registers */
foreach_in_list(glsl_to_tgsi_instruction, inst, instructions) {
for (unsigned j = 0; j < num_inst_src_regs(inst); j++) {
st_src_reg& src = inst->src[j];
if (src.file == PROGRAM_ARRAY && src.array_id > 0) {
array_remapping& m = map[src.array_id];
if (m.is_valid()) {
src.array_id = m.target_array_id();
src.swizzle = m.map_swizzles(src.swizzle);
}
}
}
for (unsigned j = 0; j < inst->tex_offset_num_offset; j++) {
st_src_reg& src = inst->tex_offsets[j];
if (src.file == PROGRAM_ARRAY && src.array_id > 0) {
array_remapping& m = map[src.array_id];
if (m.is_valid()) {
src.array_id = m.target_array_id();
src.swizzle = m.map_swizzles(src.swizzle);
}
}
}
for (unsigned j = 0; j < num_inst_dst_regs(inst); j++) {
st_dst_reg& dst = inst->dst[j];
if (dst.file == PROGRAM_ARRAY && dst.array_id > 0) {
array_remapping& m = map[dst.array_id];
if (m.is_valid()) {
assert(j == 0 &&
"remapping can only be done for single dest ops");
dst.array_id = m.target_array_id();
dst.writemask = m.map_writemask(dst.writemask);
/* If the target component is moved, then the source swizzles
* must be moved accordingly.
*/
for (unsigned j = 0; j < num_inst_src_regs(inst); j++) {
st_src_reg& src = inst->src[j];
src.swizzle = m.move_read_swizzles(src.swizzle);
}
}
}
}
st_src_reg& res = inst->resource;
if (res.file == PROGRAM_ARRAY && res.array_id > 0) {
array_remapping& m = map[res.array_id];
if (m.is_valid()) {
res.array_id = m.target_array_id();
res.swizzle = m.map_swizzles(res.swizzle);
}
}
}
#if __cplusplus < 201402L
delete[] old_sizes;
delete[] idx_map;
#endif
return new_narrays;
}
}
using namespace tgsi_array_merge;
int merge_arrays(int narrays,
unsigned *array_sizes,
exec_list *instructions,
class array_live_range *arr_live_ranges)
{
array_remapping *map= new array_remapping[narrays + 1];
if (get_array_remapping(narrays, arr_live_ranges, map))
narrays = remap_arrays(narrays, array_sizes, instructions, map);
delete[] map;
return narrays;
}