| /* |
| * Copyright (C) 2014 Intel Corporation |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
| * DEALINGS IN THE SOFTWARE. |
| */ |
| |
| /* |
| * Included by texcompress_bptc and gallium to define BPTC decoding routines. |
| */ |
| |
| #ifndef TEXCOMPRESS_BPTC_TMP_H |
| #define TEXCOMPRESS_BPTC_TMP_H |
| |
| #include "util/format_srgb.h" |
| #include "util/half_float.h" |
| #include "macros.h" |
| |
| #define BLOCK_SIZE 4 |
| #define N_PARTITIONS 64 |
| #define BLOCK_BYTES 16 |
| |
| struct bptc_unorm_mode { |
| int n_subsets; |
| int n_partition_bits; |
| bool has_rotation_bits; |
| bool has_index_selection_bit; |
| int n_color_bits; |
| int n_alpha_bits; |
| bool has_endpoint_pbits; |
| bool has_shared_pbits; |
| int n_index_bits; |
| int n_secondary_index_bits; |
| }; |
| |
| struct bptc_float_bitfield { |
| int8_t endpoint; |
| uint8_t component; |
| uint8_t offset; |
| uint8_t n_bits; |
| bool reverse; |
| }; |
| |
| struct bptc_float_mode { |
| bool reserved; |
| bool transformed_endpoints; |
| int n_partition_bits; |
| int n_endpoint_bits; |
| int n_index_bits; |
| int n_delta_bits[3]; |
| struct bptc_float_bitfield bitfields[24]; |
| }; |
| |
| struct bit_writer { |
| uint8_t buf; |
| int pos; |
| uint8_t *dst; |
| }; |
| |
| static const struct bptc_unorm_mode |
| bptc_unorm_modes[] = { |
| /* 0 */ { 3, 4, false, false, 4, 0, true, false, 3, 0 }, |
| /* 1 */ { 2, 6, false, false, 6, 0, false, true, 3, 0 }, |
| /* 2 */ { 3, 6, false, false, 5, 0, false, false, 2, 0 }, |
| /* 3 */ { 2, 6, false, false, 7, 0, true, false, 2, 0 }, |
| /* 4 */ { 1, 0, true, true, 5, 6, false, false, 2, 3 }, |
| /* 5 */ { 1, 0, true, false, 7, 8, false, false, 2, 2 }, |
| /* 6 */ { 1, 0, false, false, 7, 7, true, false, 4, 0 }, |
| /* 7 */ { 2, 6, false, false, 5, 5, true, false, 2, 0 } |
| }; |
| |
| static const struct bptc_float_mode |
| bptc_float_modes[] = { |
| /* 00 */ |
| { false, true, 5, 10, 3, { 5, 5, 5 }, |
| { { 2, 1, 4, 1, false }, { 2, 2, 4, 1, false }, { 3, 2, 4, 1, false }, |
| { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false }, |
| { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false }, |
| { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, |
| { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, |
| { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, |
| { 3, 2, 3, 1, false }, |
| { -1 } } |
| }, |
| /* 01 */ |
| { false, true, 5, 7, 3, { 6, 6, 6 }, |
| { { 2, 1, 5, 1, false }, { 3, 1, 4, 1, false }, { 3, 1, 5, 1, false }, |
| { 0, 0, 0, 7, false }, { 3, 2, 0, 1, false }, { 3, 2, 1, 1, false }, |
| { 2, 2, 4, 1, false }, { 0, 1, 0, 7, false }, { 2, 2, 5, 1, false }, |
| { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false }, { 0, 2, 0, 7, false }, |
| { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false }, |
| { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false }, |
| { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false }, |
| { 2, 0, 0, 6, false }, |
| { 3, 0, 0, 6, false }, |
| { -1 } } |
| }, |
| /* 00010 */ |
| { false, true, 5, 11, 3, { 5, 4, 4 }, |
| { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false }, |
| { 1, 0, 0, 5, false }, { 0, 0, 10, 1, false }, { 2, 1, 0, 4, false }, |
| { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false }, { 3, 2, 0, 1, false }, |
| { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false }, |
| { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false }, |
| { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false }, |
| { -1 } } |
| }, |
| /* 00011 */ |
| { false, false, 0, 10, 4, { 10, 10, 10 }, |
| { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false }, |
| { 1, 0, 0, 10, false }, { 1, 1, 0, 10, false }, { 1, 2, 0, 10, false }, |
| { -1 } } |
| }, |
| /* 00110 */ |
| { false, true, 5, 11, 3, { 4, 5, 4 }, |
| { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false }, |
| { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 3, 1, 4, 1, false }, |
| { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false }, { 0, 1, 10, 1, false }, |
| { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false }, |
| { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false }, |
| { 3, 2, 0, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false }, |
| { 2, 1, 4, 1, false }, { 3, 2, 3, 1, false }, |
| { -1 } } |
| }, |
| /* 00111 */ |
| { false, true, 0, 11, 4, { 9, 9, 9 }, |
| { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false }, |
| { 1, 0, 0, 9, false }, { 0, 0, 10, 1, false }, { 1, 1, 0, 9, false }, |
| { 0, 1, 10, 1, false }, { 1, 2, 0, 9, false }, { 0, 2, 10, 1, false }, |
| { -1 } } |
| }, |
| /* 01010 */ |
| { false, true, 5, 11, 3, { 4, 4, 5 }, |
| { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false }, |
| { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 2, 2, 4, 1, false }, |
| { 2, 1, 0, 4, false }, { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false }, |
| { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false }, |
| { 0, 2, 10, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false }, |
| { 3, 2, 1, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false }, |
| { 3, 2, 4, 1, false }, { 3, 2, 3, 1, false }, |
| { -1 } } |
| }, |
| /* 01011 */ |
| { false, true, 0, 12, 4, { 8, 8, 8 }, |
| { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false }, |
| { 1, 0, 0, 8, false }, { 0, 0, 10, 2, true }, { 1, 1, 0, 8, false }, |
| { 0, 1, 10, 2, true }, { 1, 2, 0, 8, false }, { 0, 2, 10, 2, true }, |
| { -1 } } |
| }, |
| /* 01110 */ |
| { false, true, 5, 9, 3, { 5, 5, 5 }, |
| { { 0, 0, 0, 9, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 9, false }, |
| { 2, 1, 4, 1, false }, { 0, 2, 0, 9, false }, { 3, 2, 4, 1, false }, |
| { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false }, |
| { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, |
| { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, |
| { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, |
| { 3, 2, 3, 1, false }, |
| { -1 } } |
| }, |
| /* 01111 */ |
| { false, true, 0, 16, 4, { 4, 4, 4 }, |
| { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false }, |
| { 1, 0, 0, 4, false }, { 0, 0, 10, 6, true }, { 1, 1, 0, 4, false }, |
| { 0, 1, 10, 6, true }, { 1, 2, 0, 4, false }, { 0, 2, 10, 6, true }, |
| { -1 } } |
| }, |
| /* 10010 */ |
| { false, true, 5, 8, 3, { 6, 5, 5 }, |
| { { 0, 0, 0, 8, false }, { 3, 1, 4, 1, false }, { 2, 2, 4, 1, false }, |
| { 0, 1, 0, 8, false }, { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false }, |
| { 0, 2, 0, 8, false }, { 3, 2, 3, 1, false }, { 3, 2, 4, 1, false }, |
| { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false }, |
| { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false }, |
| { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 6, false }, |
| { 3, 0, 0, 6, false }, |
| { -1 } } |
| }, |
| /* 10011 */ |
| { true /* reserved */ }, |
| /* 10110 */ |
| { false, true, 5, 8, 3, { 5, 6, 5 }, |
| { { 0, 0, 0, 8, false }, { 3, 2, 0, 1, false }, { 2, 2, 4, 1, false }, |
| { 0, 1, 0, 8, false }, { 2, 1, 5, 1, false }, { 2, 1, 4, 1, false }, |
| { 0, 2, 0, 8, false }, { 3, 1, 5, 1, false }, { 3, 2, 4, 1, false }, |
| { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false }, |
| { 1, 1, 0, 6, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false }, |
| { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false }, |
| { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false }, |
| { -1 } } |
| }, |
| /* 10111 */ |
| { true /* reserved */ }, |
| /* 11010 */ |
| { false, true, 5, 8, 3, { 5, 5, 6 }, |
| { { 0, 0, 0, 8, false }, { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false }, |
| { 0, 1, 0, 8, false }, { 2, 2, 5, 1, false }, { 2, 1, 4, 1, false }, |
| { 0, 2, 0, 8, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false }, |
| { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false }, |
| { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, |
| { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false }, |
| { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false }, |
| { -1 } } |
| }, |
| /* 11011 */ |
| { true /* reserved */ }, |
| /* 11110 */ |
| { false, false, 5, 6, 3, { 6, 6, 6 }, |
| { { 0, 0, 0, 6, false }, { 3, 1, 4, 1, false }, { 3, 2, 0, 1, false }, |
| { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 6, false }, |
| { 2, 1, 5, 1, false }, { 2, 2, 5, 1, false }, { 3, 2, 2, 1, false }, |
| { 2, 1, 4, 1, false }, { 0, 2, 0, 6, false }, { 3, 1, 5, 1, false }, |
| { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false }, |
| { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false }, |
| { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false }, |
| { 2, 0, 0, 6, false }, { 3, 0, 0, 6, false }, |
| { -1 } } |
| }, |
| /* 11111 */ |
| { true /* reserved */ }, |
| }; |
| |
| /* This partition table is used when the mode has two subsets. Each |
| * partition is represented by a 32-bit value which gives 2 bits per texel |
| * within the block. The value of the two bits represents which subset to use |
| * (0 or 1). |
| */ |
| static const uint32_t |
| partition_table1[N_PARTITIONS] = { |
| 0x50505050U, 0x40404040U, 0x54545454U, 0x54505040U, |
| 0x50404000U, 0x55545450U, 0x55545040U, 0x54504000U, |
| 0x50400000U, 0x55555450U, 0x55544000U, 0x54400000U, |
| 0x55555440U, 0x55550000U, 0x55555500U, 0x55000000U, |
| 0x55150100U, 0x00004054U, 0x15010000U, 0x00405054U, |
| 0x00004050U, 0x15050100U, 0x05010000U, 0x40505054U, |
| 0x00404050U, 0x05010100U, 0x14141414U, 0x05141450U, |
| 0x01155440U, 0x00555500U, 0x15014054U, 0x05414150U, |
| 0x44444444U, 0x55005500U, 0x11441144U, 0x05055050U, |
| 0x05500550U, 0x11114444U, 0x41144114U, 0x44111144U, |
| 0x15055054U, 0x01055040U, 0x05041050U, 0x05455150U, |
| 0x14414114U, 0x50050550U, 0x41411414U, 0x00141400U, |
| 0x00041504U, 0x00105410U, 0x10541000U, 0x04150400U, |
| 0x50410514U, 0x41051450U, 0x05415014U, 0x14054150U, |
| 0x41050514U, 0x41505014U, 0x40011554U, 0x54150140U, |
| 0x50505500U, 0x00555050U, 0x15151010U, 0x54540404U, |
| }; |
| |
| /* This partition table is used when the mode has three subsets. In this case |
| * the values can be 0, 1 or 2. |
| */ |
| static const uint32_t |
| partition_table2[N_PARTITIONS] = { |
| 0xaa685050U, 0x6a5a5040U, 0x5a5a4200U, 0x5450a0a8U, |
| 0xa5a50000U, 0xa0a05050U, 0x5555a0a0U, 0x5a5a5050U, |
| 0xaa550000U, 0xaa555500U, 0xaaaa5500U, 0x90909090U, |
| 0x94949494U, 0xa4a4a4a4U, 0xa9a59450U, 0x2a0a4250U, |
| 0xa5945040U, 0x0a425054U, 0xa5a5a500U, 0x55a0a0a0U, |
| 0xa8a85454U, 0x6a6a4040U, 0xa4a45000U, 0x1a1a0500U, |
| 0x0050a4a4U, 0xaaa59090U, 0x14696914U, 0x69691400U, |
| 0xa08585a0U, 0xaa821414U, 0x50a4a450U, 0x6a5a0200U, |
| 0xa9a58000U, 0x5090a0a8U, 0xa8a09050U, 0x24242424U, |
| 0x00aa5500U, 0x24924924U, 0x24499224U, 0x50a50a50U, |
| 0x500aa550U, 0xaaaa4444U, 0x66660000U, 0xa5a0a5a0U, |
| 0x50a050a0U, 0x69286928U, 0x44aaaa44U, 0x66666600U, |
| 0xaa444444U, 0x54a854a8U, 0x95809580U, 0x96969600U, |
| 0xa85454a8U, 0x80959580U, 0xaa141414U, 0x96960000U, |
| 0xaaaa1414U, 0xa05050a0U, 0xa0a5a5a0U, 0x96000000U, |
| 0x40804080U, 0xa9a8a9a8U, 0xaaaaaa44U, 0x2a4a5254U |
| }; |
| |
| static const uint8_t |
| anchor_indices[][N_PARTITIONS] = { |
| /* Anchor index values for the second subset of two-subset partitioning */ |
| { |
| 0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf, |
| 0xf,0x2,0x8,0x2,0x2,0x8,0x8,0xf,0x2,0x8,0x2,0x2,0x8,0x8,0x2,0x2, |
| 0xf,0xf,0x6,0x8,0x2,0x8,0xf,0xf,0x2,0x8,0x2,0x2,0x2,0xf,0xf,0x6, |
| 0x6,0x2,0x6,0x8,0xf,0xf,0x2,0x2,0xf,0xf,0xf,0xf,0xf,0x2,0x2,0xf |
| }, |
| |
| /* Anchor index values for the second subset of three-subset partitioning */ |
| { |
| 0x3,0x3,0xf,0xf,0x8,0x3,0xf,0xf,0x8,0x8,0x6,0x6,0x6,0x5,0x3,0x3, |
| 0x3,0x3,0x8,0xf,0x3,0x3,0x6,0xa,0x5,0x8,0x8,0x6,0x8,0x5,0xf,0xf, |
| 0x8,0xf,0x3,0x5,0x6,0xa,0x8,0xf,0xf,0x3,0xf,0x5,0xf,0xf,0xf,0xf, |
| 0x3,0xf,0x5,0x5,0x5,0x8,0x5,0xa,0x5,0xa,0x8,0xd,0xf,0xc,0x3,0x3 |
| }, |
| |
| /* Anchor index values for the third subset of three-subset |
| * partitioning |
| */ |
| { |
| 0xf,0x8,0x8,0x3,0xf,0xf,0x3,0x8,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x8, |
| 0xf,0x8,0xf,0x3,0xf,0x8,0xf,0x8,0x3,0xf,0x6,0xa,0xf,0xf,0xa,0x8, |
| 0xf,0x3,0xf,0xa,0xa,0x8,0x9,0xa,0x6,0xf,0x8,0xf,0x3,0x6,0x6,0x8, |
| 0xf,0x3,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x3,0xf,0xf,0x8 |
| } |
| }; |
| |
| static int |
| extract_bits(const uint8_t *block, |
| int offset, |
| int n_bits) |
| { |
| int byte_index = offset / 8; |
| int bit_index = offset % 8; |
| int n_bits_in_byte = MIN2(n_bits, 8 - bit_index); |
| int result = 0; |
| int bit = 0; |
| |
| while (true) { |
| result |= ((block[byte_index] >> bit_index) & |
| ((1 << n_bits_in_byte) - 1)) << bit; |
| |
| n_bits -= n_bits_in_byte; |
| |
| if (n_bits <= 0) |
| return result; |
| |
| bit += n_bits_in_byte; |
| byte_index++; |
| bit_index = 0; |
| n_bits_in_byte = MIN2(n_bits, 8); |
| } |
| } |
| |
| static uint8_t |
| expand_component(uint8_t byte, |
| int n_bits) |
| { |
| /* Expands a n-bit quantity into a byte by copying the most-significant |
| * bits into the unused least-significant bits. |
| */ |
| return byte << (8 - n_bits) | (byte >> (2 * n_bits - 8)); |
| } |
| |
| static int |
| extract_unorm_endpoints(const struct bptc_unorm_mode *mode, |
| const uint8_t *block, |
| int bit_offset, |
| uint8_t endpoints[][4]) |
| { |
| int component; |
| int subset; |
| int endpoint; |
| int pbit; |
| int n_components; |
| |
| /* Extract each color component */ |
| for (component = 0; component < 3; component++) { |
| for (subset = 0; subset < mode->n_subsets; subset++) { |
| for (endpoint = 0; endpoint < 2; endpoint++) { |
| endpoints[subset * 2 + endpoint][component] = |
| extract_bits(block, bit_offset, mode->n_color_bits); |
| bit_offset += mode->n_color_bits; |
| } |
| } |
| } |
| |
| /* Extract the alpha values */ |
| if (mode->n_alpha_bits > 0) { |
| for (subset = 0; subset < mode->n_subsets; subset++) { |
| for (endpoint = 0; endpoint < 2; endpoint++) { |
| endpoints[subset * 2 + endpoint][3] = |
| extract_bits(block, bit_offset, mode->n_alpha_bits); |
| bit_offset += mode->n_alpha_bits; |
| } |
| } |
| |
| n_components = 4; |
| } else { |
| for (subset = 0; subset < mode->n_subsets; subset++) |
| for (endpoint = 0; endpoint < 2; endpoint++) |
| endpoints[subset * 2 + endpoint][3] = 255; |
| |
| n_components = 3; |
| } |
| |
| /* Add in the p-bits */ |
| if (mode->has_endpoint_pbits) { |
| for (subset = 0; subset < mode->n_subsets; subset++) { |
| for (endpoint = 0; endpoint < 2; endpoint++) { |
| pbit = extract_bits(block, bit_offset, 1); |
| bit_offset += 1; |
| |
| for (component = 0; component < n_components; component++) { |
| endpoints[subset * 2 + endpoint][component] <<= 1; |
| endpoints[subset * 2 + endpoint][component] |= pbit; |
| } |
| } |
| } |
| } else if (mode->has_shared_pbits) { |
| for (subset = 0; subset < mode->n_subsets; subset++) { |
| pbit = extract_bits(block, bit_offset, 1); |
| bit_offset += 1; |
| |
| for (endpoint = 0; endpoint < 2; endpoint++) { |
| for (component = 0; component < n_components; component++) { |
| endpoints[subset * 2 + endpoint][component] <<= 1; |
| endpoints[subset * 2 + endpoint][component] |= pbit; |
| } |
| } |
| } |
| } |
| |
| /* Expand the n-bit values to a byte */ |
| for (subset = 0; subset < mode->n_subsets; subset++) { |
| for (endpoint = 0; endpoint < 2; endpoint++) { |
| for (component = 0; component < 3; component++) { |
| endpoints[subset * 2 + endpoint][component] = |
| expand_component(endpoints[subset * 2 + endpoint][component], |
| mode->n_color_bits + |
| mode->has_endpoint_pbits + |
| mode->has_shared_pbits); |
| } |
| |
| if (mode->n_alpha_bits > 0) { |
| endpoints[subset * 2 + endpoint][3] = |
| expand_component(endpoints[subset * 2 + endpoint][3], |
| mode->n_alpha_bits + |
| mode->has_endpoint_pbits + |
| mode->has_shared_pbits); |
| } |
| } |
| } |
| |
| return bit_offset; |
| } |
| |
| static bool |
| is_anchor(int n_subsets, |
| int partition_num, |
| int texel) |
| { |
| if (texel == 0) |
| return true; |
| |
| switch (n_subsets) { |
| case 1: |
| return false; |
| case 2: |
| return anchor_indices[0][partition_num] == texel; |
| case 3: |
| return (anchor_indices[1][partition_num] == texel || |
| anchor_indices[2][partition_num] == texel); |
| default: |
| assert(false); |
| return false; |
| } |
| } |
| |
| static int |
| count_anchors_before_texel(int n_subsets, |
| int partition_num, |
| int texel) |
| { |
| int count = 1; |
| |
| if (texel == 0) |
| return 0; |
| |
| switch (n_subsets) { |
| case 1: |
| break; |
| case 2: |
| if (texel > anchor_indices[0][partition_num]) |
| count++; |
| break; |
| case 3: |
| if (texel > anchor_indices[1][partition_num]) |
| count++; |
| if (texel > anchor_indices[2][partition_num]) |
| count++; |
| break; |
| default: |
| assert(false); |
| return 0; |
| } |
| |
| return count; |
| } |
| |
| static int32_t |
| interpolate(int32_t a, int32_t b, |
| int index, |
| int index_bits) |
| { |
| static const uint8_t weights2[] = { 0, 21, 43, 64 }; |
| static const uint8_t weights3[] = { 0, 9, 18, 27, 37, 46, 55, 64 }; |
| static const uint8_t weights4[] = |
| { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 }; |
| static const uint8_t *weights[] = { |
| NULL, NULL, weights2, weights3, weights4 |
| }; |
| int weight; |
| |
| weight = weights[index_bits][index]; |
| |
| return ((64 - weight) * a + weight * b + 32) >> 6; |
| } |
| |
| static void |
| apply_rotation(int rotation, |
| uint8_t *result) |
| { |
| uint8_t t; |
| |
| if (rotation == 0) |
| return; |
| |
| rotation--; |
| |
| t = result[rotation]; |
| result[rotation] = result[3]; |
| result[3] = t; |
| } |
| |
| static void |
| fetch_rgba_unorm_from_block(const uint8_t *block, |
| uint8_t *result, |
| int texel) |
| { |
| int mode_num = ffs(block[0]); |
| const struct bptc_unorm_mode *mode; |
| int bit_offset, secondary_bit_offset; |
| int partition_num; |
| int subset_num; |
| int rotation; |
| int index_selection; |
| int index_bits; |
| int indices[2]; |
| int index; |
| int anchors_before_texel; |
| bool anchor; |
| uint8_t endpoints[3 * 2][4]; |
| uint32_t subsets; |
| int component; |
| |
| if (mode_num == 0) { |
| /* According to the spec this mode is reserved and shouldn't be used. */ |
| memset(result, 0, 3); |
| result[3] = 0xff; |
| return; |
| } |
| |
| mode = bptc_unorm_modes + mode_num - 1; |
| bit_offset = mode_num; |
| |
| partition_num = extract_bits(block, bit_offset, mode->n_partition_bits); |
| bit_offset += mode->n_partition_bits; |
| |
| switch (mode->n_subsets) { |
| case 1: |
| subsets = 0; |
| break; |
| case 2: |
| subsets = partition_table1[partition_num]; |
| break; |
| case 3: |
| subsets = partition_table2[partition_num]; |
| break; |
| default: |
| assert(false); |
| return; |
| } |
| |
| if (mode->has_rotation_bits) { |
| rotation = extract_bits(block, bit_offset, 2); |
| bit_offset += 2; |
| } else { |
| rotation = 0; |
| } |
| |
| if (mode->has_index_selection_bit) { |
| index_selection = extract_bits(block, bit_offset, 1); |
| bit_offset++; |
| } else { |
| index_selection = 0; |
| } |
| |
| bit_offset = extract_unorm_endpoints(mode, block, bit_offset, endpoints); |
| |
| anchors_before_texel = count_anchors_before_texel(mode->n_subsets, |
| partition_num, texel); |
| |
| /* Calculate the offset to the secondary index */ |
| secondary_bit_offset = (bit_offset + |
| BLOCK_SIZE * BLOCK_SIZE * mode->n_index_bits - |
| mode->n_subsets + |
| mode->n_secondary_index_bits * texel - |
| anchors_before_texel); |
| |
| /* Calculate the offset to the primary index for this texel */ |
| bit_offset += mode->n_index_bits * texel - anchors_before_texel; |
| |
| subset_num = (subsets >> (texel * 2)) & 3; |
| |
| anchor = is_anchor(mode->n_subsets, partition_num, texel); |
| |
| index_bits = mode->n_index_bits; |
| if (anchor) |
| index_bits--; |
| indices[0] = extract_bits(block, bit_offset, index_bits); |
| |
| if (mode->n_secondary_index_bits) { |
| index_bits = mode->n_secondary_index_bits; |
| if (anchor) |
| index_bits--; |
| indices[1] = extract_bits(block, secondary_bit_offset, index_bits); |
| } |
| |
| index = indices[index_selection]; |
| index_bits = (index_selection ? |
| mode->n_secondary_index_bits : |
| mode->n_index_bits); |
| |
| for (component = 0; component < 3; component++) |
| result[component] = interpolate(endpoints[subset_num * 2][component], |
| endpoints[subset_num * 2 + 1][component], |
| index, |
| index_bits); |
| |
| /* Alpha uses the opposite index from the color components */ |
| if (mode->n_secondary_index_bits && !index_selection) { |
| index = indices[1]; |
| index_bits = mode->n_secondary_index_bits; |
| } else { |
| index = indices[0]; |
| index_bits = mode->n_index_bits; |
| } |
| |
| result[3] = interpolate(endpoints[subset_num * 2][3], |
| endpoints[subset_num * 2 + 1][3], |
| index, |
| index_bits); |
| |
| apply_rotation(rotation, result); |
| } |
| |
| #ifdef BPTC_BLOCK_DECODE |
| static void |
| decompress_rgba_unorm_block(int src_width, int src_height, |
| const uint8_t *block, |
| uint8_t *dst_row, int dst_rowstride) |
| { |
| int mode_num = ffs(block[0]); |
| const struct bptc_unorm_mode *mode; |
| int bit_offset, secondary_bit_offset; |
| int partition_num; |
| int subset_num; |
| int rotation; |
| int index_selection; |
| int index_bits; |
| int indices[2]; |
| int index; |
| int anchors_before_texel; |
| bool anchor; |
| uint8_t endpoints[3 * 2][4]; |
| uint32_t subsets; |
| int component; |
| unsigned x, y; |
| |
| if (mode_num == 0) { |
| /* According to the spec this mode is reserved and shouldn't be used. */ |
| for(y = 0; y < src_height; y += 1) { |
| uint8_t *result = dst_row; |
| memset(result, 0, 4 * src_width); |
| for(x = 0; x < src_width; x += 1) { |
| result[3] = 0xff; |
| result += 4; |
| } |
| dst_row += dst_rowstride; |
| } |
| return; |
| } |
| |
| mode = bptc_unorm_modes + mode_num - 1; |
| bit_offset = mode_num; |
| |
| partition_num = extract_bits(block, bit_offset, mode->n_partition_bits); |
| bit_offset += mode->n_partition_bits; |
| |
| switch (mode->n_subsets) { |
| case 1: |
| subsets = 0; |
| break; |
| case 2: |
| subsets = partition_table1[partition_num]; |
| break; |
| case 3: |
| subsets = partition_table2[partition_num]; |
| break; |
| default: |
| assert(false); |
| return; |
| } |
| |
| if (mode->has_rotation_bits) { |
| rotation = extract_bits(block, bit_offset, 2); |
| bit_offset += 2; |
| } else { |
| rotation = 0; |
| } |
| |
| if (mode->has_index_selection_bit) { |
| index_selection = extract_bits(block, bit_offset, 1); |
| bit_offset++; |
| } else { |
| index_selection = 0; |
| } |
| |
| bit_offset = extract_unorm_endpoints(mode, block, bit_offset, endpoints); |
| |
| for(y = 0; y < src_height; y += 1) { |
| uint8_t *result = dst_row; |
| for(x = 0; x < src_width; x += 1) { |
| int texel; |
| texel = x + y * 4; |
| |
| anchors_before_texel = count_anchors_before_texel(mode->n_subsets, |
| partition_num, |
| texel); |
| |
| /* Calculate the offset to the secondary index */ |
| secondary_bit_offset = (bit_offset + |
| BLOCK_SIZE * BLOCK_SIZE * mode->n_index_bits - |
| mode->n_subsets + |
| mode->n_secondary_index_bits * texel - |
| anchors_before_texel); |
| |
| /* Calculate the offset to the primary index for this texel */ |
| bit_offset += mode->n_index_bits * texel - anchors_before_texel; |
| |
| subset_num = (subsets >> (texel * 2)) & 3; |
| |
| anchor = is_anchor(mode->n_subsets, partition_num, texel); |
| |
| index_bits = mode->n_index_bits; |
| if (anchor) |
| index_bits--; |
| indices[0] = extract_bits(block, bit_offset, index_bits); |
| |
| if (mode->n_secondary_index_bits) { |
| index_bits = mode->n_secondary_index_bits; |
| if (anchor) |
| index_bits--; |
| indices[1] = extract_bits(block, secondary_bit_offset, index_bits); |
| } |
| |
| index = indices[index_selection]; |
| index_bits = (index_selection ? |
| mode->n_secondary_index_bits : |
| mode->n_index_bits); |
| |
| for (component = 0; component < 3; component++) |
| result[component] = interpolate(endpoints[subset_num * 2][component], |
| endpoints[subset_num * 2 + 1][component], |
| index, |
| index_bits); |
| |
| /* Alpha uses the opposite index from the color components */ |
| if (mode->n_secondary_index_bits && !index_selection) { |
| index = indices[1]; |
| index_bits = mode->n_secondary_index_bits; |
| } else { |
| index = indices[0]; |
| index_bits = mode->n_index_bits; |
| } |
| |
| result[3] = interpolate(endpoints[subset_num * 2][3], |
| endpoints[subset_num * 2 + 1][3], |
| index, |
| index_bits); |
| |
| apply_rotation(rotation, result); |
| result += 4; |
| } |
| dst_row += dst_rowstride; |
| } |
| } |
| |
| static void |
| decompress_rgba_unorm(int width, int height, |
| const uint8_t *src, int src_rowstride, |
| uint8_t *dst, int dst_rowstride) |
| { |
| int src_row_diff; |
| int y, x; |
| |
| if (src_rowstride >= width * 4) |
| src_row_diff = src_rowstride - ((width + 3) & ~3) * 4; |
| else |
| src_row_diff = 0; |
| |
| for (y = 0; y < height; y += BLOCK_SIZE) { |
| for (x = 0; x < width; x += BLOCK_SIZE) { |
| decompress_rgba_unorm_block(MIN2(width - x, BLOCK_SIZE), |
| MIN2(height - y, BLOCK_SIZE), |
| src, |
| dst + x * 4 + y * dst_rowstride, |
| dst_rowstride); |
| src += BLOCK_BYTES; |
| } |
| src += src_row_diff; |
| } |
| } |
| #endif // BPTC_BLOCK_DECODE |
| |
| static int32_t |
| sign_extend(int32_t value, |
| int n_bits) |
| { |
| assert(n_bits > 0 && n_bits < 32); |
| |
| const unsigned n = 32 - n_bits; |
| return (int32_t)((uint32_t)value << n) >> n; |
| } |
| |
| static int |
| signed_unquantize(int value, int n_endpoint_bits) |
| { |
| bool sign; |
| |
| if (n_endpoint_bits >= 16) |
| return value; |
| |
| if (value == 0) |
| return 0; |
| |
| sign = false; |
| |
| if (value < 0) { |
| sign = true; |
| value = -value; |
| } |
| |
| if (value >= (1 << (n_endpoint_bits - 1)) - 1) |
| value = 0x7fff; |
| else |
| value = ((value << 15) + 0x4000) >> (n_endpoint_bits - 1); |
| |
| if (sign) |
| value = -value; |
| |
| return value; |
| } |
| |
| static int |
| unsigned_unquantize(int value, int n_endpoint_bits) |
| { |
| if (n_endpoint_bits >= 15) |
| return value; |
| |
| if (value == 0) |
| return 0; |
| |
| if (value == (1 << n_endpoint_bits) - 1) |
| return 0xffff; |
| |
| return ((value << 15) + 0x4000) >> (n_endpoint_bits - 1); |
| } |
| |
| static int |
| extract_float_endpoints(const struct bptc_float_mode *mode, |
| const uint8_t *block, |
| int bit_offset, |
| int32_t endpoints[][3], |
| bool is_signed) |
| { |
| const struct bptc_float_bitfield *bitfield; |
| int endpoint, component; |
| int n_endpoints; |
| int value; |
| int i; |
| |
| if (mode->n_partition_bits) |
| n_endpoints = 4; |
| else |
| n_endpoints = 2; |
| |
| memset(endpoints, 0, sizeof endpoints[0][0] * n_endpoints * 3); |
| |
| for (bitfield = mode->bitfields; bitfield->endpoint != -1; bitfield++) { |
| value = extract_bits(block, bit_offset, bitfield->n_bits); |
| bit_offset += bitfield->n_bits; |
| |
| if (bitfield->reverse) { |
| for (i = 0; i < bitfield->n_bits; i++) { |
| if (value & (1 << i)) |
| endpoints[bitfield->endpoint][bitfield->component] |= |
| 1 << ((bitfield->n_bits - 1 - i) + bitfield->offset); |
| } |
| } else { |
| endpoints[bitfield->endpoint][bitfield->component] |= |
| value << bitfield->offset; |
| } |
| } |
| |
| if (mode->transformed_endpoints) { |
| /* The endpoints are specified as signed offsets from e0 */ |
| for (endpoint = 1; endpoint < n_endpoints; endpoint++) { |
| for (component = 0; component < 3; component++) { |
| value = sign_extend(endpoints[endpoint][component], |
| mode->n_delta_bits[component]); |
| endpoints[endpoint][component] = |
| ((endpoints[0][component] + value) & |
| ((1 << mode->n_endpoint_bits) - 1)); |
| } |
| } |
| } |
| |
| if (is_signed) { |
| for (endpoint = 0; endpoint < n_endpoints; endpoint++) { |
| for (component = 0; component < 3; component++) { |
| value = sign_extend(endpoints[endpoint][component], |
| mode->n_endpoint_bits); |
| endpoints[endpoint][component] = |
| signed_unquantize(value, mode->n_endpoint_bits); |
| } |
| } |
| } else { |
| for (endpoint = 0; endpoint < n_endpoints; endpoint++) { |
| for (component = 0; component < 3; component++) { |
| endpoints[endpoint][component] = |
| unsigned_unquantize(endpoints[endpoint][component], |
| mode->n_endpoint_bits); |
| } |
| } |
| } |
| |
| return bit_offset; |
| } |
| |
| static int32_t |
| finish_unsigned_unquantize(int32_t value) |
| { |
| return value * 31 / 64; |
| } |
| |
| static int32_t |
| finish_signed_unquantize(int32_t value) |
| { |
| if (value < 0) |
| return (-value * 31 / 32) | 0x8000; |
| else |
| return value * 31 / 32; |
| } |
| |
| static void |
| fetch_rgb_float_from_block(const uint8_t *block, |
| float *result, |
| int texel, |
| bool is_signed) |
| { |
| int mode_num; |
| const struct bptc_float_mode *mode; |
| int bit_offset; |
| int partition_num; |
| int subset_num; |
| int index_bits; |
| int index; |
| int anchors_before_texel; |
| int32_t endpoints[2 * 2][3]; |
| uint32_t subsets; |
| int n_subsets; |
| int component; |
| int32_t value; |
| |
| if (block[0] & 0x2) { |
| mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2; |
| bit_offset = 5; |
| } else { |
| mode_num = block[0] & 3; |
| bit_offset = 2; |
| } |
| |
| mode = bptc_float_modes + mode_num; |
| |
| if (mode->reserved) { |
| memset(result, 0, sizeof result[0] * 3); |
| result[3] = 1.0f; |
| return; |
| } |
| |
| bit_offset = extract_float_endpoints(mode, block, bit_offset, |
| endpoints, is_signed); |
| |
| if (mode->n_partition_bits) { |
| partition_num = extract_bits(block, bit_offset, mode->n_partition_bits); |
| bit_offset += mode->n_partition_bits; |
| |
| subsets = partition_table1[partition_num]; |
| n_subsets = 2; |
| } else { |
| partition_num = 0; |
| subsets = 0; |
| n_subsets = 1; |
| } |
| |
| anchors_before_texel = |
| count_anchors_before_texel(n_subsets, partition_num, texel); |
| |
| /* Calculate the offset to the primary index for this texel */ |
| bit_offset += mode->n_index_bits * texel - anchors_before_texel; |
| |
| subset_num = (subsets >> (texel * 2)) & 3; |
| |
| index_bits = mode->n_index_bits; |
| if (is_anchor(n_subsets, partition_num, texel)) |
| index_bits--; |
| index = extract_bits(block, bit_offset, index_bits); |
| |
| for (component = 0; component < 3; component++) { |
| value = interpolate(endpoints[subset_num * 2][component], |
| endpoints[subset_num * 2 + 1][component], |
| index, |
| mode->n_index_bits); |
| |
| if (is_signed) |
| value = finish_signed_unquantize(value); |
| else |
| value = finish_unsigned_unquantize(value); |
| |
| result[component] = _mesa_half_to_float(value); |
| } |
| |
| result[3] = 1.0f; |
| } |
| |
| #ifdef BPTC_BLOCK_DECODE |
| static void |
| decompress_rgb_float_block(unsigned src_width, unsigned src_height, |
| const uint8_t *block, |
| float *dst_row, unsigned dst_rowstride, |
| bool is_signed) |
| { |
| int mode_num; |
| const struct bptc_float_mode *mode; |
| int bit_offset; |
| int partition_num; |
| int subset_num; |
| int index_bits; |
| int index; |
| int anchors_before_texel; |
| int32_t endpoints[2 * 2][3]; |
| uint32_t subsets; |
| int n_subsets; |
| int component; |
| int32_t value; |
| unsigned x, y; |
| |
| if (block[0] & 0x2) { |
| mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2; |
| bit_offset = 5; |
| } else { |
| mode_num = block[0] & 3; |
| bit_offset = 2; |
| } |
| |
| mode = bptc_float_modes + mode_num; |
| |
| if (mode->reserved) { |
| for(y = 0; y < src_height; y += 1) { |
| float *result = dst_row; |
| memset(result, 0, sizeof result[0] * 4 * src_width); |
| for(x = 0; x < src_width; x += 1) { |
| result[3] = 1.0f; |
| result += 4; |
| } |
| dst_row += dst_rowstride / sizeof dst_row[0]; |
| } |
| return; |
| } |
| |
| bit_offset = extract_float_endpoints(mode, block, bit_offset, |
| endpoints, is_signed); |
| |
| if (mode->n_partition_bits) { |
| partition_num = extract_bits(block, bit_offset, mode->n_partition_bits); |
| bit_offset += mode->n_partition_bits; |
| |
| subsets = partition_table1[partition_num]; |
| n_subsets = 2; |
| } else { |
| partition_num = 0; |
| subsets = 0; |
| n_subsets = 1; |
| } |
| |
| for(y = 0; y < src_height; y += 1) { |
| float *result = dst_row; |
| for(x = 0; x < src_width; x += 1) { |
| int texel; |
| |
| texel = x + y * 4; |
| |
| anchors_before_texel = |
| count_anchors_before_texel(n_subsets, partition_num, texel); |
| |
| /* Calculate the offset to the primary index for this texel */ |
| bit_offset += mode->n_index_bits * texel - anchors_before_texel; |
| |
| subset_num = (subsets >> (texel * 2)) & 3; |
| |
| index_bits = mode->n_index_bits; |
| if (is_anchor(n_subsets, partition_num, texel)) |
| index_bits--; |
| index = extract_bits(block, bit_offset, index_bits); |
| |
| for (component = 0; component < 3; component++) { |
| value = interpolate(endpoints[subset_num * 2][component], |
| endpoints[subset_num * 2 + 1][component], |
| index, |
| mode->n_index_bits); |
| |
| if (is_signed) |
| value = finish_signed_unquantize(value); |
| else |
| value = finish_unsigned_unquantize(value); |
| |
| result[component] = _mesa_half_to_float(value); |
| } |
| |
| result[3] = 1.0f; |
| result += 4; |
| } |
| dst_row += dst_rowstride / sizeof dst_row[0]; |
| } |
| } |
| |
| static void |
| decompress_rgb_float(int width, int height, |
| const uint8_t *src, int src_rowstride, |
| float *dst, int dst_rowstride, bool is_signed) |
| { |
| int src_row_diff; |
| int y, x; |
| |
| if (src_rowstride >= width * 4) |
| src_row_diff = src_rowstride - ((width + 3) & ~3) * 4; |
| else |
| src_row_diff = 0; |
| |
| for (y = 0; y < height; y += BLOCK_SIZE) { |
| for (x = 0; x < width; x += BLOCK_SIZE) { |
| decompress_rgb_float_block(MIN2(width - x, BLOCK_SIZE), |
| MIN2(height - y, BLOCK_SIZE), |
| src, |
| (dst + x * 4 + |
| (y * dst_rowstride / sizeof dst[0])), |
| dst_rowstride, is_signed); |
| src += BLOCK_BYTES; |
| } |
| src += src_row_diff; |
| } |
| } |
| #endif // BPTC_BLOCK_DECODE |
| |
| static void |
| write_bits(struct bit_writer *writer, int n_bits, int value) |
| { |
| do { |
| if (n_bits + writer->pos >= 8) { |
| *(writer->dst++) = writer->buf | (value << writer->pos); |
| writer->buf = 0; |
| value >>= (8 - writer->pos); |
| n_bits -= (8 - writer->pos); |
| writer->pos = 0; |
| } else { |
| writer->buf |= value << writer->pos; |
| writer->pos += n_bits; |
| break; |
| } |
| } while (n_bits > 0); |
| } |
| |
| static void |
| get_average_luminance_alpha_unorm(int width, int height, |
| const uint8_t *src, int src_rowstride, |
| int *average_luminance, int *average_alpha) |
| { |
| int luminance_sum = 0, alpha_sum = 0; |
| int y, x; |
| |
| for (y = 0; y < height; y++) { |
| for (x = 0; x < width; x++) { |
| luminance_sum += src[0] + src[1] + src[2]; |
| alpha_sum += src[3]; |
| src += 4; |
| } |
| src += src_rowstride - width * 4; |
| } |
| |
| *average_luminance = luminance_sum / (width * height); |
| *average_alpha = alpha_sum / (width * height); |
| } |
| |
| static void |
| get_rgba_endpoints_unorm(int width, int height, |
| const uint8_t *src, int src_rowstride, |
| int average_luminance, int average_alpha, |
| uint8_t endpoints[][4]) |
| { |
| int endpoint_luminances[2]; |
| int midpoint; |
| int sums[2][4]; |
| int endpoint; |
| int luminance; |
| uint8_t temp[3]; |
| const uint8_t *p = src; |
| int rgb_left_endpoint_count = 0; |
| int alpha_left_endpoint_count = 0; |
| int y, x, i; |
| |
| memset(sums, 0, sizeof sums); |
| |
| for (y = 0; y < height; y++) { |
| for (x = 0; x < width; x++) { |
| luminance = p[0] + p[1] + p[2]; |
| if (luminance < average_luminance) { |
| endpoint = 0; |
| rgb_left_endpoint_count++; |
| } else { |
| endpoint = 1; |
| } |
| for (i = 0; i < 3; i++) |
| sums[endpoint][i] += p[i]; |
| |
| if (p[2] < average_alpha) { |
| endpoint = 0; |
| alpha_left_endpoint_count++; |
| } else { |
| endpoint = 1; |
| } |
| sums[endpoint][3] += p[3]; |
| |
| p += 4; |
| } |
| |
| p += src_rowstride - width * 4; |
| } |
| |
| if (rgb_left_endpoint_count == 0 || |
| rgb_left_endpoint_count == width * height) { |
| for (i = 0; i < 3; i++) |
| endpoints[0][i] = endpoints[1][i] = |
| (sums[0][i] + sums[1][i]) / (width * height); |
| } else { |
| for (i = 0; i < 3; i++) { |
| endpoints[0][i] = sums[0][i] / rgb_left_endpoint_count; |
| endpoints[1][i] = (sums[1][i] / |
| (width * height - rgb_left_endpoint_count)); |
| } |
| } |
| |
| if (alpha_left_endpoint_count == 0 || |
| alpha_left_endpoint_count == width * height) { |
| endpoints[0][3] = endpoints[1][3] = |
| (sums[0][3] + sums[1][3]) / (width * height); |
| } else { |
| endpoints[0][3] = sums[0][3] / alpha_left_endpoint_count; |
| endpoints[1][3] = (sums[1][3] / |
| (width * height - alpha_left_endpoint_count)); |
| } |
| |
| /* We may need to swap the endpoints to ensure the most-significant bit of |
| * the first index is zero */ |
| |
| for (endpoint = 0; endpoint < 2; endpoint++) { |
| endpoint_luminances[endpoint] = |
| endpoints[endpoint][0] + |
| endpoints[endpoint][1] + |
| endpoints[endpoint][2]; |
| } |
| midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2; |
| |
| if ((src[0] + src[1] + src[2] <= midpoint) != |
| (endpoint_luminances[0] <= midpoint)) { |
| memcpy(temp, endpoints[0], 3); |
| memcpy(endpoints[0], endpoints[1], 3); |
| memcpy(endpoints[1], temp, 3); |
| } |
| |
| /* Same for the alpha endpoints */ |
| |
| midpoint = (endpoints[0][3] + endpoints[1][3]) / 2; |
| |
| if ((src[3] <= midpoint) != (endpoints[0][3] <= midpoint)) { |
| temp[0] = endpoints[0][3]; |
| endpoints[0][3] = endpoints[1][3]; |
| endpoints[1][3] = temp[0]; |
| } |
| } |
| |
| static void |
| write_rgb_indices_unorm(struct bit_writer *writer, |
| int src_width, int src_height, |
| const uint8_t *src, int src_rowstride, |
| uint8_t endpoints[][4]) |
| { |
| int luminance; |
| int endpoint_luminances[2]; |
| int endpoint; |
| int index; |
| int y, x; |
| |
| for (endpoint = 0; endpoint < 2; endpoint++) { |
| endpoint_luminances[endpoint] = |
| endpoints[endpoint][0] + |
| endpoints[endpoint][1] + |
| endpoints[endpoint][2]; |
| } |
| |
| /* If the endpoints have the same luminance then we'll just use index 0 for |
| * all of the texels */ |
| if (endpoint_luminances[0] == endpoint_luminances[1]) { |
| write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 2 - 1, 0); |
| return; |
| } |
| |
| for (y = 0; y < src_height; y++) { |
| for (x = 0; x < src_width; x++) { |
| luminance = src[0] + src[1] + src[2]; |
| |
| index = ((luminance - endpoint_luminances[0]) * 3 / |
| (endpoint_luminances[1] - endpoint_luminances[0])); |
| if (index < 0) |
| index = 0; |
| else if (index > 3) |
| index = 3; |
| |
| assert(x != 0 || y != 0 || index < 2); |
| |
| write_bits(writer, (x == 0 && y == 0) ? 1 : 2, index); |
| |
| src += 4; |
| } |
| |
| /* Pad the indices out to the block size */ |
| if (src_width < BLOCK_SIZE) |
| write_bits(writer, 2 * (BLOCK_SIZE - src_width), 0); |
| |
| src += src_rowstride - src_width * 4; |
| } |
| |
| /* Pad the indices out to the block size */ |
| if (src_height < BLOCK_SIZE) |
| write_bits(writer, 2 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0); |
| } |
| |
| static void |
| write_alpha_indices_unorm(struct bit_writer *writer, |
| int src_width, int src_height, |
| const uint8_t *src, int src_rowstride, |
| uint8_t endpoints[][4]) |
| { |
| int index; |
| int y, x; |
| |
| /* If the endpoints have the same alpha then we'll just use index 0 for |
| * all of the texels */ |
| if (endpoints[0][3] == endpoints[1][3]) { |
| write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 3 - 1, 0); |
| return; |
| } |
| |
| for (y = 0; y < src_height; y++) { |
| for (x = 0; x < src_width; x++) { |
| index = (((int) src[3] - (int) endpoints[0][3]) * 7 / |
| ((int) endpoints[1][3] - endpoints[0][3])); |
| if (index < 0) |
| index = 0; |
| else if (index > 7) |
| index = 7; |
| |
| assert(x != 0 || y != 0 || index < 4); |
| |
| /* The first index has one less bit */ |
| write_bits(writer, (x == 0 && y == 0) ? 2 : 3, index); |
| |
| src += 4; |
| } |
| |
| /* Pad the indices out to the block size */ |
| if (src_width < BLOCK_SIZE) |
| write_bits(writer, 3 * (BLOCK_SIZE - src_width), 0); |
| |
| src += src_rowstride - src_width * 4; |
| } |
| |
| /* Pad the indices out to the block size */ |
| if (src_height < BLOCK_SIZE) |
| write_bits(writer, 3 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0); |
| } |
| |
| static void |
| compress_rgba_unorm_block(int src_width, int src_height, |
| const uint8_t *src, int src_rowstride, |
| uint8_t *dst) |
| { |
| int average_luminance, average_alpha; |
| uint8_t endpoints[2][4]; |
| struct bit_writer writer; |
| int component, endpoint; |
| |
| get_average_luminance_alpha_unorm(src_width, src_height, src, src_rowstride, |
| &average_luminance, &average_alpha); |
| get_rgba_endpoints_unorm(src_width, src_height, src, src_rowstride, |
| average_luminance, average_alpha, |
| endpoints); |
| |
| writer.dst = dst; |
| writer.pos = 0; |
| writer.buf = 0; |
| |
| write_bits(&writer, 5, 0x10); /* mode 4 */ |
| write_bits(&writer, 2, 0); /* rotation 0 */ |
| write_bits(&writer, 1, 0); /* index selection bit */ |
| |
| /* Write the color endpoints */ |
| for (component = 0; component < 3; component++) |
| for (endpoint = 0; endpoint < 2; endpoint++) |
| write_bits(&writer, 5, endpoints[endpoint][component] >> 3); |
| |
| /* Write the alpha endpoints */ |
| for (endpoint = 0; endpoint < 2; endpoint++) |
| write_bits(&writer, 6, endpoints[endpoint][3] >> 2); |
| |
| write_rgb_indices_unorm(&writer, |
| src_width, src_height, |
| src, src_rowstride, |
| endpoints); |
| write_alpha_indices_unorm(&writer, |
| src_width, src_height, |
| src, src_rowstride, |
| endpoints); |
| } |
| |
| static void |
| compress_rgba_unorm(int width, int height, |
| const uint8_t *src, int src_rowstride, |
| uint8_t *dst, int dst_rowstride) |
| { |
| int dst_row_diff; |
| int y, x; |
| |
| if (dst_rowstride >= width * 4) |
| dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4; |
| else |
| dst_row_diff = 0; |
| |
| for (y = 0; y < height; y += BLOCK_SIZE) { |
| for (x = 0; x < width; x += BLOCK_SIZE) { |
| compress_rgba_unorm_block(MIN2(width - x, BLOCK_SIZE), |
| MIN2(height - y, BLOCK_SIZE), |
| src + x * 4 + y * src_rowstride, |
| src_rowstride, |
| dst); |
| dst += BLOCK_BYTES; |
| } |
| dst += dst_row_diff; |
| } |
| } |
| |
| static float |
| get_average_luminance_float(int width, int height, |
| const float *src, int src_rowstride) |
| { |
| float luminance_sum = 0; |
| int y, x; |
| |
| for (y = 0; y < height; y++) { |
| for (x = 0; x < width; x++) { |
| luminance_sum += src[0] + src[1] + src[2]; |
| src += 3; |
| } |
| src += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float); |
| } |
| |
| return luminance_sum / (width * height); |
| } |
| |
| static float |
| clamp_value(float value, bool is_signed) |
| { |
| if (value > 65504.0f) |
| return 65504.0f; |
| |
| if (is_signed) { |
| if (value < -65504.0f) |
| return -65504.0f; |
| else |
| return value; |
| } |
| |
| if (value < 0.0f) |
| return 0.0f; |
| |
| return value; |
| } |
| |
| static void |
| get_endpoints_float(int width, int height, |
| const float *src, int src_rowstride, |
| float average_luminance, float endpoints[][3], |
| bool is_signed) |
| { |
| float endpoint_luminances[2]; |
| float midpoint; |
| float sums[2][3]; |
| int endpoint, component; |
| float luminance; |
| float temp[3]; |
| const float *p = src; |
| int left_endpoint_count = 0; |
| int y, x, i; |
| |
| memset(sums, 0, sizeof sums); |
| |
| for (y = 0; y < height; y++) { |
| for (x = 0; x < width; x++) { |
| luminance = p[0] + p[1] + p[2]; |
| if (luminance < average_luminance) { |
| endpoint = 0; |
| left_endpoint_count++; |
| } else { |
| endpoint = 1; |
| } |
| for (i = 0; i < 3; i++) |
| sums[endpoint][i] += p[i]; |
| |
| p += 3; |
| } |
| |
| p += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float); |
| } |
| |
| if (left_endpoint_count == 0 || |
| left_endpoint_count == width * height) { |
| for (i = 0; i < 3; i++) |
| endpoints[0][i] = endpoints[1][i] = |
| (sums[0][i] + sums[1][i]) / (width * height); |
| } else { |
| for (i = 0; i < 3; i++) { |
| endpoints[0][i] = sums[0][i] / left_endpoint_count; |
| endpoints[1][i] = sums[1][i] / (width * height - left_endpoint_count); |
| } |
| } |
| |
| /* Clamp the endpoints to the range of a half float and strip out |
| * infinities */ |
| for (endpoint = 0; endpoint < 2; endpoint++) { |
| for (component = 0; component < 3; component++) { |
| endpoints[endpoint][component] = |
| clamp_value(endpoints[endpoint][component], is_signed); |
| } |
| } |
| |
| /* We may need to swap the endpoints to ensure the most-significant bit of |
| * the first index is zero */ |
| |
| for (endpoint = 0; endpoint < 2; endpoint++) { |
| endpoint_luminances[endpoint] = |
| endpoints[endpoint][0] + |
| endpoints[endpoint][1] + |
| endpoints[endpoint][2]; |
| } |
| midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2.0f; |
| |
| if ((src[0] + src[1] + src[2] <= midpoint) != |
| (endpoint_luminances[0] <= midpoint)) { |
| memcpy(temp, endpoints[0], sizeof temp); |
| memcpy(endpoints[0], endpoints[1], sizeof temp); |
| memcpy(endpoints[1], temp, sizeof temp); |
| } |
| } |
| |
| static void |
| write_rgb_indices_float(struct bit_writer *writer, |
| int src_width, int src_height, |
| const float *src, int src_rowstride, |
| float endpoints[][3]) |
| { |
| float luminance; |
| float endpoint_luminances[2]; |
| int endpoint; |
| int index; |
| int y, x; |
| |
| for (endpoint = 0; endpoint < 2; endpoint++) { |
| endpoint_luminances[endpoint] = |
| endpoints[endpoint][0] + |
| endpoints[endpoint][1] + |
| endpoints[endpoint][2]; |
| } |
| |
| /* If the endpoints have the same luminance then we'll just use index 0 for |
| * all of the texels */ |
| if (endpoint_luminances[0] == endpoint_luminances[1]) { |
| write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 4 - 1, 0); |
| return; |
| } |
| |
| for (y = 0; y < src_height; y++) { |
| for (x = 0; x < src_width; x++) { |
| luminance = src[0] + src[1] + src[2]; |
| |
| index = ((luminance - endpoint_luminances[0]) * 15 / |
| (endpoint_luminances[1] - endpoint_luminances[0])); |
| if (index < 0) |
| index = 0; |
| else if (index > 15) |
| index = 15; |
| |
| assert(x != 0 || y != 0 || index < 8); |
| |
| write_bits(writer, (x == 0 && y == 0) ? 3 : 4, index); |
| |
| src += 3; |
| } |
| |
| /* Pad the indices out to the block size */ |
| if (src_width < BLOCK_SIZE) |
| write_bits(writer, 4 * (BLOCK_SIZE - src_width), 0); |
| |
| src += (src_rowstride - src_width * 3 * sizeof (float)) / sizeof (float); |
| } |
| |
| /* Pad the indices out to the block size */ |
| if (src_height < BLOCK_SIZE) |
| write_bits(writer, 4 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0); |
| } |
| |
| static int |
| get_endpoint_value(float value, bool is_signed) |
| { |
| bool sign = false; |
| int half; |
| |
| if (is_signed) { |
| half = _mesa_float_to_half(value); |
| |
| if (half & 0x8000) { |
| half &= 0x7fff; |
| sign = true; |
| } |
| |
| half = (32 * half / 31) >> 6; |
| |
| if (sign) |
| half = -half & ((1 << 10) - 1); |
| |
| return half; |
| } else { |
| if (value <= 0.0f) |
| return 0; |
| |
| half = _mesa_float_to_half(value); |
| |
| return (64 * half / 31) >> 6; |
| } |
| } |
| |
| static void |
| compress_rgb_float_block(int src_width, int src_height, |
| const float *src, int src_rowstride, |
| uint8_t *dst, |
| bool is_signed) |
| { |
| float average_luminance; |
| float endpoints[2][3]; |
| struct bit_writer writer; |
| int component, endpoint; |
| int endpoint_value; |
| |
| average_luminance = |
| get_average_luminance_float(src_width, src_height, src, src_rowstride); |
| get_endpoints_float(src_width, src_height, src, src_rowstride, |
| average_luminance, endpoints, is_signed); |
| |
| writer.dst = dst; |
| writer.pos = 0; |
| writer.buf = 0; |
| |
| write_bits(&writer, 5, 3); /* mode 3 */ |
| |
| /* Write the endpoints */ |
| for (endpoint = 0; endpoint < 2; endpoint++) { |
| for (component = 0; component < 3; component++) { |
| endpoint_value = |
| get_endpoint_value(endpoints[endpoint][component], is_signed); |
| write_bits(&writer, 10, endpoint_value); |
| } |
| } |
| |
| write_rgb_indices_float(&writer, |
| src_width, src_height, |
| src, src_rowstride, |
| endpoints); |
| } |
| |
| static void |
| compress_rgb_float(int width, int height, |
| const float *src, int src_rowstride, |
| uint8_t *dst, int dst_rowstride, |
| bool is_signed) |
| { |
| int dst_row_diff; |
| int y, x; |
| |
| if (dst_rowstride >= width * 4) |
| dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4; |
| else |
| dst_row_diff = 0; |
| |
| for (y = 0; y < height; y += BLOCK_SIZE) { |
| for (x = 0; x < width; x += BLOCK_SIZE) { |
| compress_rgb_float_block(MIN2(width - x, BLOCK_SIZE), |
| MIN2(height - y, BLOCK_SIZE), |
| src + x * 3 + |
| y * src_rowstride / sizeof (float), |
| src_rowstride, |
| dst, |
| is_signed); |
| dst += BLOCK_BYTES; |
| } |
| dst += dst_row_diff; |
| } |
| } |
| |
| #endif |